23354 lines
602 KiB
JSON
23354 lines
602 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3330,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0009009009009009009,
|
|
"grad_norm": 34.85325259337485,
|
|
"learning_rate": 0.0,
|
|
"loss": 4.425543785095215,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0018018018018018018,
|
|
"grad_norm": 41.49064024559189,
|
|
"learning_rate": 3.003003003003003e-08,
|
|
"loss": 3.991971731185913,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.002702702702702703,
|
|
"grad_norm": 33.404734390062586,
|
|
"learning_rate": 6.006006006006006e-08,
|
|
"loss": 3.3135690689086914,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0036036036036036037,
|
|
"grad_norm": 31.301531140485675,
|
|
"learning_rate": 9.00900900900901e-08,
|
|
"loss": 3.384368419647217,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0045045045045045045,
|
|
"grad_norm": 37.16273793726143,
|
|
"learning_rate": 1.2012012012012013e-07,
|
|
"loss": 4.710058212280273,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.005405405405405406,
|
|
"grad_norm": 41.04288621830464,
|
|
"learning_rate": 1.5015015015015016e-07,
|
|
"loss": 4.387257099151611,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.006306306306306306,
|
|
"grad_norm": 38.30018644703421,
|
|
"learning_rate": 1.801801801801802e-07,
|
|
"loss": 3.776132583618164,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.007207207207207207,
|
|
"grad_norm": 33.68929257403917,
|
|
"learning_rate": 2.1021021021021025e-07,
|
|
"loss": 3.8114027976989746,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.008108108108108109,
|
|
"grad_norm": 36.25630264757824,
|
|
"learning_rate": 2.4024024024024026e-07,
|
|
"loss": 4.490676403045654,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.009009009009009009,
|
|
"grad_norm": 32.57110428951466,
|
|
"learning_rate": 2.702702702702703e-07,
|
|
"loss": 3.3798646926879883,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.00990990990990991,
|
|
"grad_norm": 43.18767365474111,
|
|
"learning_rate": 3.003003003003003e-07,
|
|
"loss": 4.468938827514648,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.010810810810810811,
|
|
"grad_norm": 35.94354919359712,
|
|
"learning_rate": 3.3033033033033036e-07,
|
|
"loss": 4.262247562408447,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.011711711711711712,
|
|
"grad_norm": 32.245799614821415,
|
|
"learning_rate": 3.603603603603604e-07,
|
|
"loss": 3.8364014625549316,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.012612612612612612,
|
|
"grad_norm": 66.58620222655038,
|
|
"learning_rate": 3.903903903903904e-07,
|
|
"loss": 3.873539924621582,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.013513513513513514,
|
|
"grad_norm": 37.19568116981887,
|
|
"learning_rate": 4.204204204204205e-07,
|
|
"loss": 4.2311906814575195,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.014414414414414415,
|
|
"grad_norm": 43.73716775935688,
|
|
"learning_rate": 4.504504504504505e-07,
|
|
"loss": 4.178625106811523,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.015315315315315315,
|
|
"grad_norm": 36.675332239183064,
|
|
"learning_rate": 4.804804804804805e-07,
|
|
"loss": 3.9813666343688965,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.016216216216216217,
|
|
"grad_norm": 32.97482553933009,
|
|
"learning_rate": 5.105105105105106e-07,
|
|
"loss": 4.1125054359436035,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.017117117117117116,
|
|
"grad_norm": 32.436790195213625,
|
|
"learning_rate": 5.405405405405406e-07,
|
|
"loss": 4.283469200134277,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.018018018018018018,
|
|
"grad_norm": 40.09255214449685,
|
|
"learning_rate": 5.705705705705706e-07,
|
|
"loss": 3.9226396083831787,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.01891891891891892,
|
|
"grad_norm": 32.93413919366413,
|
|
"learning_rate": 6.006006006006006e-07,
|
|
"loss": 3.7610692977905273,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.01981981981981982,
|
|
"grad_norm": 34.359848332773865,
|
|
"learning_rate": 6.306306306306306e-07,
|
|
"loss": 4.637616157531738,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.02072072072072072,
|
|
"grad_norm": 34.849607178010444,
|
|
"learning_rate": 6.606606606606607e-07,
|
|
"loss": 4.552515029907227,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.021621621621621623,
|
|
"grad_norm": 29.055408730985356,
|
|
"learning_rate": 6.906906906906907e-07,
|
|
"loss": 3.446662425994873,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.02252252252252252,
|
|
"grad_norm": 34.963765245521756,
|
|
"learning_rate": 7.207207207207208e-07,
|
|
"loss": 3.9494271278381348,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.023423423423423424,
|
|
"grad_norm": 25.42005234312663,
|
|
"learning_rate": 7.507507507507509e-07,
|
|
"loss": 4.037813186645508,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.024324324324324326,
|
|
"grad_norm": 30.34189312777092,
|
|
"learning_rate": 7.807807807807808e-07,
|
|
"loss": 4.118837356567383,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.025225225225225224,
|
|
"grad_norm": 27.915079938660245,
|
|
"learning_rate": 8.108108108108109e-07,
|
|
"loss": 3.494497537612915,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.026126126126126126,
|
|
"grad_norm": 23.500481293588344,
|
|
"learning_rate": 8.40840840840841e-07,
|
|
"loss": 3.9746358394622803,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.02702702702702703,
|
|
"grad_norm": 23.091569375485957,
|
|
"learning_rate": 8.708708708708709e-07,
|
|
"loss": 3.6937849521636963,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.027927927927927927,
|
|
"grad_norm": 22.312260784488128,
|
|
"learning_rate": 9.00900900900901e-07,
|
|
"loss": 3.7990822792053223,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.02882882882882883,
|
|
"grad_norm": 18.55715539198881,
|
|
"learning_rate": 9.30930930930931e-07,
|
|
"loss": 3.547938823699951,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.02972972972972973,
|
|
"grad_norm": 18.684298354977482,
|
|
"learning_rate": 9.60960960960961e-07,
|
|
"loss": 3.9032235145568848,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.03063063063063063,
|
|
"grad_norm": 18.16824230200826,
|
|
"learning_rate": 9.909909909909911e-07,
|
|
"loss": 3.70247483253479,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.03153153153153153,
|
|
"grad_norm": 22.98521490510086,
|
|
"learning_rate": 1.0210210210210212e-06,
|
|
"loss": 3.7687439918518066,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.032432432432432434,
|
|
"grad_norm": 17.91413496069695,
|
|
"learning_rate": 1.051051051051051e-06,
|
|
"loss": 4.087740421295166,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.03333333333333333,
|
|
"grad_norm": 13.573837184927248,
|
|
"learning_rate": 1.0810810810810812e-06,
|
|
"loss": 3.230196952819824,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.03423423423423423,
|
|
"grad_norm": 14.997176911133973,
|
|
"learning_rate": 1.111111111111111e-06,
|
|
"loss": 3.3304688930511475,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.03513513513513514,
|
|
"grad_norm": 13.83464233131776,
|
|
"learning_rate": 1.1411411411411411e-06,
|
|
"loss": 3.6968789100646973,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.036036036036036036,
|
|
"grad_norm": 17.303369460859294,
|
|
"learning_rate": 1.1711711711711712e-06,
|
|
"loss": 4.100399971008301,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.036936936936936934,
|
|
"grad_norm": 13.424397850983052,
|
|
"learning_rate": 1.2012012012012013e-06,
|
|
"loss": 3.976811408996582,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.03783783783783784,
|
|
"grad_norm": 22.736171499764858,
|
|
"learning_rate": 1.2312312312312314e-06,
|
|
"loss": 3.5594120025634766,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.03873873873873874,
|
|
"grad_norm": 15.927485843698738,
|
|
"learning_rate": 1.2612612612612613e-06,
|
|
"loss": 3.7348382472991943,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.03963963963963964,
|
|
"grad_norm": 22.403551454918947,
|
|
"learning_rate": 1.2912912912912913e-06,
|
|
"loss": 3.6704657077789307,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.04054054054054054,
|
|
"grad_norm": 12.316481332596359,
|
|
"learning_rate": 1.3213213213213214e-06,
|
|
"loss": 3.402862071990967,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.04144144144144144,
|
|
"grad_norm": 19.038151364071236,
|
|
"learning_rate": 1.3513513513513515e-06,
|
|
"loss": 3.531310558319092,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.04234234234234234,
|
|
"grad_norm": 25.790143129271133,
|
|
"learning_rate": 1.3813813813813814e-06,
|
|
"loss": 3.7117223739624023,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.043243243243243246,
|
|
"grad_norm": 13.170821661869951,
|
|
"learning_rate": 1.4114114114114117e-06,
|
|
"loss": 3.381789445877075,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.044144144144144144,
|
|
"grad_norm": 13.885610227951034,
|
|
"learning_rate": 1.4414414414414416e-06,
|
|
"loss": 3.204735040664673,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.04504504504504504,
|
|
"grad_norm": 31.267336865046115,
|
|
"learning_rate": 1.4714714714714714e-06,
|
|
"loss": 4.8206329345703125,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.04594594594594595,
|
|
"grad_norm": 15.877465954218119,
|
|
"learning_rate": 1.5015015015015017e-06,
|
|
"loss": 2.7905378341674805,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.04684684684684685,
|
|
"grad_norm": 16.5338114322775,
|
|
"learning_rate": 1.5315315315315316e-06,
|
|
"loss": 3.382842779159546,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.047747747747747746,
|
|
"grad_norm": 10.673008991068937,
|
|
"learning_rate": 1.5615615615615617e-06,
|
|
"loss": 2.8675003051757812,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.04864864864864865,
|
|
"grad_norm": 11.673726166435003,
|
|
"learning_rate": 1.5915915915915916e-06,
|
|
"loss": 3.3539891242980957,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.04954954954954955,
|
|
"grad_norm": 39.391244386486385,
|
|
"learning_rate": 1.6216216216216219e-06,
|
|
"loss": 3.5359396934509277,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.05045045045045045,
|
|
"grad_norm": 10.618909178532993,
|
|
"learning_rate": 1.6516516516516517e-06,
|
|
"loss": 2.644918203353882,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.051351351351351354,
|
|
"grad_norm": 13.068552382087416,
|
|
"learning_rate": 1.681681681681682e-06,
|
|
"loss": 3.2725865840911865,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.05225225225225225,
|
|
"grad_norm": 10.546533163309212,
|
|
"learning_rate": 1.711711711711712e-06,
|
|
"loss": 2.2234914302825928,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.05315315315315315,
|
|
"grad_norm": 10.810516847147817,
|
|
"learning_rate": 1.7417417417417418e-06,
|
|
"loss": 3.6590347290039062,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.05405405405405406,
|
|
"grad_norm": 12.179326338196333,
|
|
"learning_rate": 1.7717717717717719e-06,
|
|
"loss": 3.5190892219543457,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.054954954954954956,
|
|
"grad_norm": 12.55991185112027,
|
|
"learning_rate": 1.801801801801802e-06,
|
|
"loss": 3.250088691711426,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.055855855855855854,
|
|
"grad_norm": 16.16156820096353,
|
|
"learning_rate": 1.831831831831832e-06,
|
|
"loss": 3.724785804748535,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.05675675675675676,
|
|
"grad_norm": 8.10725681546285,
|
|
"learning_rate": 1.861861861861862e-06,
|
|
"loss": 3.1755051612854004,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.05765765765765766,
|
|
"grad_norm": 10.026358518437815,
|
|
"learning_rate": 1.8918918918918922e-06,
|
|
"loss": 3.3634328842163086,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.05855855855855856,
|
|
"grad_norm": 11.749792278768123,
|
|
"learning_rate": 1.921921921921922e-06,
|
|
"loss": 3.3400256633758545,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.05945945945945946,
|
|
"grad_norm": 20.134196874596437,
|
|
"learning_rate": 1.951951951951952e-06,
|
|
"loss": 3.6804957389831543,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.06036036036036036,
|
|
"grad_norm": 9.269868267334294,
|
|
"learning_rate": 1.9819819819819822e-06,
|
|
"loss": 3.1135306358337402,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.06126126126126126,
|
|
"grad_norm": 7.429607972998909,
|
|
"learning_rate": 2.012012012012012e-06,
|
|
"loss": 3.148690700531006,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.062162162162162166,
|
|
"grad_norm": 19.62825204948393,
|
|
"learning_rate": 2.0420420420420424e-06,
|
|
"loss": 3.147704601287842,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.06306306306306306,
|
|
"grad_norm": 16.19832242781432,
|
|
"learning_rate": 2.0720720720720723e-06,
|
|
"loss": 2.7632508277893066,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.06396396396396396,
|
|
"grad_norm": 13.199429470705226,
|
|
"learning_rate": 2.102102102102102e-06,
|
|
"loss": 3.867487907409668,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.06486486486486487,
|
|
"grad_norm": 13.39571462897039,
|
|
"learning_rate": 2.1321321321321325e-06,
|
|
"loss": 3.193864345550537,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.06576576576576576,
|
|
"grad_norm": 12.419152003530359,
|
|
"learning_rate": 2.1621621621621623e-06,
|
|
"loss": 3.754601001739502,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.06666666666666667,
|
|
"grad_norm": 11.957234193555557,
|
|
"learning_rate": 2.192192192192192e-06,
|
|
"loss": 3.5533010959625244,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.06756756756756757,
|
|
"grad_norm": 9.1627396522264,
|
|
"learning_rate": 2.222222222222222e-06,
|
|
"loss": 3.5297141075134277,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.06846846846846846,
|
|
"grad_norm": 8.288605162110578,
|
|
"learning_rate": 2.2522522522522524e-06,
|
|
"loss": 3.120265483856201,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.06936936936936937,
|
|
"grad_norm": 23.854354911904238,
|
|
"learning_rate": 2.2822822822822822e-06,
|
|
"loss": 3.385438919067383,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.07027027027027027,
|
|
"grad_norm": 11.830332846970656,
|
|
"learning_rate": 2.3123123123123125e-06,
|
|
"loss": 2.939337730407715,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.07117117117117117,
|
|
"grad_norm": 12.413943458644699,
|
|
"learning_rate": 2.3423423423423424e-06,
|
|
"loss": 3.3958535194396973,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.07207207207207207,
|
|
"grad_norm": 8.897920164810204,
|
|
"learning_rate": 2.3723723723723727e-06,
|
|
"loss": 2.494640588760376,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.07297297297297298,
|
|
"grad_norm": 12.477196615096766,
|
|
"learning_rate": 2.4024024024024026e-06,
|
|
"loss": 3.469362258911133,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.07387387387387387,
|
|
"grad_norm": 14.471330569401308,
|
|
"learning_rate": 2.432432432432433e-06,
|
|
"loss": 3.149597406387329,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.07477477477477477,
|
|
"grad_norm": 11.472229000158672,
|
|
"learning_rate": 2.4624624624624628e-06,
|
|
"loss": 3.11580228805542,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.07567567567567568,
|
|
"grad_norm": 21.257089734183978,
|
|
"learning_rate": 2.4924924924924926e-06,
|
|
"loss": 4.018277168273926,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.07657657657657657,
|
|
"grad_norm": 11.037133037583047,
|
|
"learning_rate": 2.5225225225225225e-06,
|
|
"loss": 3.0106778144836426,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.07747747747747748,
|
|
"grad_norm": 11.701268717670866,
|
|
"learning_rate": 2.552552552552553e-06,
|
|
"loss": 2.9505202770233154,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.07837837837837838,
|
|
"grad_norm": 13.206351745245932,
|
|
"learning_rate": 2.5825825825825827e-06,
|
|
"loss": 2.924464702606201,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.07927927927927927,
|
|
"grad_norm": 13.158912537609984,
|
|
"learning_rate": 2.612612612612613e-06,
|
|
"loss": 2.4891014099121094,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.08018018018018018,
|
|
"grad_norm": 13.2529767687829,
|
|
"learning_rate": 2.642642642642643e-06,
|
|
"loss": 3.5574870109558105,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.08108108108108109,
|
|
"grad_norm": 12.879307015129848,
|
|
"learning_rate": 2.672672672672673e-06,
|
|
"loss": 3.6157870292663574,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.08198198198198198,
|
|
"grad_norm": 7.643527481100194,
|
|
"learning_rate": 2.702702702702703e-06,
|
|
"loss": 3.330392360687256,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.08288288288288288,
|
|
"grad_norm": 19.597955391317893,
|
|
"learning_rate": 2.732732732732733e-06,
|
|
"loss": 3.15685772895813,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.08378378378378379,
|
|
"grad_norm": 12.343179251305617,
|
|
"learning_rate": 2.7627627627627628e-06,
|
|
"loss": 2.9495744705200195,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.08468468468468468,
|
|
"grad_norm": 10.291955830387405,
|
|
"learning_rate": 2.7927927927927926e-06,
|
|
"loss": 3.066584825515747,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.08558558558558559,
|
|
"grad_norm": 12.014607383069372,
|
|
"learning_rate": 2.8228228228228234e-06,
|
|
"loss": 3.218724250793457,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.08648648648648649,
|
|
"grad_norm": 10.294642412471823,
|
|
"learning_rate": 2.8528528528528532e-06,
|
|
"loss": 2.9423789978027344,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.08738738738738738,
|
|
"grad_norm": 16.625230696179347,
|
|
"learning_rate": 2.882882882882883e-06,
|
|
"loss": 3.0145962238311768,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.08828828828828829,
|
|
"grad_norm": 13.380056321484588,
|
|
"learning_rate": 2.912912912912913e-06,
|
|
"loss": 3.7270960807800293,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.0891891891891892,
|
|
"grad_norm": 6.099162968769779,
|
|
"learning_rate": 2.942942942942943e-06,
|
|
"loss": 2.628577709197998,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.09009009009009009,
|
|
"grad_norm": 15.60668918864251,
|
|
"learning_rate": 2.9729729729729736e-06,
|
|
"loss": 3.4101738929748535,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.09099099099099099,
|
|
"grad_norm": 13.899724524081691,
|
|
"learning_rate": 3.0030030030030034e-06,
|
|
"loss": 2.78098201751709,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.0918918918918919,
|
|
"grad_norm": 9.955567061468747,
|
|
"learning_rate": 3.0330330330330333e-06,
|
|
"loss": 3.241696834564209,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.09279279279279279,
|
|
"grad_norm": 13.804683010117481,
|
|
"learning_rate": 3.063063063063063e-06,
|
|
"loss": 2.9352574348449707,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.0936936936936937,
|
|
"grad_norm": 22.137954195453922,
|
|
"learning_rate": 3.0930930930930935e-06,
|
|
"loss": 3.422001838684082,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.0945945945945946,
|
|
"grad_norm": 13.171905875372056,
|
|
"learning_rate": 3.1231231231231234e-06,
|
|
"loss": 2.9945342540740967,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.09549549549549549,
|
|
"grad_norm": 12.787823101874702,
|
|
"learning_rate": 3.1531531531531532e-06,
|
|
"loss": 3.4306979179382324,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.0963963963963964,
|
|
"grad_norm": 8.719903774005465,
|
|
"learning_rate": 3.183183183183183e-06,
|
|
"loss": 3.015371799468994,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.0972972972972973,
|
|
"grad_norm": 8.620974890669817,
|
|
"learning_rate": 3.2132132132132134e-06,
|
|
"loss": 2.8143250942230225,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.0981981981981982,
|
|
"grad_norm": 8.71040069570512,
|
|
"learning_rate": 3.2432432432432437e-06,
|
|
"loss": 3.1959874629974365,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.0990990990990991,
|
|
"grad_norm": 14.16224898204618,
|
|
"learning_rate": 3.2732732732732736e-06,
|
|
"loss": 3.4496989250183105,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"grad_norm": 12.935856548843718,
|
|
"learning_rate": 3.3033033033033035e-06,
|
|
"loss": 3.289576768875122,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1009009009009009,
|
|
"grad_norm": 13.810744939226538,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 3.128309965133667,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.1018018018018018,
|
|
"grad_norm": 9.221082446996164,
|
|
"learning_rate": 3.363363363363364e-06,
|
|
"loss": 2.374311923980713,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.10270270270270271,
|
|
"grad_norm": 10.86065731811132,
|
|
"learning_rate": 3.393393393393394e-06,
|
|
"loss": 3.3044633865356445,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.1036036036036036,
|
|
"grad_norm": 14.862448426052204,
|
|
"learning_rate": 3.423423423423424e-06,
|
|
"loss": 3.6216392517089844,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.1045045045045045,
|
|
"grad_norm": 8.179463296485958,
|
|
"learning_rate": 3.4534534534534537e-06,
|
|
"loss": 2.308753728866577,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.10540540540540541,
|
|
"grad_norm": 10.032980511614772,
|
|
"learning_rate": 3.4834834834834835e-06,
|
|
"loss": 2.679088592529297,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.1063063063063063,
|
|
"grad_norm": 10.103046641973926,
|
|
"learning_rate": 3.513513513513514e-06,
|
|
"loss": 3.0781192779541016,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.10720720720720721,
|
|
"grad_norm": 11.12850524281396,
|
|
"learning_rate": 3.5435435435435437e-06,
|
|
"loss": 3.381030797958374,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.10810810810810811,
|
|
"grad_norm": 15.983530195410662,
|
|
"learning_rate": 3.573573573573574e-06,
|
|
"loss": 3.3236331939697266,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.109009009009009,
|
|
"grad_norm": 12.254133581497408,
|
|
"learning_rate": 3.603603603603604e-06,
|
|
"loss": 3.121483564376831,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.10990990990990991,
|
|
"grad_norm": 16.128153623744875,
|
|
"learning_rate": 3.633633633633634e-06,
|
|
"loss": 2.8502209186553955,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.11081081081081082,
|
|
"grad_norm": 11.018669577448238,
|
|
"learning_rate": 3.663663663663664e-06,
|
|
"loss": 3.017697334289551,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.11171171171171171,
|
|
"grad_norm": 16.557843608556773,
|
|
"learning_rate": 3.693693693693694e-06,
|
|
"loss": 3.119530439376831,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.11261261261261261,
|
|
"grad_norm": 20.247862827736306,
|
|
"learning_rate": 3.723723723723724e-06,
|
|
"loss": 3.4490034580230713,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.11351351351351352,
|
|
"grad_norm": 11.866991364178078,
|
|
"learning_rate": 3.7537537537537537e-06,
|
|
"loss": 2.71677565574646,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.11441441441441441,
|
|
"grad_norm": 13.654608561170818,
|
|
"learning_rate": 3.7837837837837844e-06,
|
|
"loss": 3.1837260723114014,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.11531531531531532,
|
|
"grad_norm": 7.888677445935044,
|
|
"learning_rate": 3.8138138138138143e-06,
|
|
"loss": 2.5487513542175293,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.11621621621621622,
|
|
"grad_norm": 10.107606792947632,
|
|
"learning_rate": 3.843843843843844e-06,
|
|
"loss": 3.1575422286987305,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.11711711711711711,
|
|
"grad_norm": 22.246801953565882,
|
|
"learning_rate": 3.8738738738738744e-06,
|
|
"loss": 2.3211145401000977,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.11801801801801802,
|
|
"grad_norm": 10.113692255019279,
|
|
"learning_rate": 3.903903903903904e-06,
|
|
"loss": 2.823888063430786,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.11891891891891893,
|
|
"grad_norm": 15.495899836725995,
|
|
"learning_rate": 3.933933933933934e-06,
|
|
"loss": 2.629729747772217,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.11981981981981982,
|
|
"grad_norm": 14.895112568402705,
|
|
"learning_rate": 3.9639639639639645e-06,
|
|
"loss": 3.0849714279174805,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.12072072072072072,
|
|
"grad_norm": 8.701812516734513,
|
|
"learning_rate": 3.993993993993994e-06,
|
|
"loss": 3.189228057861328,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.12162162162162163,
|
|
"grad_norm": 17.51105059025981,
|
|
"learning_rate": 4.024024024024024e-06,
|
|
"loss": 3.5474631786346436,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.12252252252252252,
|
|
"grad_norm": 14.232975595458623,
|
|
"learning_rate": 4.0540540540540545e-06,
|
|
"loss": 2.9190244674682617,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.12342342342342343,
|
|
"grad_norm": 11.321214340628257,
|
|
"learning_rate": 4.084084084084085e-06,
|
|
"loss": 2.755706787109375,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.12432432432432433,
|
|
"grad_norm": 10.713962083603612,
|
|
"learning_rate": 4.114114114114114e-06,
|
|
"loss": 2.820930004119873,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.12522522522522522,
|
|
"grad_norm": 20.831702291364305,
|
|
"learning_rate": 4.1441441441441446e-06,
|
|
"loss": 3.517151355743408,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.12612612612612611,
|
|
"grad_norm": 17.371083213632783,
|
|
"learning_rate": 4.174174174174174e-06,
|
|
"loss": 3.032097578048706,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.12702702702702703,
|
|
"grad_norm": 11.438025273609616,
|
|
"learning_rate": 4.204204204204204e-06,
|
|
"loss": 2.9336793422698975,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.12792792792792793,
|
|
"grad_norm": 15.581995340545928,
|
|
"learning_rate": 4.234234234234235e-06,
|
|
"loss": 3.2077693939208984,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.12882882882882882,
|
|
"grad_norm": 9.595652945396042,
|
|
"learning_rate": 4.264264264264265e-06,
|
|
"loss": 2.4009509086608887,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.12972972972972974,
|
|
"grad_norm": 12.73963901565,
|
|
"learning_rate": 4.294294294294294e-06,
|
|
"loss": 3.3759877681732178,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.13063063063063063,
|
|
"grad_norm": 8.56503387459497,
|
|
"learning_rate": 4.324324324324325e-06,
|
|
"loss": 2.4320008754730225,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.13153153153153152,
|
|
"grad_norm": 13.57118369124668,
|
|
"learning_rate": 4.354354354354355e-06,
|
|
"loss": 3.0062925815582275,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.13243243243243244,
|
|
"grad_norm": 12.135823373871956,
|
|
"learning_rate": 4.384384384384384e-06,
|
|
"loss": 3.525376319885254,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.13333333333333333,
|
|
"grad_norm": 13.861297251288068,
|
|
"learning_rate": 4.414414414414415e-06,
|
|
"loss": 3.0181641578674316,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.13423423423423422,
|
|
"grad_norm": 20.567236637049717,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 2.6696677207946777,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.13513513513513514,
|
|
"grad_norm": 11.490193587124834,
|
|
"learning_rate": 4.474474474474475e-06,
|
|
"loss": 3.0199146270751953,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.13603603603603603,
|
|
"grad_norm": 7.573235089594336,
|
|
"learning_rate": 4.504504504504505e-06,
|
|
"loss": 3.1178090572357178,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.13693693693693693,
|
|
"grad_norm": 7.5936293707160045,
|
|
"learning_rate": 4.534534534534535e-06,
|
|
"loss": 3.1521480083465576,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.13783783783783785,
|
|
"grad_norm": 13.939422708113943,
|
|
"learning_rate": 4.5645645645645645e-06,
|
|
"loss": 2.7968058586120605,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.13873873873873874,
|
|
"grad_norm": 12.151567813914179,
|
|
"learning_rate": 4.594594594594596e-06,
|
|
"loss": 2.4209094047546387,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.13963963963963963,
|
|
"grad_norm": 8.324430236189901,
|
|
"learning_rate": 4.624624624624625e-06,
|
|
"loss": 2.684305191040039,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.14054054054054055,
|
|
"grad_norm": 7.014662285936094,
|
|
"learning_rate": 4.654654654654655e-06,
|
|
"loss": 2.868997097015381,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.14144144144144144,
|
|
"grad_norm": 11.634045875682444,
|
|
"learning_rate": 4.684684684684685e-06,
|
|
"loss": 3.4053492546081543,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.14234234234234233,
|
|
"grad_norm": 17.085383352071734,
|
|
"learning_rate": 4.714714714714715e-06,
|
|
"loss": 2.8980045318603516,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.14324324324324325,
|
|
"grad_norm": 9.406901847325535,
|
|
"learning_rate": 4.7447447447447454e-06,
|
|
"loss": 3.1916074752807617,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.14414414414414414,
|
|
"grad_norm": 14.368442790302291,
|
|
"learning_rate": 4.774774774774775e-06,
|
|
"loss": 3.0805845260620117,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.14504504504504503,
|
|
"grad_norm": 8.668855178764803,
|
|
"learning_rate": 4.804804804804805e-06,
|
|
"loss": 3.4004087448120117,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.14594594594594595,
|
|
"grad_norm": 13.043494784682139,
|
|
"learning_rate": 4.8348348348348355e-06,
|
|
"loss": 3.1242763996124268,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.14684684684684685,
|
|
"grad_norm": 10.178653496544205,
|
|
"learning_rate": 4.864864864864866e-06,
|
|
"loss": 2.9134225845336914,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.14774774774774774,
|
|
"grad_norm": 6.342120643856489,
|
|
"learning_rate": 4.894894894894895e-06,
|
|
"loss": 3.055790662765503,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.14864864864864866,
|
|
"grad_norm": 12.129340765287669,
|
|
"learning_rate": 4.9249249249249255e-06,
|
|
"loss": 2.9754109382629395,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.14954954954954955,
|
|
"grad_norm": 22.73014606592528,
|
|
"learning_rate": 4.954954954954955e-06,
|
|
"loss": 3.805633068084717,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.15045045045045044,
|
|
"grad_norm": 9.118927644346341,
|
|
"learning_rate": 4.984984984984985e-06,
|
|
"loss": 2.2361717224121094,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.15135135135135136,
|
|
"grad_norm": 15.801499172740884,
|
|
"learning_rate": 5.0150150150150156e-06,
|
|
"loss": 2.229874610900879,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.15225225225225225,
|
|
"grad_norm": 11.189094205311491,
|
|
"learning_rate": 5.045045045045045e-06,
|
|
"loss": 3.1760482788085938,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.15315315315315314,
|
|
"grad_norm": 13.188780981566062,
|
|
"learning_rate": 5.075075075075075e-06,
|
|
"loss": 2.8448102474212646,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.15405405405405406,
|
|
"grad_norm": 7.481996418572799,
|
|
"learning_rate": 5.105105105105106e-06,
|
|
"loss": 2.7560243606567383,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.15495495495495495,
|
|
"grad_norm": 21.437670538552602,
|
|
"learning_rate": 5.135135135135135e-06,
|
|
"loss": 2.64109468460083,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.15585585585585585,
|
|
"grad_norm": 19.055452737668237,
|
|
"learning_rate": 5.165165165165165e-06,
|
|
"loss": 3.4684371948242188,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.15675675675675677,
|
|
"grad_norm": 8.827278141715077,
|
|
"learning_rate": 5.195195195195195e-06,
|
|
"loss": 2.9278182983398438,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.15765765765765766,
|
|
"grad_norm": 11.375037513358095,
|
|
"learning_rate": 5.225225225225226e-06,
|
|
"loss": 2.990676164627075,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.15855855855855855,
|
|
"grad_norm": 9.420956828643389,
|
|
"learning_rate": 5.255255255255256e-06,
|
|
"loss": 2.0962235927581787,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.15945945945945947,
|
|
"grad_norm": 8.729852916984361,
|
|
"learning_rate": 5.285285285285286e-06,
|
|
"loss": 2.8978538513183594,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.16036036036036036,
|
|
"grad_norm": 14.666603722531908,
|
|
"learning_rate": 5.315315315315316e-06,
|
|
"loss": 3.186276435852051,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.16126126126126125,
|
|
"grad_norm": 6.984437023251985,
|
|
"learning_rate": 5.345345345345346e-06,
|
|
"loss": 3.3925909996032715,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.16216216216216217,
|
|
"grad_norm": 10.905901577860856,
|
|
"learning_rate": 5.375375375375376e-06,
|
|
"loss": 2.7547173500061035,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.16306306306306306,
|
|
"grad_norm": 11.292530700947578,
|
|
"learning_rate": 5.405405405405406e-06,
|
|
"loss": 3.129422664642334,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.16396396396396395,
|
|
"grad_norm": 9.809577115554335,
|
|
"learning_rate": 5.4354354354354355e-06,
|
|
"loss": 3.009068489074707,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.16486486486486487,
|
|
"grad_norm": 7.844265809521192,
|
|
"learning_rate": 5.465465465465466e-06,
|
|
"loss": 3.0007176399230957,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.16576576576576577,
|
|
"grad_norm": 20.260864872887325,
|
|
"learning_rate": 5.495495495495496e-06,
|
|
"loss": 2.944118022918701,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.16666666666666666,
|
|
"grad_norm": 13.536886226756584,
|
|
"learning_rate": 5.5255255255255255e-06,
|
|
"loss": 3.304983615875244,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.16756756756756758,
|
|
"grad_norm": 24.2324837682505,
|
|
"learning_rate": 5.555555555555557e-06,
|
|
"loss": 2.6285665035247803,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.16846846846846847,
|
|
"grad_norm": 20.428575340848823,
|
|
"learning_rate": 5.585585585585585e-06,
|
|
"loss": 3.5311875343322754,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.16936936936936936,
|
|
"grad_norm": 8.628152741585867,
|
|
"learning_rate": 5.615615615615616e-06,
|
|
"loss": 2.199131965637207,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.17027027027027028,
|
|
"grad_norm": 9.494224411854544,
|
|
"learning_rate": 5.645645645645647e-06,
|
|
"loss": 3.9044899940490723,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.17117117117117117,
|
|
"grad_norm": 10.448079373150058,
|
|
"learning_rate": 5.675675675675676e-06,
|
|
"loss": 2.909975528717041,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.17207207207207206,
|
|
"grad_norm": 14.728245606199566,
|
|
"learning_rate": 5.7057057057057065e-06,
|
|
"loss": 2.8709239959716797,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.17297297297297298,
|
|
"grad_norm": 14.629062202703784,
|
|
"learning_rate": 5.735735735735736e-06,
|
|
"loss": 2.678546190261841,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.17387387387387387,
|
|
"grad_norm": 18.935077283233756,
|
|
"learning_rate": 5.765765765765766e-06,
|
|
"loss": 3.197597026824951,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.17477477477477477,
|
|
"grad_norm": 17.667465284169104,
|
|
"learning_rate": 5.7957957957957965e-06,
|
|
"loss": 2.366365909576416,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.17567567567567569,
|
|
"grad_norm": 17.576062581395792,
|
|
"learning_rate": 5.825825825825826e-06,
|
|
"loss": 2.9731223583221436,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.17657657657657658,
|
|
"grad_norm": 11.088082103017733,
|
|
"learning_rate": 5.855855855855856e-06,
|
|
"loss": 2.6038565635681152,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.17747747747747747,
|
|
"grad_norm": 7.860840897402412,
|
|
"learning_rate": 5.885885885885886e-06,
|
|
"loss": 2.7844109535217285,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.1783783783783784,
|
|
"grad_norm": 9.110969545022156,
|
|
"learning_rate": 5.915915915915916e-06,
|
|
"loss": 2.762868642807007,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.17927927927927928,
|
|
"grad_norm": 18.196233055366637,
|
|
"learning_rate": 5.945945945945947e-06,
|
|
"loss": 3.618750810623169,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.18018018018018017,
|
|
"grad_norm": 11.116332907963825,
|
|
"learning_rate": 5.975975975975976e-06,
|
|
"loss": 3.071817398071289,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.1810810810810811,
|
|
"grad_norm": 13.460412830835155,
|
|
"learning_rate": 6.006006006006007e-06,
|
|
"loss": 2.9415009021759033,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.18198198198198198,
|
|
"grad_norm": 12.230300033958319,
|
|
"learning_rate": 6.036036036036037e-06,
|
|
"loss": 2.904818534851074,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.18288288288288287,
|
|
"grad_norm": 14.440108065299071,
|
|
"learning_rate": 6.066066066066067e-06,
|
|
"loss": 3.1651129722595215,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.1837837837837838,
|
|
"grad_norm": 9.206420867526731,
|
|
"learning_rate": 6.096096096096097e-06,
|
|
"loss": 2.672524929046631,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.18468468468468469,
|
|
"grad_norm": 8.45012457455799,
|
|
"learning_rate": 6.126126126126126e-06,
|
|
"loss": 2.4215810298919678,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.18558558558558558,
|
|
"grad_norm": 13.722693744341788,
|
|
"learning_rate": 6.156156156156157e-06,
|
|
"loss": 3.09840726852417,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.1864864864864865,
|
|
"grad_norm": 11.63142498028212,
|
|
"learning_rate": 6.186186186186187e-06,
|
|
"loss": 3.1762261390686035,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.1873873873873874,
|
|
"grad_norm": 10.926226524721915,
|
|
"learning_rate": 6.2162162162162164e-06,
|
|
"loss": 3.021667242050171,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.18828828828828828,
|
|
"grad_norm": 21.89247663108361,
|
|
"learning_rate": 6.246246246246247e-06,
|
|
"loss": 3.9234015941619873,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.1891891891891892,
|
|
"grad_norm": 8.168441579655488,
|
|
"learning_rate": 6.276276276276276e-06,
|
|
"loss": 2.78212833404541,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.1900900900900901,
|
|
"grad_norm": 13.350560462462122,
|
|
"learning_rate": 6.3063063063063065e-06,
|
|
"loss": 3.166926145553589,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.19099099099099098,
|
|
"grad_norm": 9.207901682088472,
|
|
"learning_rate": 6.336336336336338e-06,
|
|
"loss": 3.0401434898376465,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.1918918918918919,
|
|
"grad_norm": 17.33282946556218,
|
|
"learning_rate": 6.366366366366366e-06,
|
|
"loss": 3.134824514389038,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.1927927927927928,
|
|
"grad_norm": 14.61459028506562,
|
|
"learning_rate": 6.396396396396397e-06,
|
|
"loss": 3.093085527420044,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.19369369369369369,
|
|
"grad_norm": 28.18324111564279,
|
|
"learning_rate": 6.426426426426427e-06,
|
|
"loss": 2.892199993133545,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.1945945945945946,
|
|
"grad_norm": 11.472836781772417,
|
|
"learning_rate": 6.456456456456457e-06,
|
|
"loss": 2.6956958770751953,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.1954954954954955,
|
|
"grad_norm": 9.299172880498764,
|
|
"learning_rate": 6.486486486486487e-06,
|
|
"loss": 3.0531160831451416,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.1963963963963964,
|
|
"grad_norm": 9.086493974514497,
|
|
"learning_rate": 6.516516516516517e-06,
|
|
"loss": 3.7178831100463867,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.1972972972972973,
|
|
"grad_norm": 17.851046987553843,
|
|
"learning_rate": 6.546546546546547e-06,
|
|
"loss": 2.9834225177764893,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.1981981981981982,
|
|
"grad_norm": 15.359167786383372,
|
|
"learning_rate": 6.5765765765765775e-06,
|
|
"loss": 2.7804837226867676,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.1990990990990991,
|
|
"grad_norm": 9.470229603191179,
|
|
"learning_rate": 6.606606606606607e-06,
|
|
"loss": 2.727168083190918,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"grad_norm": 8.050118994726809,
|
|
"learning_rate": 6.636636636636637e-06,
|
|
"loss": 3.027698278427124,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.2009009009009009,
|
|
"grad_norm": 9.701821344008287,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 3.0183022022247314,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.2018018018018018,
|
|
"grad_norm": 13.605979491933239,
|
|
"learning_rate": 6.696696696696697e-06,
|
|
"loss": 2.932844877243042,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.20270270270270271,
|
|
"grad_norm": 15.56590784626651,
|
|
"learning_rate": 6.726726726726728e-06,
|
|
"loss": 2.8303544521331787,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.2036036036036036,
|
|
"grad_norm": 6.809800820822605,
|
|
"learning_rate": 6.7567567567567575e-06,
|
|
"loss": 2.799440860748291,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.2045045045045045,
|
|
"grad_norm": 12.594818924759007,
|
|
"learning_rate": 6.786786786786788e-06,
|
|
"loss": 2.9847114086151123,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.20540540540540542,
|
|
"grad_norm": 9.128656924708121,
|
|
"learning_rate": 6.816816816816817e-06,
|
|
"loss": 3.242035388946533,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.2063063063063063,
|
|
"grad_norm": 14.073242699932655,
|
|
"learning_rate": 6.846846846846848e-06,
|
|
"loss": 2.743699550628662,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.2072072072072072,
|
|
"grad_norm": 24.485071339847323,
|
|
"learning_rate": 6.876876876876878e-06,
|
|
"loss": 2.5092387199401855,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.20810810810810812,
|
|
"grad_norm": 14.048562783521968,
|
|
"learning_rate": 6.906906906906907e-06,
|
|
"loss": 3.34120774269104,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.209009009009009,
|
|
"grad_norm": 14.000061322325454,
|
|
"learning_rate": 6.936936936936938e-06,
|
|
"loss": 2.793375015258789,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.2099099099099099,
|
|
"grad_norm": 17.43089543234574,
|
|
"learning_rate": 6.966966966966967e-06,
|
|
"loss": 3.4899823665618896,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.21081081081081082,
|
|
"grad_norm": 11.566273600684262,
|
|
"learning_rate": 6.996996996996997e-06,
|
|
"loss": 2.355807065963745,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.21171171171171171,
|
|
"grad_norm": 7.409607190052066,
|
|
"learning_rate": 7.027027027027028e-06,
|
|
"loss": 2.8509583473205566,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.2126126126126126,
|
|
"grad_norm": 8.533556541298525,
|
|
"learning_rate": 7.057057057057057e-06,
|
|
"loss": 2.945148229598999,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.21351351351351353,
|
|
"grad_norm": 11.872695772014,
|
|
"learning_rate": 7.087087087087087e-06,
|
|
"loss": 2.959815502166748,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.21441441441441442,
|
|
"grad_norm": 8.96114701476462,
|
|
"learning_rate": 7.117117117117117e-06,
|
|
"loss": 2.9691104888916016,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.2153153153153153,
|
|
"grad_norm": 16.361730848996025,
|
|
"learning_rate": 7.147147147147148e-06,
|
|
"loss": 3.477224111557007,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.21621621621621623,
|
|
"grad_norm": 10.277008215567028,
|
|
"learning_rate": 7.177177177177178e-06,
|
|
"loss": 2.737469434738159,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.21711711711711712,
|
|
"grad_norm": 9.913033317953237,
|
|
"learning_rate": 7.207207207207208e-06,
|
|
"loss": 3.0314159393310547,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.218018018018018,
|
|
"grad_norm": 11.01638420875838,
|
|
"learning_rate": 7.237237237237238e-06,
|
|
"loss": 2.6725852489471436,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.21891891891891893,
|
|
"grad_norm": 16.416028513538844,
|
|
"learning_rate": 7.267267267267268e-06,
|
|
"loss": 3.2464849948883057,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.21981981981981982,
|
|
"grad_norm": 10.231502040024115,
|
|
"learning_rate": 7.297297297297298e-06,
|
|
"loss": 2.8645577430725098,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.22072072072072071,
|
|
"grad_norm": 11.10775422396275,
|
|
"learning_rate": 7.327327327327328e-06,
|
|
"loss": 3.2867000102996826,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.22162162162162163,
|
|
"grad_norm": 11.317639611848502,
|
|
"learning_rate": 7.3573573573573575e-06,
|
|
"loss": 2.9636595249176025,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.22252252252252253,
|
|
"grad_norm": 8.974086359979514,
|
|
"learning_rate": 7.387387387387388e-06,
|
|
"loss": 2.8476545810699463,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.22342342342342342,
|
|
"grad_norm": 21.21552137551918,
|
|
"learning_rate": 7.417417417417418e-06,
|
|
"loss": 1.956771731376648,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.22432432432432434,
|
|
"grad_norm": 23.44471075215663,
|
|
"learning_rate": 7.447447447447448e-06,
|
|
"loss": 3.0224685668945312,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.22522522522522523,
|
|
"grad_norm": 10.507608150607815,
|
|
"learning_rate": 7.477477477477479e-06,
|
|
"loss": 2.8447585105895996,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.22612612612612612,
|
|
"grad_norm": 8.406551181013327,
|
|
"learning_rate": 7.507507507507507e-06,
|
|
"loss": 3.091822624206543,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.22702702702702704,
|
|
"grad_norm": 15.439471955279746,
|
|
"learning_rate": 7.5375375375375385e-06,
|
|
"loss": 2.596545696258545,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.22792792792792793,
|
|
"grad_norm": 32.28371488979198,
|
|
"learning_rate": 7.567567567567569e-06,
|
|
"loss": 2.685854434967041,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.22882882882882882,
|
|
"grad_norm": 8.850471884422483,
|
|
"learning_rate": 7.597597597597598e-06,
|
|
"loss": 2.3421695232391357,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.22972972972972974,
|
|
"grad_norm": 11.577982513842546,
|
|
"learning_rate": 7.6276276276276285e-06,
|
|
"loss": 2.624809741973877,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.23063063063063063,
|
|
"grad_norm": 11.15519392482013,
|
|
"learning_rate": 7.657657657657658e-06,
|
|
"loss": 3.2881715297698975,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.23153153153153153,
|
|
"grad_norm": 8.044318807407215,
|
|
"learning_rate": 7.687687687687688e-06,
|
|
"loss": 3.0412468910217285,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.23243243243243245,
|
|
"grad_norm": 9.730521210200404,
|
|
"learning_rate": 7.717717717717719e-06,
|
|
"loss": 2.589629650115967,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.23333333333333334,
|
|
"grad_norm": 10.635194903021821,
|
|
"learning_rate": 7.747747747747749e-06,
|
|
"loss": 2.9791548252105713,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.23423423423423423,
|
|
"grad_norm": 16.18630342454132,
|
|
"learning_rate": 7.77777777777778e-06,
|
|
"loss": 2.757498025894165,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.23513513513513515,
|
|
"grad_norm": 14.686981686819545,
|
|
"learning_rate": 7.807807807807808e-06,
|
|
"loss": 3.445099115371704,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.23603603603603604,
|
|
"grad_norm": 12.096856501251967,
|
|
"learning_rate": 7.837837837837838e-06,
|
|
"loss": 2.058835029602051,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.23693693693693693,
|
|
"grad_norm": 13.50183691044128,
|
|
"learning_rate": 7.867867867867868e-06,
|
|
"loss": 2.9921302795410156,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.23783783783783785,
|
|
"grad_norm": 7.67222528315355,
|
|
"learning_rate": 7.897897897897899e-06,
|
|
"loss": 2.6035046577453613,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.23873873873873874,
|
|
"grad_norm": 14.639662719034007,
|
|
"learning_rate": 7.927927927927929e-06,
|
|
"loss": 2.464315414428711,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.23963963963963963,
|
|
"grad_norm": 14.682415018493252,
|
|
"learning_rate": 7.95795795795796e-06,
|
|
"loss": 2.6213533878326416,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.24054054054054055,
|
|
"grad_norm": 11.196079589876906,
|
|
"learning_rate": 7.987987987987988e-06,
|
|
"loss": 3.0618228912353516,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.24144144144144145,
|
|
"grad_norm": 11.835065916234305,
|
|
"learning_rate": 8.018018018018018e-06,
|
|
"loss": 2.7994627952575684,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.24234234234234234,
|
|
"grad_norm": 8.459248820549243,
|
|
"learning_rate": 8.048048048048048e-06,
|
|
"loss": 2.644664764404297,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.24324324324324326,
|
|
"grad_norm": 56.00595141573437,
|
|
"learning_rate": 8.078078078078079e-06,
|
|
"loss": 3.402913808822632,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.24414414414414415,
|
|
"grad_norm": 12.388902204738192,
|
|
"learning_rate": 8.108108108108109e-06,
|
|
"loss": 3.0297579765319824,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.24504504504504504,
|
|
"grad_norm": 17.5746724225366,
|
|
"learning_rate": 8.13813813813814e-06,
|
|
"loss": 3.002163887023926,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.24594594594594596,
|
|
"grad_norm": 12.411588847757736,
|
|
"learning_rate": 8.16816816816817e-06,
|
|
"loss": 2.925816774368286,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.24684684684684685,
|
|
"grad_norm": 10.7631886033697,
|
|
"learning_rate": 8.198198198198198e-06,
|
|
"loss": 3.161365509033203,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.24774774774774774,
|
|
"grad_norm": 19.59729985285342,
|
|
"learning_rate": 8.228228228228229e-06,
|
|
"loss": 3.6167702674865723,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.24864864864864866,
|
|
"grad_norm": 8.56052894464566,
|
|
"learning_rate": 8.258258258258259e-06,
|
|
"loss": 3.3377461433410645,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.24954954954954955,
|
|
"grad_norm": 12.5188207748136,
|
|
"learning_rate": 8.288288288288289e-06,
|
|
"loss": 1.945539116859436,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.25045045045045045,
|
|
"grad_norm": 10.559017592650845,
|
|
"learning_rate": 8.31831831831832e-06,
|
|
"loss": 3.231947898864746,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.25135135135135134,
|
|
"grad_norm": 11.363006986404962,
|
|
"learning_rate": 8.348348348348348e-06,
|
|
"loss": 2.5976619720458984,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.25225225225225223,
|
|
"grad_norm": 9.456506874294009,
|
|
"learning_rate": 8.378378378378378e-06,
|
|
"loss": 3.0002479553222656,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.2531531531531532,
|
|
"grad_norm": 7.233256385798434,
|
|
"learning_rate": 8.408408408408409e-06,
|
|
"loss": 2.9631714820861816,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.25405405405405407,
|
|
"grad_norm": 12.740994539211504,
|
|
"learning_rate": 8.438438438438439e-06,
|
|
"loss": 3.342411518096924,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.25495495495495496,
|
|
"grad_norm": 16.975791181138625,
|
|
"learning_rate": 8.46846846846847e-06,
|
|
"loss": 3.3739967346191406,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.25585585585585585,
|
|
"grad_norm": 8.000096311194891,
|
|
"learning_rate": 8.4984984984985e-06,
|
|
"loss": 3.274104118347168,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.25675675675675674,
|
|
"grad_norm": 7.915871738254484,
|
|
"learning_rate": 8.52852852852853e-06,
|
|
"loss": 2.7849321365356445,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.25765765765765763,
|
|
"grad_norm": 14.470173491798448,
|
|
"learning_rate": 8.55855855855856e-06,
|
|
"loss": 2.959172248840332,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.2585585585585586,
|
|
"grad_norm": 13.094243355506956,
|
|
"learning_rate": 8.588588588588589e-06,
|
|
"loss": 2.1489405632019043,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.2594594594594595,
|
|
"grad_norm": 10.28241813049188,
|
|
"learning_rate": 8.618618618618619e-06,
|
|
"loss": 2.9859347343444824,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.26036036036036037,
|
|
"grad_norm": 21.002045335848507,
|
|
"learning_rate": 8.64864864864865e-06,
|
|
"loss": 2.8425345420837402,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.26126126126126126,
|
|
"grad_norm": 6.556019554933329,
|
|
"learning_rate": 8.67867867867868e-06,
|
|
"loss": 2.2806668281555176,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.26216216216216215,
|
|
"grad_norm": 10.248207037492623,
|
|
"learning_rate": 8.70870870870871e-06,
|
|
"loss": 2.6596124172210693,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.26306306306306304,
|
|
"grad_norm": 14.058370075599388,
|
|
"learning_rate": 8.738738738738739e-06,
|
|
"loss": 3.0518083572387695,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.263963963963964,
|
|
"grad_norm": 14.981022948019561,
|
|
"learning_rate": 8.768768768768769e-06,
|
|
"loss": 2.9386940002441406,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.2648648648648649,
|
|
"grad_norm": 9.126114352264482,
|
|
"learning_rate": 8.798798798798799e-06,
|
|
"loss": 2.7580277919769287,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.26576576576576577,
|
|
"grad_norm": 24.283955472813055,
|
|
"learning_rate": 8.82882882882883e-06,
|
|
"loss": 3.1863598823547363,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.26666666666666666,
|
|
"grad_norm": 13.512487486565014,
|
|
"learning_rate": 8.85885885885886e-06,
|
|
"loss": 2.6051082611083984,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.26756756756756755,
|
|
"grad_norm": 8.476380947832192,
|
|
"learning_rate": 8.888888888888888e-06,
|
|
"loss": 2.938279867172241,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.26846846846846845,
|
|
"grad_norm": 8.989581462618583,
|
|
"learning_rate": 8.91891891891892e-06,
|
|
"loss": 2.6073546409606934,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.2693693693693694,
|
|
"grad_norm": 6.791346567014697,
|
|
"learning_rate": 8.94894894894895e-06,
|
|
"loss": 2.4681522846221924,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.2702702702702703,
|
|
"grad_norm": 7.570697729070026,
|
|
"learning_rate": 8.97897897897898e-06,
|
|
"loss": 2.572007656097412,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.2711711711711712,
|
|
"grad_norm": 12.873385521201621,
|
|
"learning_rate": 9.00900900900901e-06,
|
|
"loss": 3.200517177581787,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.27207207207207207,
|
|
"grad_norm": 9.919996447413737,
|
|
"learning_rate": 9.03903903903904e-06,
|
|
"loss": 3.6645684242248535,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.27297297297297296,
|
|
"grad_norm": 20.811230325398483,
|
|
"learning_rate": 9.06906906906907e-06,
|
|
"loss": 3.234799385070801,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.27387387387387385,
|
|
"grad_norm": 11.509767162102126,
|
|
"learning_rate": 9.0990990990991e-06,
|
|
"loss": 2.761482000350952,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.2747747747747748,
|
|
"grad_norm": 14.033212157225655,
|
|
"learning_rate": 9.129129129129129e-06,
|
|
"loss": 2.790647268295288,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.2756756756756757,
|
|
"grad_norm": 7.497673527220171,
|
|
"learning_rate": 9.15915915915916e-06,
|
|
"loss": 2.823512554168701,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.2765765765765766,
|
|
"grad_norm": 8.811148929818787,
|
|
"learning_rate": 9.189189189189191e-06,
|
|
"loss": 2.7691304683685303,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.2774774774774775,
|
|
"grad_norm": 14.432072629353575,
|
|
"learning_rate": 9.21921921921922e-06,
|
|
"loss": 2.9669785499572754,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.27837837837837837,
|
|
"grad_norm": 11.676462619066314,
|
|
"learning_rate": 9.24924924924925e-06,
|
|
"loss": 2.898813247680664,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.27927927927927926,
|
|
"grad_norm": 11.938478478590634,
|
|
"learning_rate": 9.27927927927928e-06,
|
|
"loss": 2.7341861724853516,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.2801801801801802,
|
|
"grad_norm": 7.771546309843594,
|
|
"learning_rate": 9.30930930930931e-06,
|
|
"loss": 3.296088218688965,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.2810810810810811,
|
|
"grad_norm": 15.101892303227435,
|
|
"learning_rate": 9.339339339339341e-06,
|
|
"loss": 2.7351455688476562,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.281981981981982,
|
|
"grad_norm": 15.354103308230707,
|
|
"learning_rate": 9.36936936936937e-06,
|
|
"loss": 2.7281081676483154,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.2828828828828829,
|
|
"grad_norm": 10.32489615719569,
|
|
"learning_rate": 9.3993993993994e-06,
|
|
"loss": 3.0136618614196777,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.28378378378378377,
|
|
"grad_norm": 10.964373335445094,
|
|
"learning_rate": 9.42942942942943e-06,
|
|
"loss": 2.7031970024108887,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.28468468468468466,
|
|
"grad_norm": 13.771411818974284,
|
|
"learning_rate": 9.45945945945946e-06,
|
|
"loss": 3.1682801246643066,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.2855855855855856,
|
|
"grad_norm": 17.41638771477687,
|
|
"learning_rate": 9.489489489489491e-06,
|
|
"loss": 2.969046115875244,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.2864864864864865,
|
|
"grad_norm": 8.761151924702018,
|
|
"learning_rate": 9.51951951951952e-06,
|
|
"loss": 2.702993392944336,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.2873873873873874,
|
|
"grad_norm": 19.846208771744315,
|
|
"learning_rate": 9.54954954954955e-06,
|
|
"loss": 3.4983010292053223,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.2882882882882883,
|
|
"grad_norm": 10.962444668108311,
|
|
"learning_rate": 9.57957957957958e-06,
|
|
"loss": 2.782130718231201,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.2891891891891892,
|
|
"grad_norm": 8.817707648077757,
|
|
"learning_rate": 9.60960960960961e-06,
|
|
"loss": 2.8512470722198486,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.29009009009009007,
|
|
"grad_norm": 25.911846990827954,
|
|
"learning_rate": 9.63963963963964e-06,
|
|
"loss": 2.665433645248413,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.290990990990991,
|
|
"grad_norm": 8.934711465258212,
|
|
"learning_rate": 9.669669669669671e-06,
|
|
"loss": 3.3899407386779785,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.2918918918918919,
|
|
"grad_norm": 19.379009729221924,
|
|
"learning_rate": 9.699699699699701e-06,
|
|
"loss": 2.7415874004364014,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.2927927927927928,
|
|
"grad_norm": 7.27095865249855,
|
|
"learning_rate": 9.729729729729732e-06,
|
|
"loss": 2.793801784515381,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.2936936936936937,
|
|
"grad_norm": 10.159554727073187,
|
|
"learning_rate": 9.75975975975976e-06,
|
|
"loss": 3.259864091873169,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.2945945945945946,
|
|
"grad_norm": 10.694615892664855,
|
|
"learning_rate": 9.78978978978979e-06,
|
|
"loss": 2.6160967350006104,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.2954954954954955,
|
|
"grad_norm": 9.5064054134046,
|
|
"learning_rate": 9.81981981981982e-06,
|
|
"loss": 2.9735093116760254,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.2963963963963964,
|
|
"grad_norm": 9.9763523327945,
|
|
"learning_rate": 9.849849849849851e-06,
|
|
"loss": 2.9225847721099854,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.2972972972972973,
|
|
"grad_norm": 15.841275634630133,
|
|
"learning_rate": 9.879879879879881e-06,
|
|
"loss": 3.438632011413574,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.2981981981981982,
|
|
"grad_norm": 13.39081074070422,
|
|
"learning_rate": 9.90990990990991e-06,
|
|
"loss": 2.254317283630371,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.2990990990990991,
|
|
"grad_norm": 6.984182721517865,
|
|
"learning_rate": 9.93993993993994e-06,
|
|
"loss": 3.190903425216675,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"grad_norm": 9.309658146682933,
|
|
"learning_rate": 9.96996996996997e-06,
|
|
"loss": 3.059541702270508,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.3009009009009009,
|
|
"grad_norm": 14.937607378881662,
|
|
"learning_rate": 1e-05,
|
|
"loss": 2.9330413341522217,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.30180180180180183,
|
|
"grad_norm": 13.51602566312119,
|
|
"learning_rate": 9.999997252952125e-06,
|
|
"loss": 2.683208703994751,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.3027027027027027,
|
|
"grad_norm": 12.800960609903019,
|
|
"learning_rate": 9.999989011811516e-06,
|
|
"loss": 2.7787554264068604,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.3036036036036036,
|
|
"grad_norm": 13.71715277590532,
|
|
"learning_rate": 9.99997527658723e-06,
|
|
"loss": 2.9442594051361084,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.3045045045045045,
|
|
"grad_norm": 8.545557973152443,
|
|
"learning_rate": 9.99995604729436e-06,
|
|
"loss": 2.9743549823760986,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.3054054054054054,
|
|
"grad_norm": 21.6858655013613,
|
|
"learning_rate": 9.999931323954033e-06,
|
|
"loss": 3.2620232105255127,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.3063063063063063,
|
|
"grad_norm": 8.534962992180814,
|
|
"learning_rate": 9.999901106593418e-06,
|
|
"loss": 3.381075620651245,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.30720720720720723,
|
|
"grad_norm": 11.249096787047085,
|
|
"learning_rate": 9.999865395245715e-06,
|
|
"loss": 2.0019235610961914,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.3081081081081081,
|
|
"grad_norm": 28.871822700019855,
|
|
"learning_rate": 9.999824189950168e-06,
|
|
"loss": 3.112780809402466,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.309009009009009,
|
|
"grad_norm": 12.415582532638952,
|
|
"learning_rate": 9.999777490752056e-06,
|
|
"loss": 1.8625688552856445,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.3099099099099099,
|
|
"grad_norm": 11.271771743558347,
|
|
"learning_rate": 9.999725297702687e-06,
|
|
"loss": 3.349148750305176,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.3108108108108108,
|
|
"grad_norm": 23.20299513286698,
|
|
"learning_rate": 9.999667610859416e-06,
|
|
"loss": 3.0870251655578613,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.3117117117117117,
|
|
"grad_norm": 6.300127629414208,
|
|
"learning_rate": 9.999604430285628e-06,
|
|
"loss": 2.8080344200134277,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.31261261261261264,
|
|
"grad_norm": 14.291296171659212,
|
|
"learning_rate": 9.999535756050749e-06,
|
|
"loss": 2.659015655517578,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.31351351351351353,
|
|
"grad_norm": 9.428101003216518,
|
|
"learning_rate": 9.999461588230238e-06,
|
|
"loss": 2.754297971725464,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.3144144144144144,
|
|
"grad_norm": 17.324448052496745,
|
|
"learning_rate": 9.999381926905592e-06,
|
|
"loss": 2.883801221847534,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.3153153153153153,
|
|
"grad_norm": 44.62966117466432,
|
|
"learning_rate": 9.999296772164347e-06,
|
|
"loss": 2.813298225402832,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.3162162162162162,
|
|
"grad_norm": 8.03713677990249,
|
|
"learning_rate": 9.99920612410007e-06,
|
|
"loss": 2.9977688789367676,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.3171171171171171,
|
|
"grad_norm": 12.171635869334452,
|
|
"learning_rate": 9.999109982812368e-06,
|
|
"loss": 2.778214693069458,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.31801801801801804,
|
|
"grad_norm": 8.896688386600355,
|
|
"learning_rate": 9.99900834840688e-06,
|
|
"loss": 2.320204496383667,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.31891891891891894,
|
|
"grad_norm": 31.70762213957997,
|
|
"learning_rate": 9.998901220995288e-06,
|
|
"loss": 2.4865245819091797,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.31981981981981983,
|
|
"grad_norm": 11.559063762294295,
|
|
"learning_rate": 9.998788600695304e-06,
|
|
"loss": 2.8191728591918945,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.3207207207207207,
|
|
"grad_norm": 8.430927558151229,
|
|
"learning_rate": 9.998670487630677e-06,
|
|
"loss": 3.1708900928497314,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.3216216216216216,
|
|
"grad_norm": 9.369043149871663,
|
|
"learning_rate": 9.998546881931193e-06,
|
|
"loss": 3.329425811767578,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.3225225225225225,
|
|
"grad_norm": 18.13050608374191,
|
|
"learning_rate": 9.99841778373267e-06,
|
|
"loss": 2.7754788398742676,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.32342342342342345,
|
|
"grad_norm": 9.164569001044327,
|
|
"learning_rate": 9.998283193176965e-06,
|
|
"loss": 2.8335437774658203,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.32432432432432434,
|
|
"grad_norm": 14.19126210540288,
|
|
"learning_rate": 9.99814311041197e-06,
|
|
"loss": 2.9259860515594482,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.32522522522522523,
|
|
"grad_norm": 14.102586629742616,
|
|
"learning_rate": 9.99799753559161e-06,
|
|
"loss": 2.768043041229248,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.3261261261261261,
|
|
"grad_norm": 13.742512069399446,
|
|
"learning_rate": 9.997846468875842e-06,
|
|
"loss": 2.8981881141662598,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.327027027027027,
|
|
"grad_norm": 9.548610700595049,
|
|
"learning_rate": 9.997689910430665e-06,
|
|
"loss": 3.3076324462890625,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.3279279279279279,
|
|
"grad_norm": 13.513857277910285,
|
|
"learning_rate": 9.997527860428108e-06,
|
|
"loss": 3.220381736755371,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.32882882882882886,
|
|
"grad_norm": 14.10797346437793,
|
|
"learning_rate": 9.997360319046234e-06,
|
|
"loss": 3.3665030002593994,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.32972972972972975,
|
|
"grad_norm": 26.072002812869076,
|
|
"learning_rate": 9.997187286469139e-06,
|
|
"loss": 3.1026194095611572,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.33063063063063064,
|
|
"grad_norm": 18.001123799541062,
|
|
"learning_rate": 9.997008762886957e-06,
|
|
"loss": 3.223040819168091,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.33153153153153153,
|
|
"grad_norm": 22.442381380443265,
|
|
"learning_rate": 9.996824748495852e-06,
|
|
"loss": 3.1964216232299805,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.3324324324324324,
|
|
"grad_norm": 11.751647444389079,
|
|
"learning_rate": 9.996635243498023e-06,
|
|
"loss": 2.9581570625305176,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 8.490228785197973,
|
|
"learning_rate": 9.9964402481017e-06,
|
|
"loss": 2.4892563819885254,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.3342342342342342,
|
|
"grad_norm": 22.120876400061814,
|
|
"learning_rate": 9.996239762521152e-06,
|
|
"loss": 2.7092278003692627,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.33513513513513515,
|
|
"grad_norm": 49.07721434544736,
|
|
"learning_rate": 9.99603378697667e-06,
|
|
"loss": 2.9483840465545654,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.33603603603603605,
|
|
"grad_norm": 10.959565962421676,
|
|
"learning_rate": 9.99582232169459e-06,
|
|
"loss": 2.596356153488159,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.33693693693693694,
|
|
"grad_norm": 10.446306364592443,
|
|
"learning_rate": 9.995605366907271e-06,
|
|
"loss": 2.5975446701049805,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.33783783783783783,
|
|
"grad_norm": 10.807935753392998,
|
|
"learning_rate": 9.995382922853106e-06,
|
|
"loss": 2.6463375091552734,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.3387387387387387,
|
|
"grad_norm": 14.468879138447939,
|
|
"learning_rate": 9.995154989776523e-06,
|
|
"loss": 2.805997371673584,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.3396396396396396,
|
|
"grad_norm": 12.627586198607087,
|
|
"learning_rate": 9.994921567927979e-06,
|
|
"loss": 2.336535930633545,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.34054054054054056,
|
|
"grad_norm": 14.897162080513253,
|
|
"learning_rate": 9.99468265756396e-06,
|
|
"loss": 2.5676686763763428,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.34144144144144145,
|
|
"grad_norm": 29.61898488885318,
|
|
"learning_rate": 9.99443825894699e-06,
|
|
"loss": 3.419049024581909,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.34234234234234234,
|
|
"grad_norm": 10.532926204598702,
|
|
"learning_rate": 9.994188372345615e-06,
|
|
"loss": 2.9493656158447266,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.34324324324324323,
|
|
"grad_norm": 14.91480845391336,
|
|
"learning_rate": 9.993932998034417e-06,
|
|
"loss": 3.0369420051574707,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.3441441441441441,
|
|
"grad_norm": 30.467746807491423,
|
|
"learning_rate": 9.993672136294004e-06,
|
|
"loss": 3.0565989017486572,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.345045045045045,
|
|
"grad_norm": 11.506205536111601,
|
|
"learning_rate": 9.993405787411017e-06,
|
|
"loss": 2.8990392684936523,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.34594594594594597,
|
|
"grad_norm": 19.374470886381832,
|
|
"learning_rate": 9.993133951678126e-06,
|
|
"loss": 2.2662172317504883,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.34684684684684686,
|
|
"grad_norm": 15.790913391354422,
|
|
"learning_rate": 9.99285662939403e-06,
|
|
"loss": 3.020148277282715,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.34774774774774775,
|
|
"grad_norm": 14.007809354711547,
|
|
"learning_rate": 9.992573820863455e-06,
|
|
"loss": 3.2606804370880127,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.34864864864864864,
|
|
"grad_norm": 11.517630489972202,
|
|
"learning_rate": 9.992285526397156e-06,
|
|
"loss": 2.9776859283447266,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.34954954954954953,
|
|
"grad_norm": 14.821112490097487,
|
|
"learning_rate": 9.991991746311916e-06,
|
|
"loss": 2.882371425628662,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.3504504504504504,
|
|
"grad_norm": 8.489287848634739,
|
|
"learning_rate": 9.991692480930548e-06,
|
|
"loss": 3.1440539360046387,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.35135135135135137,
|
|
"grad_norm": 16.962765685771142,
|
|
"learning_rate": 9.99138773058189e-06,
|
|
"loss": 3.225944995880127,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.35225225225225226,
|
|
"grad_norm": 9.881647023432432,
|
|
"learning_rate": 9.991077495600806e-06,
|
|
"loss": 2.9112517833709717,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.35315315315315315,
|
|
"grad_norm": 12.774530733801422,
|
|
"learning_rate": 9.990761776328188e-06,
|
|
"loss": 2.7864723205566406,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.35405405405405405,
|
|
"grad_norm": 11.246619416376717,
|
|
"learning_rate": 9.990440573110959e-06,
|
|
"loss": 2.6985116004943848,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.35495495495495494,
|
|
"grad_norm": 14.885147823780443,
|
|
"learning_rate": 9.990113886302057e-06,
|
|
"loss": 3.236449956893921,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.35585585585585583,
|
|
"grad_norm": 10.90873280846296,
|
|
"learning_rate": 9.989781716260456e-06,
|
|
"loss": 2.6477572917938232,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.3567567567567568,
|
|
"grad_norm": 11.210641614835065,
|
|
"learning_rate": 9.989444063351148e-06,
|
|
"loss": 2.5131397247314453,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.35765765765765767,
|
|
"grad_norm": 7.430746275852679,
|
|
"learning_rate": 9.989100927945155e-06,
|
|
"loss": 2.6379480361938477,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.35855855855855856,
|
|
"grad_norm": 9.221946585921508,
|
|
"learning_rate": 9.988752310419518e-06,
|
|
"loss": 2.8938801288604736,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.35945945945945945,
|
|
"grad_norm": 14.126719322984972,
|
|
"learning_rate": 9.988398211157308e-06,
|
|
"loss": 2.8114590644836426,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.36036036036036034,
|
|
"grad_norm": 20.861023641973205,
|
|
"learning_rate": 9.988038630547613e-06,
|
|
"loss": 2.9054737091064453,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.36126126126126124,
|
|
"grad_norm": 13.948322199293036,
|
|
"learning_rate": 9.98767356898555e-06,
|
|
"loss": 3.147998571395874,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.3621621621621622,
|
|
"grad_norm": 25.70114904388452,
|
|
"learning_rate": 9.987303026872252e-06,
|
|
"loss": 2.8002514839172363,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.3630630630630631,
|
|
"grad_norm": 9.063663563693485,
|
|
"learning_rate": 9.986927004614881e-06,
|
|
"loss": 3.302854299545288,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.36396396396396397,
|
|
"grad_norm": 14.173772835618514,
|
|
"learning_rate": 9.986545502626616e-06,
|
|
"loss": 3.1031603813171387,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.36486486486486486,
|
|
"grad_norm": 17.929471705345165,
|
|
"learning_rate": 9.986158521326659e-06,
|
|
"loss": 3.3072774410247803,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.36576576576576575,
|
|
"grad_norm": 10.510314331851996,
|
|
"learning_rate": 9.985766061140233e-06,
|
|
"loss": 3.089600086212158,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.36666666666666664,
|
|
"grad_norm": 16.161964021767908,
|
|
"learning_rate": 9.98536812249858e-06,
|
|
"loss": 3.111833095550537,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.3675675675675676,
|
|
"grad_norm": 12.299022871195842,
|
|
"learning_rate": 9.98496470583896e-06,
|
|
"loss": 2.920365333557129,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.3684684684684685,
|
|
"grad_norm": 11.919251390163824,
|
|
"learning_rate": 9.984555811604662e-06,
|
|
"loss": 2.449800491333008,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.36936936936936937,
|
|
"grad_norm": 9.051532590643848,
|
|
"learning_rate": 9.984141440244978e-06,
|
|
"loss": 2.639411449432373,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.37027027027027026,
|
|
"grad_norm": 10.664942728836646,
|
|
"learning_rate": 9.983721592215235e-06,
|
|
"loss": 2.888113498687744,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.37117117117117115,
|
|
"grad_norm": 8.057489950991233,
|
|
"learning_rate": 9.983296267976766e-06,
|
|
"loss": 2.8491649627685547,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.37207207207207205,
|
|
"grad_norm": 6.843779683988893,
|
|
"learning_rate": 9.982865467996925e-06,
|
|
"loss": 2.389925003051758,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.372972972972973,
|
|
"grad_norm": 14.58676875082707,
|
|
"learning_rate": 9.982429192749085e-06,
|
|
"loss": 2.9418718814849854,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.3738738738738739,
|
|
"grad_norm": 31.64837532039199,
|
|
"learning_rate": 9.981987442712634e-06,
|
|
"loss": 2.4360032081604004,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.3747747747747748,
|
|
"grad_norm": 11.906904188716325,
|
|
"learning_rate": 9.981540218372973e-06,
|
|
"loss": 2.563695192337036,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.37567567567567567,
|
|
"grad_norm": 11.99370443118536,
|
|
"learning_rate": 9.981087520221522e-06,
|
|
"loss": 2.920173406600952,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.37657657657657656,
|
|
"grad_norm": 8.474036221130298,
|
|
"learning_rate": 9.980629348755714e-06,
|
|
"loss": 2.159977912902832,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.37747747747747745,
|
|
"grad_norm": 8.909386719480173,
|
|
"learning_rate": 9.980165704478999e-06,
|
|
"loss": 2.4905076026916504,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.3783783783783784,
|
|
"grad_norm": 10.712045905675597,
|
|
"learning_rate": 9.979696587900836e-06,
|
|
"loss": 3.200435161590576,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.3792792792792793,
|
|
"grad_norm": 21.35121092434891,
|
|
"learning_rate": 9.9792219995367e-06,
|
|
"loss": 3.889805555343628,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.3801801801801802,
|
|
"grad_norm": 8.791945223524177,
|
|
"learning_rate": 9.978741939908076e-06,
|
|
"loss": 2.8661367893218994,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.3810810810810811,
|
|
"grad_norm": 12.216964876719446,
|
|
"learning_rate": 9.978256409542463e-06,
|
|
"loss": 2.6770262718200684,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.38198198198198197,
|
|
"grad_norm": 10.595853465936564,
|
|
"learning_rate": 9.977765408973374e-06,
|
|
"loss": 2.4664907455444336,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.38288288288288286,
|
|
"grad_norm": 12.598600276222705,
|
|
"learning_rate": 9.977268938740328e-06,
|
|
"loss": 2.2553420066833496,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.3837837837837838,
|
|
"grad_norm": 8.339940756295746,
|
|
"learning_rate": 9.976766999388854e-06,
|
|
"loss": 2.898794174194336,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.3846846846846847,
|
|
"grad_norm": 50.14623659446829,
|
|
"learning_rate": 9.976259591470496e-06,
|
|
"loss": 2.727290391921997,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.3855855855855856,
|
|
"grad_norm": 11.631026669444639,
|
|
"learning_rate": 9.975746715542803e-06,
|
|
"loss": 3.2811455726623535,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.3864864864864865,
|
|
"grad_norm": 11.7664612995829,
|
|
"learning_rate": 9.97522837216933e-06,
|
|
"loss": 2.847942590713501,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.38738738738738737,
|
|
"grad_norm": 14.446151150330858,
|
|
"learning_rate": 9.974704561919645e-06,
|
|
"loss": 3.5835790634155273,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.38828828828828826,
|
|
"grad_norm": 7.375059114002889,
|
|
"learning_rate": 9.97417528536932e-06,
|
|
"loss": 2.6707923412323,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.3891891891891892,
|
|
"grad_norm": 20.214165459908816,
|
|
"learning_rate": 9.973640543099936e-06,
|
|
"loss": 2.7764248847961426,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.3900900900900901,
|
|
"grad_norm": 8.803838609901794,
|
|
"learning_rate": 9.973100335699075e-06,
|
|
"loss": 2.204397201538086,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.390990990990991,
|
|
"grad_norm": 10.738546550577066,
|
|
"learning_rate": 9.97255466376033e-06,
|
|
"loss": 2.7971489429473877,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.3918918918918919,
|
|
"grad_norm": 10.776977738087297,
|
|
"learning_rate": 9.972003527883295e-06,
|
|
"loss": 2.568075656890869,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.3927927927927928,
|
|
"grad_norm": 6.921417468230944,
|
|
"learning_rate": 9.971446928673566e-06,
|
|
"loss": 2.9334769248962402,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.39369369369369367,
|
|
"grad_norm": 12.307022988169349,
|
|
"learning_rate": 9.970884866742748e-06,
|
|
"loss": 2.8420519828796387,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.3945945945945946,
|
|
"grad_norm": 10.81190899170059,
|
|
"learning_rate": 9.970317342708444e-06,
|
|
"loss": 2.915928602218628,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.3954954954954955,
|
|
"grad_norm": 15.502005814041977,
|
|
"learning_rate": 9.969744357194262e-06,
|
|
"loss": 2.37294864654541,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.3963963963963964,
|
|
"grad_norm": 18.933632612278064,
|
|
"learning_rate": 9.969165910829807e-06,
|
|
"loss": 2.4669623374938965,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.3972972972972973,
|
|
"grad_norm": 9.062906848264465,
|
|
"learning_rate": 9.96858200425069e-06,
|
|
"loss": 2.9604015350341797,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.3981981981981982,
|
|
"grad_norm": 11.843111873808983,
|
|
"learning_rate": 9.967992638098517e-06,
|
|
"loss": 2.989811658859253,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.3990990990990991,
|
|
"grad_norm": 7.270834648998459,
|
|
"learning_rate": 9.967397813020892e-06,
|
|
"loss": 2.972478151321411,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"grad_norm": 16.076482807125995,
|
|
"learning_rate": 9.966797529671424e-06,
|
|
"loss": 2.6020092964172363,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.4009009009009009,
|
|
"grad_norm": 11.032887388520376,
|
|
"learning_rate": 9.966191788709716e-06,
|
|
"loss": 2.7426490783691406,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.4018018018018018,
|
|
"grad_norm": 10.299569964339257,
|
|
"learning_rate": 9.965580590801364e-06,
|
|
"loss": 3.3576645851135254,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.4027027027027027,
|
|
"grad_norm": 11.75702071005093,
|
|
"learning_rate": 9.96496393661797e-06,
|
|
"loss": 2.7325360774993896,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.4036036036036036,
|
|
"grad_norm": 11.199429295188697,
|
|
"learning_rate": 9.96434182683712e-06,
|
|
"loss": 3.0111613273620605,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.4045045045045045,
|
|
"grad_norm": 12.438250339217415,
|
|
"learning_rate": 9.963714262142402e-06,
|
|
"loss": 2.617116689682007,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.40540540540540543,
|
|
"grad_norm": 6.967396475542938,
|
|
"learning_rate": 9.963081243223396e-06,
|
|
"loss": 2.620596408843994,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.4063063063063063,
|
|
"grad_norm": 11.90892747992436,
|
|
"learning_rate": 9.962442770775675e-06,
|
|
"loss": 2.8448939323425293,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.4072072072072072,
|
|
"grad_norm": 8.872921289600368,
|
|
"learning_rate": 9.961798845500808e-06,
|
|
"loss": 2.4782495498657227,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.4081081081081081,
|
|
"grad_norm": 11.848919914627832,
|
|
"learning_rate": 9.961149468106346e-06,
|
|
"loss": 2.8514678478240967,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.409009009009009,
|
|
"grad_norm": 14.842331093483242,
|
|
"learning_rate": 9.960494639305843e-06,
|
|
"loss": 3.000009775161743,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.4099099099099099,
|
|
"grad_norm": 15.653102268407324,
|
|
"learning_rate": 9.959834359818836e-06,
|
|
"loss": 3.2245235443115234,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.41081081081081083,
|
|
"grad_norm": 8.003740413363506,
|
|
"learning_rate": 9.95916863037085e-06,
|
|
"loss": 2.044393539428711,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.4117117117117117,
|
|
"grad_norm": 15.394222737473838,
|
|
"learning_rate": 9.958497451693406e-06,
|
|
"loss": 3.533689022064209,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.4126126126126126,
|
|
"grad_norm": 9.791143790822147,
|
|
"learning_rate": 9.957820824524003e-06,
|
|
"loss": 2.6153087615966797,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.4135135135135135,
|
|
"grad_norm": 9.589947001463136,
|
|
"learning_rate": 9.957138749606134e-06,
|
|
"loss": 2.8632655143737793,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.4144144144144144,
|
|
"grad_norm": 13.966272316065108,
|
|
"learning_rate": 9.956451227689278e-06,
|
|
"loss": 3.260765790939331,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.4153153153153153,
|
|
"grad_norm": 8.677815370787325,
|
|
"learning_rate": 9.955758259528895e-06,
|
|
"loss": 2.7139787673950195,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.41621621621621624,
|
|
"grad_norm": 7.5992044736880695,
|
|
"learning_rate": 9.955059845886432e-06,
|
|
"loss": 2.8179216384887695,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.41711711711711713,
|
|
"grad_norm": 20.189192906766145,
|
|
"learning_rate": 9.954355987529319e-06,
|
|
"loss": 1.8593086004257202,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.418018018018018,
|
|
"grad_norm": 9.294365372544148,
|
|
"learning_rate": 9.95364668523097e-06,
|
|
"loss": 3.129530668258667,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.4189189189189189,
|
|
"grad_norm": 11.10229210239617,
|
|
"learning_rate": 9.95293193977078e-06,
|
|
"loss": 2.4548749923706055,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.4198198198198198,
|
|
"grad_norm": 12.303290597408346,
|
|
"learning_rate": 9.952211751934125e-06,
|
|
"loss": 2.833526611328125,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.4207207207207207,
|
|
"grad_norm": 18.39720072416737,
|
|
"learning_rate": 9.951486122512358e-06,
|
|
"loss": 3.272202253341675,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.42162162162162165,
|
|
"grad_norm": 10.962511020348291,
|
|
"learning_rate": 9.950755052302819e-06,
|
|
"loss": 2.5285682678222656,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.42252252252252254,
|
|
"grad_norm": 11.343724042754738,
|
|
"learning_rate": 9.950018542108818e-06,
|
|
"loss": 3.135453939437866,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.42342342342342343,
|
|
"grad_norm": 7.162285143051499,
|
|
"learning_rate": 9.949276592739652e-06,
|
|
"loss": 2.7263050079345703,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.4243243243243243,
|
|
"grad_norm": 8.271823810331844,
|
|
"learning_rate": 9.948529205010583e-06,
|
|
"loss": 2.757145404815674,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.4252252252252252,
|
|
"grad_norm": 18.888485463079643,
|
|
"learning_rate": 9.94777637974286e-06,
|
|
"loss": 3.6016228199005127,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.4261261261261261,
|
|
"grad_norm": 13.705038817017533,
|
|
"learning_rate": 9.947018117763698e-06,
|
|
"loss": 3.1560139656066895,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.42702702702702705,
|
|
"grad_norm": 8.131771829742979,
|
|
"learning_rate": 9.946254419906293e-06,
|
|
"loss": 2.994487762451172,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.42792792792792794,
|
|
"grad_norm": 8.192010903051235,
|
|
"learning_rate": 9.945485287009808e-06,
|
|
"loss": 2.746253728866577,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.42882882882882883,
|
|
"grad_norm": 21.46422155759696,
|
|
"learning_rate": 9.944710719919381e-06,
|
|
"loss": 3.2966389656066895,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.4297297297297297,
|
|
"grad_norm": 12.75970618129319,
|
|
"learning_rate": 9.943930719486123e-06,
|
|
"loss": 2.8765969276428223,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.4306306306306306,
|
|
"grad_norm": 11.278838807173917,
|
|
"learning_rate": 9.943145286567114e-06,
|
|
"loss": 2.933793067932129,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.4315315315315315,
|
|
"grad_norm": 17.89762340765894,
|
|
"learning_rate": 9.942354422025402e-06,
|
|
"loss": 2.363278865814209,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.43243243243243246,
|
|
"grad_norm": 13.648964264768017,
|
|
"learning_rate": 9.94155812673e-06,
|
|
"loss": 2.65312123298645,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.43333333333333335,
|
|
"grad_norm": 20.414504599109517,
|
|
"learning_rate": 9.940756401555899e-06,
|
|
"loss": 2.994137763977051,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.43423423423423424,
|
|
"grad_norm": 8.091874246038742,
|
|
"learning_rate": 9.939949247384046e-06,
|
|
"loss": 2.7260451316833496,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.43513513513513513,
|
|
"grad_norm": 11.05681404847883,
|
|
"learning_rate": 9.939136665101359e-06,
|
|
"loss": 3.468167781829834,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.436036036036036,
|
|
"grad_norm": 8.502576465292318,
|
|
"learning_rate": 9.938318655600716e-06,
|
|
"loss": 2.5464885234832764,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.4369369369369369,
|
|
"grad_norm": 29.646600363505314,
|
|
"learning_rate": 9.937495219780968e-06,
|
|
"loss": 3.2312614917755127,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.43783783783783786,
|
|
"grad_norm": 14.764378214629252,
|
|
"learning_rate": 9.936666358546915e-06,
|
|
"loss": 2.948831796646118,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.43873873873873875,
|
|
"grad_norm": 10.897389925985015,
|
|
"learning_rate": 9.935832072809329e-06,
|
|
"loss": 2.6337313652038574,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.43963963963963965,
|
|
"grad_norm": 29.19644238276464,
|
|
"learning_rate": 9.93499236348494e-06,
|
|
"loss": 3.6373133659362793,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.44054054054054054,
|
|
"grad_norm": 14.453672964670863,
|
|
"learning_rate": 9.934147231496434e-06,
|
|
"loss": 2.855248212814331,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.44144144144144143,
|
|
"grad_norm": 27.842608083205935,
|
|
"learning_rate": 9.933296677772462e-06,
|
|
"loss": 3.682950019836426,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.4423423423423423,
|
|
"grad_norm": 11.201091380440992,
|
|
"learning_rate": 9.932440703247623e-06,
|
|
"loss": 2.6520917415618896,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.44324324324324327,
|
|
"grad_norm": 8.675835563073338,
|
|
"learning_rate": 9.931579308862484e-06,
|
|
"loss": 2.5899949073791504,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.44414414414414416,
|
|
"grad_norm": 36.134451593989255,
|
|
"learning_rate": 9.930712495563559e-06,
|
|
"loss": 2.4611892700195312,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.44504504504504505,
|
|
"grad_norm": 17.104554033233082,
|
|
"learning_rate": 9.929840264303318e-06,
|
|
"loss": 2.9234981536865234,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.44594594594594594,
|
|
"grad_norm": 11.425194577937127,
|
|
"learning_rate": 9.928962616040187e-06,
|
|
"loss": 3.180088996887207,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.44684684684684683,
|
|
"grad_norm": 14.72422917409879,
|
|
"learning_rate": 9.928079551738542e-06,
|
|
"loss": 3.261711835861206,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.4477477477477477,
|
|
"grad_norm": 18.04668472832457,
|
|
"learning_rate": 9.927191072368714e-06,
|
|
"loss": 2.876741886138916,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.4486486486486487,
|
|
"grad_norm": 10.242062741938264,
|
|
"learning_rate": 9.926297178906976e-06,
|
|
"loss": 3.0803256034851074,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.44954954954954957,
|
|
"grad_norm": 9.858260384743199,
|
|
"learning_rate": 9.925397872335558e-06,
|
|
"loss": 1.8135777711868286,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.45045045045045046,
|
|
"grad_norm": 16.353432239840764,
|
|
"learning_rate": 9.924493153642636e-06,
|
|
"loss": 2.367913246154785,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.45135135135135135,
|
|
"grad_norm": 12.678071821463053,
|
|
"learning_rate": 9.92358302382233e-06,
|
|
"loss": 3.121692180633545,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.45225225225225224,
|
|
"grad_norm": 11.986141950956682,
|
|
"learning_rate": 9.92266748387471e-06,
|
|
"loss": 2.9279327392578125,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.45315315315315313,
|
|
"grad_norm": 17.26673357479762,
|
|
"learning_rate": 9.921746534805789e-06,
|
|
"loss": 2.9483375549316406,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.4540540540540541,
|
|
"grad_norm": 12.8745885638904,
|
|
"learning_rate": 9.920820177627522e-06,
|
|
"loss": 2.940941095352173,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.45495495495495497,
|
|
"grad_norm": 13.512081445743542,
|
|
"learning_rate": 9.919888413357808e-06,
|
|
"loss": 2.897225856781006,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.45585585585585586,
|
|
"grad_norm": 11.575210720999843,
|
|
"learning_rate": 9.918951243020489e-06,
|
|
"loss": 2.8494417667388916,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.45675675675675675,
|
|
"grad_norm": 27.703000681652096,
|
|
"learning_rate": 9.918008667645344e-06,
|
|
"loss": 2.347216844558716,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.45765765765765765,
|
|
"grad_norm": 8.180610209870624,
|
|
"learning_rate": 9.917060688268094e-06,
|
|
"loss": 3.1521518230438232,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.45855855855855854,
|
|
"grad_norm": 13.417460116546783,
|
|
"learning_rate": 9.916107305930397e-06,
|
|
"loss": 2.6805672645568848,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.4594594594594595,
|
|
"grad_norm": 8.35493484028015,
|
|
"learning_rate": 9.915148521679848e-06,
|
|
"loss": 2.8929569721221924,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.4603603603603604,
|
|
"grad_norm": 18.11039317600781,
|
|
"learning_rate": 9.914184336569973e-06,
|
|
"loss": 1.871511459350586,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.46126126126126127,
|
|
"grad_norm": 16.339316213391253,
|
|
"learning_rate": 9.913214751660244e-06,
|
|
"loss": 3.3061211109161377,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.46216216216216216,
|
|
"grad_norm": 17.68052245730394,
|
|
"learning_rate": 9.912239768016057e-06,
|
|
"loss": 2.3791470527648926,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.46306306306306305,
|
|
"grad_norm": 11.245574736173385,
|
|
"learning_rate": 9.911259386708742e-06,
|
|
"loss": 2.556948184967041,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.46396396396396394,
|
|
"grad_norm": 8.915739531623856,
|
|
"learning_rate": 9.91027360881556e-06,
|
|
"loss": 2.971670150756836,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.4648648648648649,
|
|
"grad_norm": 5.929578462713964,
|
|
"learning_rate": 9.909282435419703e-06,
|
|
"loss": 2.971108913421631,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.4657657657657658,
|
|
"grad_norm": 10.688852817948094,
|
|
"learning_rate": 9.908285867610292e-06,
|
|
"loss": 2.6687498092651367,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.4666666666666667,
|
|
"grad_norm": 18.48653768523157,
|
|
"learning_rate": 9.907283906482374e-06,
|
|
"loss": 3.280163526535034,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.46756756756756757,
|
|
"grad_norm": 20.344949506929094,
|
|
"learning_rate": 9.906276553136924e-06,
|
|
"loss": 2.7002642154693604,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.46846846846846846,
|
|
"grad_norm": 13.383833626515251,
|
|
"learning_rate": 9.90526380868084e-06,
|
|
"loss": 2.652203321456909,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.46936936936936935,
|
|
"grad_norm": 22.95836676576381,
|
|
"learning_rate": 9.904245674226948e-06,
|
|
"loss": 3.252615451812744,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.4702702702702703,
|
|
"grad_norm": 9.730269158871646,
|
|
"learning_rate": 9.90322215089399e-06,
|
|
"loss": 2.8116307258605957,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.4711711711711712,
|
|
"grad_norm": 10.828805525393937,
|
|
"learning_rate": 9.902193239806634e-06,
|
|
"loss": 2.7895991802215576,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.4720720720720721,
|
|
"grad_norm": 12.86168869720143,
|
|
"learning_rate": 9.901158942095468e-06,
|
|
"loss": 2.8977463245391846,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.47297297297297297,
|
|
"grad_norm": 11.771464906214417,
|
|
"learning_rate": 9.900119258896998e-06,
|
|
"loss": 2.2192132472991943,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.47387387387387386,
|
|
"grad_norm": 11.30141430142043,
|
|
"learning_rate": 9.899074191353649e-06,
|
|
"loss": 3.1374659538269043,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.47477477477477475,
|
|
"grad_norm": 13.897590331761034,
|
|
"learning_rate": 9.898023740613758e-06,
|
|
"loss": 3.357194423675537,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.4756756756756757,
|
|
"grad_norm": 7.449387794038159,
|
|
"learning_rate": 9.896967907831581e-06,
|
|
"loss": 2.854480266571045,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.4765765765765766,
|
|
"grad_norm": 8.878874765157995,
|
|
"learning_rate": 9.895906694167291e-06,
|
|
"loss": 2.738018035888672,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.4774774774774775,
|
|
"grad_norm": 38.00148277537286,
|
|
"learning_rate": 9.894840100786966e-06,
|
|
"loss": 2.8189690113067627,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.4783783783783784,
|
|
"grad_norm": 17.3491501566159,
|
|
"learning_rate": 9.893768128862601e-06,
|
|
"loss": 2.9495034217834473,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.47927927927927927,
|
|
"grad_norm": 16.50728344098491,
|
|
"learning_rate": 9.892690779572098e-06,
|
|
"loss": 2.541929244995117,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.48018018018018016,
|
|
"grad_norm": 13.275241172557463,
|
|
"learning_rate": 9.891608054099271e-06,
|
|
"loss": 2.7305493354797363,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.4810810810810811,
|
|
"grad_norm": 10.577891946131855,
|
|
"learning_rate": 9.89051995363384e-06,
|
|
"loss": 3.0362510681152344,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.481981981981982,
|
|
"grad_norm": 9.552120076240959,
|
|
"learning_rate": 9.889426479371427e-06,
|
|
"loss": 3.040802001953125,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.4828828828828829,
|
|
"grad_norm": 23.697875741603628,
|
|
"learning_rate": 9.888327632513563e-06,
|
|
"loss": 3.7922098636627197,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.4837837837837838,
|
|
"grad_norm": 8.507680677535628,
|
|
"learning_rate": 9.887223414267686e-06,
|
|
"loss": 2.7465715408325195,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.4846846846846847,
|
|
"grad_norm": 15.26647905177071,
|
|
"learning_rate": 9.88611382584713e-06,
|
|
"loss": 2.9226627349853516,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.48558558558558557,
|
|
"grad_norm": 16.897882002125424,
|
|
"learning_rate": 9.88499886847113e-06,
|
|
"loss": 2.804058074951172,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.4864864864864865,
|
|
"grad_norm": 7.317461306084804,
|
|
"learning_rate": 9.883878543364824e-06,
|
|
"loss": 2.7797727584838867,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.4873873873873874,
|
|
"grad_norm": 14.644555104597616,
|
|
"learning_rate": 9.882752851759247e-06,
|
|
"loss": 3.074551820755005,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.4882882882882883,
|
|
"grad_norm": 10.955052355379598,
|
|
"learning_rate": 9.881621794891332e-06,
|
|
"loss": 3.0798258781433105,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.4891891891891892,
|
|
"grad_norm": 7.009829826155904,
|
|
"learning_rate": 9.880485374003902e-06,
|
|
"loss": 2.5292856693267822,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.4900900900900901,
|
|
"grad_norm": 8.328587975820678,
|
|
"learning_rate": 9.879343590345682e-06,
|
|
"loss": 2.8059566020965576,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.49099099099099097,
|
|
"grad_norm": 8.910851525333204,
|
|
"learning_rate": 9.878196445171281e-06,
|
|
"loss": 2.900643825531006,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.4918918918918919,
|
|
"grad_norm": 8.920847529814719,
|
|
"learning_rate": 9.877043939741211e-06,
|
|
"loss": 2.661362648010254,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.4927927927927928,
|
|
"grad_norm": 19.722150751686048,
|
|
"learning_rate": 9.87588607532186e-06,
|
|
"loss": 2.3552534580230713,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.4936936936936937,
|
|
"grad_norm": 11.697970053360999,
|
|
"learning_rate": 9.874722853185519e-06,
|
|
"loss": 2.4047014713287354,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.4945945945945946,
|
|
"grad_norm": 22.169788506007976,
|
|
"learning_rate": 9.87355427461035e-06,
|
|
"loss": 2.575777530670166,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.4954954954954955,
|
|
"grad_norm": 12.80578817852263,
|
|
"learning_rate": 9.872380340880416e-06,
|
|
"loss": 2.420854330062866,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.4963963963963964,
|
|
"grad_norm": 11.369342456286857,
|
|
"learning_rate": 9.871201053285658e-06,
|
|
"loss": 2.5156991481781006,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.4972972972972973,
|
|
"grad_norm": 12.316050038110454,
|
|
"learning_rate": 9.870016413121894e-06,
|
|
"loss": 3.14607310295105,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.4981981981981982,
|
|
"grad_norm": 16.239571696929776,
|
|
"learning_rate": 9.868826421690835e-06,
|
|
"loss": 3.397555351257324,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.4990990990990991,
|
|
"grad_norm": 8.854516430326528,
|
|
"learning_rate": 9.867631080300063e-06,
|
|
"loss": 2.690509796142578,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 9.376344141476533,
|
|
"learning_rate": 9.866430390263044e-06,
|
|
"loss": 2.900862216949463,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.5009009009009009,
|
|
"grad_norm": 9.423648915581877,
|
|
"learning_rate": 9.86522435289912e-06,
|
|
"loss": 2.855050563812256,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.5018018018018018,
|
|
"grad_norm": 8.101093503468741,
|
|
"learning_rate": 9.864012969533505e-06,
|
|
"loss": 3.2957873344421387,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.5027027027027027,
|
|
"grad_norm": 10.100404830767033,
|
|
"learning_rate": 9.862796241497291e-06,
|
|
"loss": 2.6753034591674805,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.5036036036036036,
|
|
"grad_norm": 6.802573845910635,
|
|
"learning_rate": 9.861574170127446e-06,
|
|
"loss": 2.527581214904785,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.5045045045045045,
|
|
"grad_norm": 7.109905831565619,
|
|
"learning_rate": 9.8603467567668e-06,
|
|
"loss": 2.5860238075256348,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.5054054054054054,
|
|
"grad_norm": 77.48281332985208,
|
|
"learning_rate": 9.85911400276406e-06,
|
|
"loss": 2.9344871044158936,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.5063063063063064,
|
|
"grad_norm": 17.204696265849268,
|
|
"learning_rate": 9.857875909473801e-06,
|
|
"loss": 2.4911441802978516,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.5072072072072072,
|
|
"grad_norm": 33.333109411986456,
|
|
"learning_rate": 9.856632478256465e-06,
|
|
"loss": 3.2600035667419434,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.5081081081081081,
|
|
"grad_norm": 18.927800261993358,
|
|
"learning_rate": 9.855383710478353e-06,
|
|
"loss": 3.0671844482421875,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.509009009009009,
|
|
"grad_norm": 11.659851933898024,
|
|
"learning_rate": 9.85412960751164e-06,
|
|
"loss": 2.565697193145752,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.5099099099099099,
|
|
"grad_norm": 17.64516165490412,
|
|
"learning_rate": 9.852870170734354e-06,
|
|
"loss": 3.133984088897705,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.5108108108108108,
|
|
"grad_norm": 12.590469859202411,
|
|
"learning_rate": 9.851605401530391e-06,
|
|
"loss": 2.9112048149108887,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.5117117117117117,
|
|
"grad_norm": 9.161002149446931,
|
|
"learning_rate": 9.850335301289504e-06,
|
|
"loss": 2.987330913543701,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.5126126126126126,
|
|
"grad_norm": 13.323533234558495,
|
|
"learning_rate": 9.849059871407303e-06,
|
|
"loss": 2.5533223152160645,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.5135135135135135,
|
|
"grad_norm": 6.344853206795556,
|
|
"learning_rate": 9.847779113285254e-06,
|
|
"loss": 2.8108911514282227,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.5144144144144144,
|
|
"grad_norm": 16.14781390884599,
|
|
"learning_rate": 9.846493028330678e-06,
|
|
"loss": 3.010362148284912,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.5153153153153153,
|
|
"grad_norm": 13.724464009158758,
|
|
"learning_rate": 9.845201617956752e-06,
|
|
"loss": 2.64815354347229,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.5162162162162162,
|
|
"grad_norm": 13.755497363882526,
|
|
"learning_rate": 9.8439048835825e-06,
|
|
"loss": 2.952627658843994,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.5171171171171172,
|
|
"grad_norm": 12.747606100104184,
|
|
"learning_rate": 9.842602826632799e-06,
|
|
"loss": 2.591431140899658,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.5180180180180181,
|
|
"grad_norm": 21.745555288655588,
|
|
"learning_rate": 9.841295448538377e-06,
|
|
"loss": 3.343477487564087,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.518918918918919,
|
|
"grad_norm": 9.598784757276308,
|
|
"learning_rate": 9.839982750735804e-06,
|
|
"loss": 2.577162742614746,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.5198198198198198,
|
|
"grad_norm": 9.254856439318155,
|
|
"learning_rate": 9.838664734667496e-06,
|
|
"loss": 2.9017882347106934,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.5207207207207207,
|
|
"grad_norm": 8.530699856357325,
|
|
"learning_rate": 9.837341401781715e-06,
|
|
"loss": 2.5971288681030273,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.5216216216216216,
|
|
"grad_norm": 6.5891797705212625,
|
|
"learning_rate": 9.836012753532566e-06,
|
|
"loss": 2.6294662952423096,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.5225225225225225,
|
|
"grad_norm": 12.609165447431351,
|
|
"learning_rate": 9.834678791379992e-06,
|
|
"loss": 2.7069082260131836,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.5234234234234234,
|
|
"grad_norm": 12.163112172436126,
|
|
"learning_rate": 9.833339516789778e-06,
|
|
"loss": 2.796908378601074,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.5243243243243243,
|
|
"grad_norm": 12.871569991902243,
|
|
"learning_rate": 9.831994931233542e-06,
|
|
"loss": 3.122313976287842,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.5252252252252252,
|
|
"grad_norm": 14.596195641635775,
|
|
"learning_rate": 9.83064503618874e-06,
|
|
"loss": 3.2716140747070312,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.5261261261261261,
|
|
"grad_norm": 12.543798988950169,
|
|
"learning_rate": 9.829289833138667e-06,
|
|
"loss": 3.1897201538085938,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.527027027027027,
|
|
"grad_norm": 13.507227668169904,
|
|
"learning_rate": 9.827929323572441e-06,
|
|
"loss": 4.371722221374512,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.527927927927928,
|
|
"grad_norm": 6.877900168304943,
|
|
"learning_rate": 9.826563508985017e-06,
|
|
"loss": 2.0487256050109863,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.5288288288288289,
|
|
"grad_norm": 19.668259971076097,
|
|
"learning_rate": 9.82519239087718e-06,
|
|
"loss": 3.2878916263580322,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.5297297297297298,
|
|
"grad_norm": 11.170925627790286,
|
|
"learning_rate": 9.823815970755542e-06,
|
|
"loss": 3.1439385414123535,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.5306306306306307,
|
|
"grad_norm": 10.390653138517692,
|
|
"learning_rate": 9.822434250132535e-06,
|
|
"loss": 2.6878585815429688,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.5315315315315315,
|
|
"grad_norm": 7.073174266005421,
|
|
"learning_rate": 9.821047230526425e-06,
|
|
"loss": 2.1680750846862793,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.5324324324324324,
|
|
"grad_norm": 9.093168029084392,
|
|
"learning_rate": 9.819654913461292e-06,
|
|
"loss": 2.9089152812957764,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.5333333333333333,
|
|
"grad_norm": 12.850144871048814,
|
|
"learning_rate": 9.818257300467045e-06,
|
|
"loss": 2.6065568923950195,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.5342342342342342,
|
|
"grad_norm": 8.32330763975957,
|
|
"learning_rate": 9.816854393079402e-06,
|
|
"loss": 3.0583200454711914,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.5351351351351351,
|
|
"grad_norm": 12.069825241338958,
|
|
"learning_rate": 9.815446192839908e-06,
|
|
"loss": 3.030487060546875,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.536036036036036,
|
|
"grad_norm": 10.949716889539568,
|
|
"learning_rate": 9.814032701295923e-06,
|
|
"loss": 2.8246138095855713,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.5369369369369369,
|
|
"grad_norm": 26.699912146577983,
|
|
"learning_rate": 9.812613920000613e-06,
|
|
"loss": 3.470756769180298,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.5378378378378378,
|
|
"grad_norm": 16.392118624059524,
|
|
"learning_rate": 9.811189850512965e-06,
|
|
"loss": 3.1673455238342285,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.5387387387387388,
|
|
"grad_norm": 9.259366588758454,
|
|
"learning_rate": 9.809760494397776e-06,
|
|
"loss": 2.5202136039733887,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.5396396396396397,
|
|
"grad_norm": 9.007590254502421,
|
|
"learning_rate": 9.808325853225645e-06,
|
|
"loss": 3.080294132232666,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.5405405405405406,
|
|
"grad_norm": 13.32109150521752,
|
|
"learning_rate": 9.806885928572984e-06,
|
|
"loss": 2.2382287979125977,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.5414414414414415,
|
|
"grad_norm": 7.902638433971261,
|
|
"learning_rate": 9.805440722022015e-06,
|
|
"loss": 2.7696096897125244,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.5423423423423424,
|
|
"grad_norm": 9.29351832414914,
|
|
"learning_rate": 9.803990235160753e-06,
|
|
"loss": 3.026676654815674,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.5432432432432432,
|
|
"grad_norm": 15.78925805226816,
|
|
"learning_rate": 9.802534469583022e-06,
|
|
"loss": 3.3322205543518066,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.5441441441441441,
|
|
"grad_norm": 17.1472967796494,
|
|
"learning_rate": 9.801073426888447e-06,
|
|
"loss": 2.838545799255371,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.545045045045045,
|
|
"grad_norm": 13.725380880291375,
|
|
"learning_rate": 9.79960710868245e-06,
|
|
"loss": 2.4575085639953613,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.5459459459459459,
|
|
"grad_norm": 11.343607830029514,
|
|
"learning_rate": 9.798135516576246e-06,
|
|
"loss": 2.8615031242370605,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.5468468468468468,
|
|
"grad_norm": 8.149097053351092,
|
|
"learning_rate": 9.796658652186852e-06,
|
|
"loss": 2.528751850128174,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.5477477477477477,
|
|
"grad_norm": 10.722467414146747,
|
|
"learning_rate": 9.795176517137072e-06,
|
|
"loss": 2.9100944995880127,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.5486486486486486,
|
|
"grad_norm": 13.849500897141981,
|
|
"learning_rate": 9.793689113055507e-06,
|
|
"loss": 3.3990378379821777,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.5495495495495496,
|
|
"grad_norm": 44.885020827823546,
|
|
"learning_rate": 9.792196441576544e-06,
|
|
"loss": 2.527492046356201,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.5504504504504505,
|
|
"grad_norm": 9.276202543493147,
|
|
"learning_rate": 9.79069850434036e-06,
|
|
"loss": 2.643711805343628,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.5513513513513514,
|
|
"grad_norm": 9.985398691316842,
|
|
"learning_rate": 9.789195302992914e-06,
|
|
"loss": 2.638700485229492,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.5522522522522523,
|
|
"grad_norm": 8.124092970725826,
|
|
"learning_rate": 9.787686839185954e-06,
|
|
"loss": 3.1427161693573,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.5531531531531532,
|
|
"grad_norm": 11.9797011769303,
|
|
"learning_rate": 9.786173114577012e-06,
|
|
"loss": 1.6746983528137207,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.5540540540540541,
|
|
"grad_norm": 10.760629779017707,
|
|
"learning_rate": 9.784654130829394e-06,
|
|
"loss": 2.682283639907837,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.554954954954955,
|
|
"grad_norm": 8.134445546653057,
|
|
"learning_rate": 9.78312988961219e-06,
|
|
"loss": 2.845862865447998,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.5558558558558558,
|
|
"grad_norm": 13.050361292942267,
|
|
"learning_rate": 9.781600392600264e-06,
|
|
"loss": 3.153568744659424,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.5567567567567567,
|
|
"grad_norm": 31.881308227440655,
|
|
"learning_rate": 9.780065641474257e-06,
|
|
"loss": 2.7752227783203125,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.5576576576576576,
|
|
"grad_norm": 17.67219029389856,
|
|
"learning_rate": 9.778525637920587e-06,
|
|
"loss": 2.7249202728271484,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.5585585585585585,
|
|
"grad_norm": 12.22272888684173,
|
|
"learning_rate": 9.776980383631432e-06,
|
|
"loss": 2.489539623260498,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.5594594594594594,
|
|
"grad_norm": 8.729293975204598,
|
|
"learning_rate": 9.775429880304753e-06,
|
|
"loss": 2.6470212936401367,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.5603603603603604,
|
|
"grad_norm": 12.057004192073059,
|
|
"learning_rate": 9.773874129644268e-06,
|
|
"loss": 2.4962947368621826,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.5612612612612613,
|
|
"grad_norm": 15.658988410504156,
|
|
"learning_rate": 9.77231313335947e-06,
|
|
"loss": 2.730250120162964,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.5621621621621622,
|
|
"grad_norm": 9.968299463426975,
|
|
"learning_rate": 9.77074689316561e-06,
|
|
"loss": 2.25225830078125,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.5630630630630631,
|
|
"grad_norm": 9.657069192556015,
|
|
"learning_rate": 9.769175410783703e-06,
|
|
"loss": 3.269899845123291,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.563963963963964,
|
|
"grad_norm": 19.251856009582813,
|
|
"learning_rate": 9.767598687940523e-06,
|
|
"loss": 2.8722891807556152,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.5648648648648649,
|
|
"grad_norm": 14.428545606993785,
|
|
"learning_rate": 9.766016726368604e-06,
|
|
"loss": 3.03021502494812,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.5657657657657658,
|
|
"grad_norm": 14.16030624645081,
|
|
"learning_rate": 9.764429527806233e-06,
|
|
"loss": 2.89723539352417,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.5666666666666667,
|
|
"grad_norm": 8.32130764827128,
|
|
"learning_rate": 9.76283709399746e-06,
|
|
"loss": 2.671215772628784,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.5675675675675675,
|
|
"grad_norm": 11.716176911854502,
|
|
"learning_rate": 9.761239426692077e-06,
|
|
"loss": 2.804887056350708,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.5684684684684684,
|
|
"grad_norm": 11.769213920438558,
|
|
"learning_rate": 9.759636527645633e-06,
|
|
"loss": 3.2259230613708496,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.5693693693693693,
|
|
"grad_norm": 12.718068774896336,
|
|
"learning_rate": 9.758028398619423e-06,
|
|
"loss": 2.9922332763671875,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.5702702702702702,
|
|
"grad_norm": 24.023232094252386,
|
|
"learning_rate": 9.756415041380493e-06,
|
|
"loss": 2.8729424476623535,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.5711711711711712,
|
|
"grad_norm": 12.166461400696965,
|
|
"learning_rate": 9.754796457701628e-06,
|
|
"loss": 2.605339527130127,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.5720720720720721,
|
|
"grad_norm": 10.402685081844133,
|
|
"learning_rate": 9.753172649361358e-06,
|
|
"loss": 2.934504270553589,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.572972972972973,
|
|
"grad_norm": 10.272109745887343,
|
|
"learning_rate": 9.751543618143958e-06,
|
|
"loss": 2.570463180541992,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.5738738738738739,
|
|
"grad_norm": 13.711719588706229,
|
|
"learning_rate": 9.749909365839436e-06,
|
|
"loss": 3.169706344604492,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.5747747747747748,
|
|
"grad_norm": 33.59789891103926,
|
|
"learning_rate": 9.748269894243541e-06,
|
|
"loss": 2.3556222915649414,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.5756756756756757,
|
|
"grad_norm": 7.975103574740825,
|
|
"learning_rate": 9.746625205157755e-06,
|
|
"loss": 1.1413840055465698,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.5765765765765766,
|
|
"grad_norm": 22.735513423510085,
|
|
"learning_rate": 9.744975300389295e-06,
|
|
"loss": 2.070692300796509,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.5774774774774775,
|
|
"grad_norm": 16.36291204510473,
|
|
"learning_rate": 9.743320181751105e-06,
|
|
"loss": 3.360299825668335,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.5783783783783784,
|
|
"grad_norm": 13.992378484017673,
|
|
"learning_rate": 9.741659851061866e-06,
|
|
"loss": 2.6247687339782715,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.5792792792792792,
|
|
"grad_norm": 10.452818390696052,
|
|
"learning_rate": 9.739994310145977e-06,
|
|
"loss": 2.7434468269348145,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.5801801801801801,
|
|
"grad_norm": 14.842864249759991,
|
|
"learning_rate": 9.73832356083357e-06,
|
|
"loss": 2.6327593326568604,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.581081081081081,
|
|
"grad_norm": 11.334382859755081,
|
|
"learning_rate": 9.736647604960492e-06,
|
|
"loss": 2.6178431510925293,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.581981981981982,
|
|
"grad_norm": 10.990080398430045,
|
|
"learning_rate": 9.734966444368317e-06,
|
|
"loss": 2.822669744491577,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.5828828828828829,
|
|
"grad_norm": 11.880254008410231,
|
|
"learning_rate": 9.733280080904337e-06,
|
|
"loss": 2.7857983112335205,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.5837837837837838,
|
|
"grad_norm": 11.920915789197798,
|
|
"learning_rate": 9.731588516421562e-06,
|
|
"loss": 3.316427230834961,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.5846846846846847,
|
|
"grad_norm": 14.414450806441018,
|
|
"learning_rate": 9.729891752778712e-06,
|
|
"loss": 3.260469436645508,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.5855855855855856,
|
|
"grad_norm": 11.80034688226506,
|
|
"learning_rate": 9.728189791840227e-06,
|
|
"loss": 2.9232394695281982,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.5864864864864865,
|
|
"grad_norm": 15.725196900408367,
|
|
"learning_rate": 9.726482635476252e-06,
|
|
"loss": 2.745842456817627,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.5873873873873874,
|
|
"grad_norm": 8.0300708831056,
|
|
"learning_rate": 9.724770285562642e-06,
|
|
"loss": 2.5746424198150635,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.5882882882882883,
|
|
"grad_norm": 17.331180975895887,
|
|
"learning_rate": 9.723052743980963e-06,
|
|
"loss": 2.8071985244750977,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.5891891891891892,
|
|
"grad_norm": 13.071760646959074,
|
|
"learning_rate": 9.72133001261848e-06,
|
|
"loss": 2.8666656017303467,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.5900900900900901,
|
|
"grad_norm": 8.582467250156132,
|
|
"learning_rate": 9.719602093368165e-06,
|
|
"loss": 2.601950168609619,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.590990990990991,
|
|
"grad_norm": 9.076812716735017,
|
|
"learning_rate": 9.717868988128688e-06,
|
|
"loss": 3.026968479156494,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.5918918918918918,
|
|
"grad_norm": 10.619332033928004,
|
|
"learning_rate": 9.716130698804418e-06,
|
|
"loss": 1.9140194654464722,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.5927927927927928,
|
|
"grad_norm": 28.80427013899261,
|
|
"learning_rate": 9.714387227305422e-06,
|
|
"loss": 2.499068021774292,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.5936936936936937,
|
|
"grad_norm": 9.552137001045008,
|
|
"learning_rate": 9.712638575547458e-06,
|
|
"loss": 3.0285205841064453,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.5945945945945946,
|
|
"grad_norm": 44.90766293089647,
|
|
"learning_rate": 9.710884745451979e-06,
|
|
"loss": 3.3245625495910645,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.5954954954954955,
|
|
"grad_norm": 9.12125793416734,
|
|
"learning_rate": 9.709125738946126e-06,
|
|
"loss": 2.6970860958099365,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.5963963963963964,
|
|
"grad_norm": 19.76964852034373,
|
|
"learning_rate": 9.707361557962728e-06,
|
|
"loss": 2.7420012950897217,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.5972972972972973,
|
|
"grad_norm": 28.95380432258598,
|
|
"learning_rate": 9.705592204440306e-06,
|
|
"loss": 3.1635239124298096,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.5981981981981982,
|
|
"grad_norm": 12.148321113338309,
|
|
"learning_rate": 9.703817680323055e-06,
|
|
"loss": 2.6524462699890137,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.5990990990990991,
|
|
"grad_norm": 22.42931971897512,
|
|
"learning_rate": 9.702037987560859e-06,
|
|
"loss": 1.996198058128357,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"grad_norm": 9.760729582324164,
|
|
"learning_rate": 9.700253128109275e-06,
|
|
"loss": 2.959993839263916,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.6009009009009009,
|
|
"grad_norm": 11.039657436693238,
|
|
"learning_rate": 9.698463103929542e-06,
|
|
"loss": 2.4604763984680176,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.6018018018018018,
|
|
"grad_norm": 12.591538232890576,
|
|
"learning_rate": 9.696667916988576e-06,
|
|
"loss": 2.6671557426452637,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.6027027027027027,
|
|
"grad_norm": 13.018778973089516,
|
|
"learning_rate": 9.694867569258957e-06,
|
|
"loss": 2.5060935020446777,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.6036036036036037,
|
|
"grad_norm": 24.812787978360838,
|
|
"learning_rate": 9.693062062718947e-06,
|
|
"loss": 2.6539478302001953,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.6045045045045045,
|
|
"grad_norm": 9.28865529697274,
|
|
"learning_rate": 9.691251399352468e-06,
|
|
"loss": 3.0585227012634277,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.6054054054054054,
|
|
"grad_norm": 12.489042706136434,
|
|
"learning_rate": 9.689435581149114e-06,
|
|
"loss": 2.2748842239379883,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.6063063063063063,
|
|
"grad_norm": 12.788764883935377,
|
|
"learning_rate": 9.687614610104137e-06,
|
|
"loss": 2.742896795272827,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.6072072072072072,
|
|
"grad_norm": 10.400479488548925,
|
|
"learning_rate": 9.68578848821846e-06,
|
|
"loss": 2.913947582244873,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.6081081081081081,
|
|
"grad_norm": 10.356969659317713,
|
|
"learning_rate": 9.683957217498657e-06,
|
|
"loss": 3.195772647857666,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.609009009009009,
|
|
"grad_norm": 11.177217351847228,
|
|
"learning_rate": 9.682120799956961e-06,
|
|
"loss": 2.561089515686035,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.6099099099099099,
|
|
"grad_norm": 11.776725641662114,
|
|
"learning_rate": 9.68027923761127e-06,
|
|
"loss": 2.9913177490234375,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.6108108108108108,
|
|
"grad_norm": 20.340971853249492,
|
|
"learning_rate": 9.678432532485122e-06,
|
|
"loss": 2.9433279037475586,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.6117117117117117,
|
|
"grad_norm": 13.062691511533304,
|
|
"learning_rate": 9.676580686607714e-06,
|
|
"loss": 2.7461581230163574,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.6126126126126126,
|
|
"grad_norm": 10.550831289023261,
|
|
"learning_rate": 9.67472370201389e-06,
|
|
"loss": 2.2804391384124756,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.6135135135135135,
|
|
"grad_norm": 14.04652689177321,
|
|
"learning_rate": 9.672861580744142e-06,
|
|
"loss": 2.3337719440460205,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.6144144144144145,
|
|
"grad_norm": 11.726406448121304,
|
|
"learning_rate": 9.6709943248446e-06,
|
|
"loss": 3.0193614959716797,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.6153153153153154,
|
|
"grad_norm": 7.698304365102145,
|
|
"learning_rate": 9.669121936367043e-06,
|
|
"loss": 2.7147281169891357,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.6162162162162163,
|
|
"grad_norm": 10.794221131633092,
|
|
"learning_rate": 9.667244417368888e-06,
|
|
"loss": 2.859605073928833,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.6171171171171171,
|
|
"grad_norm": 10.03370727731534,
|
|
"learning_rate": 9.665361769913187e-06,
|
|
"loss": 2.639362096786499,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.618018018018018,
|
|
"grad_norm": 15.312131543900419,
|
|
"learning_rate": 9.663473996068631e-06,
|
|
"loss": 2.8004603385925293,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.6189189189189189,
|
|
"grad_norm": 12.006983237628706,
|
|
"learning_rate": 9.661581097909542e-06,
|
|
"loss": 2.2756056785583496,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.6198198198198198,
|
|
"grad_norm": 7.178982852760417,
|
|
"learning_rate": 9.659683077515871e-06,
|
|
"loss": 2.5561704635620117,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.6207207207207207,
|
|
"grad_norm": 17.558612337023416,
|
|
"learning_rate": 9.6577799369732e-06,
|
|
"loss": 2.2039928436279297,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.6216216216216216,
|
|
"grad_norm": 12.664437522074346,
|
|
"learning_rate": 9.655871678372735e-06,
|
|
"loss": 2.7276079654693604,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.6225225225225225,
|
|
"grad_norm": 9.598029782608371,
|
|
"learning_rate": 9.65395830381131e-06,
|
|
"loss": 3.5972723960876465,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.6234234234234234,
|
|
"grad_norm": 10.084799153198551,
|
|
"learning_rate": 9.652039815391376e-06,
|
|
"loss": 2.7443714141845703,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.6243243243243243,
|
|
"grad_norm": 11.19197717861924,
|
|
"learning_rate": 9.650116215221006e-06,
|
|
"loss": 3.679194688796997,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.6252252252252253,
|
|
"grad_norm": 13.429386304946776,
|
|
"learning_rate": 9.648187505413887e-06,
|
|
"loss": 2.4218192100524902,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.6261261261261262,
|
|
"grad_norm": 9.445783315531548,
|
|
"learning_rate": 9.646253688089321e-06,
|
|
"loss": 2.255427122116089,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.6270270270270271,
|
|
"grad_norm": 13.604349677903247,
|
|
"learning_rate": 9.644314765372227e-06,
|
|
"loss": 2.967381000518799,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.627927927927928,
|
|
"grad_norm": 13.267927862733156,
|
|
"learning_rate": 9.64237073939313e-06,
|
|
"loss": 2.0716745853424072,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.6288288288288288,
|
|
"grad_norm": 20.78254766537362,
|
|
"learning_rate": 9.64042161228816e-06,
|
|
"loss": 2.4729197025299072,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.6297297297297297,
|
|
"grad_norm": 10.735150542974305,
|
|
"learning_rate": 9.638467386199057e-06,
|
|
"loss": 2.4997925758361816,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.6306306306306306,
|
|
"grad_norm": 9.654671058509456,
|
|
"learning_rate": 9.636508063273161e-06,
|
|
"loss": 2.097930908203125,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.6315315315315315,
|
|
"grad_norm": 8.12843638998296,
|
|
"learning_rate": 9.634543645663417e-06,
|
|
"loss": 3.2768378257751465,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.6324324324324324,
|
|
"grad_norm": 9.035173235571136,
|
|
"learning_rate": 9.63257413552836e-06,
|
|
"loss": 2.5695743560791016,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.6333333333333333,
|
|
"grad_norm": 10.465715380961658,
|
|
"learning_rate": 9.63059953503213e-06,
|
|
"loss": 2.83211088180542,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.6342342342342342,
|
|
"grad_norm": 8.531194596728515,
|
|
"learning_rate": 9.628619846344453e-06,
|
|
"loss": 3.39132022857666,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.6351351351351351,
|
|
"grad_norm": 10.723427942794773,
|
|
"learning_rate": 9.626635071640648e-06,
|
|
"loss": 2.4200494289398193,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.6360360360360361,
|
|
"grad_norm": 9.667355089191147,
|
|
"learning_rate": 9.624645213101627e-06,
|
|
"loss": 2.2413747310638428,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.636936936936937,
|
|
"grad_norm": 7.524620829749232,
|
|
"learning_rate": 9.62265027291388e-06,
|
|
"loss": 2.371335029602051,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.6378378378378379,
|
|
"grad_norm": 10.477549393486395,
|
|
"learning_rate": 9.62065025326949e-06,
|
|
"loss": 3.217474937438965,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.6387387387387388,
|
|
"grad_norm": 11.236334668871198,
|
|
"learning_rate": 9.618645156366113e-06,
|
|
"loss": 2.601595640182495,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.6396396396396397,
|
|
"grad_norm": 12.085562514268764,
|
|
"learning_rate": 9.616634984406992e-06,
|
|
"loss": 3.323373794555664,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.6405405405405405,
|
|
"grad_norm": 7.864682656572068,
|
|
"learning_rate": 9.61461973960094e-06,
|
|
"loss": 2.608567476272583,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.6414414414414414,
|
|
"grad_norm": 11.02629949211748,
|
|
"learning_rate": 9.612599424162344e-06,
|
|
"loss": 3.1566262245178223,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.6423423423423423,
|
|
"grad_norm": 7.927060068649259,
|
|
"learning_rate": 9.61057404031117e-06,
|
|
"loss": 2.9061496257781982,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.6432432432432432,
|
|
"grad_norm": 16.4660133303867,
|
|
"learning_rate": 9.608543590272947e-06,
|
|
"loss": 2.9211766719818115,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.6441441441441441,
|
|
"grad_norm": 10.311485765880613,
|
|
"learning_rate": 9.606508076278772e-06,
|
|
"loss": 2.5923843383789062,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.645045045045045,
|
|
"grad_norm": 11.027862149795704,
|
|
"learning_rate": 9.604467500565305e-06,
|
|
"loss": 2.2774062156677246,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.6459459459459459,
|
|
"grad_norm": 10.9047541264129,
|
|
"learning_rate": 9.602421865374774e-06,
|
|
"loss": 2.7341184616088867,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.6468468468468469,
|
|
"grad_norm": 26.06312075565098,
|
|
"learning_rate": 9.600371172954957e-06,
|
|
"loss": 2.2477073669433594,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.6477477477477478,
|
|
"grad_norm": 9.903233360954676,
|
|
"learning_rate": 9.598315425559199e-06,
|
|
"loss": 2.342525005340576,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.6486486486486487,
|
|
"grad_norm": 18.65596381212552,
|
|
"learning_rate": 9.596254625446391e-06,
|
|
"loss": 2.6345980167388916,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.6495495495495496,
|
|
"grad_norm": 20.153467171902868,
|
|
"learning_rate": 9.594188774880981e-06,
|
|
"loss": 2.5606279373168945,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.6504504504504505,
|
|
"grad_norm": 6.441711489089664,
|
|
"learning_rate": 9.592117876132965e-06,
|
|
"loss": 2.866764783859253,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.6513513513513514,
|
|
"grad_norm": 9.81198891053071,
|
|
"learning_rate": 9.590041931477887e-06,
|
|
"loss": 3.1564278602600098,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.6522522522522523,
|
|
"grad_norm": 17.684208371078892,
|
|
"learning_rate": 9.587960943196834e-06,
|
|
"loss": 2.7963242530822754,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.6531531531531531,
|
|
"grad_norm": 15.342811240268174,
|
|
"learning_rate": 9.585874913576435e-06,
|
|
"loss": 3.0183424949645996,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.654054054054054,
|
|
"grad_norm": 14.623353023945262,
|
|
"learning_rate": 9.583783844908861e-06,
|
|
"loss": 3.3839101791381836,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.6549549549549549,
|
|
"grad_norm": 8.398692776443163,
|
|
"learning_rate": 9.581687739491816e-06,
|
|
"loss": 2.6206607818603516,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.6558558558558558,
|
|
"grad_norm": 12.37961380763781,
|
|
"learning_rate": 9.579586599628542e-06,
|
|
"loss": 2.7061638832092285,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.6567567567567567,
|
|
"grad_norm": 10.678438728195196,
|
|
"learning_rate": 9.577480427627814e-06,
|
|
"loss": 2.508704900741577,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.6576576576576577,
|
|
"grad_norm": 25.932935898496996,
|
|
"learning_rate": 9.57536922580393e-06,
|
|
"loss": 1.9583587646484375,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.6585585585585586,
|
|
"grad_norm": 27.66965432336236,
|
|
"learning_rate": 9.573252996476722e-06,
|
|
"loss": 3.129885196685791,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.6594594594594595,
|
|
"grad_norm": 9.735259990519928,
|
|
"learning_rate": 9.571131741971543e-06,
|
|
"loss": 2.7763378620147705,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.6603603603603604,
|
|
"grad_norm": 8.032203064987039,
|
|
"learning_rate": 9.569005464619267e-06,
|
|
"loss": 2.2813687324523926,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.6612612612612613,
|
|
"grad_norm": 8.041170678642661,
|
|
"learning_rate": 9.566874166756288e-06,
|
|
"loss": 2.840353012084961,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.6621621621621622,
|
|
"grad_norm": 28.158206672083676,
|
|
"learning_rate": 9.564737850724518e-06,
|
|
"loss": 2.8474009037017822,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.6630630630630631,
|
|
"grad_norm": 5.815000573446517,
|
|
"learning_rate": 9.562596518871382e-06,
|
|
"loss": 2.8207173347473145,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.663963963963964,
|
|
"grad_norm": 34.14413849056543,
|
|
"learning_rate": 9.560450173549816e-06,
|
|
"loss": 3.46616792678833,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.6648648648648648,
|
|
"grad_norm": 9.09455757919389,
|
|
"learning_rate": 9.558298817118263e-06,
|
|
"loss": 3.2472267150878906,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.6657657657657657,
|
|
"grad_norm": 11.745734417740652,
|
|
"learning_rate": 9.55614245194068e-06,
|
|
"loss": 2.990772247314453,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 10.126106707154532,
|
|
"learning_rate": 9.553981080386517e-06,
|
|
"loss": 2.621983766555786,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.6675675675675675,
|
|
"grad_norm": 10.013984136877077,
|
|
"learning_rate": 9.551814704830734e-06,
|
|
"loss": 2.8704495429992676,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.6684684684684684,
|
|
"grad_norm": 17.553893999524107,
|
|
"learning_rate": 9.549643327653784e-06,
|
|
"loss": 2.6300485134124756,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.6693693693693694,
|
|
"grad_norm": 8.905039694665708,
|
|
"learning_rate": 9.54746695124162e-06,
|
|
"loss": 2.985562801361084,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.6702702702702703,
|
|
"grad_norm": 19.481838616003518,
|
|
"learning_rate": 9.545285577985683e-06,
|
|
"loss": 3.3478264808654785,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.6711711711711712,
|
|
"grad_norm": 10.728554308127086,
|
|
"learning_rate": 9.543099210282911e-06,
|
|
"loss": 2.5533034801483154,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.6720720720720721,
|
|
"grad_norm": 6.0652069579138645,
|
|
"learning_rate": 9.540907850535723e-06,
|
|
"loss": 2.541428565979004,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.672972972972973,
|
|
"grad_norm": 17.782409649378508,
|
|
"learning_rate": 9.53871150115203e-06,
|
|
"loss": 3.083484172821045,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.6738738738738739,
|
|
"grad_norm": 28.609892675952906,
|
|
"learning_rate": 9.536510164545223e-06,
|
|
"loss": 2.3928310871124268,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.6747747747747748,
|
|
"grad_norm": 16.4195531284808,
|
|
"learning_rate": 9.534303843134171e-06,
|
|
"loss": 2.2515366077423096,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.6756756756756757,
|
|
"grad_norm": 7.548044911736927,
|
|
"learning_rate": 9.532092539343221e-06,
|
|
"loss": 2.632566452026367,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.6765765765765765,
|
|
"grad_norm": 14.289205489681361,
|
|
"learning_rate": 9.5298762556022e-06,
|
|
"loss": 2.8126754760742188,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.6774774774774774,
|
|
"grad_norm": 16.292231519879333,
|
|
"learning_rate": 9.5276549943464e-06,
|
|
"loss": 2.814695358276367,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.6783783783783783,
|
|
"grad_norm": 25.04265360040195,
|
|
"learning_rate": 9.525428758016586e-06,
|
|
"loss": 2.972036123275757,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.6792792792792792,
|
|
"grad_norm": 23.661749713598404,
|
|
"learning_rate": 9.523197549058992e-06,
|
|
"loss": 2.619868755340576,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.6801801801801802,
|
|
"grad_norm": 17.407043836170345,
|
|
"learning_rate": 9.520961369925308e-06,
|
|
"loss": 4.805351257324219,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.6810810810810811,
|
|
"grad_norm": 10.468744127830053,
|
|
"learning_rate": 9.518720223072693e-06,
|
|
"loss": 3.144011974334717,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.681981981981982,
|
|
"grad_norm": 9.704633564788162,
|
|
"learning_rate": 9.516474110963762e-06,
|
|
"loss": 2.9169135093688965,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.6828828828828829,
|
|
"grad_norm": 7.024733080156754,
|
|
"learning_rate": 9.514223036066587e-06,
|
|
"loss": 2.531320571899414,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.6837837837837838,
|
|
"grad_norm": 9.295575693758463,
|
|
"learning_rate": 9.511967000854691e-06,
|
|
"loss": 2.128255844116211,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.6846846846846847,
|
|
"grad_norm": 11.08585967285018,
|
|
"learning_rate": 9.50970600780705e-06,
|
|
"loss": 2.904832363128662,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.6855855855855856,
|
|
"grad_norm": 8.611078781100211,
|
|
"learning_rate": 9.507440059408081e-06,
|
|
"loss": 3.224320888519287,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.6864864864864865,
|
|
"grad_norm": 19.933373813383568,
|
|
"learning_rate": 9.50516915814766e-06,
|
|
"loss": 2.992894172668457,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.6873873873873874,
|
|
"grad_norm": 17.629980801907795,
|
|
"learning_rate": 9.502893306521092e-06,
|
|
"loss": 2.5204880237579346,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.6882882882882883,
|
|
"grad_norm": 8.426783809290349,
|
|
"learning_rate": 9.500612507029128e-06,
|
|
"loss": 2.420421838760376,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.6891891891891891,
|
|
"grad_norm": 21.42446866639724,
|
|
"learning_rate": 9.498326762177952e-06,
|
|
"loss": 2.415316581726074,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.69009009009009,
|
|
"grad_norm": 11.706220780072188,
|
|
"learning_rate": 9.496036074479184e-06,
|
|
"loss": 2.944035053253174,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.690990990990991,
|
|
"grad_norm": 9.802219173432976,
|
|
"learning_rate": 9.49374044644988e-06,
|
|
"loss": 2.377190589904785,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.6918918918918919,
|
|
"grad_norm": 4.558892645831176,
|
|
"learning_rate": 9.491439880612513e-06,
|
|
"loss": 1.1236885786056519,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.6927927927927928,
|
|
"grad_norm": 7.80491444951165,
|
|
"learning_rate": 9.489134379494996e-06,
|
|
"loss": 3.0552773475646973,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.6936936936936937,
|
|
"grad_norm": 18.374039901625448,
|
|
"learning_rate": 9.486823945630654e-06,
|
|
"loss": 3.0332956314086914,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.6945945945945946,
|
|
"grad_norm": 31.234558943180797,
|
|
"learning_rate": 9.484508581558236e-06,
|
|
"loss": 2.4991304874420166,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.6954954954954955,
|
|
"grad_norm": 13.190032949087422,
|
|
"learning_rate": 9.48218828982191e-06,
|
|
"loss": 3.1494534015655518,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.6963963963963964,
|
|
"grad_norm": 5.571744569472321,
|
|
"learning_rate": 9.479863072971254e-06,
|
|
"loss": 2.624263048171997,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.6972972972972973,
|
|
"grad_norm": 15.149735769109464,
|
|
"learning_rate": 9.477532933561264e-06,
|
|
"loss": 2.8947343826293945,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.6981981981981982,
|
|
"grad_norm": 13.33064496622505,
|
|
"learning_rate": 9.47519787415234e-06,
|
|
"loss": 2.5602176189422607,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.6990990990990991,
|
|
"grad_norm": 15.786645382596577,
|
|
"learning_rate": 9.47285789731029e-06,
|
|
"loss": 2.399599552154541,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"grad_norm": 8.671795375113534,
|
|
"learning_rate": 9.470513005606327e-06,
|
|
"loss": 3.5607237815856934,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.7009009009009008,
|
|
"grad_norm": 9.359263464644386,
|
|
"learning_rate": 9.468163201617063e-06,
|
|
"loss": 2.7475228309631348,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.7018018018018019,
|
|
"grad_norm": 10.50780876599671,
|
|
"learning_rate": 9.465808487924503e-06,
|
|
"loss": 3.2652931213378906,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.7027027027027027,
|
|
"grad_norm": 9.381052152519086,
|
|
"learning_rate": 9.463448867116057e-06,
|
|
"loss": 2.7883598804473877,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.7036036036036036,
|
|
"grad_norm": 10.272838500948234,
|
|
"learning_rate": 9.461084341784519e-06,
|
|
"loss": 2.684918165206909,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.7045045045045045,
|
|
"grad_norm": 21.94060348072659,
|
|
"learning_rate": 9.458714914528076e-06,
|
|
"loss": 2.7922565937042236,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.7054054054054054,
|
|
"grad_norm": 16.382327778672522,
|
|
"learning_rate": 9.4563405879503e-06,
|
|
"loss": 2.7418885231018066,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.7063063063063063,
|
|
"grad_norm": 11.490590318903909,
|
|
"learning_rate": 9.453961364660143e-06,
|
|
"loss": 2.6633377075195312,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.7072072072072072,
|
|
"grad_norm": 12.885767656647163,
|
|
"learning_rate": 9.451577247271945e-06,
|
|
"loss": 2.512943983078003,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.7081081081081081,
|
|
"grad_norm": 12.034839864833085,
|
|
"learning_rate": 9.449188238405417e-06,
|
|
"loss": 2.8916306495666504,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.709009009009009,
|
|
"grad_norm": 10.52612701188939,
|
|
"learning_rate": 9.446794340685653e-06,
|
|
"loss": 2.993307590484619,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.7099099099099099,
|
|
"grad_norm": 6.9540578958384165,
|
|
"learning_rate": 9.444395556743106e-06,
|
|
"loss": 2.479743242263794,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.7108108108108108,
|
|
"grad_norm": 10.290522905456008,
|
|
"learning_rate": 9.441991889213613e-06,
|
|
"loss": 2.46384596824646,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.7117117117117117,
|
|
"grad_norm": 6.698040329706862,
|
|
"learning_rate": 9.439583340738365e-06,
|
|
"loss": 2.5311758518218994,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.7126126126126127,
|
|
"grad_norm": 7.906102550506939,
|
|
"learning_rate": 9.437169913963924e-06,
|
|
"loss": 2.1800713539123535,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.7135135135135136,
|
|
"grad_norm": 15.987084764349607,
|
|
"learning_rate": 9.434751611542208e-06,
|
|
"loss": 2.688724994659424,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.7144144144144144,
|
|
"grad_norm": 8.655721817445965,
|
|
"learning_rate": 9.432328436130493e-06,
|
|
"loss": 2.7366106510162354,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.7153153153153153,
|
|
"grad_norm": 7.824353996578687,
|
|
"learning_rate": 9.429900390391415e-06,
|
|
"loss": 2.5832180976867676,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.7162162162162162,
|
|
"grad_norm": 19.995905128135163,
|
|
"learning_rate": 9.42746747699295e-06,
|
|
"loss": 2.926548957824707,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.7171171171171171,
|
|
"grad_norm": 14.474967706949847,
|
|
"learning_rate": 9.425029698608438e-06,
|
|
"loss": 2.584516763687134,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.718018018018018,
|
|
"grad_norm": 9.194292118045457,
|
|
"learning_rate": 9.42258705791655e-06,
|
|
"loss": 2.5635998249053955,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.7189189189189189,
|
|
"grad_norm": 22.858682720554423,
|
|
"learning_rate": 9.42013955760131e-06,
|
|
"loss": 2.6104507446289062,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.7198198198198198,
|
|
"grad_norm": 10.425797078712982,
|
|
"learning_rate": 9.417687200352077e-06,
|
|
"loss": 2.803596019744873,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.7207207207207207,
|
|
"grad_norm": 8.694257724524336,
|
|
"learning_rate": 9.415229988863548e-06,
|
|
"loss": 2.6353211402893066,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.7216216216216216,
|
|
"grad_norm": 9.41087506713879,
|
|
"learning_rate": 9.412767925835753e-06,
|
|
"loss": 3.070380210876465,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.7225225225225225,
|
|
"grad_norm": 12.144455674929114,
|
|
"learning_rate": 9.410301013974056e-06,
|
|
"loss": 2.652477741241455,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.7234234234234235,
|
|
"grad_norm": 26.120427483925482,
|
|
"learning_rate": 9.40782925598915e-06,
|
|
"loss": 2.8056678771972656,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.7243243243243244,
|
|
"grad_norm": 9.277478281120864,
|
|
"learning_rate": 9.405352654597042e-06,
|
|
"loss": 3.2002205848693848,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.7252252252252253,
|
|
"grad_norm": 20.25434885403409,
|
|
"learning_rate": 9.402871212519074e-06,
|
|
"loss": 2.261554718017578,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.7261261261261261,
|
|
"grad_norm": 20.2554986876722,
|
|
"learning_rate": 9.400384932481902e-06,
|
|
"loss": 2.0507774353027344,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.727027027027027,
|
|
"grad_norm": 16.07366356988039,
|
|
"learning_rate": 9.397893817217497e-06,
|
|
"loss": 2.8768179416656494,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.7279279279279279,
|
|
"grad_norm": 48.89208536633896,
|
|
"learning_rate": 9.395397869463145e-06,
|
|
"loss": 2.4264047145843506,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.7288288288288288,
|
|
"grad_norm": 17.83944897537209,
|
|
"learning_rate": 9.392897091961442e-06,
|
|
"loss": 3.395022392272949,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.7297297297297297,
|
|
"grad_norm": 15.946872351454127,
|
|
"learning_rate": 9.390391487460286e-06,
|
|
"loss": 3.188384532928467,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.7306306306306306,
|
|
"grad_norm": 8.178557885088305,
|
|
"learning_rate": 9.387881058712888e-06,
|
|
"loss": 3.1175644397735596,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.7315315315315315,
|
|
"grad_norm": 16.724591664532905,
|
|
"learning_rate": 9.385365808477755e-06,
|
|
"loss": 2.8816864490509033,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.7324324324324324,
|
|
"grad_norm": 14.780641087159573,
|
|
"learning_rate": 9.382845739518688e-06,
|
|
"loss": 2.7165169715881348,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.7333333333333333,
|
|
"grad_norm": 20.646142007780085,
|
|
"learning_rate": 9.380320854604792e-06,
|
|
"loss": 2.3554203510284424,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.7342342342342343,
|
|
"grad_norm": 18.385887494793863,
|
|
"learning_rate": 9.377791156510456e-06,
|
|
"loss": 3.113914966583252,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.7351351351351352,
|
|
"grad_norm": 28.416780525776396,
|
|
"learning_rate": 9.37525664801536e-06,
|
|
"loss": 2.461071491241455,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.7360360360360361,
|
|
"grad_norm": 8.120471090329623,
|
|
"learning_rate": 9.372717331904472e-06,
|
|
"loss": 2.44002103805542,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.736936936936937,
|
|
"grad_norm": 9.178800276935084,
|
|
"learning_rate": 9.370173210968041e-06,
|
|
"loss": 2.982046127319336,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.7378378378378379,
|
|
"grad_norm": 9.291849128138498,
|
|
"learning_rate": 9.367624288001596e-06,
|
|
"loss": 2.580765962600708,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.7387387387387387,
|
|
"grad_norm": 14.215598673679969,
|
|
"learning_rate": 9.365070565805941e-06,
|
|
"loss": 2.9165632724761963,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.7396396396396396,
|
|
"grad_norm": 20.65767647017504,
|
|
"learning_rate": 9.362512047187159e-06,
|
|
"loss": 2.551032304763794,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.7405405405405405,
|
|
"grad_norm": 9.285553299169587,
|
|
"learning_rate": 9.359948734956591e-06,
|
|
"loss": 2.451357841491699,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.7414414414414414,
|
|
"grad_norm": 10.736946004417806,
|
|
"learning_rate": 9.357380631930863e-06,
|
|
"loss": 2.8412632942199707,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.7423423423423423,
|
|
"grad_norm": 13.588522754081062,
|
|
"learning_rate": 9.35480774093185e-06,
|
|
"loss": 2.455108165740967,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.7432432432432432,
|
|
"grad_norm": 8.066711806396757,
|
|
"learning_rate": 9.352230064786696e-06,
|
|
"loss": 2.8943047523498535,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.7441441441441441,
|
|
"grad_norm": 15.193609099494148,
|
|
"learning_rate": 9.349647606327798e-06,
|
|
"loss": 2.916165351867676,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.7450450450450451,
|
|
"grad_norm": 14.04662684891242,
|
|
"learning_rate": 9.347060368392816e-06,
|
|
"loss": 2.5018086433410645,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.745945945945946,
|
|
"grad_norm": 7.39177413129495,
|
|
"learning_rate": 9.344468353824653e-06,
|
|
"loss": 2.697756052017212,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.7468468468468469,
|
|
"grad_norm": 17.813385065952076,
|
|
"learning_rate": 9.341871565471464e-06,
|
|
"loss": 2.811149835586548,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.7477477477477478,
|
|
"grad_norm": 15.113644784678222,
|
|
"learning_rate": 9.33927000618665e-06,
|
|
"loss": 2.9072134494781494,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.7486486486486487,
|
|
"grad_norm": 10.464234570824074,
|
|
"learning_rate": 9.336663678828859e-06,
|
|
"loss": 2.5747392177581787,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.7495495495495496,
|
|
"grad_norm": 10.25650921301874,
|
|
"learning_rate": 9.334052586261965e-06,
|
|
"loss": 2.636551856994629,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.7504504504504504,
|
|
"grad_norm": 16.38634375299398,
|
|
"learning_rate": 9.331436731355093e-06,
|
|
"loss": 2.807694911956787,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.7513513513513513,
|
|
"grad_norm": 11.603882134317555,
|
|
"learning_rate": 9.32881611698259e-06,
|
|
"loss": 2.8333516120910645,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.7522522522522522,
|
|
"grad_norm": 9.144476288348503,
|
|
"learning_rate": 9.326190746024041e-06,
|
|
"loss": 2.7606048583984375,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.7531531531531531,
|
|
"grad_norm": 8.565934481185979,
|
|
"learning_rate": 9.323560621364253e-06,
|
|
"loss": 2.768097400665283,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.754054054054054,
|
|
"grad_norm": 10.316465266491164,
|
|
"learning_rate": 9.320925745893257e-06,
|
|
"loss": 2.4527711868286133,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.7549549549549549,
|
|
"grad_norm": 9.976588781572577,
|
|
"learning_rate": 9.318286122506304e-06,
|
|
"loss": 3.151270627975464,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.7558558558558559,
|
|
"grad_norm": 15.224079481612058,
|
|
"learning_rate": 9.315641754103863e-06,
|
|
"loss": 2.5614888668060303,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.7567567567567568,
|
|
"grad_norm": 11.589239774949226,
|
|
"learning_rate": 9.312992643591617e-06,
|
|
"loss": 2.7102653980255127,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.7576576576576577,
|
|
"grad_norm": 7.516970212020474,
|
|
"learning_rate": 9.310338793880458e-06,
|
|
"loss": 2.6227128505706787,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.7585585585585586,
|
|
"grad_norm": 8.043716387922379,
|
|
"learning_rate": 9.30768020788649e-06,
|
|
"loss": 2.1668949127197266,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.7594594594594595,
|
|
"grad_norm": 11.933802574887105,
|
|
"learning_rate": 9.305016888531013e-06,
|
|
"loss": 2.6135597229003906,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.7603603603603604,
|
|
"grad_norm": 7.370709474714002,
|
|
"learning_rate": 9.302348838740539e-06,
|
|
"loss": 2.7681655883789062,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.7612612612612613,
|
|
"grad_norm": 16.425951235185856,
|
|
"learning_rate": 9.29967606144677e-06,
|
|
"loss": 2.987060546875,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.7621621621621621,
|
|
"grad_norm": 9.549678592141358,
|
|
"learning_rate": 9.296998559586603e-06,
|
|
"loss": 2.931814670562744,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.763063063063063,
|
|
"grad_norm": 18.44660477260879,
|
|
"learning_rate": 9.294316336102132e-06,
|
|
"loss": 2.7123594284057617,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.7639639639639639,
|
|
"grad_norm": 19.372543945414222,
|
|
"learning_rate": 9.291629393940631e-06,
|
|
"loss": 2.546339988708496,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.7648648648648648,
|
|
"grad_norm": 9.501187725957108,
|
|
"learning_rate": 9.288937736054568e-06,
|
|
"loss": 2.3622288703918457,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.7657657657657657,
|
|
"grad_norm": 19.46303995758465,
|
|
"learning_rate": 9.286241365401585e-06,
|
|
"loss": 2.243159294128418,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.7666666666666667,
|
|
"grad_norm": 8.549177641261387,
|
|
"learning_rate": 9.283540284944507e-06,
|
|
"loss": 2.2970545291900635,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.7675675675675676,
|
|
"grad_norm": 10.303935337104333,
|
|
"learning_rate": 9.280834497651334e-06,
|
|
"loss": 2.5987138748168945,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.7684684684684685,
|
|
"grad_norm": 9.753998036349868,
|
|
"learning_rate": 9.278124006495234e-06,
|
|
"loss": 2.220139980316162,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.7693693693693694,
|
|
"grad_norm": 9.188191929831634,
|
|
"learning_rate": 9.27540881445455e-06,
|
|
"loss": 2.614650011062622,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.7702702702702703,
|
|
"grad_norm": 11.225502603819619,
|
|
"learning_rate": 9.272688924512783e-06,
|
|
"loss": 2.9019663333892822,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.7711711711711712,
|
|
"grad_norm": 14.050951696540693,
|
|
"learning_rate": 9.269964339658605e-06,
|
|
"loss": 2.776390790939331,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.7720720720720721,
|
|
"grad_norm": 34.83073169335098,
|
|
"learning_rate": 9.267235062885838e-06,
|
|
"loss": 4.928389072418213,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.772972972972973,
|
|
"grad_norm": 7.4228290420989085,
|
|
"learning_rate": 9.264501097193465e-06,
|
|
"loss": 2.3369925022125244,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.7738738738738739,
|
|
"grad_norm": 23.330497182576813,
|
|
"learning_rate": 9.26176244558562e-06,
|
|
"loss": 2.757167339324951,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.7747747747747747,
|
|
"grad_norm": 8.526306477591158,
|
|
"learning_rate": 9.259019111071587e-06,
|
|
"loss": 3.1688270568847656,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.7756756756756756,
|
|
"grad_norm": 23.494552058234884,
|
|
"learning_rate": 9.256271096665792e-06,
|
|
"loss": 2.630497694015503,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.7765765765765765,
|
|
"grad_norm": 10.565389505665538,
|
|
"learning_rate": 9.253518405387808e-06,
|
|
"loss": 2.744032382965088,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.7774774774774775,
|
|
"grad_norm": 14.766385774718497,
|
|
"learning_rate": 9.250761040262344e-06,
|
|
"loss": 2.5588674545288086,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.7783783783783784,
|
|
"grad_norm": 9.207452155362795,
|
|
"learning_rate": 9.247999004319245e-06,
|
|
"loss": 2.6305861473083496,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.7792792792792793,
|
|
"grad_norm": 8.75815825583094,
|
|
"learning_rate": 9.24523230059349e-06,
|
|
"loss": 3.0089213848114014,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.7801801801801802,
|
|
"grad_norm": 21.26373014356904,
|
|
"learning_rate": 9.242460932125185e-06,
|
|
"loss": 3.4457340240478516,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.7810810810810811,
|
|
"grad_norm": 14.92483027731681,
|
|
"learning_rate": 9.239684901959565e-06,
|
|
"loss": 2.5607290267944336,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.781981981981982,
|
|
"grad_norm": 6.90303732761023,
|
|
"learning_rate": 9.236904213146982e-06,
|
|
"loss": 2.2297580242156982,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.7828828828828829,
|
|
"grad_norm": 13.062000921870082,
|
|
"learning_rate": 9.234118868742911e-06,
|
|
"loss": 3.182875871658325,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.7837837837837838,
|
|
"grad_norm": 8.967717184430734,
|
|
"learning_rate": 9.231328871807943e-06,
|
|
"loss": 2.0097410678863525,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.7846846846846847,
|
|
"grad_norm": 13.639231514475798,
|
|
"learning_rate": 9.228534225407781e-06,
|
|
"loss": 3.289792060852051,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.7855855855855856,
|
|
"grad_norm": 8.795213936439309,
|
|
"learning_rate": 9.225734932613233e-06,
|
|
"loss": 1.7139835357666016,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.7864864864864864,
|
|
"grad_norm": 29.48172548724543,
|
|
"learning_rate": 9.222930996500218e-06,
|
|
"loss": 2.708808183670044,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.7873873873873873,
|
|
"grad_norm": 12.608981891953999,
|
|
"learning_rate": 9.220122420149753e-06,
|
|
"loss": 2.9573237895965576,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.7882882882882883,
|
|
"grad_norm": 10.05828507973409,
|
|
"learning_rate": 9.217309206647955e-06,
|
|
"loss": 2.8175604343414307,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.7891891891891892,
|
|
"grad_norm": 18.025305074516382,
|
|
"learning_rate": 9.21449135908604e-06,
|
|
"loss": 2.6104860305786133,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.7900900900900901,
|
|
"grad_norm": 10.529882797070544,
|
|
"learning_rate": 9.21166888056031e-06,
|
|
"loss": 2.855435609817505,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.790990990990991,
|
|
"grad_norm": 9.608056253818662,
|
|
"learning_rate": 9.208841774172159e-06,
|
|
"loss": 2.761814832687378,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.7918918918918919,
|
|
"grad_norm": 9.47187691572592,
|
|
"learning_rate": 9.206010043028066e-06,
|
|
"loss": 2.776505470275879,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.7927927927927928,
|
|
"grad_norm": 14.241497175937738,
|
|
"learning_rate": 9.203173690239591e-06,
|
|
"loss": 2.668419361114502,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.7936936936936937,
|
|
"grad_norm": 14.832385901867733,
|
|
"learning_rate": 9.200332718923374e-06,
|
|
"loss": 2.8139710426330566,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.7945945945945946,
|
|
"grad_norm": 23.079974992143335,
|
|
"learning_rate": 9.197487132201129e-06,
|
|
"loss": 3.2440996170043945,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.7954954954954955,
|
|
"grad_norm": 16.23973883783376,
|
|
"learning_rate": 9.194636933199637e-06,
|
|
"loss": 2.396348476409912,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.7963963963963964,
|
|
"grad_norm": 11.451286076420798,
|
|
"learning_rate": 9.191782125050757e-06,
|
|
"loss": 3.1295599937438965,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.7972972972972973,
|
|
"grad_norm": 9.549326162841712,
|
|
"learning_rate": 9.188922710891401e-06,
|
|
"loss": 3.130983829498291,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.7981981981981981,
|
|
"grad_norm": 9.54142098168785,
|
|
"learning_rate": 9.186058693863554e-06,
|
|
"loss": 2.2931318283081055,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.7990990990990992,
|
|
"grad_norm": 37.205104515681505,
|
|
"learning_rate": 9.18319007711425e-06,
|
|
"loss": 3.0152671337127686,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 11.628006866071045,
|
|
"learning_rate": 9.180316863795578e-06,
|
|
"loss": 2.771811008453369,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.8009009009009009,
|
|
"grad_norm": 12.902006492396236,
|
|
"learning_rate": 9.177439057064684e-06,
|
|
"loss": 2.614129066467285,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.8018018018018018,
|
|
"grad_norm": 19.478417774026575,
|
|
"learning_rate": 9.174556660083751e-06,
|
|
"loss": 2.6148383617401123,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.8027027027027027,
|
|
"grad_norm": 12.43984899089034,
|
|
"learning_rate": 9.17166967602002e-06,
|
|
"loss": 2.7329466342926025,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.8036036036036036,
|
|
"grad_norm": 12.761357469110417,
|
|
"learning_rate": 9.16877810804576e-06,
|
|
"loss": 2.4950637817382812,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.8045045045045045,
|
|
"grad_norm": 11.275645132613704,
|
|
"learning_rate": 9.165881959338279e-06,
|
|
"loss": 2.7276556491851807,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.8054054054054054,
|
|
"grad_norm": 11.39602365398208,
|
|
"learning_rate": 9.162981233079925e-06,
|
|
"loss": 2.9622392654418945,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.8063063063063063,
|
|
"grad_norm": 11.83232194085143,
|
|
"learning_rate": 9.160075932458069e-06,
|
|
"loss": 2.2887325286865234,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.8072072072072072,
|
|
"grad_norm": 16.802448060782268,
|
|
"learning_rate": 9.157166060665113e-06,
|
|
"loss": 2.6609508991241455,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.8081081081081081,
|
|
"grad_norm": 10.679228399556099,
|
|
"learning_rate": 9.154251620898475e-06,
|
|
"loss": 2.6985628604888916,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.809009009009009,
|
|
"grad_norm": 7.695041715720657,
|
|
"learning_rate": 9.151332616360604e-06,
|
|
"loss": 2.8990566730499268,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.80990990990991,
|
|
"grad_norm": 9.526552739953779,
|
|
"learning_rate": 9.148409050258956e-06,
|
|
"loss": 2.71006441116333,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.8108108108108109,
|
|
"grad_norm": 12.888284126466246,
|
|
"learning_rate": 9.145480925805998e-06,
|
|
"loss": 2.5472047328948975,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.8117117117117117,
|
|
"grad_norm": 11.468867576029266,
|
|
"learning_rate": 9.142548246219212e-06,
|
|
"loss": 2.5543947219848633,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.8126126126126126,
|
|
"grad_norm": 14.2108048341138,
|
|
"learning_rate": 9.139611014721082e-06,
|
|
"loss": 2.2650206089019775,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.8135135135135135,
|
|
"grad_norm": 10.510519818486246,
|
|
"learning_rate": 9.136669234539093e-06,
|
|
"loss": 2.8357656002044678,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.8144144144144144,
|
|
"grad_norm": 8.430176591688056,
|
|
"learning_rate": 9.133722908905733e-06,
|
|
"loss": 2.9496519565582275,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.8153153153153153,
|
|
"grad_norm": 14.294982303531594,
|
|
"learning_rate": 9.130772041058478e-06,
|
|
"loss": 2.9449565410614014,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.8162162162162162,
|
|
"grad_norm": 13.848470272783437,
|
|
"learning_rate": 9.127816634239798e-06,
|
|
"loss": 2.7125496864318848,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.8171171171171171,
|
|
"grad_norm": 9.712082839910767,
|
|
"learning_rate": 9.124856691697152e-06,
|
|
"loss": 2.622316360473633,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.818018018018018,
|
|
"grad_norm": 8.282921934671824,
|
|
"learning_rate": 9.121892216682981e-06,
|
|
"loss": 2.7912187576293945,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.8189189189189189,
|
|
"grad_norm": 8.898346246381115,
|
|
"learning_rate": 9.118923212454706e-06,
|
|
"loss": 3.250605583190918,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.8198198198198198,
|
|
"grad_norm": 24.150519660685216,
|
|
"learning_rate": 9.115949682274727e-06,
|
|
"loss": 2.956425666809082,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.8207207207207208,
|
|
"grad_norm": 10.87116408089112,
|
|
"learning_rate": 9.112971629410416e-06,
|
|
"loss": 3.0385324954986572,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.8216216216216217,
|
|
"grad_norm": 17.483675464713595,
|
|
"learning_rate": 9.109989057134113e-06,
|
|
"loss": 2.362560749053955,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.8225225225225226,
|
|
"grad_norm": 15.918245368373386,
|
|
"learning_rate": 9.107001968723127e-06,
|
|
"loss": 2.489495038986206,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.8234234234234235,
|
|
"grad_norm": 15.174666429652001,
|
|
"learning_rate": 9.104010367459728e-06,
|
|
"loss": 2.519327163696289,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.8243243243243243,
|
|
"grad_norm": 11.008930408753894,
|
|
"learning_rate": 9.101014256631144e-06,
|
|
"loss": 2.9351871013641357,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.8252252252252252,
|
|
"grad_norm": 5.441422734794709,
|
|
"learning_rate": 9.098013639529557e-06,
|
|
"loss": 2.472674608230591,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.8261261261261261,
|
|
"grad_norm": 15.615971571768284,
|
|
"learning_rate": 9.095008519452108e-06,
|
|
"loss": 3.1283674240112305,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.827027027027027,
|
|
"grad_norm": 9.72553659169402,
|
|
"learning_rate": 9.091998899700876e-06,
|
|
"loss": 2.636320114135742,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.8279279279279279,
|
|
"grad_norm": 22.068351344641943,
|
|
"learning_rate": 9.08898478358289e-06,
|
|
"loss": 2.253181219100952,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.8288288288288288,
|
|
"grad_norm": 11.65588326241802,
|
|
"learning_rate": 9.085966174410118e-06,
|
|
"loss": 2.6036620140075684,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.8297297297297297,
|
|
"grad_norm": 7.70849813769218,
|
|
"learning_rate": 9.082943075499467e-06,
|
|
"loss": 2.9057793617248535,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.8306306306306306,
|
|
"grad_norm": 15.145793198315555,
|
|
"learning_rate": 9.079915490172775e-06,
|
|
"loss": 3.194100856781006,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.8315315315315316,
|
|
"grad_norm": 20.301754986462093,
|
|
"learning_rate": 9.07688342175681e-06,
|
|
"loss": 2.951569080352783,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.8324324324324325,
|
|
"grad_norm": 8.06496478388907,
|
|
"learning_rate": 9.073846873583268e-06,
|
|
"loss": 2.6843085289001465,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 34.72774833106996,
|
|
"learning_rate": 9.070805848988763e-06,
|
|
"loss": 3.4267823696136475,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.8342342342342343,
|
|
"grad_norm": 18.05664547289118,
|
|
"learning_rate": 9.067760351314838e-06,
|
|
"loss": 2.6139276027679443,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.8351351351351352,
|
|
"grad_norm": 15.825387603740007,
|
|
"learning_rate": 9.06471038390794e-06,
|
|
"loss": 2.70221209526062,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.836036036036036,
|
|
"grad_norm": 12.785079120226515,
|
|
"learning_rate": 9.06165595011943e-06,
|
|
"loss": 2.9434709548950195,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.8369369369369369,
|
|
"grad_norm": 11.366277573937527,
|
|
"learning_rate": 9.058597053305581e-06,
|
|
"loss": 2.491422176361084,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.8378378378378378,
|
|
"grad_norm": 9.676716238112755,
|
|
"learning_rate": 9.055533696827567e-06,
|
|
"loss": 2.3882524967193604,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.8387387387387387,
|
|
"grad_norm": 22.644503500032666,
|
|
"learning_rate": 9.05246588405146e-06,
|
|
"loss": 3.4896740913391113,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.8396396396396396,
|
|
"grad_norm": 10.849687971098547,
|
|
"learning_rate": 9.049393618348237e-06,
|
|
"loss": 2.70531964302063,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.8405405405405405,
|
|
"grad_norm": 12.569047526859508,
|
|
"learning_rate": 9.046316903093757e-06,
|
|
"loss": 2.8259475231170654,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.8414414414414414,
|
|
"grad_norm": 19.438957664171042,
|
|
"learning_rate": 9.043235741668775e-06,
|
|
"loss": 2.8464393615722656,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.8423423423423423,
|
|
"grad_norm": 19.936953235312018,
|
|
"learning_rate": 9.040150137458931e-06,
|
|
"loss": 3.2039737701416016,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.8432432432432433,
|
|
"grad_norm": 8.613752918501891,
|
|
"learning_rate": 9.037060093854748e-06,
|
|
"loss": 2.366600275039673,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.8441441441441442,
|
|
"grad_norm": 11.072376563504072,
|
|
"learning_rate": 9.033965614251623e-06,
|
|
"loss": 2.9386978149414062,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.8450450450450451,
|
|
"grad_norm": 11.053325721807497,
|
|
"learning_rate": 9.030866702049828e-06,
|
|
"loss": 2.8608415126800537,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.845945945945946,
|
|
"grad_norm": 13.210636717606068,
|
|
"learning_rate": 9.027763360654509e-06,
|
|
"loss": 2.7908477783203125,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.8468468468468469,
|
|
"grad_norm": 12.165099599643389,
|
|
"learning_rate": 9.024655593475675e-06,
|
|
"loss": 2.92722487449646,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.8477477477477477,
|
|
"grad_norm": 20.884372936174117,
|
|
"learning_rate": 9.021543403928202e-06,
|
|
"loss": 2.5187618732452393,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.8486486486486486,
|
|
"grad_norm": 7.200474379907157,
|
|
"learning_rate": 9.018426795431825e-06,
|
|
"loss": 1.8800894021987915,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.8495495495495495,
|
|
"grad_norm": 10.238654769472504,
|
|
"learning_rate": 9.015305771411128e-06,
|
|
"loss": 2.726329803466797,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.8504504504504504,
|
|
"grad_norm": 8.044861147047099,
|
|
"learning_rate": 9.012180335295558e-06,
|
|
"loss": 2.646120548248291,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.8513513513513513,
|
|
"grad_norm": 6.750949554243864,
|
|
"learning_rate": 9.0090504905194e-06,
|
|
"loss": 2.664979934692383,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.8522522522522522,
|
|
"grad_norm": 10.468759833010632,
|
|
"learning_rate": 9.005916240521788e-06,
|
|
"loss": 2.7094297409057617,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.8531531531531531,
|
|
"grad_norm": 10.091214984327587,
|
|
"learning_rate": 9.002777588746698e-06,
|
|
"loss": 2.7719886302948,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.8540540540540541,
|
|
"grad_norm": 10.820966698368546,
|
|
"learning_rate": 8.999634538642938e-06,
|
|
"loss": 2.6184051036834717,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.854954954954955,
|
|
"grad_norm": 14.412149092715156,
|
|
"learning_rate": 8.996487093664152e-06,
|
|
"loss": 1.899958848953247,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.8558558558558559,
|
|
"grad_norm": 8.781129108710493,
|
|
"learning_rate": 8.993335257268814e-06,
|
|
"loss": 2.7084133625030518,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.8567567567567568,
|
|
"grad_norm": 11.727531617720302,
|
|
"learning_rate": 8.990179032920222e-06,
|
|
"loss": 2.895404100418091,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.8576576576576577,
|
|
"grad_norm": 11.556277303840965,
|
|
"learning_rate": 8.987018424086496e-06,
|
|
"loss": 2.6565675735473633,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.8585585585585586,
|
|
"grad_norm": 10.473320014625909,
|
|
"learning_rate": 8.983853434240573e-06,
|
|
"loss": 2.8310439586639404,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.8594594594594595,
|
|
"grad_norm": 13.148854857824386,
|
|
"learning_rate": 8.980684066860203e-06,
|
|
"loss": 3.1059255599975586,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.8603603603603603,
|
|
"grad_norm": 17.525522015414296,
|
|
"learning_rate": 8.97751032542795e-06,
|
|
"loss": 4.3065009117126465,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.8612612612612612,
|
|
"grad_norm": 27.81509585595586,
|
|
"learning_rate": 8.974332213431182e-06,
|
|
"loss": 2.922642230987549,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.8621621621621621,
|
|
"grad_norm": 30.277550053473007,
|
|
"learning_rate": 8.971149734362067e-06,
|
|
"loss": 3.379426956176758,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.863063063063063,
|
|
"grad_norm": 13.70645291398646,
|
|
"learning_rate": 8.967962891717575e-06,
|
|
"loss": 2.701871871948242,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.8639639639639639,
|
|
"grad_norm": 13.008491648469082,
|
|
"learning_rate": 8.96477168899947e-06,
|
|
"loss": 3.1602060794830322,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.8648648648648649,
|
|
"grad_norm": 12.744955511737194,
|
|
"learning_rate": 8.961576129714307e-06,
|
|
"loss": 2.9068946838378906,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.8657657657657658,
|
|
"grad_norm": 8.665898985016119,
|
|
"learning_rate": 8.958376217373428e-06,
|
|
"loss": 2.813460350036621,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.8666666666666667,
|
|
"grad_norm": 16.13879321331645,
|
|
"learning_rate": 8.955171955492956e-06,
|
|
"loss": 3.479156494140625,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.8675675675675676,
|
|
"grad_norm": 14.285750763371981,
|
|
"learning_rate": 8.951963347593797e-06,
|
|
"loss": 3.6346793174743652,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.8684684684684685,
|
|
"grad_norm": 24.895785145623265,
|
|
"learning_rate": 8.948750397201631e-06,
|
|
"loss": 3.2489395141601562,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.8693693693693694,
|
|
"grad_norm": 12.19008178226641,
|
|
"learning_rate": 8.94553310784691e-06,
|
|
"loss": 2.697892665863037,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.8702702702702703,
|
|
"grad_norm": 7.760892193309789,
|
|
"learning_rate": 8.942311483064849e-06,
|
|
"loss": 3.010653495788574,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.8711711711711712,
|
|
"grad_norm": 13.600426235180727,
|
|
"learning_rate": 8.939085526395435e-06,
|
|
"loss": 2.576807975769043,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.872072072072072,
|
|
"grad_norm": 9.293504375965979,
|
|
"learning_rate": 8.93585524138341e-06,
|
|
"loss": 1.9543118476867676,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.8729729729729729,
|
|
"grad_norm": 14.268656864628506,
|
|
"learning_rate": 8.932620631578273e-06,
|
|
"loss": 2.6119332313537598,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.8738738738738738,
|
|
"grad_norm": 6.2148919940604515,
|
|
"learning_rate": 8.929381700534275e-06,
|
|
"loss": 2.7489614486694336,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.8747747747747747,
|
|
"grad_norm": 12.017346068841979,
|
|
"learning_rate": 8.926138451810415e-06,
|
|
"loss": 2.812819004058838,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.8756756756756757,
|
|
"grad_norm": 9.31122675418436,
|
|
"learning_rate": 8.92289088897044e-06,
|
|
"loss": 2.209747314453125,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.8765765765765766,
|
|
"grad_norm": 6.994540232509598,
|
|
"learning_rate": 8.91963901558283e-06,
|
|
"loss": 2.4728050231933594,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.8774774774774775,
|
|
"grad_norm": 8.989192809882589,
|
|
"learning_rate": 8.916382835220807e-06,
|
|
"loss": 2.2611050605773926,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.8783783783783784,
|
|
"grad_norm": 10.16692604773728,
|
|
"learning_rate": 8.913122351462325e-06,
|
|
"loss": 3.0471978187561035,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.8792792792792793,
|
|
"grad_norm": 9.936796580361097,
|
|
"learning_rate": 8.909857567890066e-06,
|
|
"loss": 2.8904948234558105,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.8801801801801802,
|
|
"grad_norm": 11.190809672301613,
|
|
"learning_rate": 8.906588488091437e-06,
|
|
"loss": 2.7144124507904053,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.8810810810810811,
|
|
"grad_norm": 12.623352491069394,
|
|
"learning_rate": 8.903315115658564e-06,
|
|
"loss": 2.7676889896392822,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.881981981981982,
|
|
"grad_norm": 17.349761940508067,
|
|
"learning_rate": 8.900037454188293e-06,
|
|
"loss": 2.701625347137451,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.8828828828828829,
|
|
"grad_norm": 12.414156999805867,
|
|
"learning_rate": 8.89675550728218e-06,
|
|
"loss": 2.732914447784424,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.8837837837837837,
|
|
"grad_norm": 16.099666044563033,
|
|
"learning_rate": 8.893469278546492e-06,
|
|
"loss": 2.825678825378418,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.8846846846846846,
|
|
"grad_norm": 13.838046661247263,
|
|
"learning_rate": 8.890178771592198e-06,
|
|
"loss": 2.8472774028778076,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.8855855855855855,
|
|
"grad_norm": 12.563819633459376,
|
|
"learning_rate": 8.886883990034973e-06,
|
|
"loss": 3.426520824432373,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.8864864864864865,
|
|
"grad_norm": 9.893840320317794,
|
|
"learning_rate": 8.883584937495185e-06,
|
|
"loss": 2.547525405883789,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.8873873873873874,
|
|
"grad_norm": 27.712967334706622,
|
|
"learning_rate": 8.880281617597895e-06,
|
|
"loss": 2.8002498149871826,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.8882882882882883,
|
|
"grad_norm": 8.552225351097242,
|
|
"learning_rate": 8.876974033972855e-06,
|
|
"loss": 2.2080230712890625,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.8891891891891892,
|
|
"grad_norm": 12.181276388042427,
|
|
"learning_rate": 8.873662190254503e-06,
|
|
"loss": 3.1568124294281006,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.8900900900900901,
|
|
"grad_norm": 7.284003438251827,
|
|
"learning_rate": 8.870346090081954e-06,
|
|
"loss": 3.1568026542663574,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.890990990990991,
|
|
"grad_norm": 8.030153735651504,
|
|
"learning_rate": 8.867025737099003e-06,
|
|
"loss": 2.9712777137756348,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.8918918918918919,
|
|
"grad_norm": 13.56543096571292,
|
|
"learning_rate": 8.863701134954116e-06,
|
|
"loss": 2.6984872817993164,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.8927927927927928,
|
|
"grad_norm": 9.699493597923782,
|
|
"learning_rate": 8.860372287300432e-06,
|
|
"loss": 2.580648422241211,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.8936936936936937,
|
|
"grad_norm": 11.566024306750842,
|
|
"learning_rate": 8.857039197795751e-06,
|
|
"loss": 3.0338239669799805,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.8945945945945946,
|
|
"grad_norm": 15.284229479177787,
|
|
"learning_rate": 8.853701870102536e-06,
|
|
"loss": 2.9295122623443604,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.8954954954954955,
|
|
"grad_norm": 22.242804170201218,
|
|
"learning_rate": 8.850360307887906e-06,
|
|
"loss": 3.161966323852539,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.8963963963963963,
|
|
"grad_norm": 18.63466452079311,
|
|
"learning_rate": 8.847014514823635e-06,
|
|
"loss": 2.4487550258636475,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.8972972972972973,
|
|
"grad_norm": 7.759208826565421,
|
|
"learning_rate": 8.843664494586144e-06,
|
|
"loss": 2.9561946392059326,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.8981981981981982,
|
|
"grad_norm": 12.32861462876588,
|
|
"learning_rate": 8.840310250856498e-06,
|
|
"loss": 2.5366580486297607,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.8990990990990991,
|
|
"grad_norm": 12.50596301253408,
|
|
"learning_rate": 8.836951787320407e-06,
|
|
"loss": 2.808290481567383,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"grad_norm": 7.0421358171911095,
|
|
"learning_rate": 8.833589107668212e-06,
|
|
"loss": 2.9124622344970703,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.9009009009009009,
|
|
"grad_norm": 18.95726436434203,
|
|
"learning_rate": 8.83022221559489e-06,
|
|
"loss": 2.8594493865966797,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.9018018018018018,
|
|
"grad_norm": 14.360861662237461,
|
|
"learning_rate": 8.82685111480005e-06,
|
|
"loss": 3.033987283706665,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.9027027027027027,
|
|
"grad_norm": 12.725727994537058,
|
|
"learning_rate": 8.823475808987918e-06,
|
|
"loss": 2.891810894012451,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.9036036036036036,
|
|
"grad_norm": 13.572670302653405,
|
|
"learning_rate": 8.820096301867346e-06,
|
|
"loss": 2.401615619659424,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.9045045045045045,
|
|
"grad_norm": 8.41678694622558,
|
|
"learning_rate": 8.816712597151805e-06,
|
|
"loss": 2.6334965229034424,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.9054054054054054,
|
|
"grad_norm": 8.854776168215475,
|
|
"learning_rate": 8.813324698559367e-06,
|
|
"loss": 2.9537599086761475,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.9063063063063063,
|
|
"grad_norm": 11.12285578151776,
|
|
"learning_rate": 8.809932609812727e-06,
|
|
"loss": 2.487921714782715,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.9072072072072072,
|
|
"grad_norm": 17.024347913585913,
|
|
"learning_rate": 8.806536334639171e-06,
|
|
"loss": 3.0856282711029053,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.9081081081081082,
|
|
"grad_norm": 10.816465698621194,
|
|
"learning_rate": 8.803135876770596e-06,
|
|
"loss": 3.0241355895996094,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.909009009009009,
|
|
"grad_norm": 9.68705192478706,
|
|
"learning_rate": 8.799731239943488e-06,
|
|
"loss": 2.661813974380493,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.9099099099099099,
|
|
"grad_norm": 9.619989580445472,
|
|
"learning_rate": 8.796322427898928e-06,
|
|
"loss": 2.7854785919189453,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.9108108108108108,
|
|
"grad_norm": 10.915941939454688,
|
|
"learning_rate": 8.792909444382583e-06,
|
|
"loss": 2.263568878173828,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.9117117117117117,
|
|
"grad_norm": 10.503261281888763,
|
|
"learning_rate": 8.789492293144706e-06,
|
|
"loss": 2.716181993484497,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.9126126126126126,
|
|
"grad_norm": 9.257983129719602,
|
|
"learning_rate": 8.786070977940126e-06,
|
|
"loss": 2.7381417751312256,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.9135135135135135,
|
|
"grad_norm": 8.31630031526566,
|
|
"learning_rate": 8.782645502528252e-06,
|
|
"loss": 3.162320137023926,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.9144144144144144,
|
|
"grad_norm": 16.43338595527805,
|
|
"learning_rate": 8.77921587067306e-06,
|
|
"loss": 2.9138591289520264,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.9153153153153153,
|
|
"grad_norm": 14.235643594684289,
|
|
"learning_rate": 8.775782086143099e-06,
|
|
"loss": 2.0848324298858643,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.9162162162162162,
|
|
"grad_norm": 7.891020172544132,
|
|
"learning_rate": 8.772344152711471e-06,
|
|
"loss": 2.5524768829345703,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.9171171171171171,
|
|
"grad_norm": 16.24503078546184,
|
|
"learning_rate": 8.768902074155848e-06,
|
|
"loss": 2.9793877601623535,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.918018018018018,
|
|
"grad_norm": 8.785763725472759,
|
|
"learning_rate": 8.765455854258451e-06,
|
|
"loss": 2.7830615043640137,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.918918918918919,
|
|
"grad_norm": 11.653157311412453,
|
|
"learning_rate": 8.762005496806049e-06,
|
|
"loss": 3.0042476654052734,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.9198198198198199,
|
|
"grad_norm": 16.60196923530908,
|
|
"learning_rate": 8.758551005589967e-06,
|
|
"loss": 2.7907586097717285,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.9207207207207208,
|
|
"grad_norm": 12.108493842364739,
|
|
"learning_rate": 8.75509238440606e-06,
|
|
"loss": 2.2653555870056152,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.9216216216216216,
|
|
"grad_norm": 10.496920522396609,
|
|
"learning_rate": 8.751629637054732e-06,
|
|
"loss": 2.5076041221618652,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.9225225225225225,
|
|
"grad_norm": 11.443885242245704,
|
|
"learning_rate": 8.748162767340913e-06,
|
|
"loss": 2.913900375366211,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.9234234234234234,
|
|
"grad_norm": 22.94493743951495,
|
|
"learning_rate": 8.744691779074067e-06,
|
|
"loss": 3.189591407775879,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.9243243243243243,
|
|
"grad_norm": 15.007894138964957,
|
|
"learning_rate": 8.741216676068182e-06,
|
|
"loss": 3.1167805194854736,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.9252252252252252,
|
|
"grad_norm": 10.701832402692443,
|
|
"learning_rate": 8.73773746214177e-06,
|
|
"loss": 3.0631814002990723,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.9261261261261261,
|
|
"grad_norm": 22.839782379389632,
|
|
"learning_rate": 8.734254141117854e-06,
|
|
"loss": 2.710697889328003,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.927027027027027,
|
|
"grad_norm": 13.701631951913418,
|
|
"learning_rate": 8.730766716823974e-06,
|
|
"loss": 3.417142868041992,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.9279279279279279,
|
|
"grad_norm": 14.065120927128005,
|
|
"learning_rate": 8.727275193092182e-06,
|
|
"loss": 2.857334852218628,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.9288288288288288,
|
|
"grad_norm": 10.303680600969198,
|
|
"learning_rate": 8.723779573759028e-06,
|
|
"loss": 2.1903722286224365,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.9297297297297298,
|
|
"grad_norm": 10.694191816249965,
|
|
"learning_rate": 8.720279862665568e-06,
|
|
"loss": 2.833894729614258,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.9306306306306307,
|
|
"grad_norm": 10.646006782741036,
|
|
"learning_rate": 8.71677606365735e-06,
|
|
"loss": 2.849029541015625,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.9315315315315316,
|
|
"grad_norm": 7.103070319164133,
|
|
"learning_rate": 8.713268180584418e-06,
|
|
"loss": 2.9669673442840576,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.9324324324324325,
|
|
"grad_norm": 61.3378003575111,
|
|
"learning_rate": 8.709756217301297e-06,
|
|
"loss": 2.606581687927246,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.9333333333333333,
|
|
"grad_norm": 7.857691032867668,
|
|
"learning_rate": 8.706240177667003e-06,
|
|
"loss": 2.770477294921875,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.9342342342342342,
|
|
"grad_norm": 11.728458658229151,
|
|
"learning_rate": 8.702720065545024e-06,
|
|
"loss": 2.768040418624878,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.9351351351351351,
|
|
"grad_norm": 22.532803809334503,
|
|
"learning_rate": 8.69919588480333e-06,
|
|
"loss": 3.213409423828125,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.936036036036036,
|
|
"grad_norm": 13.299290384074183,
|
|
"learning_rate": 8.695667639314356e-06,
|
|
"loss": 2.9722280502319336,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.9369369369369369,
|
|
"grad_norm": 10.179605937395092,
|
|
"learning_rate": 8.692135332955008e-06,
|
|
"loss": 2.879065752029419,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.9378378378378378,
|
|
"grad_norm": 10.469521987091323,
|
|
"learning_rate": 8.68859896960665e-06,
|
|
"loss": 2.9380648136138916,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.9387387387387387,
|
|
"grad_norm": 19.18919294251035,
|
|
"learning_rate": 8.685058553155108e-06,
|
|
"loss": 2.389150619506836,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.9396396396396396,
|
|
"grad_norm": 9.85030798714381,
|
|
"learning_rate": 8.681514087490656e-06,
|
|
"loss": 2.6265854835510254,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.9405405405405406,
|
|
"grad_norm": 15.751685941479064,
|
|
"learning_rate": 8.677965576508023e-06,
|
|
"loss": 3.252495527267456,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.9414414414414415,
|
|
"grad_norm": 17.470155615456466,
|
|
"learning_rate": 8.67441302410638e-06,
|
|
"loss": 2.970489978790283,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.9423423423423424,
|
|
"grad_norm": 10.515437271818497,
|
|
"learning_rate": 8.670856434189341e-06,
|
|
"loss": 2.4407780170440674,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.9432432432432433,
|
|
"grad_norm": 14.247174716756376,
|
|
"learning_rate": 8.667295810664953e-06,
|
|
"loss": 2.4543590545654297,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.9441441441441442,
|
|
"grad_norm": 12.75533365867589,
|
|
"learning_rate": 8.663731157445701e-06,
|
|
"loss": 3.617422580718994,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.945045045045045,
|
|
"grad_norm": 16.220959393701147,
|
|
"learning_rate": 8.660162478448488e-06,
|
|
"loss": 1.659257173538208,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.9459459459459459,
|
|
"grad_norm": 13.986671940370561,
|
|
"learning_rate": 8.656589777594653e-06,
|
|
"loss": 2.4097299575805664,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.9468468468468468,
|
|
"grad_norm": 8.65526211507018,
|
|
"learning_rate": 8.653013058809945e-06,
|
|
"loss": 3.0357894897460938,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.9477477477477477,
|
|
"grad_norm": 9.449523178663169,
|
|
"learning_rate": 8.649432326024531e-06,
|
|
"loss": 2.2048110961914062,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.9486486486486486,
|
|
"grad_norm": 18.198976515034804,
|
|
"learning_rate": 8.64584758317299e-06,
|
|
"loss": 2.9834446907043457,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.9495495495495495,
|
|
"grad_norm": 14.176661243192957,
|
|
"learning_rate": 8.642258834194307e-06,
|
|
"loss": 2.716215133666992,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.9504504504504504,
|
|
"grad_norm": 16.24849823374468,
|
|
"learning_rate": 8.638666083031864e-06,
|
|
"loss": 2.4051785469055176,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.9513513513513514,
|
|
"grad_norm": 10.323688529598419,
|
|
"learning_rate": 8.635069333633449e-06,
|
|
"loss": 2.5208535194396973,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.9522522522522523,
|
|
"grad_norm": 11.942409294214032,
|
|
"learning_rate": 8.631468589951236e-06,
|
|
"loss": 2.975149393081665,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.9531531531531532,
|
|
"grad_norm": 24.540734605676096,
|
|
"learning_rate": 8.627863855941794e-06,
|
|
"loss": 2.916090965270996,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.9540540540540541,
|
|
"grad_norm": 8.728220424100492,
|
|
"learning_rate": 8.624255135566071e-06,
|
|
"loss": 2.4554600715637207,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.954954954954955,
|
|
"grad_norm": 13.06212302080104,
|
|
"learning_rate": 8.6206424327894e-06,
|
|
"loss": 2.824169158935547,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.9558558558558559,
|
|
"grad_norm": 20.581661868346774,
|
|
"learning_rate": 8.61702575158149e-06,
|
|
"loss": 3.1394472122192383,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.9567567567567568,
|
|
"grad_norm": 13.824699726387482,
|
|
"learning_rate": 8.613405095916415e-06,
|
|
"loss": 2.6784205436706543,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.9576576576576576,
|
|
"grad_norm": 7.226778164286236,
|
|
"learning_rate": 8.609780469772623e-06,
|
|
"loss": 2.159748077392578,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.9585585585585585,
|
|
"grad_norm": 17.74386991419334,
|
|
"learning_rate": 8.606151877132922e-06,
|
|
"loss": 2.42501163482666,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.9594594594594594,
|
|
"grad_norm": 11.786811385744368,
|
|
"learning_rate": 8.60251932198448e-06,
|
|
"loss": 2.690016508102417,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.9603603603603603,
|
|
"grad_norm": 12.530584920708565,
|
|
"learning_rate": 8.598882808318818e-06,
|
|
"loss": 2.0007379055023193,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.9612612612612612,
|
|
"grad_norm": 12.032977450254815,
|
|
"learning_rate": 8.595242340131806e-06,
|
|
"loss": 2.8771276473999023,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.9621621621621622,
|
|
"grad_norm": 12.990670627995613,
|
|
"learning_rate": 8.591597921423661e-06,
|
|
"loss": 2.881491184234619,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.9630630630630631,
|
|
"grad_norm": 8.478639408597052,
|
|
"learning_rate": 8.58794955619894e-06,
|
|
"loss": 2.4792816638946533,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.963963963963964,
|
|
"grad_norm": 10.808242878186473,
|
|
"learning_rate": 8.584297248466536e-06,
|
|
"loss": 2.679600715637207,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.9648648648648649,
|
|
"grad_norm": 10.666407154007661,
|
|
"learning_rate": 8.580641002239676e-06,
|
|
"loss": 2.258665084838867,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.9657657657657658,
|
|
"grad_norm": 13.089016586138516,
|
|
"learning_rate": 8.57698082153591e-06,
|
|
"loss": 2.685615062713623,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.9666666666666667,
|
|
"grad_norm": 11.897872127993535,
|
|
"learning_rate": 8.573316710377119e-06,
|
|
"loss": 2.5039196014404297,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.9675675675675676,
|
|
"grad_norm": 7.790603844140429,
|
|
"learning_rate": 8.569648672789496e-06,
|
|
"loss": 2.5071516036987305,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.9684684684684685,
|
|
"grad_norm": 8.517147853409444,
|
|
"learning_rate": 8.565976712803551e-06,
|
|
"loss": 2.6650633811950684,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.9693693693693693,
|
|
"grad_norm": 14.218988165724692,
|
|
"learning_rate": 8.562300834454106e-06,
|
|
"loss": 2.593008279800415,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.9702702702702702,
|
|
"grad_norm": 13.720113145665367,
|
|
"learning_rate": 8.558621041780283e-06,
|
|
"loss": 2.5849051475524902,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.9711711711711711,
|
|
"grad_norm": 34.20313502324281,
|
|
"learning_rate": 8.554937338825511e-06,
|
|
"loss": 2.3197808265686035,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.972072072072072,
|
|
"grad_norm": 10.755292958039405,
|
|
"learning_rate": 8.551249729637514e-06,
|
|
"loss": 2.836639165878296,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.972972972972973,
|
|
"grad_norm": 9.542291164874905,
|
|
"learning_rate": 8.547558218268308e-06,
|
|
"loss": 2.4726526737213135,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.9738738738738739,
|
|
"grad_norm": 9.101503649495474,
|
|
"learning_rate": 8.543862808774193e-06,
|
|
"loss": 2.8820559978485107,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.9747747747747748,
|
|
"grad_norm": 27.756006165054163,
|
|
"learning_rate": 8.540163505215758e-06,
|
|
"loss": 2.6131694316864014,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.9756756756756757,
|
|
"grad_norm": 11.900513829471146,
|
|
"learning_rate": 8.536460311657868e-06,
|
|
"loss": 3.004784107208252,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.9765765765765766,
|
|
"grad_norm": 12.282451059049643,
|
|
"learning_rate": 8.532753232169663e-06,
|
|
"loss": 3.2131333351135254,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.9774774774774775,
|
|
"grad_norm": 6.207133910941661,
|
|
"learning_rate": 8.529042270824552e-06,
|
|
"loss": 2.8020894527435303,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.9783783783783784,
|
|
"grad_norm": 8.98261289615078,
|
|
"learning_rate": 8.525327431700215e-06,
|
|
"loss": 2.8358824253082275,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.9792792792792793,
|
|
"grad_norm": 5.879296428796706,
|
|
"learning_rate": 8.521608718878582e-06,
|
|
"loss": 2.6441617012023926,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.9801801801801802,
|
|
"grad_norm": 14.704560095815832,
|
|
"learning_rate": 8.517886136445851e-06,
|
|
"loss": 3.0834951400756836,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.981081081081081,
|
|
"grad_norm": 8.908562992873398,
|
|
"learning_rate": 8.514159688492464e-06,
|
|
"loss": 2.323145866394043,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.9819819819819819,
|
|
"grad_norm": 9.355692462449275,
|
|
"learning_rate": 8.510429379113114e-06,
|
|
"loss": 2.854309558868408,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.9828828828828828,
|
|
"grad_norm": 14.242696861776297,
|
|
"learning_rate": 8.506695212406734e-06,
|
|
"loss": 2.8959994316101074,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.9837837837837838,
|
|
"grad_norm": 9.655187837242087,
|
|
"learning_rate": 8.502957192476505e-06,
|
|
"loss": 2.3906760215759277,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.9846846846846847,
|
|
"grad_norm": 16.686220459430942,
|
|
"learning_rate": 8.499215323429828e-06,
|
|
"loss": 3.729126453399658,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.9855855855855856,
|
|
"grad_norm": 10.688427431513956,
|
|
"learning_rate": 8.495469609378342e-06,
|
|
"loss": 2.0590858459472656,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.9864864864864865,
|
|
"grad_norm": 12.418419957345947,
|
|
"learning_rate": 8.491720054437911e-06,
|
|
"loss": 2.6640067100524902,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.9873873873873874,
|
|
"grad_norm": 11.15755443159756,
|
|
"learning_rate": 8.487966662728615e-06,
|
|
"loss": 2.7714591026306152,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.9882882882882883,
|
|
"grad_norm": 8.003587800163373,
|
|
"learning_rate": 8.484209438374755e-06,
|
|
"loss": 3.144829511642456,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.9891891891891892,
|
|
"grad_norm": 11.562203253058653,
|
|
"learning_rate": 8.480448385504842e-06,
|
|
"loss": 2.6076714992523193,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.9900900900900901,
|
|
"grad_norm": 12.795449782864218,
|
|
"learning_rate": 8.476683508251591e-06,
|
|
"loss": 2.9385275840759277,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.990990990990991,
|
|
"grad_norm": 8.34869623402233,
|
|
"learning_rate": 8.47291481075192e-06,
|
|
"loss": 2.556445598602295,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.9918918918918919,
|
|
"grad_norm": 41.06221827535907,
|
|
"learning_rate": 8.469142297146949e-06,
|
|
"loss": 3.650862216949463,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.9927927927927928,
|
|
"grad_norm": 8.429716966478185,
|
|
"learning_rate": 8.465365971581988e-06,
|
|
"loss": 2.6668899059295654,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.9936936936936936,
|
|
"grad_norm": 8.58753720175306,
|
|
"learning_rate": 8.461585838206531e-06,
|
|
"loss": 2.888392925262451,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.9945945945945946,
|
|
"grad_norm": 10.625126330975723,
|
|
"learning_rate": 8.457801901174267e-06,
|
|
"loss": 2.9486002922058105,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.9954954954954955,
|
|
"grad_norm": 9.686365195239674,
|
|
"learning_rate": 8.454014164643056e-06,
|
|
"loss": 2.6803948879241943,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.9963963963963964,
|
|
"grad_norm": 20.539892083201337,
|
|
"learning_rate": 8.450222632774934e-06,
|
|
"loss": 2.840132236480713,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.9972972972972973,
|
|
"grad_norm": 10.384738292731727,
|
|
"learning_rate": 8.446427309736111e-06,
|
|
"loss": 2.4560065269470215,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.9981981981981982,
|
|
"grad_norm": 11.742221200846808,
|
|
"learning_rate": 8.442628199696961e-06,
|
|
"loss": 2.985318660736084,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.9990990990990991,
|
|
"grad_norm": 11.539013810526916,
|
|
"learning_rate": 8.438825306832016e-06,
|
|
"loss": 2.8573508262634277,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 9.764784001874446,
|
|
"learning_rate": 8.435018635319971e-06,
|
|
"loss": 2.8428144454956055,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 1.000900900900901,
|
|
"grad_norm": 11.020427019859978,
|
|
"learning_rate": 8.43120818934367e-06,
|
|
"loss": 1.6871743202209473,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 1.0018018018018018,
|
|
"grad_norm": 16.195969495168665,
|
|
"learning_rate": 8.427393973090099e-06,
|
|
"loss": 1.8083250522613525,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 1.0027027027027027,
|
|
"grad_norm": 10.095894005649189,
|
|
"learning_rate": 8.423575990750395e-06,
|
|
"loss": 1.6103458404541016,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 1.0036036036036036,
|
|
"grad_norm": 21.321239667766413,
|
|
"learning_rate": 8.41975424651983e-06,
|
|
"loss": 1.87693452835083,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 1.0045045045045045,
|
|
"grad_norm": 13.705097911530245,
|
|
"learning_rate": 8.415928744597809e-06,
|
|
"loss": 2.0128674507141113,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 1.0054054054054054,
|
|
"grad_norm": 11.458298437524217,
|
|
"learning_rate": 8.412099489187869e-06,
|
|
"loss": 1.6997014284133911,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 1.0063063063063062,
|
|
"grad_norm": 8.195963817611196,
|
|
"learning_rate": 8.408266484497664e-06,
|
|
"loss": 1.5195136070251465,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 1.0072072072072071,
|
|
"grad_norm": 14.923918708563708,
|
|
"learning_rate": 8.40442973473898e-06,
|
|
"loss": 1.493704915046692,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 1.008108108108108,
|
|
"grad_norm": 11.105529141527436,
|
|
"learning_rate": 8.400589244127706e-06,
|
|
"loss": 1.435099720954895,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 1.009009009009009,
|
|
"grad_norm": 14.94678429310199,
|
|
"learning_rate": 8.396745016883849e-06,
|
|
"loss": 1.0782595872879028,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 1.0099099099099098,
|
|
"grad_norm": 18.55316224056508,
|
|
"learning_rate": 8.39289705723152e-06,
|
|
"loss": 1.224260687828064,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 1.0108108108108107,
|
|
"grad_norm": 13.383219077438845,
|
|
"learning_rate": 8.389045369398927e-06,
|
|
"loss": 2.4057981967926025,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 1.0117117117117118,
|
|
"grad_norm": 13.853889947376945,
|
|
"learning_rate": 8.385189957618383e-06,
|
|
"loss": 1.8272724151611328,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 1.0126126126126127,
|
|
"grad_norm": 14.417095255534402,
|
|
"learning_rate": 8.381330826126284e-06,
|
|
"loss": 1.3936996459960938,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 1.0135135135135136,
|
|
"grad_norm": 9.362334521585238,
|
|
"learning_rate": 8.377467979163121e-06,
|
|
"loss": 1.6082737445831299,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 1.0144144144144145,
|
|
"grad_norm": 19.61330240989115,
|
|
"learning_rate": 8.373601420973464e-06,
|
|
"loss": 1.5716181993484497,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 1.0153153153153154,
|
|
"grad_norm": 16.02149253836458,
|
|
"learning_rate": 8.36973115580596e-06,
|
|
"loss": 2.0253872871398926,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 1.0162162162162163,
|
|
"grad_norm": 8.868636627253291,
|
|
"learning_rate": 8.365857187913329e-06,
|
|
"loss": 1.5251405239105225,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 1.0171171171171172,
|
|
"grad_norm": 9.224258602542248,
|
|
"learning_rate": 8.361979521552363e-06,
|
|
"loss": 1.3686174154281616,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 1.018018018018018,
|
|
"grad_norm": 9.932368168789786,
|
|
"learning_rate": 8.358098160983916e-06,
|
|
"loss": 1.1146045923233032,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 1.018918918918919,
|
|
"grad_norm": 13.307457528809199,
|
|
"learning_rate": 8.354213110472903e-06,
|
|
"loss": 2.0996854305267334,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 1.0198198198198198,
|
|
"grad_norm": 14.850252436116673,
|
|
"learning_rate": 8.350324374288289e-06,
|
|
"loss": 2.0421838760375977,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 1.0207207207207207,
|
|
"grad_norm": 10.125209745260454,
|
|
"learning_rate": 8.34643195670309e-06,
|
|
"loss": 1.5646802186965942,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 1.0216216216216216,
|
|
"grad_norm": 11.54911553947335,
|
|
"learning_rate": 8.342535861994374e-06,
|
|
"loss": 1.8787198066711426,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 1.0225225225225225,
|
|
"grad_norm": 18.523592449785106,
|
|
"learning_rate": 8.338636094443242e-06,
|
|
"loss": 1.1811740398406982,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 1.0234234234234234,
|
|
"grad_norm": 15.057537445556772,
|
|
"learning_rate": 8.334732658334834e-06,
|
|
"loss": 2.080415725708008,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 1.0243243243243243,
|
|
"grad_norm": 10.837715905790452,
|
|
"learning_rate": 8.33082555795832e-06,
|
|
"loss": 0.9543228149414062,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 1.0252252252252252,
|
|
"grad_norm": 9.028361152261766,
|
|
"learning_rate": 8.326914797606897e-06,
|
|
"loss": 1.548874855041504,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 1.026126126126126,
|
|
"grad_norm": 11.198019677870143,
|
|
"learning_rate": 8.323000381577783e-06,
|
|
"loss": 2.275304079055786,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 1.027027027027027,
|
|
"grad_norm": 10.149551619289737,
|
|
"learning_rate": 8.319082314172213e-06,
|
|
"loss": 1.595092535018921,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 1.0279279279279279,
|
|
"grad_norm": 7.274664038664192,
|
|
"learning_rate": 8.315160599695434e-06,
|
|
"loss": 1.22855544090271,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 1.0288288288288288,
|
|
"grad_norm": 8.944842389439918,
|
|
"learning_rate": 8.311235242456703e-06,
|
|
"loss": 1.3381730318069458,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 1.0297297297297296,
|
|
"grad_norm": 13.677904292942957,
|
|
"learning_rate": 8.307306246769275e-06,
|
|
"loss": 1.7892537117004395,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 1.0306306306306305,
|
|
"grad_norm": 16.091411976470066,
|
|
"learning_rate": 8.303373616950408e-06,
|
|
"loss": 2.1872668266296387,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 1.0315315315315314,
|
|
"grad_norm": 18.49183870526625,
|
|
"learning_rate": 8.299437357321349e-06,
|
|
"loss": 1.2070658206939697,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 1.0324324324324325,
|
|
"grad_norm": 14.845403652544013,
|
|
"learning_rate": 8.295497472207338e-06,
|
|
"loss": 2.032404661178589,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 1.0333333333333334,
|
|
"grad_norm": 14.313150837493948,
|
|
"learning_rate": 8.291553965937596e-06,
|
|
"loss": 1.3609435558319092,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 1.0342342342342343,
|
|
"grad_norm": 11.387225351747333,
|
|
"learning_rate": 8.28760684284532e-06,
|
|
"loss": 1.6761469841003418,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 1.0351351351351352,
|
|
"grad_norm": 13.154008351902426,
|
|
"learning_rate": 8.283656107267686e-06,
|
|
"loss": 1.7343151569366455,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 1.0360360360360361,
|
|
"grad_norm": 9.847030852367354,
|
|
"learning_rate": 8.279701763545838e-06,
|
|
"loss": 2.2546045780181885,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 1.036936936936937,
|
|
"grad_norm": 8.091196662552067,
|
|
"learning_rate": 8.275743816024886e-06,
|
|
"loss": 1.8532781600952148,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 1.037837837837838,
|
|
"grad_norm": 8.382867243646281,
|
|
"learning_rate": 8.271782269053899e-06,
|
|
"loss": 1.4469783306121826,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 1.0387387387387388,
|
|
"grad_norm": 11.461998428693212,
|
|
"learning_rate": 8.267817126985898e-06,
|
|
"loss": 1.674446940422058,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 1.0396396396396397,
|
|
"grad_norm": 10.267039583398452,
|
|
"learning_rate": 8.263848394177856e-06,
|
|
"loss": 1.6920478343963623,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 1.0405405405405406,
|
|
"grad_norm": 16.231667928814414,
|
|
"learning_rate": 8.259876074990698e-06,
|
|
"loss": 1.8150936365127563,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 1.0414414414414415,
|
|
"grad_norm": 13.39039416726503,
|
|
"learning_rate": 8.25590017378928e-06,
|
|
"loss": 1.7284342050552368,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 1.0423423423423424,
|
|
"grad_norm": 16.86825951513754,
|
|
"learning_rate": 8.251920694942399e-06,
|
|
"loss": 1.5762327909469604,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 1.0432432432432432,
|
|
"grad_norm": 8.302781692845457,
|
|
"learning_rate": 8.247937642822783e-06,
|
|
"loss": 1.9574871063232422,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 1.0441441441441441,
|
|
"grad_norm": 14.050467961204983,
|
|
"learning_rate": 8.243951021807085e-06,
|
|
"loss": 1.2647265195846558,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 1.045045045045045,
|
|
"grad_norm": 17.840809821088865,
|
|
"learning_rate": 8.239960836275886e-06,
|
|
"loss": 1.3962913751602173,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 1.045945945945946,
|
|
"grad_norm": 10.101731759748537,
|
|
"learning_rate": 8.23596709061367e-06,
|
|
"loss": 1.5938246250152588,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 1.0468468468468468,
|
|
"grad_norm": 10.759880870958964,
|
|
"learning_rate": 8.231969789208848e-06,
|
|
"loss": 1.6824748516082764,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 1.0477477477477477,
|
|
"grad_norm": 9.610035985087961,
|
|
"learning_rate": 8.227968936453725e-06,
|
|
"loss": 1.4494415521621704,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 1.0486486486486486,
|
|
"grad_norm": 10.436591920309185,
|
|
"learning_rate": 8.22396453674452e-06,
|
|
"loss": 1.639620065689087,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 1.0495495495495495,
|
|
"grad_norm": 10.421869292017382,
|
|
"learning_rate": 8.219956594481342e-06,
|
|
"loss": 1.3855957984924316,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 1.0504504504504504,
|
|
"grad_norm": 13.09735512969942,
|
|
"learning_rate": 8.215945114068196e-06,
|
|
"loss": 1.7121726274490356,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 1.0513513513513513,
|
|
"grad_norm": 10.621685505079233,
|
|
"learning_rate": 8.21193009991297e-06,
|
|
"loss": 1.5693837404251099,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 1.0522522522522522,
|
|
"grad_norm": 8.661909260403378,
|
|
"learning_rate": 8.207911556427442e-06,
|
|
"loss": 1.9569733142852783,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 1.053153153153153,
|
|
"grad_norm": 10.961758764007032,
|
|
"learning_rate": 8.203889488027263e-06,
|
|
"loss": 2.3044838905334473,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 1.054054054054054,
|
|
"grad_norm": 10.705919878277495,
|
|
"learning_rate": 8.19986389913196e-06,
|
|
"loss": 1.9391883611679077,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 1.054954954954955,
|
|
"grad_norm": 17.11264626842883,
|
|
"learning_rate": 8.195834794164925e-06,
|
|
"loss": 2.016961097717285,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 1.055855855855856,
|
|
"grad_norm": 9.28350029324163,
|
|
"learning_rate": 8.191802177553419e-06,
|
|
"loss": 1.9359326362609863,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 1.0567567567567568,
|
|
"grad_norm": 29.005868757379027,
|
|
"learning_rate": 8.187766053728554e-06,
|
|
"loss": 2.1905364990234375,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 1.0576576576576577,
|
|
"grad_norm": 17.452604277084465,
|
|
"learning_rate": 8.183726427125302e-06,
|
|
"loss": 1.7908490896224976,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 1.0585585585585586,
|
|
"grad_norm": 8.187973593415926,
|
|
"learning_rate": 8.179683302182486e-06,
|
|
"loss": 1.3162143230438232,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 1.0594594594594595,
|
|
"grad_norm": 14.952205890351422,
|
|
"learning_rate": 8.175636683342763e-06,
|
|
"loss": 1.6705520153045654,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 1.0603603603603604,
|
|
"grad_norm": 12.44885358568132,
|
|
"learning_rate": 8.17158657505264e-06,
|
|
"loss": 1.8957538604736328,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 1.0612612612612613,
|
|
"grad_norm": 9.069270431390361,
|
|
"learning_rate": 8.16753298176245e-06,
|
|
"loss": 0.9111911654472351,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 1.0621621621621622,
|
|
"grad_norm": 10.191977733913982,
|
|
"learning_rate": 8.16347590792636e-06,
|
|
"loss": 1.4045283794403076,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 1.063063063063063,
|
|
"grad_norm": 13.00032871093983,
|
|
"learning_rate": 8.159415358002361e-06,
|
|
"loss": 1.5373706817626953,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 1.063963963963964,
|
|
"grad_norm": 14.124441412264169,
|
|
"learning_rate": 8.155351336452263e-06,
|
|
"loss": 1.0758987665176392,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 1.0648648648648649,
|
|
"grad_norm": 12.441316798050691,
|
|
"learning_rate": 8.151283847741691e-06,
|
|
"loss": 1.4989533424377441,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 1.0657657657657658,
|
|
"grad_norm": 10.51543842939082,
|
|
"learning_rate": 8.14721289634008e-06,
|
|
"loss": 1.4637765884399414,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 1.0666666666666667,
|
|
"grad_norm": 13.602448520620147,
|
|
"learning_rate": 8.143138486720667e-06,
|
|
"loss": 1.3450312614440918,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 1.0675675675675675,
|
|
"grad_norm": 8.280529775846368,
|
|
"learning_rate": 8.139060623360494e-06,
|
|
"loss": 1.489940881729126,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 1.0684684684684684,
|
|
"grad_norm": 10.46426651432547,
|
|
"learning_rate": 8.134979310740395e-06,
|
|
"loss": 1.5905389785766602,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 1.0693693693693693,
|
|
"grad_norm": 13.358266883594808,
|
|
"learning_rate": 8.13089455334499e-06,
|
|
"loss": 1.2072575092315674,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 1.0702702702702702,
|
|
"grad_norm": 16.330825739721572,
|
|
"learning_rate": 8.126806355662693e-06,
|
|
"loss": 1.7562267780303955,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 1.071171171171171,
|
|
"grad_norm": 12.195167613218787,
|
|
"learning_rate": 8.122714722185696e-06,
|
|
"loss": 1.0812071561813354,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 1.072072072072072,
|
|
"grad_norm": 15.670715731924602,
|
|
"learning_rate": 8.118619657409959e-06,
|
|
"loss": 1.0835292339324951,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 1.072972972972973,
|
|
"grad_norm": 9.573100215659544,
|
|
"learning_rate": 8.114521165835221e-06,
|
|
"loss": 1.1943262815475464,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 1.0738738738738738,
|
|
"grad_norm": 7.5920072132170375,
|
|
"learning_rate": 8.11041925196498e-06,
|
|
"loss": 1.1824040412902832,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 1.0747747747747747,
|
|
"grad_norm": 14.720889174028226,
|
|
"learning_rate": 8.106313920306503e-06,
|
|
"loss": 1.6713793277740479,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 1.0756756756756758,
|
|
"grad_norm": 14.203867713126108,
|
|
"learning_rate": 8.102205175370801e-06,
|
|
"loss": 2.0247316360473633,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 1.0765765765765765,
|
|
"grad_norm": 8.406718147766572,
|
|
"learning_rate": 8.098093021672645e-06,
|
|
"loss": 1.42876398563385,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 1.0774774774774776,
|
|
"grad_norm": 9.914578355617206,
|
|
"learning_rate": 8.093977463730546e-06,
|
|
"loss": 1.242950201034546,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 1.0783783783783785,
|
|
"grad_norm": 12.82859235072137,
|
|
"learning_rate": 8.089858506066762e-06,
|
|
"loss": 2.044468402862549,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 1.0792792792792794,
|
|
"grad_norm": 15.21115282821868,
|
|
"learning_rate": 8.085736153207277e-06,
|
|
"loss": 1.8516826629638672,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 1.0801801801801802,
|
|
"grad_norm": 10.97731431398425,
|
|
"learning_rate": 8.081610409681815e-06,
|
|
"loss": 1.2195425033569336,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 1.0810810810810811,
|
|
"grad_norm": 10.051010569598152,
|
|
"learning_rate": 8.077481280023822e-06,
|
|
"loss": 1.627333402633667,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 1.081981981981982,
|
|
"grad_norm": 12.896898338302256,
|
|
"learning_rate": 8.073348768770463e-06,
|
|
"loss": 1.999898910522461,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 1.082882882882883,
|
|
"grad_norm": 12.658839336316985,
|
|
"learning_rate": 8.06921288046262e-06,
|
|
"loss": 1.5643589496612549,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 1.0837837837837838,
|
|
"grad_norm": 10.956247301531295,
|
|
"learning_rate": 8.06507361964489e-06,
|
|
"loss": 1.6658892631530762,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 1.0846846846846847,
|
|
"grad_norm": 12.298519860263355,
|
|
"learning_rate": 8.060930990865569e-06,
|
|
"loss": 1.4011785984039307,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 1.0855855855855856,
|
|
"grad_norm": 9.72709077172854,
|
|
"learning_rate": 8.056784998676656e-06,
|
|
"loss": 2.0497689247131348,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 1.0864864864864865,
|
|
"grad_norm": 7.466528196092187,
|
|
"learning_rate": 8.05263564763385e-06,
|
|
"loss": 2.2291030883789062,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 1.0873873873873874,
|
|
"grad_norm": 12.515884561780549,
|
|
"learning_rate": 8.048482942296535e-06,
|
|
"loss": 1.490678071975708,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 1.0882882882882883,
|
|
"grad_norm": 10.468667146864716,
|
|
"learning_rate": 8.044326887227784e-06,
|
|
"loss": 1.6769261360168457,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 1.0891891891891892,
|
|
"grad_norm": 14.319507357911029,
|
|
"learning_rate": 8.040167486994349e-06,
|
|
"loss": 1.0405046939849854,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 1.09009009009009,
|
|
"grad_norm": 7.171034569727635,
|
|
"learning_rate": 8.03600474616666e-06,
|
|
"loss": 1.6920535564422607,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 1.090990990990991,
|
|
"grad_norm": 9.878017463906483,
|
|
"learning_rate": 8.031838669318815e-06,
|
|
"loss": 1.6081115007400513,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 1.0918918918918918,
|
|
"grad_norm": 14.076104282243213,
|
|
"learning_rate": 8.02766926102858e-06,
|
|
"loss": 1.0227457284927368,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 1.0927927927927927,
|
|
"grad_norm": 9.853681408844322,
|
|
"learning_rate": 8.023496525877377e-06,
|
|
"loss": 1.634497880935669,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 1.0936936936936936,
|
|
"grad_norm": 15.624806973123826,
|
|
"learning_rate": 8.019320468450293e-06,
|
|
"loss": 2.02766752243042,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 1.0945945945945945,
|
|
"grad_norm": 11.477751200975193,
|
|
"learning_rate": 8.015141093336059e-06,
|
|
"loss": 1.8287404775619507,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 1.0954954954954954,
|
|
"grad_norm": 57.919064943020636,
|
|
"learning_rate": 8.010958405127048e-06,
|
|
"loss": 2.305783271789551,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 1.0963963963963963,
|
|
"grad_norm": 11.707293625051278,
|
|
"learning_rate": 8.006772408419281e-06,
|
|
"loss": 1.1302621364593506,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 1.0972972972972972,
|
|
"grad_norm": 9.560840226906693,
|
|
"learning_rate": 8.002583107812414e-06,
|
|
"loss": 1.7443042993545532,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 1.0981981981981983,
|
|
"grad_norm": 8.637141874105088,
|
|
"learning_rate": 7.998390507909724e-06,
|
|
"loss": 1.2046616077423096,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 1.0990990990990992,
|
|
"grad_norm": 13.253483776227233,
|
|
"learning_rate": 7.994194613318126e-06,
|
|
"loss": 1.3849682807922363,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"grad_norm": 12.414811146075584,
|
|
"learning_rate": 7.989995428648148e-06,
|
|
"loss": 1.4627125263214111,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 1.100900900900901,
|
|
"grad_norm": 11.292520077449975,
|
|
"learning_rate": 7.985792958513932e-06,
|
|
"loss": 2.078278064727783,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 1.1018018018018019,
|
|
"grad_norm": 9.253922726492476,
|
|
"learning_rate": 7.981587207533234e-06,
|
|
"loss": 1.5167481899261475,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 1.1027027027027028,
|
|
"grad_norm": 21.46976466629096,
|
|
"learning_rate": 7.977378180327415e-06,
|
|
"loss": 2.4628076553344727,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 1.1036036036036037,
|
|
"grad_norm": 9.756905499452317,
|
|
"learning_rate": 7.973165881521435e-06,
|
|
"loss": 1.5625048875808716,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 1.1045045045045045,
|
|
"grad_norm": 12.77253396194263,
|
|
"learning_rate": 7.968950315743845e-06,
|
|
"loss": 1.2804774045944214,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 1.1054054054054054,
|
|
"grad_norm": 13.17580429811146,
|
|
"learning_rate": 7.964731487626793e-06,
|
|
"loss": 1.7434399127960205,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 1.1063063063063063,
|
|
"grad_norm": 8.8860333522632,
|
|
"learning_rate": 7.960509401806007e-06,
|
|
"loss": 1.3496845960617065,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 1.1072072072072072,
|
|
"grad_norm": 9.689757737587119,
|
|
"learning_rate": 7.956284062920795e-06,
|
|
"loss": 1.2241935729980469,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 1.1081081081081081,
|
|
"grad_norm": 9.189552358817172,
|
|
"learning_rate": 7.952055475614041e-06,
|
|
"loss": 1.6663897037506104,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 1.109009009009009,
|
|
"grad_norm": 8.79050806300742,
|
|
"learning_rate": 7.947823644532198e-06,
|
|
"loss": 1.8955342769622803,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 1.10990990990991,
|
|
"grad_norm": 11.433091564916111,
|
|
"learning_rate": 7.943588574325283e-06,
|
|
"loss": 2.5440666675567627,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 1.1108108108108108,
|
|
"grad_norm": 9.45792054858781,
|
|
"learning_rate": 7.939350269646871e-06,
|
|
"loss": 1.8544093370437622,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 1.1117117117117117,
|
|
"grad_norm": 15.295838121016162,
|
|
"learning_rate": 7.935108735154093e-06,
|
|
"loss": 1.7091851234436035,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 1.1126126126126126,
|
|
"grad_norm": 9.185738743837135,
|
|
"learning_rate": 7.93086397550763e-06,
|
|
"loss": 1.4154603481292725,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 1.1135135135135135,
|
|
"grad_norm": 19.610794074775608,
|
|
"learning_rate": 7.926615995371704e-06,
|
|
"loss": 0.9731454849243164,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 1.1144144144144144,
|
|
"grad_norm": 35.18486509243029,
|
|
"learning_rate": 7.922364799414075e-06,
|
|
"loss": 1.8865573406219482,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 1.1153153153153152,
|
|
"grad_norm": 10.786773415690652,
|
|
"learning_rate": 7.918110392306042e-06,
|
|
"loss": 1.138608694076538,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 1.1162162162162161,
|
|
"grad_norm": 8.28684544934627,
|
|
"learning_rate": 7.913852778722426e-06,
|
|
"loss": 1.8452454805374146,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 1.117117117117117,
|
|
"grad_norm": 7.506354201537919,
|
|
"learning_rate": 7.909591963341576e-06,
|
|
"loss": 1.302584171295166,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 1.118018018018018,
|
|
"grad_norm": 13.755149715721302,
|
|
"learning_rate": 7.905327950845357e-06,
|
|
"loss": 1.8079793453216553,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 1.118918918918919,
|
|
"grad_norm": 13.330989665217649,
|
|
"learning_rate": 7.901060745919148e-06,
|
|
"loss": 1.2016383409500122,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 1.1198198198198197,
|
|
"grad_norm": 9.829354825068858,
|
|
"learning_rate": 7.896790353251836e-06,
|
|
"loss": 1.247478723526001,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 1.1207207207207208,
|
|
"grad_norm": 16.22052628865822,
|
|
"learning_rate": 7.892516777535808e-06,
|
|
"loss": 2.252026081085205,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 1.1216216216216217,
|
|
"grad_norm": 10.917816174993641,
|
|
"learning_rate": 7.888240023466952e-06,
|
|
"loss": 1.7726813554763794,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 1.1225225225225226,
|
|
"grad_norm": 9.743573351476616,
|
|
"learning_rate": 7.883960095744649e-06,
|
|
"loss": 1.7089396715164185,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 1.1234234234234235,
|
|
"grad_norm": 10.385925486216097,
|
|
"learning_rate": 7.879676999071764e-06,
|
|
"loss": 1.327872633934021,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 1.1243243243243244,
|
|
"grad_norm": 14.640452121020735,
|
|
"learning_rate": 7.875390738154645e-06,
|
|
"loss": 1.3367271423339844,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 1.1252252252252253,
|
|
"grad_norm": 14.336590579499099,
|
|
"learning_rate": 7.871101317703118e-06,
|
|
"loss": 1.280564546585083,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 1.1261261261261262,
|
|
"grad_norm": 10.789650982585174,
|
|
"learning_rate": 7.866808742430481e-06,
|
|
"loss": 1.9063369035720825,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 1.127027027027027,
|
|
"grad_norm": 15.38404039592624,
|
|
"learning_rate": 7.8625130170535e-06,
|
|
"loss": 1.8157051801681519,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 1.127927927927928,
|
|
"grad_norm": 15.262532261695428,
|
|
"learning_rate": 7.858214146292394e-06,
|
|
"loss": 1.309761643409729,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 1.1288288288288288,
|
|
"grad_norm": 17.302922232288402,
|
|
"learning_rate": 7.853912134870851e-06,
|
|
"loss": 1.5196702480316162,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 1.1297297297297297,
|
|
"grad_norm": 14.852904225740026,
|
|
"learning_rate": 7.849606987516e-06,
|
|
"loss": 1.4707289934158325,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 1.1306306306306306,
|
|
"grad_norm": 20.72461967356017,
|
|
"learning_rate": 7.84529870895842e-06,
|
|
"loss": 1.5276254415512085,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 1.1315315315315315,
|
|
"grad_norm": 13.611472477748729,
|
|
"learning_rate": 7.840987303932131e-06,
|
|
"loss": 2.18794584274292,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 1.1324324324324324,
|
|
"grad_norm": 8.085616722923268,
|
|
"learning_rate": 7.836672777174585e-06,
|
|
"loss": 1.352491855621338,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 1.1333333333333333,
|
|
"grad_norm": 16.132324263932222,
|
|
"learning_rate": 7.83235513342667e-06,
|
|
"loss": 1.7365800142288208,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 1.1342342342342342,
|
|
"grad_norm": 11.86482454877073,
|
|
"learning_rate": 7.828034377432694e-06,
|
|
"loss": 1.753848910331726,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 1.135135135135135,
|
|
"grad_norm": 7.956056959211617,
|
|
"learning_rate": 7.823710513940385e-06,
|
|
"loss": 1.3149350881576538,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 1.136036036036036,
|
|
"grad_norm": 25.654039284216676,
|
|
"learning_rate": 7.819383547700889e-06,
|
|
"loss": 1.264143705368042,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 1.1369369369369369,
|
|
"grad_norm": 6.991141902419393,
|
|
"learning_rate": 7.81505348346876e-06,
|
|
"loss": 1.4038636684417725,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 1.1378378378378378,
|
|
"grad_norm": 8.25379690380056,
|
|
"learning_rate": 7.810720326001954e-06,
|
|
"loss": 1.7839691638946533,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 1.1387387387387387,
|
|
"grad_norm": 9.646297674995921,
|
|
"learning_rate": 7.806384080061827e-06,
|
|
"loss": 1.5281214714050293,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 1.1396396396396395,
|
|
"grad_norm": 8.434120353270666,
|
|
"learning_rate": 7.802044750413128e-06,
|
|
"loss": 1.055334448814392,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 1.1405405405405404,
|
|
"grad_norm": 13.563465933944338,
|
|
"learning_rate": 7.797702341824e-06,
|
|
"loss": 1.307537317276001,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 1.1414414414414416,
|
|
"grad_norm": 13.948444094045099,
|
|
"learning_rate": 7.793356859065962e-06,
|
|
"loss": 1.3452039957046509,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 1.1423423423423422,
|
|
"grad_norm": 13.969006681460863,
|
|
"learning_rate": 7.789008306913911e-06,
|
|
"loss": 1.434963583946228,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 1.1432432432432433,
|
|
"grad_norm": 11.92492065608128,
|
|
"learning_rate": 7.784656690146125e-06,
|
|
"loss": 1.6941349506378174,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 1.1441441441441442,
|
|
"grad_norm": 9.786806235171635,
|
|
"learning_rate": 7.78030201354424e-06,
|
|
"loss": 1.5654044151306152,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 1.1450450450450451,
|
|
"grad_norm": 16.528398727541727,
|
|
"learning_rate": 7.775944281893258e-06,
|
|
"loss": 1.3948932886123657,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 1.145945945945946,
|
|
"grad_norm": 21.470940029536713,
|
|
"learning_rate": 7.771583499981538e-06,
|
|
"loss": 2.152101516723633,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 1.146846846846847,
|
|
"grad_norm": 13.235891849465588,
|
|
"learning_rate": 7.767219672600794e-06,
|
|
"loss": 1.002134084701538,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 1.1477477477477478,
|
|
"grad_norm": 11.354407284936569,
|
|
"learning_rate": 7.76285280454608e-06,
|
|
"loss": 1.4057799577713013,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 1.1486486486486487,
|
|
"grad_norm": 11.048595701802194,
|
|
"learning_rate": 7.758482900615794e-06,
|
|
"loss": 1.1977487802505493,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 1.1495495495495496,
|
|
"grad_norm": 12.227212627101522,
|
|
"learning_rate": 7.75410996561167e-06,
|
|
"loss": 1.527909517288208,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 1.1504504504504505,
|
|
"grad_norm": 15.173327014778602,
|
|
"learning_rate": 7.749734004338777e-06,
|
|
"loss": 1.143022060394287,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 1.1513513513513514,
|
|
"grad_norm": 9.146107789902002,
|
|
"learning_rate": 7.745355021605499e-06,
|
|
"loss": 1.2634553909301758,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 1.1522522522522523,
|
|
"grad_norm": 12.249337106978563,
|
|
"learning_rate": 7.74097302222355e-06,
|
|
"loss": 2.0868749618530273,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 1.1531531531531531,
|
|
"grad_norm": 10.95442874403436,
|
|
"learning_rate": 7.736588011007952e-06,
|
|
"loss": 1.8694771528244019,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 1.154054054054054,
|
|
"grad_norm": 9.634759373981339,
|
|
"learning_rate": 7.732199992777045e-06,
|
|
"loss": 1.4333633184432983,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 1.154954954954955,
|
|
"grad_norm": 14.418215297755733,
|
|
"learning_rate": 7.72780897235246e-06,
|
|
"loss": 1.240595817565918,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 1.1558558558558558,
|
|
"grad_norm": 17.359398926703346,
|
|
"learning_rate": 7.72341495455914e-06,
|
|
"loss": 1.9707403182983398,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 1.1567567567567567,
|
|
"grad_norm": 17.892415832475763,
|
|
"learning_rate": 7.71901794422531e-06,
|
|
"loss": 1.1688909530639648,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 1.1576576576576576,
|
|
"grad_norm": 26.730124348758526,
|
|
"learning_rate": 7.714617946182498e-06,
|
|
"loss": 1.3510494232177734,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 1.1585585585585585,
|
|
"grad_norm": 15.56918791780566,
|
|
"learning_rate": 7.710214965265499e-06,
|
|
"loss": 1.6069289445877075,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 1.1594594594594594,
|
|
"grad_norm": 8.474185793071808,
|
|
"learning_rate": 7.705809006312394e-06,
|
|
"loss": 1.445816993713379,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 1.1603603603603603,
|
|
"grad_norm": 16.54501389107988,
|
|
"learning_rate": 7.701400074164535e-06,
|
|
"loss": 1.384445309638977,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 1.1612612612612612,
|
|
"grad_norm": 13.98831684862548,
|
|
"learning_rate": 7.696988173666545e-06,
|
|
"loss": 1.0784920454025269,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 1.1621621621621623,
|
|
"grad_norm": 12.039360431390216,
|
|
"learning_rate": 7.692573309666298e-06,
|
|
"loss": 1.702589988708496,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 1.163063063063063,
|
|
"grad_norm": 13.378061008808606,
|
|
"learning_rate": 7.688155487014936e-06,
|
|
"loss": 1.2335466146469116,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 1.163963963963964,
|
|
"grad_norm": 11.01659306250313,
|
|
"learning_rate": 7.683734710566848e-06,
|
|
"loss": 1.6434353590011597,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 1.164864864864865,
|
|
"grad_norm": 12.310872183382594,
|
|
"learning_rate": 7.679310985179664e-06,
|
|
"loss": 1.2276870012283325,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 1.1657657657657658,
|
|
"grad_norm": 11.84406149723134,
|
|
"learning_rate": 7.67488431571426e-06,
|
|
"loss": 1.0436772108078003,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 1.1666666666666667,
|
|
"grad_norm": 11.693643942171297,
|
|
"learning_rate": 7.670454707034745e-06,
|
|
"loss": 1.523186206817627,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 1.1675675675675676,
|
|
"grad_norm": 8.07150386900118,
|
|
"learning_rate": 7.666022164008458e-06,
|
|
"loss": 1.8224475383758545,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 1.1684684684684685,
|
|
"grad_norm": 11.398184522360536,
|
|
"learning_rate": 7.661586691505961e-06,
|
|
"loss": 2.4738755226135254,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 1.1693693693693694,
|
|
"grad_norm": 8.745404391664705,
|
|
"learning_rate": 7.657148294401037e-06,
|
|
"loss": 1.1814496517181396,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 1.1702702702702703,
|
|
"grad_norm": 9.302437223698567,
|
|
"learning_rate": 7.652706977570682e-06,
|
|
"loss": 1.6256625652313232,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 1.1711711711711712,
|
|
"grad_norm": 10.333906575252291,
|
|
"learning_rate": 7.648262745895103e-06,
|
|
"loss": 2.1055374145507812,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.172072072072072,
|
|
"grad_norm": 11.315946775683122,
|
|
"learning_rate": 7.643815604257703e-06,
|
|
"loss": 1.6647669076919556,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 1.172972972972973,
|
|
"grad_norm": 19.541790861158102,
|
|
"learning_rate": 7.639365557545085e-06,
|
|
"loss": 2.452302932739258,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 1.1738738738738739,
|
|
"grad_norm": 9.99620076555469,
|
|
"learning_rate": 7.63491261064705e-06,
|
|
"loss": 1.2176405191421509,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 1.1747747747747748,
|
|
"grad_norm": 20.37284495024169,
|
|
"learning_rate": 7.630456768456578e-06,
|
|
"loss": 1.5443081855773926,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 1.1756756756756757,
|
|
"grad_norm": 9.514029041847198,
|
|
"learning_rate": 7.625998035869833e-06,
|
|
"loss": 1.2930316925048828,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 1.1765765765765765,
|
|
"grad_norm": 12.776789687380294,
|
|
"learning_rate": 7.621536417786159e-06,
|
|
"loss": 1.2133829593658447,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 1.1774774774774774,
|
|
"grad_norm": 14.137936897976031,
|
|
"learning_rate": 7.617071919108066e-06,
|
|
"loss": 1.6285301446914673,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 1.1783783783783783,
|
|
"grad_norm": 9.750814868572935,
|
|
"learning_rate": 7.612604544741231e-06,
|
|
"loss": 1.6151678562164307,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 1.1792792792792792,
|
|
"grad_norm": 11.124079331189733,
|
|
"learning_rate": 7.608134299594489e-06,
|
|
"loss": 0.9893984198570251,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 1.1801801801801801,
|
|
"grad_norm": 14.341680894143462,
|
|
"learning_rate": 7.603661188579834e-06,
|
|
"loss": 1.4867775440216064,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 1.181081081081081,
|
|
"grad_norm": 10.389613013976772,
|
|
"learning_rate": 7.599185216612404e-06,
|
|
"loss": 2.4986953735351562,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 1.181981981981982,
|
|
"grad_norm": 15.031980879964772,
|
|
"learning_rate": 7.59470638861048e-06,
|
|
"loss": 1.6477925777435303,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 1.1828828828828828,
|
|
"grad_norm": 12.672810633719594,
|
|
"learning_rate": 7.590224709495488e-06,
|
|
"loss": 1.228175401687622,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 1.1837837837837837,
|
|
"grad_norm": 8.678480242174444,
|
|
"learning_rate": 7.585740184191983e-06,
|
|
"loss": 1.1139668226242065,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 1.1846846846846848,
|
|
"grad_norm": 11.693803772702594,
|
|
"learning_rate": 7.581252817627645e-06,
|
|
"loss": 1.8523716926574707,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 1.1855855855855855,
|
|
"grad_norm": 15.84584428770457,
|
|
"learning_rate": 7.576762614733278e-06,
|
|
"loss": 1.498417854309082,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 1.1864864864864866,
|
|
"grad_norm": 8.044145709378215,
|
|
"learning_rate": 7.572269580442806e-06,
|
|
"loss": 1.3055856227874756,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 1.1873873873873875,
|
|
"grad_norm": 11.3776637027518,
|
|
"learning_rate": 7.567773719693259e-06,
|
|
"loss": 1.3935675621032715,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 1.1882882882882884,
|
|
"grad_norm": 11.392851172846493,
|
|
"learning_rate": 7.563275037424775e-06,
|
|
"loss": 1.1069931983947754,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 1.1891891891891893,
|
|
"grad_norm": 8.397596012134446,
|
|
"learning_rate": 7.558773538580593e-06,
|
|
"loss": 1.5372304916381836,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 1.1900900900900901,
|
|
"grad_norm": 14.191071446322592,
|
|
"learning_rate": 7.554269228107044e-06,
|
|
"loss": 1.5797395706176758,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 1.190990990990991,
|
|
"grad_norm": 13.358608945649758,
|
|
"learning_rate": 7.549762110953553e-06,
|
|
"loss": 1.3508579730987549,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 1.191891891891892,
|
|
"grad_norm": 16.107248339446517,
|
|
"learning_rate": 7.545252192072625e-06,
|
|
"loss": 1.4219568967819214,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 1.1927927927927928,
|
|
"grad_norm": 15.949458931646804,
|
|
"learning_rate": 7.540739476419847e-06,
|
|
"loss": 1.5413322448730469,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 1.1936936936936937,
|
|
"grad_norm": 7.25570395373709,
|
|
"learning_rate": 7.5362239689538765e-06,
|
|
"loss": 1.5617890357971191,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 1.1945945945945946,
|
|
"grad_norm": 5.507534271918394,
|
|
"learning_rate": 7.531705674636439e-06,
|
|
"loss": 0.9302718639373779,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 1.1954954954954955,
|
|
"grad_norm": 11.704751357096594,
|
|
"learning_rate": 7.527184598432322e-06,
|
|
"loss": 1.9931082725524902,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 1.1963963963963964,
|
|
"grad_norm": 9.933944417942092,
|
|
"learning_rate": 7.522660745309375e-06,
|
|
"loss": 1.3197085857391357,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 1.1972972972972973,
|
|
"grad_norm": 7.720230920801659,
|
|
"learning_rate": 7.518134120238489e-06,
|
|
"loss": 1.283144474029541,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 1.1981981981981982,
|
|
"grad_norm": 13.160022270225245,
|
|
"learning_rate": 7.5136047281936094e-06,
|
|
"loss": 1.2439384460449219,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 1.199099099099099,
|
|
"grad_norm": 9.588431958829952,
|
|
"learning_rate": 7.509072574151719e-06,
|
|
"loss": 0.9278205633163452,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"grad_norm": 10.694717205816724,
|
|
"learning_rate": 7.504537663092834e-06,
|
|
"loss": 1.0412869453430176,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 1.2009009009009008,
|
|
"grad_norm": 12.597064505650591,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 1.7085771560668945,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 1.2018018018018017,
|
|
"grad_norm": 9.980346507290404,
|
|
"learning_rate": 7.495459589859293e-06,
|
|
"loss": 0.9375530481338501,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 1.2027027027027026,
|
|
"grad_norm": 25.49173748694432,
|
|
"learning_rate": 7.490916437659799e-06,
|
|
"loss": 1.4595293998718262,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 1.2036036036036035,
|
|
"grad_norm": 10.509646074519262,
|
|
"learning_rate": 7.486370548393621e-06,
|
|
"loss": 1.883044719696045,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 1.2045045045045044,
|
|
"grad_norm": 10.135246780041918,
|
|
"learning_rate": 7.48182192705587e-06,
|
|
"loss": 1.252532958984375,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 1.2054054054054055,
|
|
"grad_norm": 14.784925489162314,
|
|
"learning_rate": 7.47727057864466e-06,
|
|
"loss": 1.6863490343093872,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 1.2063063063063062,
|
|
"grad_norm": 9.426808553510034,
|
|
"learning_rate": 7.472716508161095e-06,
|
|
"loss": 1.2537802457809448,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 1.2072072072072073,
|
|
"grad_norm": 11.36536859739626,
|
|
"learning_rate": 7.468159720609278e-06,
|
|
"loss": 1.567642331123352,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 1.2081081081081082,
|
|
"grad_norm": 13.907149635693166,
|
|
"learning_rate": 7.463600220996294e-06,
|
|
"loss": 1.0958294868469238,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 1.209009009009009,
|
|
"grad_norm": 14.923315595776977,
|
|
"learning_rate": 7.45903801433221e-06,
|
|
"loss": 1.4967621564865112,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 1.20990990990991,
|
|
"grad_norm": 12.484070207598085,
|
|
"learning_rate": 7.454473105630063e-06,
|
|
"loss": 0.9661339521408081,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 1.2108108108108109,
|
|
"grad_norm": 13.667218559643285,
|
|
"learning_rate": 7.449905499905863e-06,
|
|
"loss": 1.088149905204773,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 1.2117117117117118,
|
|
"grad_norm": 10.615603407838263,
|
|
"learning_rate": 7.445335202178585e-06,
|
|
"loss": 1.7580902576446533,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 1.2126126126126127,
|
|
"grad_norm": 9.87829841243034,
|
|
"learning_rate": 7.440762217470156e-06,
|
|
"loss": 1.0903618335723877,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 1.2135135135135136,
|
|
"grad_norm": 13.23634609532938,
|
|
"learning_rate": 7.4361865508054646e-06,
|
|
"loss": 1.5855700969696045,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 1.2144144144144144,
|
|
"grad_norm": 15.17742491666185,
|
|
"learning_rate": 7.431608207212335e-06,
|
|
"loss": 1.5372284650802612,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 1.2153153153153153,
|
|
"grad_norm": 11.916694769113313,
|
|
"learning_rate": 7.427027191721541e-06,
|
|
"loss": 1.5875383615493774,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 1.2162162162162162,
|
|
"grad_norm": 100.75380099015622,
|
|
"learning_rate": 7.422443509366791e-06,
|
|
"loss": 2.580488681793213,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.2171171171171171,
|
|
"grad_norm": 18.528713273542575,
|
|
"learning_rate": 7.417857165184723e-06,
|
|
"loss": 1.1066110134124756,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 1.218018018018018,
|
|
"grad_norm": 8.831968227387177,
|
|
"learning_rate": 7.413268164214898e-06,
|
|
"loss": 1.5199337005615234,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 1.218918918918919,
|
|
"grad_norm": 9.685783038261286,
|
|
"learning_rate": 7.4086765114998e-06,
|
|
"loss": 1.211181640625,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 1.2198198198198198,
|
|
"grad_norm": 14.46493643753685,
|
|
"learning_rate": 7.404082212084825e-06,
|
|
"loss": 1.3475542068481445,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 1.2207207207207207,
|
|
"grad_norm": 7.827895772435491,
|
|
"learning_rate": 7.399485271018275e-06,
|
|
"loss": 1.2011393308639526,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 1.2216216216216216,
|
|
"grad_norm": 12.112899033982343,
|
|
"learning_rate": 7.394885693351359e-06,
|
|
"loss": 1.5029023885726929,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 1.2225225225225225,
|
|
"grad_norm": 19.16374374542783,
|
|
"learning_rate": 7.390283484138181e-06,
|
|
"loss": 1.1819264888763428,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 1.2234234234234234,
|
|
"grad_norm": 15.837163637802899,
|
|
"learning_rate": 7.385678648435736e-06,
|
|
"loss": 1.7538678646087646,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 1.2243243243243243,
|
|
"grad_norm": 8.816568931447835,
|
|
"learning_rate": 7.381071191303905e-06,
|
|
"loss": 1.7851725816726685,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 1.2252252252252251,
|
|
"grad_norm": 12.425085747807193,
|
|
"learning_rate": 7.37646111780545e-06,
|
|
"loss": 1.6314969062805176,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 1.226126126126126,
|
|
"grad_norm": 11.622292873388071,
|
|
"learning_rate": 7.371848433006011e-06,
|
|
"loss": 1.8315403461456299,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 1.227027027027027,
|
|
"grad_norm": 11.700004162311084,
|
|
"learning_rate": 7.367233141974091e-06,
|
|
"loss": 1.2369434833526611,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 1.227927927927928,
|
|
"grad_norm": 7.913797704821526,
|
|
"learning_rate": 7.36261524978106e-06,
|
|
"loss": 1.6064811944961548,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 1.2288288288288287,
|
|
"grad_norm": 9.524045204852513,
|
|
"learning_rate": 7.357994761501148e-06,
|
|
"loss": 0.9757938981056213,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 1.2297297297297298,
|
|
"grad_norm": 7.515508698675772,
|
|
"learning_rate": 7.353371682211439e-06,
|
|
"loss": 1.7672373056411743,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 1.2306306306306307,
|
|
"grad_norm": 11.170188493390835,
|
|
"learning_rate": 7.348746016991855e-06,
|
|
"loss": 1.468174934387207,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 1.2315315315315316,
|
|
"grad_norm": 11.147007367575577,
|
|
"learning_rate": 7.344117770925169e-06,
|
|
"loss": 1.455219030380249,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 1.2324324324324325,
|
|
"grad_norm": 12.241165739692947,
|
|
"learning_rate": 7.339486949096986e-06,
|
|
"loss": 1.9719338417053223,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 1.2333333333333334,
|
|
"grad_norm": 8.88542917374653,
|
|
"learning_rate": 7.3348535565957415e-06,
|
|
"loss": 1.3688957691192627,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 1.2342342342342343,
|
|
"grad_norm": 13.03109391063374,
|
|
"learning_rate": 7.330217598512696e-06,
|
|
"loss": 1.9144933223724365,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 1.2351351351351352,
|
|
"grad_norm": 11.356009632577601,
|
|
"learning_rate": 7.3255790799419276e-06,
|
|
"loss": 2.976274251937866,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 1.236036036036036,
|
|
"grad_norm": 16.20171708391758,
|
|
"learning_rate": 7.320938005980331e-06,
|
|
"loss": 2.230113983154297,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 1.236936936936937,
|
|
"grad_norm": 17.420836190283396,
|
|
"learning_rate": 7.316294381727607e-06,
|
|
"loss": 2.2218849658966064,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 1.2378378378378379,
|
|
"grad_norm": 21.68831769625887,
|
|
"learning_rate": 7.311648212286259e-06,
|
|
"loss": 1.731313943862915,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 1.2387387387387387,
|
|
"grad_norm": 9.670390727557972,
|
|
"learning_rate": 7.3069995027615866e-06,
|
|
"loss": 1.6031734943389893,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 1.2396396396396396,
|
|
"grad_norm": 17.7257502941489,
|
|
"learning_rate": 7.302348258261681e-06,
|
|
"loss": 1.090880274772644,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 1.2405405405405405,
|
|
"grad_norm": 12.158862132711823,
|
|
"learning_rate": 7.297694483897419e-06,
|
|
"loss": 1.7922217845916748,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 1.2414414414414414,
|
|
"grad_norm": 10.343322201737845,
|
|
"learning_rate": 7.293038184782455e-06,
|
|
"loss": 1.5444504022598267,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 1.2423423423423423,
|
|
"grad_norm": 15.600510863887093,
|
|
"learning_rate": 7.288379366033222e-06,
|
|
"loss": 1.8387994766235352,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 1.2432432432432432,
|
|
"grad_norm": 17.805275666565176,
|
|
"learning_rate": 7.283718032768918e-06,
|
|
"loss": 2.0836944580078125,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.244144144144144,
|
|
"grad_norm": 15.290226624511453,
|
|
"learning_rate": 7.279054190111506e-06,
|
|
"loss": 1.8654720783233643,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 1.245045045045045,
|
|
"grad_norm": 11.358877730376452,
|
|
"learning_rate": 7.274387843185706e-06,
|
|
"loss": 0.9758589267730713,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 1.2459459459459459,
|
|
"grad_norm": 10.315239518582793,
|
|
"learning_rate": 7.269718997118989e-06,
|
|
"loss": 1.3522006273269653,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 1.2468468468468468,
|
|
"grad_norm": 11.204012591868468,
|
|
"learning_rate": 7.265047657041572e-06,
|
|
"loss": 2.102362632751465,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 1.2477477477477477,
|
|
"grad_norm": 11.042702044425438,
|
|
"learning_rate": 7.260373828086412e-06,
|
|
"loss": 1.0497840642929077,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 1.2486486486486488,
|
|
"grad_norm": 10.28616439594604,
|
|
"learning_rate": 7.2556975153892026e-06,
|
|
"loss": 1.5571216344833374,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 1.2495495495495494,
|
|
"grad_norm": 13.383199442545159,
|
|
"learning_rate": 7.251018724088367e-06,
|
|
"loss": 1.8421202898025513,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 1.2504504504504506,
|
|
"grad_norm": 12.244421177234683,
|
|
"learning_rate": 7.24633745932505e-06,
|
|
"loss": 1.5307228565216064,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 1.2513513513513512,
|
|
"grad_norm": 8.821148614606216,
|
|
"learning_rate": 7.241653726243114e-06,
|
|
"loss": 1.2365405559539795,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 1.2522522522522523,
|
|
"grad_norm": 10.320916195803836,
|
|
"learning_rate": 7.236967529989135e-06,
|
|
"loss": 2.1349010467529297,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 1.2531531531531532,
|
|
"grad_norm": 41.31115369427396,
|
|
"learning_rate": 7.232278875712396e-06,
|
|
"loss": 3.4286715984344482,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 1.2540540540540541,
|
|
"grad_norm": 8.296025650924667,
|
|
"learning_rate": 7.22758776856488e-06,
|
|
"loss": 1.4277901649475098,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 1.254954954954955,
|
|
"grad_norm": 10.533961574576528,
|
|
"learning_rate": 7.222894213701264e-06,
|
|
"loss": 1.6853958368301392,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 1.255855855855856,
|
|
"grad_norm": 13.35248993703874,
|
|
"learning_rate": 7.218198216278918e-06,
|
|
"loss": 2.1888997554779053,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 1.2567567567567568,
|
|
"grad_norm": 12.990859083080924,
|
|
"learning_rate": 7.213499781457891e-06,
|
|
"loss": 1.1632167100906372,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 1.2576576576576577,
|
|
"grad_norm": 11.893579892206938,
|
|
"learning_rate": 7.208798914400916e-06,
|
|
"loss": 1.218822717666626,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 1.2585585585585586,
|
|
"grad_norm": 11.106466441833884,
|
|
"learning_rate": 7.204095620273395e-06,
|
|
"loss": 1.7315642833709717,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 1.2594594594594595,
|
|
"grad_norm": 8.767909217390986,
|
|
"learning_rate": 7.1993899042433965e-06,
|
|
"loss": 0.9117752313613892,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 1.2603603603603604,
|
|
"grad_norm": 9.32030423453937,
|
|
"learning_rate": 7.194681771481653e-06,
|
|
"loss": 1.5483365058898926,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 1.2612612612612613,
|
|
"grad_norm": 10.922927319662296,
|
|
"learning_rate": 7.189971227161551e-06,
|
|
"loss": 1.449894905090332,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 1.2621621621621621,
|
|
"grad_norm": 8.460395029652267,
|
|
"learning_rate": 7.185258276459125e-06,
|
|
"loss": 1.7599258422851562,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 1.263063063063063,
|
|
"grad_norm": 17.449187812870015,
|
|
"learning_rate": 7.180542924553055e-06,
|
|
"loss": 1.0262142419815063,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 1.263963963963964,
|
|
"grad_norm": 14.96280750597402,
|
|
"learning_rate": 7.175825176624665e-06,
|
|
"loss": 2.000757932662964,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 1.2648648648648648,
|
|
"grad_norm": 8.965402157080927,
|
|
"learning_rate": 7.171105037857901e-06,
|
|
"loss": 1.717637300491333,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 1.2657657657657657,
|
|
"grad_norm": 16.005789341830745,
|
|
"learning_rate": 7.166382513439344e-06,
|
|
"loss": 1.4934778213500977,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 1.2666666666666666,
|
|
"grad_norm": 29.960182603102517,
|
|
"learning_rate": 7.161657608558195e-06,
|
|
"loss": 2.0391616821289062,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 1.2675675675675675,
|
|
"grad_norm": 13.301955309440645,
|
|
"learning_rate": 7.156930328406268e-06,
|
|
"loss": 3.2663180828094482,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 1.2684684684684684,
|
|
"grad_norm": 9.317719860301414,
|
|
"learning_rate": 7.152200678177992e-06,
|
|
"loss": 1.3521085977554321,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 1.2693693693693695,
|
|
"grad_norm": 15.01321760720811,
|
|
"learning_rate": 7.147468663070394e-06,
|
|
"loss": 1.457066297531128,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 1.2702702702702702,
|
|
"grad_norm": 64.91707114599426,
|
|
"learning_rate": 7.142734288283104e-06,
|
|
"loss": 3.4249014854431152,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 1.2711711711711713,
|
|
"grad_norm": 8.851525925579665,
|
|
"learning_rate": 7.137997559018347e-06,
|
|
"loss": 1.3285980224609375,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 1.272072072072072,
|
|
"grad_norm": 12.594045557885265,
|
|
"learning_rate": 7.133258480480927e-06,
|
|
"loss": 1.5793802738189697,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 1.272972972972973,
|
|
"grad_norm": 13.593476027500934,
|
|
"learning_rate": 7.128517057878236e-06,
|
|
"loss": 1.9223592281341553,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 1.2738738738738737,
|
|
"grad_norm": 8.690744191150314,
|
|
"learning_rate": 7.12377329642024e-06,
|
|
"loss": 1.2703932523727417,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 1.2747747747747749,
|
|
"grad_norm": 12.402794101289746,
|
|
"learning_rate": 7.119027201319475e-06,
|
|
"loss": 1.16603684425354,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 1.2756756756756757,
|
|
"grad_norm": 9.367974071505143,
|
|
"learning_rate": 7.114278777791041e-06,
|
|
"loss": 1.7819464206695557,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 1.2765765765765766,
|
|
"grad_norm": 7.8478231909280325,
|
|
"learning_rate": 7.109528031052597e-06,
|
|
"loss": 1.3226629495620728,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 1.2774774774774775,
|
|
"grad_norm": 9.661943223100234,
|
|
"learning_rate": 7.1047749663243545e-06,
|
|
"loss": 1.7379398345947266,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 1.2783783783783784,
|
|
"grad_norm": 10.490058757861664,
|
|
"learning_rate": 7.1000195888290726e-06,
|
|
"loss": 1.9525641202926636,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 1.2792792792792793,
|
|
"grad_norm": 29.87284270496952,
|
|
"learning_rate": 7.09526190379205e-06,
|
|
"loss": 0.845831573009491,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 1.2801801801801802,
|
|
"grad_norm": 11.91578853488134,
|
|
"learning_rate": 7.090501916441124e-06,
|
|
"loss": 1.4141738414764404,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 1.281081081081081,
|
|
"grad_norm": 11.098002980381764,
|
|
"learning_rate": 7.085739632006656e-06,
|
|
"loss": 0.9975290894508362,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 1.281981981981982,
|
|
"grad_norm": 16.92866770300015,
|
|
"learning_rate": 7.0809750557215385e-06,
|
|
"loss": 1.3577886819839478,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 1.2828828828828829,
|
|
"grad_norm": 10.099739910797975,
|
|
"learning_rate": 7.076208192821179e-06,
|
|
"loss": 1.1366277933120728,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 1.2837837837837838,
|
|
"grad_norm": 14.743356708038405,
|
|
"learning_rate": 7.071439048543498e-06,
|
|
"loss": 1.587902545928955,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 1.2846846846846847,
|
|
"grad_norm": 13.55985735794094,
|
|
"learning_rate": 7.0666676281289205e-06,
|
|
"loss": 1.235689401626587,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 1.2855855855855856,
|
|
"grad_norm": 10.195364678567254,
|
|
"learning_rate": 7.061893936820376e-06,
|
|
"loss": 1.7189230918884277,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 1.2864864864864864,
|
|
"grad_norm": 11.420024715459206,
|
|
"learning_rate": 7.057117979863288e-06,
|
|
"loss": 1.6788554191589355,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 1.2873873873873873,
|
|
"grad_norm": 7.899561912850803,
|
|
"learning_rate": 7.05233976250557e-06,
|
|
"loss": 1.0105758905410767,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 1.2882882882882882,
|
|
"grad_norm": 16.941219293922575,
|
|
"learning_rate": 7.047559289997618e-06,
|
|
"loss": 1.7400261163711548,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 1.2891891891891891,
|
|
"grad_norm": 9.56288562596186,
|
|
"learning_rate": 7.0427765675923055e-06,
|
|
"loss": 1.8979640007019043,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 1.29009009009009,
|
|
"grad_norm": 12.510087641070497,
|
|
"learning_rate": 7.037991600544982e-06,
|
|
"loss": 1.7491884231567383,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 1.290990990990991,
|
|
"grad_norm": 9.667832761107086,
|
|
"learning_rate": 7.0332043941134595e-06,
|
|
"loss": 1.933447003364563,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 1.291891891891892,
|
|
"grad_norm": 10.3254616942883,
|
|
"learning_rate": 7.028414953558012e-06,
|
|
"loss": 0.959897518157959,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 1.2927927927927927,
|
|
"grad_norm": 9.966037197858935,
|
|
"learning_rate": 7.023623284141369e-06,
|
|
"loss": 1.429056167602539,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 1.2936936936936938,
|
|
"grad_norm": 10.193568731979536,
|
|
"learning_rate": 7.0188293911287075e-06,
|
|
"loss": 1.8352670669555664,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 1.2945945945945945,
|
|
"grad_norm": 14.231366838619765,
|
|
"learning_rate": 7.014033279787651e-06,
|
|
"loss": 3.6025497913360596,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 1.2954954954954956,
|
|
"grad_norm": 17.102283011638892,
|
|
"learning_rate": 7.009234955388257e-06,
|
|
"loss": 1.7527354955673218,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 1.2963963963963965,
|
|
"grad_norm": 11.049771073776762,
|
|
"learning_rate": 7.004434423203016e-06,
|
|
"loss": 1.2662566900253296,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 1.2972972972972974,
|
|
"grad_norm": 12.530549988649303,
|
|
"learning_rate": 6.999631688506846e-06,
|
|
"loss": 0.9092813730239868,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 1.2981981981981983,
|
|
"grad_norm": 15.15531879332505,
|
|
"learning_rate": 6.994826756577082e-06,
|
|
"loss": 2.4031832218170166,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 1.2990990990990992,
|
|
"grad_norm": 10.56692184348743,
|
|
"learning_rate": 6.990019632693478e-06,
|
|
"loss": 1.1668978929519653,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"grad_norm": 11.039725683520837,
|
|
"learning_rate": 6.985210322138191e-06,
|
|
"loss": 1.592980146408081,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 1.300900900900901,
|
|
"grad_norm": 11.074299403387734,
|
|
"learning_rate": 6.980398830195785e-06,
|
|
"loss": 1.2689815759658813,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 1.3018018018018018,
|
|
"grad_norm": 7.922665314187741,
|
|
"learning_rate": 6.975585162153218e-06,
|
|
"loss": 1.483189582824707,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 1.3027027027027027,
|
|
"grad_norm": 12.352596563820562,
|
|
"learning_rate": 6.970769323299843e-06,
|
|
"loss": 1.1730546951293945,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 1.3036036036036036,
|
|
"grad_norm": 8.647036918170205,
|
|
"learning_rate": 6.965951318927395e-06,
|
|
"loss": 1.3135360479354858,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 1.3045045045045045,
|
|
"grad_norm": 19.76918698112762,
|
|
"learning_rate": 6.96113115432999e-06,
|
|
"loss": 2.7980329990386963,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 1.3054054054054054,
|
|
"grad_norm": 10.59361494593598,
|
|
"learning_rate": 6.956308834804115e-06,
|
|
"loss": 1.9804593324661255,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 1.3063063063063063,
|
|
"grad_norm": 12.670842435395594,
|
|
"learning_rate": 6.951484365648628e-06,
|
|
"loss": 1.292311191558838,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 1.3072072072072072,
|
|
"grad_norm": 14.579133809409123,
|
|
"learning_rate": 6.9466577521647496e-06,
|
|
"loss": 1.7021632194519043,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 1.308108108108108,
|
|
"grad_norm": 11.659110578279638,
|
|
"learning_rate": 6.941828999656054e-06,
|
|
"loss": 2.4892637729644775,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 1.309009009009009,
|
|
"grad_norm": 11.723701997771665,
|
|
"learning_rate": 6.936998113428466e-06,
|
|
"loss": 2.369381904602051,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 1.3099099099099099,
|
|
"grad_norm": 15.520099051937635,
|
|
"learning_rate": 6.932165098790257e-06,
|
|
"loss": 2.2101807594299316,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 1.3108108108108107,
|
|
"grad_norm": 6.329220230845185,
|
|
"learning_rate": 6.927329961052036e-06,
|
|
"loss": 1.4462792873382568,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 1.3117117117117116,
|
|
"grad_norm": 8.577589949788242,
|
|
"learning_rate": 6.9224927055267456e-06,
|
|
"loss": 1.212193489074707,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 1.3126126126126128,
|
|
"grad_norm": 10.300198588278217,
|
|
"learning_rate": 6.917653337529655e-06,
|
|
"loss": 1.342195749282837,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 1.3135135135135134,
|
|
"grad_norm": 11.523689425446618,
|
|
"learning_rate": 6.912811862378353e-06,
|
|
"loss": 1.1294045448303223,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 1.3144144144144145,
|
|
"grad_norm": 7.504283966414375,
|
|
"learning_rate": 6.9079682853927436e-06,
|
|
"loss": 1.299343466758728,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 1.3153153153153152,
|
|
"grad_norm": 18.030183928050544,
|
|
"learning_rate": 6.9031226118950445e-06,
|
|
"loss": 1.2723416090011597,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 1.3162162162162163,
|
|
"grad_norm": 9.79491788344939,
|
|
"learning_rate": 6.898274847209775e-06,
|
|
"loss": 1.2439301013946533,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 1.317117117117117,
|
|
"grad_norm": 16.072963433739307,
|
|
"learning_rate": 6.8934249966637515e-06,
|
|
"loss": 0.9168739318847656,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 1.318018018018018,
|
|
"grad_norm": 6.646130498815386,
|
|
"learning_rate": 6.88857306558608e-06,
|
|
"loss": 1.6370880603790283,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 1.318918918918919,
|
|
"grad_norm": 13.474685631393964,
|
|
"learning_rate": 6.8837190593081595e-06,
|
|
"loss": 1.6939034461975098,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 1.3198198198198199,
|
|
"grad_norm": 13.255101591637459,
|
|
"learning_rate": 6.878862983163661e-06,
|
|
"loss": 0.566031277179718,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 1.3207207207207208,
|
|
"grad_norm": 23.61069042071233,
|
|
"learning_rate": 6.874004842488537e-06,
|
|
"loss": 1.9992173910140991,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 1.3216216216216217,
|
|
"grad_norm": 16.17865447072103,
|
|
"learning_rate": 6.869144642621006e-06,
|
|
"loss": 2.696247100830078,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 1.3225225225225226,
|
|
"grad_norm": 30.219320393157133,
|
|
"learning_rate": 6.864282388901544e-06,
|
|
"loss": 2.0518383979797363,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 1.3234234234234235,
|
|
"grad_norm": 10.251150920416583,
|
|
"learning_rate": 6.859418086672892e-06,
|
|
"loss": 1.741684913635254,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 1.3243243243243243,
|
|
"grad_norm": 12.22188532170062,
|
|
"learning_rate": 6.854551741280039e-06,
|
|
"loss": 1.4471855163574219,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 1.3252252252252252,
|
|
"grad_norm": 8.464858435050514,
|
|
"learning_rate": 6.849683358070217e-06,
|
|
"loss": 2.0512211322784424,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 1.3261261261261261,
|
|
"grad_norm": 9.663271857650159,
|
|
"learning_rate": 6.844812942392899e-06,
|
|
"loss": 1.8038698434829712,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 1.327027027027027,
|
|
"grad_norm": 7.5145310553659135,
|
|
"learning_rate": 6.839940499599791e-06,
|
|
"loss": 1.251605749130249,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 1.327927927927928,
|
|
"grad_norm": 9.654822014222745,
|
|
"learning_rate": 6.835066035044827e-06,
|
|
"loss": 1.6228208541870117,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 1.3288288288288288,
|
|
"grad_norm": 12.073003036682927,
|
|
"learning_rate": 6.830189554084162e-06,
|
|
"loss": 1.4056490659713745,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 1.3297297297297297,
|
|
"grad_norm": 9.09359100958607,
|
|
"learning_rate": 6.825311062076166e-06,
|
|
"loss": 1.4476819038391113,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 1.3306306306306306,
|
|
"grad_norm": 8.974308806871944,
|
|
"learning_rate": 6.820430564381419e-06,
|
|
"loss": 1.0044509172439575,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 1.3315315315315315,
|
|
"grad_norm": 11.560229844922576,
|
|
"learning_rate": 6.815548066362707e-06,
|
|
"loss": 1.3208290338516235,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 1.3324324324324324,
|
|
"grad_norm": 27.433320472132237,
|
|
"learning_rate": 6.810663573385013e-06,
|
|
"loss": 1.8785879611968994,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 1.3333333333333333,
|
|
"grad_norm": 7.34239456788459,
|
|
"learning_rate": 6.805777090815506e-06,
|
|
"loss": 1.3099184036254883,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 1.3342342342342342,
|
|
"grad_norm": 13.785305743855641,
|
|
"learning_rate": 6.800888624023552e-06,
|
|
"loss": 1.6740031242370605,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 1.3351351351351353,
|
|
"grad_norm": 13.478577023871006,
|
|
"learning_rate": 6.79599817838069e-06,
|
|
"loss": 1.2225863933563232,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 1.336036036036036,
|
|
"grad_norm": 8.949648729074358,
|
|
"learning_rate": 6.791105759260637e-06,
|
|
"loss": 1.498471975326538,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 1.336936936936937,
|
|
"grad_norm": 10.935949380255698,
|
|
"learning_rate": 6.786211372039277e-06,
|
|
"loss": 2.295103073120117,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 1.3378378378378377,
|
|
"grad_norm": 9.925065969678082,
|
|
"learning_rate": 6.781315022094652e-06,
|
|
"loss": 1.3988707065582275,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 1.3387387387387388,
|
|
"grad_norm": 29.961737390518984,
|
|
"learning_rate": 6.7764167148069695e-06,
|
|
"loss": 1.797900915145874,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 1.3396396396396395,
|
|
"grad_norm": 23.355601544653386,
|
|
"learning_rate": 6.771516455558581e-06,
|
|
"loss": 1.6941187381744385,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 1.3405405405405406,
|
|
"grad_norm": 8.925186199314515,
|
|
"learning_rate": 6.766614249733986e-06,
|
|
"loss": 1.1308190822601318,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 1.3414414414414415,
|
|
"grad_norm": 77.27501005877426,
|
|
"learning_rate": 6.761710102719823e-06,
|
|
"loss": 1.5973894596099854,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 1.3423423423423424,
|
|
"grad_norm": 11.444046265690886,
|
|
"learning_rate": 6.7568040199048604e-06,
|
|
"loss": 1.4958088397979736,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 1.3432432432432433,
|
|
"grad_norm": 10.019881082111748,
|
|
"learning_rate": 6.751896006679999e-06,
|
|
"loss": 1.3319631814956665,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 1.3441441441441442,
|
|
"grad_norm": 17.6714685880708,
|
|
"learning_rate": 6.746986068438255e-06,
|
|
"loss": 2.3347811698913574,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 1.345045045045045,
|
|
"grad_norm": 8.951677689395087,
|
|
"learning_rate": 6.742074210574764e-06,
|
|
"loss": 1.483781337738037,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 1.345945945945946,
|
|
"grad_norm": 9.509507922198216,
|
|
"learning_rate": 6.737160438486771e-06,
|
|
"loss": 1.783625841140747,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 1.3468468468468469,
|
|
"grad_norm": 14.358783693076733,
|
|
"learning_rate": 6.732244757573619e-06,
|
|
"loss": 1.4702600240707397,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 1.3477477477477477,
|
|
"grad_norm": 11.278813813202781,
|
|
"learning_rate": 6.727327173236755e-06,
|
|
"loss": 1.2078044414520264,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 1.3486486486486486,
|
|
"grad_norm": 13.923544729865053,
|
|
"learning_rate": 6.722407690879713e-06,
|
|
"loss": 1.5310215950012207,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 1.3495495495495495,
|
|
"grad_norm": 12.384659652404792,
|
|
"learning_rate": 6.717486315908117e-06,
|
|
"loss": 1.6496665477752686,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 1.3504504504504504,
|
|
"grad_norm": 16.531447348895114,
|
|
"learning_rate": 6.712563053729666e-06,
|
|
"loss": 1.783576250076294,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 1.3513513513513513,
|
|
"grad_norm": 12.340681437779834,
|
|
"learning_rate": 6.707637909754136e-06,
|
|
"loss": 1.4855138063430786,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.3522522522522522,
|
|
"grad_norm": 12.840687199470583,
|
|
"learning_rate": 6.702710889393369e-06,
|
|
"loss": 1.4109737873077393,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 1.353153153153153,
|
|
"grad_norm": 11.793260271316448,
|
|
"learning_rate": 6.697781998061269e-06,
|
|
"loss": 1.1371577978134155,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 1.354054054054054,
|
|
"grad_norm": 8.953952530348078,
|
|
"learning_rate": 6.692851241173796e-06,
|
|
"loss": 1.495855450630188,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 1.3549549549549549,
|
|
"grad_norm": 11.357007680099965,
|
|
"learning_rate": 6.687918624148963e-06,
|
|
"loss": 1.0094865560531616,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 1.3558558558558558,
|
|
"grad_norm": 13.866286980242856,
|
|
"learning_rate": 6.682984152406819e-06,
|
|
"loss": 2.112832546234131,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 1.3567567567567567,
|
|
"grad_norm": 11.31135397643678,
|
|
"learning_rate": 6.6780478313694595e-06,
|
|
"loss": 1.3318833112716675,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 1.3576576576576578,
|
|
"grad_norm": 7.261488334927335,
|
|
"learning_rate": 6.6731096664610085e-06,
|
|
"loss": 1.0667122602462769,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 1.3585585585585584,
|
|
"grad_norm": 17.226149858340406,
|
|
"learning_rate": 6.668169663107614e-06,
|
|
"loss": 1.4053834676742554,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 1.3594594594594596,
|
|
"grad_norm": 15.457783991296136,
|
|
"learning_rate": 6.663227826737448e-06,
|
|
"loss": 1.7283596992492676,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 1.3603603603603602,
|
|
"grad_norm": 14.47847833402991,
|
|
"learning_rate": 6.658284162780696e-06,
|
|
"loss": 1.3663225173950195,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 1.3612612612612613,
|
|
"grad_norm": 11.76769315410328,
|
|
"learning_rate": 6.653338676669549e-06,
|
|
"loss": 1.1600167751312256,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 1.3621621621621622,
|
|
"grad_norm": 11.287613650548183,
|
|
"learning_rate": 6.6483913738382015e-06,
|
|
"loss": 2.11704158782959,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 1.3630630630630631,
|
|
"grad_norm": 8.769643345190612,
|
|
"learning_rate": 6.643442259722845e-06,
|
|
"loss": 2.3525846004486084,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 1.363963963963964,
|
|
"grad_norm": 10.057037063527666,
|
|
"learning_rate": 6.6384913397616614e-06,
|
|
"loss": 1.8541146516799927,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 1.364864864864865,
|
|
"grad_norm": 16.84125796871969,
|
|
"learning_rate": 6.633538619394817e-06,
|
|
"loss": 2.212017059326172,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 1.3657657657657658,
|
|
"grad_norm": 15.014498400819402,
|
|
"learning_rate": 6.628584104064454e-06,
|
|
"loss": 1.8785572052001953,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 1.3666666666666667,
|
|
"grad_norm": 7.8208850480783445,
|
|
"learning_rate": 6.623627799214689e-06,
|
|
"loss": 1.8144475221633911,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 1.3675675675675676,
|
|
"grad_norm": 8.796725097213452,
|
|
"learning_rate": 6.618669710291607e-06,
|
|
"loss": 1.1300932168960571,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 1.3684684684684685,
|
|
"grad_norm": 16.96085567967029,
|
|
"learning_rate": 6.613709842743247e-06,
|
|
"loss": 1.8939905166625977,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 1.3693693693693694,
|
|
"grad_norm": 15.890620069538523,
|
|
"learning_rate": 6.608748202019609e-06,
|
|
"loss": 2.296168565750122,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 1.3702702702702703,
|
|
"grad_norm": 12.897079381014754,
|
|
"learning_rate": 6.60378479357264e-06,
|
|
"loss": 1.373428225517273,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 1.3711711711711712,
|
|
"grad_norm": 72.08633517003211,
|
|
"learning_rate": 6.598819622856227e-06,
|
|
"loss": 2.1196229457855225,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 1.372072072072072,
|
|
"grad_norm": 11.863094670122011,
|
|
"learning_rate": 6.593852695326195e-06,
|
|
"loss": 1.8780111074447632,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 1.372972972972973,
|
|
"grad_norm": 12.40630514355168,
|
|
"learning_rate": 6.588884016440296e-06,
|
|
"loss": 2.2452330589294434,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 1.3738738738738738,
|
|
"grad_norm": 10.698082427109062,
|
|
"learning_rate": 6.583913591658215e-06,
|
|
"loss": 1.7420361042022705,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 1.3747747747747747,
|
|
"grad_norm": 11.45374027254298,
|
|
"learning_rate": 6.5789414264415455e-06,
|
|
"loss": 1.486595869064331,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 1.3756756756756756,
|
|
"grad_norm": 9.960417384966648,
|
|
"learning_rate": 6.573967526253799e-06,
|
|
"loss": 2.052097797393799,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 1.3765765765765765,
|
|
"grad_norm": 9.398298485798593,
|
|
"learning_rate": 6.568991896560394e-06,
|
|
"loss": 1.6156516075134277,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 1.3774774774774774,
|
|
"grad_norm": 7.991873232786982,
|
|
"learning_rate": 6.564014542828645e-06,
|
|
"loss": 1.7775673866271973,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 1.3783783783783785,
|
|
"grad_norm": 12.990266393680315,
|
|
"learning_rate": 6.559035470527766e-06,
|
|
"loss": 1.3804233074188232,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 1.3792792792792792,
|
|
"grad_norm": 19.1123845290699,
|
|
"learning_rate": 6.554054685128857e-06,
|
|
"loss": 2.075040578842163,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 1.3801801801801803,
|
|
"grad_norm": 11.649539270034651,
|
|
"learning_rate": 6.549072192104899e-06,
|
|
"loss": 1.1930575370788574,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 1.381081081081081,
|
|
"grad_norm": 10.849947510650079,
|
|
"learning_rate": 6.54408799693075e-06,
|
|
"loss": 1.5370773077011108,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 1.381981981981982,
|
|
"grad_norm": 14.00986862839,
|
|
"learning_rate": 6.539102105083139e-06,
|
|
"loss": 1.4853053092956543,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 1.3828828828828827,
|
|
"grad_norm": 11.43709366826303,
|
|
"learning_rate": 6.5341145220406624e-06,
|
|
"loss": 1.607257604598999,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 1.3837837837837839,
|
|
"grad_norm": 9.335255223305646,
|
|
"learning_rate": 6.52912525328377e-06,
|
|
"loss": 1.058199167251587,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 1.3846846846846848,
|
|
"grad_norm": 8.65597380645358,
|
|
"learning_rate": 6.524134304294767e-06,
|
|
"loss": 1.581141710281372,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 1.3855855855855856,
|
|
"grad_norm": 8.905658798603659,
|
|
"learning_rate": 6.519141680557801e-06,
|
|
"loss": 1.7198735475540161,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 1.3864864864864865,
|
|
"grad_norm": 13.866845671291236,
|
|
"learning_rate": 6.514147387558866e-06,
|
|
"loss": 1.1386839151382446,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 1.3873873873873874,
|
|
"grad_norm": 7.183340151141346,
|
|
"learning_rate": 6.509151430785785e-06,
|
|
"loss": 1.55452299118042,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 1.3882882882882883,
|
|
"grad_norm": 14.031047648797244,
|
|
"learning_rate": 6.5041538157282105e-06,
|
|
"loss": 1.459531307220459,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 1.3891891891891892,
|
|
"grad_norm": 9.921835400261099,
|
|
"learning_rate": 6.4991545478776175e-06,
|
|
"loss": 1.7958147525787354,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 1.39009009009009,
|
|
"grad_norm": 14.997502932067247,
|
|
"learning_rate": 6.494153632727299e-06,
|
|
"loss": 1.7099924087524414,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 1.390990990990991,
|
|
"grad_norm": 15.02260259254844,
|
|
"learning_rate": 6.489151075772355e-06,
|
|
"loss": 2.1933164596557617,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 1.3918918918918919,
|
|
"grad_norm": 10.2867303895181,
|
|
"learning_rate": 6.484146882509692e-06,
|
|
"loss": 1.3097925186157227,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 1.3927927927927928,
|
|
"grad_norm": 9.534133003704742,
|
|
"learning_rate": 6.4791410584380134e-06,
|
|
"loss": 1.562878131866455,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 1.3936936936936937,
|
|
"grad_norm": 8.408041594397528,
|
|
"learning_rate": 6.474133609057812e-06,
|
|
"loss": 1.8736263513565063,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 1.3945945945945946,
|
|
"grad_norm": 13.757239562523456,
|
|
"learning_rate": 6.469124539871372e-06,
|
|
"loss": 1.7695668935775757,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 1.3954954954954955,
|
|
"grad_norm": 11.411530144244546,
|
|
"learning_rate": 6.464113856382752e-06,
|
|
"loss": 3.1489667892456055,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 1.3963963963963963,
|
|
"grad_norm": 16.402962216303628,
|
|
"learning_rate": 6.45910156409779e-06,
|
|
"loss": 1.3440314531326294,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 1.3972972972972972,
|
|
"grad_norm": 13.418126658463782,
|
|
"learning_rate": 6.4540876685240876e-06,
|
|
"loss": 1.2822811603546143,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 1.3981981981981981,
|
|
"grad_norm": 11.034343836938302,
|
|
"learning_rate": 6.449072175171008e-06,
|
|
"loss": 1.186071753501892,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 1.399099099099099,
|
|
"grad_norm": 18.777461199501083,
|
|
"learning_rate": 6.44405508954967e-06,
|
|
"loss": 1.8270515203475952,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"grad_norm": 7.822655272651996,
|
|
"learning_rate": 6.439036417172948e-06,
|
|
"loss": 1.5740070343017578,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 1.400900900900901,
|
|
"grad_norm": 12.887351504893184,
|
|
"learning_rate": 6.434016163555452e-06,
|
|
"loss": 1.8109557628631592,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 1.4018018018018017,
|
|
"grad_norm": 16.52740736826598,
|
|
"learning_rate": 6.428994334213533e-06,
|
|
"loss": 3.0481677055358887,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 1.4027027027027028,
|
|
"grad_norm": 10.743501044416927,
|
|
"learning_rate": 6.423970934665275e-06,
|
|
"loss": 1.4444117546081543,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 1.4036036036036035,
|
|
"grad_norm": 11.938361621219478,
|
|
"learning_rate": 6.418945970430486e-06,
|
|
"loss": 1.5727248191833496,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 1.4045045045045046,
|
|
"grad_norm": 13.699446290887359,
|
|
"learning_rate": 6.4139194470306885e-06,
|
|
"loss": 1.1469335556030273,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 1.4054054054054055,
|
|
"grad_norm": 11.087383650013706,
|
|
"learning_rate": 6.408891369989128e-06,
|
|
"loss": 1.6197885274887085,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 1.4063063063063064,
|
|
"grad_norm": 10.275757877334277,
|
|
"learning_rate": 6.403861744830749e-06,
|
|
"loss": 2.283778667449951,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 1.4072072072072073,
|
|
"grad_norm": 8.899213704142253,
|
|
"learning_rate": 6.398830577082198e-06,
|
|
"loss": 1.2480144500732422,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 1.4081081081081082,
|
|
"grad_norm": 8.514196767597555,
|
|
"learning_rate": 6.393797872271823e-06,
|
|
"loss": 1.2771308422088623,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 1.409009009009009,
|
|
"grad_norm": 15.00421976405728,
|
|
"learning_rate": 6.3887636359296534e-06,
|
|
"loss": 1.398680329322815,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 1.40990990990991,
|
|
"grad_norm": 9.213733227110374,
|
|
"learning_rate": 6.383727873587406e-06,
|
|
"loss": 1.3736463785171509,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 1.4108108108108108,
|
|
"grad_norm": 12.995105041138832,
|
|
"learning_rate": 6.378690590778471e-06,
|
|
"loss": 1.017199158668518,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 1.4117117117117117,
|
|
"grad_norm": 11.874571646596916,
|
|
"learning_rate": 6.373651793037916e-06,
|
|
"loss": 0.8746930360794067,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 1.4126126126126126,
|
|
"grad_norm": 7.435370378254893,
|
|
"learning_rate": 6.368611485902463e-06,
|
|
"loss": 1.9025697708129883,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 1.4135135135135135,
|
|
"grad_norm": 9.285030912488718,
|
|
"learning_rate": 6.363569674910499e-06,
|
|
"loss": 2.2031970024108887,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 1.4144144144144144,
|
|
"grad_norm": 11.609358703290049,
|
|
"learning_rate": 6.358526365602064e-06,
|
|
"loss": 1.5108566284179688,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 1.4153153153153153,
|
|
"grad_norm": 10.638422004709088,
|
|
"learning_rate": 6.353481563518842e-06,
|
|
"loss": 1.4841814041137695,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 1.4162162162162162,
|
|
"grad_norm": 14.190019820334548,
|
|
"learning_rate": 6.3484352742041586e-06,
|
|
"loss": 1.2413240671157837,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 1.417117117117117,
|
|
"grad_norm": 10.124847187016426,
|
|
"learning_rate": 6.343387503202974e-06,
|
|
"loss": 1.3165152072906494,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 1.418018018018018,
|
|
"grad_norm": 21.978059431121792,
|
|
"learning_rate": 6.338338256061873e-06,
|
|
"loss": 1.9939208030700684,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 1.4189189189189189,
|
|
"grad_norm": 16.402579483713858,
|
|
"learning_rate": 6.333287538329067e-06,
|
|
"loss": 1.8033710718154907,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 1.4198198198198198,
|
|
"grad_norm": 8.44207105176216,
|
|
"learning_rate": 6.328235355554382e-06,
|
|
"loss": 1.7890293598175049,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 1.4207207207207206,
|
|
"grad_norm": 11.354134019653287,
|
|
"learning_rate": 6.323181713289252e-06,
|
|
"loss": 1.3599138259887695,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 1.4216216216216218,
|
|
"grad_norm": 9.123051520886271,
|
|
"learning_rate": 6.318126617086715e-06,
|
|
"loss": 1.515425205230713,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 1.4225225225225224,
|
|
"grad_norm": 15.343909798586768,
|
|
"learning_rate": 6.31307007250141e-06,
|
|
"loss": 1.4394266605377197,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 1.4234234234234235,
|
|
"grad_norm": 8.212165320846351,
|
|
"learning_rate": 6.308012085089563e-06,
|
|
"loss": 1.6644560098648071,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 1.4243243243243242,
|
|
"grad_norm": 9.281393071269752,
|
|
"learning_rate": 6.3029526604089884e-06,
|
|
"loss": 1.6146223545074463,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 1.4252252252252253,
|
|
"grad_norm": 9.012711145286287,
|
|
"learning_rate": 6.297891804019078e-06,
|
|
"loss": 1.294966697692871,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 1.426126126126126,
|
|
"grad_norm": 9.725957470939198,
|
|
"learning_rate": 6.292829521480799e-06,
|
|
"loss": 1.305307149887085,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 1.427027027027027,
|
|
"grad_norm": 9.957884015738744,
|
|
"learning_rate": 6.2877658183566835e-06,
|
|
"loss": 1.8277273178100586,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 1.427927927927928,
|
|
"grad_norm": 14.176997988886253,
|
|
"learning_rate": 6.282700700210826e-06,
|
|
"loss": 2.1664233207702637,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 1.428828828828829,
|
|
"grad_norm": 11.210158890912192,
|
|
"learning_rate": 6.277634172608875e-06,
|
|
"loss": 1.5417143106460571,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 1.4297297297297298,
|
|
"grad_norm": 43.360585840995725,
|
|
"learning_rate": 6.272566241118028e-06,
|
|
"loss": 1.2948834896087646,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 1.4306306306306307,
|
|
"grad_norm": 23.179384362830724,
|
|
"learning_rate": 6.267496911307025e-06,
|
|
"loss": 2.2447755336761475,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 1.4315315315315316,
|
|
"grad_norm": 13.150370235034188,
|
|
"learning_rate": 6.262426188746142e-06,
|
|
"loss": 1.6491607427597046,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 1.4324324324324325,
|
|
"grad_norm": 16.607971986636734,
|
|
"learning_rate": 6.257354079007188e-06,
|
|
"loss": 1.8326668739318848,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 1.4333333333333333,
|
|
"grad_norm": 12.860466881669609,
|
|
"learning_rate": 6.252280587663493e-06,
|
|
"loss": 1.7672603130340576,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 1.4342342342342342,
|
|
"grad_norm": 13.821510643759613,
|
|
"learning_rate": 6.247205720289907e-06,
|
|
"loss": 1.5929617881774902,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 1.4351351351351351,
|
|
"grad_norm": 16.62571737411264,
|
|
"learning_rate": 6.242129482462791e-06,
|
|
"loss": 1.1646729707717896,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 1.436036036036036,
|
|
"grad_norm": 10.361866409910958,
|
|
"learning_rate": 6.2370518797600134e-06,
|
|
"loss": 1.9066646099090576,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 1.436936936936937,
|
|
"grad_norm": 17.129189452143006,
|
|
"learning_rate": 6.2319729177609385e-06,
|
|
"loss": 1.6732923984527588,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 1.4378378378378378,
|
|
"grad_norm": 8.787234499239403,
|
|
"learning_rate": 6.226892602046431e-06,
|
|
"loss": 1.8658177852630615,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 1.4387387387387387,
|
|
"grad_norm": 11.615649470253091,
|
|
"learning_rate": 6.221810938198836e-06,
|
|
"loss": 1.6445609331130981,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 1.4396396396396396,
|
|
"grad_norm": 15.692881856944984,
|
|
"learning_rate": 6.216727931801983e-06,
|
|
"loss": 1.9909805059432983,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 1.4405405405405405,
|
|
"grad_norm": 14.21867844260476,
|
|
"learning_rate": 6.21164358844118e-06,
|
|
"loss": 1.9550414085388184,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 1.4414414414414414,
|
|
"grad_norm": 10.660692370132626,
|
|
"learning_rate": 6.206557913703196e-06,
|
|
"loss": 1.4929118156433105,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 1.4423423423423423,
|
|
"grad_norm": 10.216387245363482,
|
|
"learning_rate": 6.201470913176273e-06,
|
|
"loss": 1.3300130367279053,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 1.4432432432432432,
|
|
"grad_norm": 21.373978171219317,
|
|
"learning_rate": 6.196382592450101e-06,
|
|
"loss": 1.9349777698516846,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 1.4441441441441443,
|
|
"grad_norm": 14.959338680440988,
|
|
"learning_rate": 6.191292957115825e-06,
|
|
"loss": 1.6885695457458496,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 1.445045045045045,
|
|
"grad_norm": 10.201541049752565,
|
|
"learning_rate": 6.186202012766036e-06,
|
|
"loss": 1.5974383354187012,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 1.445945945945946,
|
|
"grad_norm": 13.33819723535737,
|
|
"learning_rate": 6.1811097649947574e-06,
|
|
"loss": 1.3800368309020996,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 1.4468468468468467,
|
|
"grad_norm": 9.264941433832085,
|
|
"learning_rate": 6.176016219397452e-06,
|
|
"loss": 1.209047555923462,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 1.4477477477477478,
|
|
"grad_norm": 16.113363796632687,
|
|
"learning_rate": 6.170921381571002e-06,
|
|
"loss": 1.006246566772461,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 1.4486486486486487,
|
|
"grad_norm": 16.770842222995086,
|
|
"learning_rate": 6.165825257113713e-06,
|
|
"loss": 1.6659287214279175,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 1.4495495495495496,
|
|
"grad_norm": 16.039169570521924,
|
|
"learning_rate": 6.160727851625307e-06,
|
|
"loss": 3.2486722469329834,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 1.4504504504504505,
|
|
"grad_norm": 17.357556570755342,
|
|
"learning_rate": 6.155629170706911e-06,
|
|
"loss": 2.889737606048584,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 1.4513513513513514,
|
|
"grad_norm": 12.05780156243574,
|
|
"learning_rate": 6.150529219961051e-06,
|
|
"loss": 1.5399055480957031,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 1.4522522522522523,
|
|
"grad_norm": 12.821440883999509,
|
|
"learning_rate": 6.14542800499165e-06,
|
|
"loss": 0.9202550649642944,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 1.4531531531531532,
|
|
"grad_norm": 9.525863966614185,
|
|
"learning_rate": 6.1403255314040236e-06,
|
|
"loss": 1.334446668624878,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 1.454054054054054,
|
|
"grad_norm": 11.119332552840138,
|
|
"learning_rate": 6.135221804804865e-06,
|
|
"loss": 1.0869536399841309,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 1.454954954954955,
|
|
"grad_norm": 9.457639599285084,
|
|
"learning_rate": 6.130116830802246e-06,
|
|
"loss": 1.2879786491394043,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 1.4558558558558559,
|
|
"grad_norm": 11.840952811412075,
|
|
"learning_rate": 6.125010615005612e-06,
|
|
"loss": 1.6483588218688965,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 1.4567567567567568,
|
|
"grad_norm": 12.597190006246704,
|
|
"learning_rate": 6.11990316302577e-06,
|
|
"loss": 1.1935402154922485,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 1.4576576576576576,
|
|
"grad_norm": 14.775593935385816,
|
|
"learning_rate": 6.114794480474886e-06,
|
|
"loss": 1.4351227283477783,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 1.4585585585585585,
|
|
"grad_norm": 25.85373262150229,
|
|
"learning_rate": 6.109684572966479e-06,
|
|
"loss": 1.638490915298462,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 1.4594594594594594,
|
|
"grad_norm": 7.955022606363988,
|
|
"learning_rate": 6.104573446115411e-06,
|
|
"loss": 1.7206358909606934,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 1.4603603603603603,
|
|
"grad_norm": 8.620128522921192,
|
|
"learning_rate": 6.099461105537889e-06,
|
|
"loss": 2.0019052028656006,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 1.4612612612612612,
|
|
"grad_norm": 13.946066909423006,
|
|
"learning_rate": 6.094347556851449e-06,
|
|
"loss": 1.5832326412200928,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 1.462162162162162,
|
|
"grad_norm": 12.133350625112662,
|
|
"learning_rate": 6.089232805674956e-06,
|
|
"loss": 1.5703575611114502,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 1.463063063063063,
|
|
"grad_norm": 9.00483359712496,
|
|
"learning_rate": 6.084116857628597e-06,
|
|
"loss": 1.1276874542236328,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 1.4639639639639639,
|
|
"grad_norm": 9.763789195084014,
|
|
"learning_rate": 6.078999718333873e-06,
|
|
"loss": 2.4685914516448975,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 1.464864864864865,
|
|
"grad_norm": 9.700531000079286,
|
|
"learning_rate": 6.073881393413596e-06,
|
|
"loss": 1.3841001987457275,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 1.4657657657657657,
|
|
"grad_norm": 16.588302386398713,
|
|
"learning_rate": 6.068761888491879e-06,
|
|
"loss": 2.0603816509246826,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 1.4666666666666668,
|
|
"grad_norm": 14.757717618867021,
|
|
"learning_rate": 6.063641209194132e-06,
|
|
"loss": 1.2781075239181519,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 1.4675675675675675,
|
|
"grad_norm": 40.19571124648031,
|
|
"learning_rate": 6.058519361147055e-06,
|
|
"loss": 1.8549585342407227,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 1.4684684684684686,
|
|
"grad_norm": 7.224974582225779,
|
|
"learning_rate": 6.053396349978632e-06,
|
|
"loss": 1.322721004486084,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 1.4693693693693692,
|
|
"grad_norm": 17.844463774704433,
|
|
"learning_rate": 6.048272181318128e-06,
|
|
"loss": 1.6630491018295288,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 1.4702702702702704,
|
|
"grad_norm": 11.434248056957271,
|
|
"learning_rate": 6.043146860796076e-06,
|
|
"loss": 1.1549161672592163,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 1.4711711711711712,
|
|
"grad_norm": 11.432998465665783,
|
|
"learning_rate": 6.0380203940442775e-06,
|
|
"loss": 0.9584408402442932,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 1.4720720720720721,
|
|
"grad_norm": 8.388264777564174,
|
|
"learning_rate": 6.032892786695791e-06,
|
|
"loss": 2.0273547172546387,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 1.472972972972973,
|
|
"grad_norm": 10.0987785197442,
|
|
"learning_rate": 6.0277640443849304e-06,
|
|
"loss": 1.3959040641784668,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 1.473873873873874,
|
|
"grad_norm": 15.271438862832918,
|
|
"learning_rate": 6.022634172747256e-06,
|
|
"loss": 1.2520636320114136,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 1.4747747747747748,
|
|
"grad_norm": 14.927005011233385,
|
|
"learning_rate": 6.017503177419567e-06,
|
|
"loss": 2.620509147644043,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 1.4756756756756757,
|
|
"grad_norm": 11.735603116312422,
|
|
"learning_rate": 6.012371064039902e-06,
|
|
"loss": 1.672066330909729,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 1.4765765765765766,
|
|
"grad_norm": 13.62705118948423,
|
|
"learning_rate": 6.007237838247526e-06,
|
|
"loss": 2.0264129638671875,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 1.4774774774774775,
|
|
"grad_norm": 8.554462912863533,
|
|
"learning_rate": 6.0021035056829245e-06,
|
|
"loss": 1.2257013320922852,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 1.4783783783783784,
|
|
"grad_norm": 8.449478832103793,
|
|
"learning_rate": 5.9969680719878e-06,
|
|
"loss": 1.8023260831832886,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 1.4792792792792793,
|
|
"grad_norm": 8.959188230712469,
|
|
"learning_rate": 5.991831542805065e-06,
|
|
"loss": 1.2051575183868408,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 1.4801801801801802,
|
|
"grad_norm": 15.777369325858878,
|
|
"learning_rate": 5.986693923778838e-06,
|
|
"loss": 1.8904740810394287,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 1.481081081081081,
|
|
"grad_norm": 10.411842713882923,
|
|
"learning_rate": 5.9815552205544316e-06,
|
|
"loss": 0.7278385162353516,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 1.481981981981982,
|
|
"grad_norm": 8.146928013269534,
|
|
"learning_rate": 5.97641543877835e-06,
|
|
"loss": 1.5132724046707153,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 1.4828828828828828,
|
|
"grad_norm": 11.882997439370762,
|
|
"learning_rate": 5.971274584098288e-06,
|
|
"loss": 1.256619930267334,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 1.4837837837837837,
|
|
"grad_norm": 11.595953910475629,
|
|
"learning_rate": 5.966132662163111e-06,
|
|
"loss": 1.0904524326324463,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 1.4846846846846846,
|
|
"grad_norm": 10.305489803373794,
|
|
"learning_rate": 5.960989678622865e-06,
|
|
"loss": 1.3187847137451172,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 1.4855855855855855,
|
|
"grad_norm": 13.548112746313338,
|
|
"learning_rate": 5.955845639128756e-06,
|
|
"loss": 1.4859611988067627,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 1.4864864864864864,
|
|
"grad_norm": 10.724179999334005,
|
|
"learning_rate": 5.950700549333155e-06,
|
|
"loss": 1.4655499458312988,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 1.4873873873873875,
|
|
"grad_norm": 12.19615947777353,
|
|
"learning_rate": 5.945554414889583e-06,
|
|
"loss": 1.3756601810455322,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 1.4882882882882882,
|
|
"grad_norm": 13.17831608172685,
|
|
"learning_rate": 5.940407241452711e-06,
|
|
"loss": 2.211275339126587,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 1.4891891891891893,
|
|
"grad_norm": 18.11415047074662,
|
|
"learning_rate": 5.935259034678355e-06,
|
|
"loss": 0.9074662923812866,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 1.49009009009009,
|
|
"grad_norm": 11.868427755788677,
|
|
"learning_rate": 5.93010980022346e-06,
|
|
"loss": 1.8241913318634033,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 1.490990990990991,
|
|
"grad_norm": 11.664353534766478,
|
|
"learning_rate": 5.924959543746106e-06,
|
|
"loss": 2.207322120666504,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 1.491891891891892,
|
|
"grad_norm": 8.330443411504472,
|
|
"learning_rate": 5.919808270905492e-06,
|
|
"loss": 1.806227684020996,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 1.4927927927927929,
|
|
"grad_norm": 8.941231059731615,
|
|
"learning_rate": 5.914655987361934e-06,
|
|
"loss": 1.4988905191421509,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 1.4936936936936938,
|
|
"grad_norm": 7.830759914268425,
|
|
"learning_rate": 5.909502698776862e-06,
|
|
"loss": 1.8874337673187256,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 1.4945945945945946,
|
|
"grad_norm": 10.5751037090603,
|
|
"learning_rate": 5.9043484108128065e-06,
|
|
"loss": 1.1740607023239136,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 1.4954954954954955,
|
|
"grad_norm": 10.412303621365872,
|
|
"learning_rate": 5.8991931291334e-06,
|
|
"loss": 2.1098806858062744,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 1.4963963963963964,
|
|
"grad_norm": 9.858650119398847,
|
|
"learning_rate": 5.894036859403363e-06,
|
|
"loss": 1.1361764669418335,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 1.4972972972972973,
|
|
"grad_norm": 10.787153898457976,
|
|
"learning_rate": 5.8888796072885035e-06,
|
|
"loss": 1.571702480316162,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 1.4981981981981982,
|
|
"grad_norm": 12.97077720565757,
|
|
"learning_rate": 5.883721378455709e-06,
|
|
"loss": 1.1731760501861572,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 1.499099099099099,
|
|
"grad_norm": 8.890718278014416,
|
|
"learning_rate": 5.8785621785729404e-06,
|
|
"loss": 1.6960201263427734,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 9.177425634084365,
|
|
"learning_rate": 5.873402013309226e-06,
|
|
"loss": 2.0487327575683594,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 1.500900900900901,
|
|
"grad_norm": 19.481958445940464,
|
|
"learning_rate": 5.8682408883346535e-06,
|
|
"loss": 1.5003571510314941,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 1.5018018018018018,
|
|
"grad_norm": 7.910706063031221,
|
|
"learning_rate": 5.863078809320364e-06,
|
|
"loss": 1.8180335760116577,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 1.5027027027027027,
|
|
"grad_norm": 9.026031805006543,
|
|
"learning_rate": 5.857915781938552e-06,
|
|
"loss": 0.5815849900245667,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 1.5036036036036036,
|
|
"grad_norm": 9.194787009241589,
|
|
"learning_rate": 5.85275181186245e-06,
|
|
"loss": 2.0344133377075195,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 1.5045045045045045,
|
|
"grad_norm": 15.441711483190371,
|
|
"learning_rate": 5.847586904766326e-06,
|
|
"loss": 1.4995135068893433,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 1.5054054054054054,
|
|
"grad_norm": 19.405524715570145,
|
|
"learning_rate": 5.8424210663254785e-06,
|
|
"loss": 1.014390468597412,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 1.5063063063063065,
|
|
"grad_norm": 7.728607689108665,
|
|
"learning_rate": 5.837254302216232e-06,
|
|
"loss": 1.178196668624878,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 1.5072072072072071,
|
|
"grad_norm": 7.933918814442345,
|
|
"learning_rate": 5.832086618115924e-06,
|
|
"loss": 1.8295114040374756,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 1.5081081081081082,
|
|
"grad_norm": 14.836941228728008,
|
|
"learning_rate": 5.8269180197029055e-06,
|
|
"loss": 1.8722667694091797,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 1.509009009009009,
|
|
"grad_norm": 11.861924285254538,
|
|
"learning_rate": 5.821748512656531e-06,
|
|
"loss": 1.7150132656097412,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 1.50990990990991,
|
|
"grad_norm": 7.914972670958925,
|
|
"learning_rate": 5.816578102657154e-06,
|
|
"loss": 2.0199174880981445,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 1.5108108108108107,
|
|
"grad_norm": 14.704452033368009,
|
|
"learning_rate": 5.811406795386122e-06,
|
|
"loss": 2.478482961654663,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 1.5117117117117118,
|
|
"grad_norm": 20.401386884909293,
|
|
"learning_rate": 5.806234596525763e-06,
|
|
"loss": 2.1431708335876465,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 1.5126126126126125,
|
|
"grad_norm": 9.657768229659336,
|
|
"learning_rate": 5.80106151175939e-06,
|
|
"loss": 1.2024478912353516,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 1.5135135135135136,
|
|
"grad_norm": 11.154050310367278,
|
|
"learning_rate": 5.795887546771286e-06,
|
|
"loss": 1.7211270332336426,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 1.5144144144144143,
|
|
"grad_norm": 27.025931308202402,
|
|
"learning_rate": 5.790712707246705e-06,
|
|
"loss": 1.498245358467102,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 1.5153153153153154,
|
|
"grad_norm": 8.568964534712551,
|
|
"learning_rate": 5.785536998871858e-06,
|
|
"loss": 1.4259952306747437,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 1.516216216216216,
|
|
"grad_norm": 9.352484629253393,
|
|
"learning_rate": 5.780360427333915e-06,
|
|
"loss": 1.5320580005645752,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 1.5171171171171172,
|
|
"grad_norm": 19.846026084647807,
|
|
"learning_rate": 5.77518299832099e-06,
|
|
"loss": 1.1129813194274902,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 1.518018018018018,
|
|
"grad_norm": 9.143607370757177,
|
|
"learning_rate": 5.770004717522141e-06,
|
|
"loss": 2.056257486343384,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 1.518918918918919,
|
|
"grad_norm": 16.3647108204785,
|
|
"learning_rate": 5.764825590627362e-06,
|
|
"loss": 1.675489902496338,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 1.5198198198198198,
|
|
"grad_norm": 8.620761350410266,
|
|
"learning_rate": 5.75964562332758e-06,
|
|
"loss": 1.0735529661178589,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 1.5207207207207207,
|
|
"grad_norm": 11.949137784819353,
|
|
"learning_rate": 5.754464821314637e-06,
|
|
"loss": 1.6738760471343994,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 1.5216216216216216,
|
|
"grad_norm": 13.20388841691068,
|
|
"learning_rate": 5.749283190281301e-06,
|
|
"loss": 2.174321413040161,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 1.5225225225225225,
|
|
"grad_norm": 8.30531785385007,
|
|
"learning_rate": 5.744100735921245e-06,
|
|
"loss": 1.2775280475616455,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 1.5234234234234234,
|
|
"grad_norm": 9.575544371832327,
|
|
"learning_rate": 5.7389174639290526e-06,
|
|
"loss": 1.6569616794586182,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 1.5243243243243243,
|
|
"grad_norm": 11.02059585703722,
|
|
"learning_rate": 5.733733380000199e-06,
|
|
"loss": 1.7098231315612793,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 1.5252252252252252,
|
|
"grad_norm": 20.77680238959511,
|
|
"learning_rate": 5.728548489831057e-06,
|
|
"loss": 1.6244301795959473,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 1.526126126126126,
|
|
"grad_norm": 13.085006557615154,
|
|
"learning_rate": 5.723362799118883e-06,
|
|
"loss": 1.1960186958312988,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 1.527027027027027,
|
|
"grad_norm": 16.9436732795327,
|
|
"learning_rate": 5.718176313561812e-06,
|
|
"loss": 1.2833318710327148,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 1.5279279279279279,
|
|
"grad_norm": 11.56067198397842,
|
|
"learning_rate": 5.712989038858855e-06,
|
|
"loss": 1.3736417293548584,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 1.528828828828829,
|
|
"grad_norm": 11.914661268488837,
|
|
"learning_rate": 5.707800980709888e-06,
|
|
"loss": 1.7185921669006348,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 1.5297297297297296,
|
|
"grad_norm": 12.478648162885634,
|
|
"learning_rate": 5.702612144815648e-06,
|
|
"loss": 0.889504075050354,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 1.5306306306306308,
|
|
"grad_norm": 12.015993636472965,
|
|
"learning_rate": 5.697422536877728e-06,
|
|
"loss": 1.2465369701385498,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 1.5315315315315314,
|
|
"grad_norm": 8.569076120163366,
|
|
"learning_rate": 5.69223216259857e-06,
|
|
"loss": 1.6955018043518066,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 1.5324324324324325,
|
|
"grad_norm": 11.079823892196782,
|
|
"learning_rate": 5.687041027681455e-06,
|
|
"loss": 2.3626818656921387,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 1.5333333333333332,
|
|
"grad_norm": 14.626377664270034,
|
|
"learning_rate": 5.681849137830501e-06,
|
|
"loss": 1.3092429637908936,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 1.5342342342342343,
|
|
"grad_norm": 13.003938549380914,
|
|
"learning_rate": 5.6766564987506564e-06,
|
|
"loss": 1.6559178829193115,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 1.535135135135135,
|
|
"grad_norm": 30.983859642167147,
|
|
"learning_rate": 5.671463116147693e-06,
|
|
"loss": 1.196112871170044,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 1.5360360360360361,
|
|
"grad_norm": 11.7516729348981,
|
|
"learning_rate": 5.666268995728199e-06,
|
|
"loss": 1.7353311777114868,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 1.5369369369369368,
|
|
"grad_norm": 8.027133091016674,
|
|
"learning_rate": 5.661074143199574e-06,
|
|
"loss": 0.9091062545776367,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 1.537837837837838,
|
|
"grad_norm": 16.1951342231858,
|
|
"learning_rate": 5.65587856427002e-06,
|
|
"loss": 0.7852965593338013,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 1.5387387387387388,
|
|
"grad_norm": 20.435391009280067,
|
|
"learning_rate": 5.650682264648539e-06,
|
|
"loss": 0.6935000419616699,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 1.5396396396396397,
|
|
"grad_norm": 12.942865389701538,
|
|
"learning_rate": 5.645485250044925e-06,
|
|
"loss": 1.6129305362701416,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 1.5405405405405406,
|
|
"grad_norm": 10.064534723487958,
|
|
"learning_rate": 5.640287526169758e-06,
|
|
"loss": 1.5475406646728516,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 1.5414414414414415,
|
|
"grad_norm": 10.937035308927271,
|
|
"learning_rate": 5.635089098734394e-06,
|
|
"loss": 1.6922842264175415,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 1.5423423423423424,
|
|
"grad_norm": 11.282532754995433,
|
|
"learning_rate": 5.629889973450967e-06,
|
|
"loss": 1.1134803295135498,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 1.5432432432432432,
|
|
"grad_norm": 20.10974367652297,
|
|
"learning_rate": 5.624690156032375e-06,
|
|
"loss": 1.522965908050537,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 1.5441441441441441,
|
|
"grad_norm": 31.445859279573188,
|
|
"learning_rate": 5.619489652192277e-06,
|
|
"loss": 1.5033540725708008,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 1.545045045045045,
|
|
"grad_norm": 26.19661678772121,
|
|
"learning_rate": 5.614288467645085e-06,
|
|
"loss": 1.8068926334381104,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 1.545945945945946,
|
|
"grad_norm": 13.689355970616518,
|
|
"learning_rate": 5.60908660810596e-06,
|
|
"loss": 1.1300309896469116,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 1.5468468468468468,
|
|
"grad_norm": 17.717202462674102,
|
|
"learning_rate": 5.603884079290807e-06,
|
|
"loss": 1.168104648590088,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 1.5477477477477477,
|
|
"grad_norm": 13.78333679043539,
|
|
"learning_rate": 5.598680886916262e-06,
|
|
"loss": 2.086900234222412,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 1.5486486486486486,
|
|
"grad_norm": 11.61939517847149,
|
|
"learning_rate": 5.593477036699694e-06,
|
|
"loss": 0.9420620203018188,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 1.5495495495495497,
|
|
"grad_norm": 8.566983109466861,
|
|
"learning_rate": 5.588272534359193e-06,
|
|
"loss": 1.6835405826568604,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 1.5504504504504504,
|
|
"grad_norm": 20.272241725615217,
|
|
"learning_rate": 5.583067385613565e-06,
|
|
"loss": 1.3769150972366333,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 1.5513513513513515,
|
|
"grad_norm": 13.152434110198287,
|
|
"learning_rate": 5.577861596182329e-06,
|
|
"loss": 1.4473698139190674,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 1.5522522522522522,
|
|
"grad_norm": 16.03627561622673,
|
|
"learning_rate": 5.572655171785706e-06,
|
|
"loss": 1.2154319286346436,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 1.5531531531531533,
|
|
"grad_norm": 8.941137328941675,
|
|
"learning_rate": 5.567448118144612e-06,
|
|
"loss": 2.1888017654418945,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 1.554054054054054,
|
|
"grad_norm": 8.394871606465358,
|
|
"learning_rate": 5.56224044098066e-06,
|
|
"loss": 1.4928936958312988,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 1.554954954954955,
|
|
"grad_norm": 9.725576143990676,
|
|
"learning_rate": 5.557032146016142e-06,
|
|
"loss": 1.499352216720581,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 1.5558558558558557,
|
|
"grad_norm": 11.049766302756778,
|
|
"learning_rate": 5.551823238974036e-06,
|
|
"loss": 1.7239288091659546,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 1.5567567567567568,
|
|
"grad_norm": 11.428954415639115,
|
|
"learning_rate": 5.5466137255779874e-06,
|
|
"loss": 1.3868229389190674,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 1.5576576576576575,
|
|
"grad_norm": 20.396513511623166,
|
|
"learning_rate": 5.541403611552309e-06,
|
|
"loss": 2.211121082305908,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 1.5585585585585586,
|
|
"grad_norm": 13.881234123571051,
|
|
"learning_rate": 5.536192902621975e-06,
|
|
"loss": 1.5474152565002441,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 1.5594594594594593,
|
|
"grad_norm": 9.726536291578002,
|
|
"learning_rate": 5.530981604512612e-06,
|
|
"loss": 2.0104358196258545,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 1.5603603603603604,
|
|
"grad_norm": 12.36264130130095,
|
|
"learning_rate": 5.525769722950491e-06,
|
|
"loss": 1.5727704763412476,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 1.5612612612612613,
|
|
"grad_norm": 12.5047381018751,
|
|
"learning_rate": 5.520557263662533e-06,
|
|
"loss": 1.171445369720459,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 1.5621621621621622,
|
|
"grad_norm": 7.379902224066317,
|
|
"learning_rate": 5.515344232376283e-06,
|
|
"loss": 2.027517318725586,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 1.563063063063063,
|
|
"grad_norm": 7.6975595525575,
|
|
"learning_rate": 5.510130634819921e-06,
|
|
"loss": 2.2303547859191895,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 1.563963963963964,
|
|
"grad_norm": 8.677028985658822,
|
|
"learning_rate": 5.504916476722249e-06,
|
|
"loss": 1.9097020626068115,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 1.5648648648648649,
|
|
"grad_norm": 13.031580998989343,
|
|
"learning_rate": 5.499701763812684e-06,
|
|
"loss": 1.7067222595214844,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 1.5657657657657658,
|
|
"grad_norm": 9.184292289384498,
|
|
"learning_rate": 5.49448650182125e-06,
|
|
"loss": 1.273280382156372,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 1.5666666666666667,
|
|
"grad_norm": 11.550196038454393,
|
|
"learning_rate": 5.489270696478578e-06,
|
|
"loss": 1.4262604713439941,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 1.5675675675675675,
|
|
"grad_norm": 7.769261047923687,
|
|
"learning_rate": 5.484054353515896e-06,
|
|
"loss": 1.2348459959030151,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 1.5684684684684684,
|
|
"grad_norm": 13.107939324830204,
|
|
"learning_rate": 5.478837478665021e-06,
|
|
"loss": 1.8473243713378906,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 1.5693693693693693,
|
|
"grad_norm": 10.401820528121625,
|
|
"learning_rate": 5.473620077658353e-06,
|
|
"loss": 1.4473576545715332,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 1.5702702702702702,
|
|
"grad_norm": 9.423733960457518,
|
|
"learning_rate": 5.468402156228875e-06,
|
|
"loss": 1.2356266975402832,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 1.571171171171171,
|
|
"grad_norm": 10.585209187451408,
|
|
"learning_rate": 5.463183720110138e-06,
|
|
"loss": 1.097558617591858,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 1.5720720720720722,
|
|
"grad_norm": 8.721091264497147,
|
|
"learning_rate": 5.457964775036259e-06,
|
|
"loss": 1.1964499950408936,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 1.572972972972973,
|
|
"grad_norm": 14.234017672895392,
|
|
"learning_rate": 5.452745326741914e-06,
|
|
"loss": 2.836690664291382,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 1.573873873873874,
|
|
"grad_norm": 14.692751474492763,
|
|
"learning_rate": 5.447525380962334e-06,
|
|
"loss": 1.6723198890686035,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 1.5747747747747747,
|
|
"grad_norm": 9.03843708877778,
|
|
"learning_rate": 5.442304943433294e-06,
|
|
"loss": 1.8948137760162354,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 1.5756756756756758,
|
|
"grad_norm": 10.540186751581782,
|
|
"learning_rate": 5.437084019891113e-06,
|
|
"loss": 1.5536619424819946,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 1.5765765765765765,
|
|
"grad_norm": 9.353422818160505,
|
|
"learning_rate": 5.431862616072643e-06,
|
|
"loss": 1.621275544166565,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 1.5774774774774776,
|
|
"grad_norm": 7.226380065522959,
|
|
"learning_rate": 5.426640737715259e-06,
|
|
"loss": 1.4333561658859253,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 1.5783783783783782,
|
|
"grad_norm": 8.905206873635436,
|
|
"learning_rate": 5.421418390556861e-06,
|
|
"loss": 1.806396245956421,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 1.5792792792792794,
|
|
"grad_norm": 14.590205105832894,
|
|
"learning_rate": 5.416195580335864e-06,
|
|
"loss": 2.2460813522338867,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 1.58018018018018,
|
|
"grad_norm": 12.896655028517138,
|
|
"learning_rate": 5.410972312791196e-06,
|
|
"loss": 1.7828409671783447,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 1.5810810810810811,
|
|
"grad_norm": 10.13319642764654,
|
|
"learning_rate": 5.4057485936622774e-06,
|
|
"loss": 1.285484790802002,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 1.581981981981982,
|
|
"grad_norm": 12.578810943595805,
|
|
"learning_rate": 5.400524428689035e-06,
|
|
"loss": 1.4974174499511719,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 1.582882882882883,
|
|
"grad_norm": 39.19894799894435,
|
|
"learning_rate": 5.395299823611881e-06,
|
|
"loss": 1.149336814880371,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 1.5837837837837838,
|
|
"grad_norm": 8.933910764324592,
|
|
"learning_rate": 5.390074784171711e-06,
|
|
"loss": 2.2303504943847656,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 1.5846846846846847,
|
|
"grad_norm": 8.087996858180993,
|
|
"learning_rate": 5.384849316109897e-06,
|
|
"loss": 1.6556181907653809,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 1.5855855855855856,
|
|
"grad_norm": 15.824654305234416,
|
|
"learning_rate": 5.379623425168287e-06,
|
|
"loss": 1.2944085597991943,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 1.5864864864864865,
|
|
"grad_norm": 10.838593459559144,
|
|
"learning_rate": 5.374397117089185e-06,
|
|
"loss": 0.8539993762969971,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 1.5873873873873874,
|
|
"grad_norm": 13.54095066315367,
|
|
"learning_rate": 5.369170397615361e-06,
|
|
"loss": 1.1020762920379639,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 1.5882882882882883,
|
|
"grad_norm": 8.534711679875066,
|
|
"learning_rate": 5.363943272490034e-06,
|
|
"loss": 1.6411093473434448,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 1.5891891891891892,
|
|
"grad_norm": 6.1730102866116,
|
|
"learning_rate": 5.358715747456871e-06,
|
|
"loss": 1.6978633403778076,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 1.59009009009009,
|
|
"grad_norm": 8.041044716644684,
|
|
"learning_rate": 5.353487828259973e-06,
|
|
"loss": 1.5700498819351196,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 1.590990990990991,
|
|
"grad_norm": 11.102138549576248,
|
|
"learning_rate": 5.348259520643883e-06,
|
|
"loss": 1.0377918481826782,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 1.5918918918918918,
|
|
"grad_norm": 12.738014608736016,
|
|
"learning_rate": 5.343030830353561e-06,
|
|
"loss": 1.8865203857421875,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 1.592792792792793,
|
|
"grad_norm": 11.457612778203872,
|
|
"learning_rate": 5.3378017631343925e-06,
|
|
"loss": 1.6433229446411133,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 1.5936936936936936,
|
|
"grad_norm": 14.71488254333933,
|
|
"learning_rate": 5.332572324732178e-06,
|
|
"loss": 1.7563650608062744,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 1.5945945945945947,
|
|
"grad_norm": 17.721491557671634,
|
|
"learning_rate": 5.327342520893125e-06,
|
|
"loss": 1.1539019346237183,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 1.5954954954954954,
|
|
"grad_norm": 8.71741568280385,
|
|
"learning_rate": 5.322112357363841e-06,
|
|
"loss": 2.0637054443359375,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 1.5963963963963965,
|
|
"grad_norm": 12.56926543285426,
|
|
"learning_rate": 5.31688183989133e-06,
|
|
"loss": 1.3117108345031738,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 1.5972972972972972,
|
|
"grad_norm": 8.865456653148819,
|
|
"learning_rate": 5.311650974222986e-06,
|
|
"loss": 1.6243171691894531,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 1.5981981981981983,
|
|
"grad_norm": 12.345358106267252,
|
|
"learning_rate": 5.306419766106582e-06,
|
|
"loss": 0.6512848734855652,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 1.599099099099099,
|
|
"grad_norm": 10.705630687496493,
|
|
"learning_rate": 5.301188221290272e-06,
|
|
"loss": 1.4087820053100586,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"grad_norm": 10.741620683648545,
|
|
"learning_rate": 5.295956345522576e-06,
|
|
"loss": 1.1886143684387207,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 1.6009009009009008,
|
|
"grad_norm": 8.543286773620101,
|
|
"learning_rate": 5.290724144552379e-06,
|
|
"loss": 1.4079172611236572,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 1.6018018018018019,
|
|
"grad_norm": 13.272800580786697,
|
|
"learning_rate": 5.285491624128927e-06,
|
|
"loss": 1.1488702297210693,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 1.6027027027027025,
|
|
"grad_norm": 14.426102880294756,
|
|
"learning_rate": 5.280258790001809e-06,
|
|
"loss": 0.9321128129959106,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 1.6036036036036037,
|
|
"grad_norm": 9.548974634623296,
|
|
"learning_rate": 5.275025647920966e-06,
|
|
"loss": 1.1412519216537476,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 1.6045045045045045,
|
|
"grad_norm": 20.524347880108646,
|
|
"learning_rate": 5.2697922036366746e-06,
|
|
"loss": 1.6211285591125488,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 1.6054054054054054,
|
|
"grad_norm": 33.479578225974755,
|
|
"learning_rate": 5.264558462899543e-06,
|
|
"loss": 1.5719397068023682,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 1.6063063063063063,
|
|
"grad_norm": 19.53752839452626,
|
|
"learning_rate": 5.259324431460506e-06,
|
|
"loss": 1.844597339630127,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 1.6072072072072072,
|
|
"grad_norm": 16.542751722494142,
|
|
"learning_rate": 5.254090115070818e-06,
|
|
"loss": 1.5311559438705444,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 1.6081081081081081,
|
|
"grad_norm": 8.724197779311208,
|
|
"learning_rate": 5.248855519482043e-06,
|
|
"loss": 1.4715955257415771,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 1.609009009009009,
|
|
"grad_norm": 11.2231172158559,
|
|
"learning_rate": 5.2436206504460605e-06,
|
|
"loss": 1.6918704509735107,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 1.60990990990991,
|
|
"grad_norm": 5.892152936833488,
|
|
"learning_rate": 5.238385513715043e-06,
|
|
"loss": 1.3249701261520386,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 1.6108108108108108,
|
|
"grad_norm": 17.068558495770255,
|
|
"learning_rate": 5.233150115041455e-06,
|
|
"loss": 0.935366690158844,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 1.6117117117117117,
|
|
"grad_norm": 14.047495217762876,
|
|
"learning_rate": 5.227914460178057e-06,
|
|
"loss": 1.8201367855072021,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 1.6126126126126126,
|
|
"grad_norm": 11.293164291285848,
|
|
"learning_rate": 5.222678554877886e-06,
|
|
"loss": 2.1987149715423584,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 1.6135135135135135,
|
|
"grad_norm": 22.676416411920346,
|
|
"learning_rate": 5.217442404894254e-06,
|
|
"loss": 1.5156608819961548,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 1.6144144144144144,
|
|
"grad_norm": 7.958578598404144,
|
|
"learning_rate": 5.212206015980742e-06,
|
|
"loss": 1.5375053882598877,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 1.6153153153153155,
|
|
"grad_norm": 9.514909529564704,
|
|
"learning_rate": 5.206969393891197e-06,
|
|
"loss": 1.2336030006408691,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 1.6162162162162161,
|
|
"grad_norm": 12.967977313463441,
|
|
"learning_rate": 5.201732544379718e-06,
|
|
"loss": 1.5091674327850342,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 1.6171171171171173,
|
|
"grad_norm": 35.3412053116069,
|
|
"learning_rate": 5.196495473200656e-06,
|
|
"loss": 2.655040979385376,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 1.618018018018018,
|
|
"grad_norm": 10.153236749107771,
|
|
"learning_rate": 5.191258186108608e-06,
|
|
"loss": 2.105454921722412,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 1.618918918918919,
|
|
"grad_norm": 9.554663494178893,
|
|
"learning_rate": 5.1860206888584e-06,
|
|
"loss": 1.0198447704315186,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 1.6198198198198197,
|
|
"grad_norm": 14.959304262963258,
|
|
"learning_rate": 5.180782987205096e-06,
|
|
"loss": 1.8990309238433838,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 1.6207207207207208,
|
|
"grad_norm": 7.995055189206548,
|
|
"learning_rate": 5.175545086903985e-06,
|
|
"loss": 1.9752475023269653,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 1.6216216216216215,
|
|
"grad_norm": 7.132154343339352,
|
|
"learning_rate": 5.170306993710569e-06,
|
|
"loss": 1.1116163730621338,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 1.6225225225225226,
|
|
"grad_norm": 20.544392234808946,
|
|
"learning_rate": 5.165068713380568e-06,
|
|
"loss": 2.2245161533355713,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 1.6234234234234233,
|
|
"grad_norm": 8.74079077333362,
|
|
"learning_rate": 5.159830251669904e-06,
|
|
"loss": 1.6176211833953857,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 1.6243243243243244,
|
|
"grad_norm": 17.127210113345892,
|
|
"learning_rate": 5.154591614334698e-06,
|
|
"loss": 1.2219548225402832,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 1.6252252252252253,
|
|
"grad_norm": 6.946111506813401,
|
|
"learning_rate": 5.149352807131266e-06,
|
|
"loss": 1.9225727319717407,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 1.6261261261261262,
|
|
"grad_norm": 11.802021809675379,
|
|
"learning_rate": 5.14411383581611e-06,
|
|
"loss": 1.5231215953826904,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 1.627027027027027,
|
|
"grad_norm": 11.43398591743184,
|
|
"learning_rate": 5.138874706145912e-06,
|
|
"loss": 2.293747663497925,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 1.627927927927928,
|
|
"grad_norm": 12.066185236189112,
|
|
"learning_rate": 5.133635423877524e-06,
|
|
"loss": 1.5310258865356445,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 1.6288288288288288,
|
|
"grad_norm": 8.440505711955788,
|
|
"learning_rate": 5.128395994767976e-06,
|
|
"loss": 0.7817059755325317,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 1.6297297297297297,
|
|
"grad_norm": 10.759503137406137,
|
|
"learning_rate": 5.123156424574449e-06,
|
|
"loss": 1.421698808670044,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 1.6306306306306306,
|
|
"grad_norm": 11.512030401380272,
|
|
"learning_rate": 5.117916719054285e-06,
|
|
"loss": 1.6906330585479736,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 1.6315315315315315,
|
|
"grad_norm": 13.917023003366065,
|
|
"learning_rate": 5.112676883964972e-06,
|
|
"loss": 2.2094297409057617,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 1.6324324324324324,
|
|
"grad_norm": 13.373602393330966,
|
|
"learning_rate": 5.107436925064141e-06,
|
|
"loss": 1.8133379220962524,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 1.6333333333333333,
|
|
"grad_norm": 8.79091168804635,
|
|
"learning_rate": 5.102196848109558e-06,
|
|
"loss": 1.3521404266357422,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 1.6342342342342342,
|
|
"grad_norm": 9.774103988395167,
|
|
"learning_rate": 5.096956658859122e-06,
|
|
"loss": 1.5047627687454224,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 1.635135135135135,
|
|
"grad_norm": 9.749121232105614,
|
|
"learning_rate": 5.0917163630708535e-06,
|
|
"loss": 0.7041552066802979,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 1.6360360360360362,
|
|
"grad_norm": 9.645488273598785,
|
|
"learning_rate": 5.0864759665028875e-06,
|
|
"loss": 2.0276386737823486,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 1.6369369369369369,
|
|
"grad_norm": 16.155605642009423,
|
|
"learning_rate": 5.081235474913474e-06,
|
|
"loss": 1.315154790878296,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 1.637837837837838,
|
|
"grad_norm": 10.45011539448401,
|
|
"learning_rate": 5.075994894060965e-06,
|
|
"loss": 1.2299561500549316,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 1.6387387387387387,
|
|
"grad_norm": 6.78701649907053,
|
|
"learning_rate": 5.070754229703811e-06,
|
|
"loss": 1.5950348377227783,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 1.6396396396396398,
|
|
"grad_norm": 7.831513199875119,
|
|
"learning_rate": 5.065513487600555e-06,
|
|
"loss": 0.42251333594322205,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 1.6405405405405404,
|
|
"grad_norm": 9.441732636387309,
|
|
"learning_rate": 5.060272673509824e-06,
|
|
"loss": 1.9003760814666748,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 1.6414414414414416,
|
|
"grad_norm": 13.226111228686847,
|
|
"learning_rate": 5.0550317931903236e-06,
|
|
"loss": 1.4831870794296265,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 1.6423423423423422,
|
|
"grad_norm": 21.681457173780473,
|
|
"learning_rate": 5.049790852400837e-06,
|
|
"loss": 2.1507914066314697,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 1.6432432432432433,
|
|
"grad_norm": 9.197443881739524,
|
|
"learning_rate": 5.044549856900207e-06,
|
|
"loss": 0.9067004919052124,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 1.644144144144144,
|
|
"grad_norm": 6.586339834129427,
|
|
"learning_rate": 5.039308812447342e-06,
|
|
"loss": 2.0299181938171387,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 1.6450450450450451,
|
|
"grad_norm": 7.470865346958301,
|
|
"learning_rate": 5.0340677248012e-06,
|
|
"loss": 1.4668736457824707,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 1.6459459459459458,
|
|
"grad_norm": 11.646958846660729,
|
|
"learning_rate": 5.028826599720791e-06,
|
|
"loss": 1.1238493919372559,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 1.646846846846847,
|
|
"grad_norm": 12.009980530239458,
|
|
"learning_rate": 5.023585442965162e-06,
|
|
"loss": 1.087951421737671,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 1.6477477477477478,
|
|
"grad_norm": 13.671737196875641,
|
|
"learning_rate": 5.018344260293394e-06,
|
|
"loss": 1.4537632465362549,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 1.6486486486486487,
|
|
"grad_norm": 12.275631728640773,
|
|
"learning_rate": 5.013103057464604e-06,
|
|
"loss": 1.1609238386154175,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 1.6495495495495496,
|
|
"grad_norm": 13.909632331811517,
|
|
"learning_rate": 5.0078618402379235e-06,
|
|
"loss": 0.9822266697883606,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 1.6504504504504505,
|
|
"grad_norm": 12.335022091123836,
|
|
"learning_rate": 5.002620614372502e-06,
|
|
"loss": 1.4582247734069824,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 1.6513513513513514,
|
|
"grad_norm": 15.567399091027983,
|
|
"learning_rate": 4.997379385627499e-06,
|
|
"loss": 1.7853538990020752,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 1.6522522522522523,
|
|
"grad_norm": 8.448600924161148,
|
|
"learning_rate": 4.992138159762077e-06,
|
|
"loss": 1.6177634000778198,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 1.6531531531531531,
|
|
"grad_norm": 18.989389160269997,
|
|
"learning_rate": 4.986896942535397e-06,
|
|
"loss": 1.8286075592041016,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 1.654054054054054,
|
|
"grad_norm": 14.777730822153895,
|
|
"learning_rate": 4.981655739706606e-06,
|
|
"loss": 2.3525032997131348,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 1.654954954954955,
|
|
"grad_norm": 12.034893940187724,
|
|
"learning_rate": 4.97641455703484e-06,
|
|
"loss": 1.6599555015563965,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 1.6558558558558558,
|
|
"grad_norm": 11.424236746033749,
|
|
"learning_rate": 4.971173400279211e-06,
|
|
"loss": 1.7991001605987549,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 1.6567567567567567,
|
|
"grad_norm": 17.93729333887496,
|
|
"learning_rate": 4.965932275198801e-06,
|
|
"loss": 1.4123523235321045,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 1.6576576576576576,
|
|
"grad_norm": 14.405695155183214,
|
|
"learning_rate": 4.9606911875526595e-06,
|
|
"loss": 1.6130017042160034,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 1.6585585585585587,
|
|
"grad_norm": 8.550653806048487,
|
|
"learning_rate": 4.9554501430997935e-06,
|
|
"loss": 1.1157453060150146,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 1.6594594594594594,
|
|
"grad_norm": 13.963548010924033,
|
|
"learning_rate": 4.950209147599164e-06,
|
|
"loss": 1.7340162992477417,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 1.6603603603603605,
|
|
"grad_norm": 12.618807401986361,
|
|
"learning_rate": 4.944968206809678e-06,
|
|
"loss": 1.807329535484314,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 1.6612612612612612,
|
|
"grad_norm": 9.469283753876082,
|
|
"learning_rate": 4.939727326490179e-06,
|
|
"loss": 1.7297239303588867,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 1.6621621621621623,
|
|
"grad_norm": 12.69181008288232,
|
|
"learning_rate": 4.934486512399448e-06,
|
|
"loss": 1.4359819889068604,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 1.663063063063063,
|
|
"grad_norm": 10.506967926904066,
|
|
"learning_rate": 4.929245770296191e-06,
|
|
"loss": 0.88924640417099,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 1.663963963963964,
|
|
"grad_norm": 12.951834717347314,
|
|
"learning_rate": 4.924005105939037e-06,
|
|
"loss": 2.525228500366211,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 1.6648648648648647,
|
|
"grad_norm": 11.919812373158688,
|
|
"learning_rate": 4.918764525086526e-06,
|
|
"loss": 2.649840831756592,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 1.6657657657657658,
|
|
"grad_norm": 12.59000885635357,
|
|
"learning_rate": 4.9135240334971125e-06,
|
|
"loss": 1.0599262714385986,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 13.510155903609554,
|
|
"learning_rate": 4.908283636929148e-06,
|
|
"loss": 1.9727380275726318,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 1.6675675675675676,
|
|
"grad_norm": 10.023354144746676,
|
|
"learning_rate": 4.903043341140879e-06,
|
|
"loss": 1.5703070163726807,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 1.6684684684684683,
|
|
"grad_norm": 12.25730601159229,
|
|
"learning_rate": 4.8978031518904426e-06,
|
|
"loss": 2.0800280570983887,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 1.6693693693693694,
|
|
"grad_norm": 13.031520450060414,
|
|
"learning_rate": 4.892563074935861e-06,
|
|
"loss": 1.8855412006378174,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 1.6702702702702703,
|
|
"grad_norm": 12.694316598911586,
|
|
"learning_rate": 4.88732311603503e-06,
|
|
"loss": 1.389702320098877,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 1.6711711711711712,
|
|
"grad_norm": 13.196528378231587,
|
|
"learning_rate": 4.882083280945716e-06,
|
|
"loss": 1.3638392686843872,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 1.672072072072072,
|
|
"grad_norm": 36.57858549033672,
|
|
"learning_rate": 4.876843575425552e-06,
|
|
"loss": 1.2863314151763916,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 1.672972972972973,
|
|
"grad_norm": 11.214173760175132,
|
|
"learning_rate": 4.871604005232025e-06,
|
|
"loss": 2.1213583946228027,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 1.6738738738738739,
|
|
"grad_norm": 15.26985886525236,
|
|
"learning_rate": 4.866364576122477e-06,
|
|
"loss": 1.1910454034805298,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 1.6747747747747748,
|
|
"grad_norm": 7.650950317476867,
|
|
"learning_rate": 4.8611252938540905e-06,
|
|
"loss": 1.617013931274414,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 1.6756756756756757,
|
|
"grad_norm": 6.697009447144261,
|
|
"learning_rate": 4.8558861641838914e-06,
|
|
"loss": 1.7851452827453613,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 1.6765765765765765,
|
|
"grad_norm": 36.276983437220125,
|
|
"learning_rate": 4.8506471928687355e-06,
|
|
"loss": 0.9960858821868896,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 1.6774774774774774,
|
|
"grad_norm": 12.587915346906154,
|
|
"learning_rate": 4.845408385665304e-06,
|
|
"loss": 1.7082439661026,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 1.6783783783783783,
|
|
"grad_norm": 9.952914402871658,
|
|
"learning_rate": 4.840169748330096e-06,
|
|
"loss": 2.0728540420532227,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 1.6792792792792792,
|
|
"grad_norm": 8.780047498997055,
|
|
"learning_rate": 4.834931286619432e-06,
|
|
"loss": 1.2778596878051758,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 1.6801801801801801,
|
|
"grad_norm": 12.42477262833344,
|
|
"learning_rate": 4.829693006289431e-06,
|
|
"loss": 1.4588884115219116,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 1.6810810810810812,
|
|
"grad_norm": 23.60159068391552,
|
|
"learning_rate": 4.824454913096017e-06,
|
|
"loss": 1.6938081979751587,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 1.681981981981982,
|
|
"grad_norm": 10.090972321761608,
|
|
"learning_rate": 4.819217012794905e-06,
|
|
"loss": 1.3040815591812134,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 1.682882882882883,
|
|
"grad_norm": 14.158069020228154,
|
|
"learning_rate": 4.813979311141602e-06,
|
|
"loss": 1.5980745553970337,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 1.6837837837837837,
|
|
"grad_norm": 13.735311953873568,
|
|
"learning_rate": 4.808741813891394e-06,
|
|
"loss": 1.662085771560669,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 1.6846846846846848,
|
|
"grad_norm": 6.729855693752189,
|
|
"learning_rate": 4.8035045267993445e-06,
|
|
"loss": 1.634281873703003,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 1.6855855855855855,
|
|
"grad_norm": 22.744365791952895,
|
|
"learning_rate": 4.798267455620283e-06,
|
|
"loss": 1.6238887310028076,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 1.6864864864864866,
|
|
"grad_norm": 18.216565260056957,
|
|
"learning_rate": 4.793030606108805e-06,
|
|
"loss": 2.6606647968292236,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 1.6873873873873872,
|
|
"grad_norm": 9.77827469578083,
|
|
"learning_rate": 4.78779398401926e-06,
|
|
"loss": 1.3882408142089844,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 1.6882882882882884,
|
|
"grad_norm": 12.694831394117665,
|
|
"learning_rate": 4.782557595105749e-06,
|
|
"loss": 1.601452112197876,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 1.689189189189189,
|
|
"grad_norm": 10.809963035776216,
|
|
"learning_rate": 4.7773214451221165e-06,
|
|
"loss": 1.9865121841430664,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 1.6900900900900901,
|
|
"grad_norm": 10.18731847385368,
|
|
"learning_rate": 4.772085539821945e-06,
|
|
"loss": 2.2083988189697266,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 1.690990990990991,
|
|
"grad_norm": 30.780898769798974,
|
|
"learning_rate": 4.766849884958546e-06,
|
|
"loss": 1.2041118144989014,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 1.691891891891892,
|
|
"grad_norm": 10.945051063745945,
|
|
"learning_rate": 4.7616144862849585e-06,
|
|
"loss": 1.120283603668213,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 1.6927927927927928,
|
|
"grad_norm": 12.9577090634596,
|
|
"learning_rate": 4.7563793495539395e-06,
|
|
"loss": 1.7876710891723633,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 1.6936936936936937,
|
|
"grad_norm": 10.344690917084307,
|
|
"learning_rate": 4.751144480517956e-06,
|
|
"loss": 1.6284270286560059,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 1.6945945945945946,
|
|
"grad_norm": 22.650389896351047,
|
|
"learning_rate": 4.745909884929184e-06,
|
|
"loss": 1.6889030933380127,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 1.6954954954954955,
|
|
"grad_norm": 8.62342726183916,
|
|
"learning_rate": 4.740675568539495e-06,
|
|
"loss": 1.451640009880066,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 1.6963963963963964,
|
|
"grad_norm": 10.835080585069939,
|
|
"learning_rate": 4.735441537100458e-06,
|
|
"loss": 1.3494681119918823,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 1.6972972972972973,
|
|
"grad_norm": 9.526831764008596,
|
|
"learning_rate": 4.730207796363327e-06,
|
|
"loss": 1.5760974884033203,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 1.6981981981981982,
|
|
"grad_norm": 10.205558172881307,
|
|
"learning_rate": 4.724974352079036e-06,
|
|
"loss": 1.8778183460235596,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 1.699099099099099,
|
|
"grad_norm": 13.132706909089642,
|
|
"learning_rate": 4.719741209998192e-06,
|
|
"loss": 1.5913633108139038,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"grad_norm": 8.838642913832645,
|
|
"learning_rate": 4.714508375871075e-06,
|
|
"loss": 2.1396777629852295,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 1.7009009009009008,
|
|
"grad_norm": 10.790780937843527,
|
|
"learning_rate": 4.7092758554476215e-06,
|
|
"loss": 1.3998993635177612,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 1.701801801801802,
|
|
"grad_norm": 13.017536027213275,
|
|
"learning_rate": 4.704043654477426e-06,
|
|
"loss": 1.4358144998550415,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 1.7027027027027026,
|
|
"grad_norm": 11.746019198220758,
|
|
"learning_rate": 4.6988117787097306e-06,
|
|
"loss": 1.917391061782837,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 1.7036036036036037,
|
|
"grad_norm": 8.085372419455812,
|
|
"learning_rate": 4.69358023389342e-06,
|
|
"loss": 1.343592882156372,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 1.7045045045045044,
|
|
"grad_norm": 14.492111970042975,
|
|
"learning_rate": 4.688349025777015e-06,
|
|
"loss": 1.737886905670166,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 1.7054054054054055,
|
|
"grad_norm": 11.989231765241929,
|
|
"learning_rate": 4.683118160108669e-06,
|
|
"loss": 1.2686560153961182,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 1.7063063063063062,
|
|
"grad_norm": 10.070388036081532,
|
|
"learning_rate": 4.6778876426361594e-06,
|
|
"loss": 1.584463357925415,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 1.7072072072072073,
|
|
"grad_norm": 11.768188931183234,
|
|
"learning_rate": 4.672657479106875e-06,
|
|
"loss": 1.7363247871398926,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 1.708108108108108,
|
|
"grad_norm": 23.170647656149796,
|
|
"learning_rate": 4.667427675267823e-06,
|
|
"loss": 1.6051385402679443,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 1.709009009009009,
|
|
"grad_norm": 6.505681805310561,
|
|
"learning_rate": 4.662198236865609e-06,
|
|
"loss": 1.6003258228302002,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 1.7099099099099098,
|
|
"grad_norm": 15.029094086005635,
|
|
"learning_rate": 4.656969169646441e-06,
|
|
"loss": 0.6877315044403076,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 1.7108108108108109,
|
|
"grad_norm": 12.380703102012152,
|
|
"learning_rate": 4.65174047935612e-06,
|
|
"loss": 1.3788490295410156,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 1.7117117117117115,
|
|
"grad_norm": 11.386064056229571,
|
|
"learning_rate": 4.646512171740028e-06,
|
|
"loss": 2.066871404647827,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 1.7126126126126127,
|
|
"grad_norm": 17.705891829662622,
|
|
"learning_rate": 4.641284252543131e-06,
|
|
"loss": 1.9048995971679688,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 1.7135135135135136,
|
|
"grad_norm": 9.36166670759488,
|
|
"learning_rate": 4.636056727509968e-06,
|
|
"loss": 1.6398744583129883,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 1.7144144144144144,
|
|
"grad_norm": 9.126165176019333,
|
|
"learning_rate": 4.630829602384641e-06,
|
|
"loss": 1.5823161602020264,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 1.7153153153153153,
|
|
"grad_norm": 34.45379460464936,
|
|
"learning_rate": 4.625602882910818e-06,
|
|
"loss": 1.6188974380493164,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 1.7162162162162162,
|
|
"grad_norm": 9.611917464319482,
|
|
"learning_rate": 4.620376574831717e-06,
|
|
"loss": 1.4678549766540527,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 1.7171171171171171,
|
|
"grad_norm": 13.379680968485829,
|
|
"learning_rate": 4.615150683890105e-06,
|
|
"loss": 1.2325408458709717,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 1.718018018018018,
|
|
"grad_norm": 9.333383096793895,
|
|
"learning_rate": 4.60992521582829e-06,
|
|
"loss": 1.7048616409301758,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 1.718918918918919,
|
|
"grad_norm": 8.377997409792348,
|
|
"learning_rate": 4.604700176388119e-06,
|
|
"loss": 1.4476325511932373,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 1.7198198198198198,
|
|
"grad_norm": 15.18463966415058,
|
|
"learning_rate": 4.599475571310965e-06,
|
|
"loss": 2.0498039722442627,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 1.7207207207207207,
|
|
"grad_norm": 13.635293999015182,
|
|
"learning_rate": 4.594251406337723e-06,
|
|
"loss": 1.41183340549469,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 1.7216216216216216,
|
|
"grad_norm": 9.619050169190075,
|
|
"learning_rate": 4.589027687208806e-06,
|
|
"loss": 1.4390678405761719,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 1.7225225225225225,
|
|
"grad_norm": 8.691323448200201,
|
|
"learning_rate": 4.583804419664137e-06,
|
|
"loss": 1.912278175354004,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 1.7234234234234234,
|
|
"grad_norm": 13.907079897704138,
|
|
"learning_rate": 4.578581609443141e-06,
|
|
"loss": 1.561707615852356,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 1.7243243243243245,
|
|
"grad_norm": 12.471220009886467,
|
|
"learning_rate": 4.573359262284744e-06,
|
|
"loss": 1.1934659481048584,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 1.7252252252252251,
|
|
"grad_norm": 9.836124267121887,
|
|
"learning_rate": 4.568137383927359e-06,
|
|
"loss": 1.3664239645004272,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 1.7261261261261263,
|
|
"grad_norm": 13.055513206884156,
|
|
"learning_rate": 4.562915980108888e-06,
|
|
"loss": 1.6567761898040771,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 1.727027027027027,
|
|
"grad_norm": 12.097454766477828,
|
|
"learning_rate": 4.557695056566707e-06,
|
|
"loss": 1.1626321077346802,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 1.727927927927928,
|
|
"grad_norm": 23.041165401436277,
|
|
"learning_rate": 4.552474619037669e-06,
|
|
"loss": 2.7990541458129883,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 1.7288288288288287,
|
|
"grad_norm": 7.122789479043114,
|
|
"learning_rate": 4.547254673258089e-06,
|
|
"loss": 1.6740970611572266,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 1.7297297297297298,
|
|
"grad_norm": 9.542192387325457,
|
|
"learning_rate": 4.5420352249637445e-06,
|
|
"loss": 1.7170947790145874,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 1.7306306306306305,
|
|
"grad_norm": 8.482009364764991,
|
|
"learning_rate": 4.5368162798898655e-06,
|
|
"loss": 0.8922778964042664,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 1.7315315315315316,
|
|
"grad_norm": 18.77987322013907,
|
|
"learning_rate": 4.531597843771125e-06,
|
|
"loss": 1.7073520421981812,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 1.7324324324324323,
|
|
"grad_norm": 11.291633682693858,
|
|
"learning_rate": 4.5263799223416476e-06,
|
|
"loss": 0.8407334089279175,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 1.7333333333333334,
|
|
"grad_norm": 8.443523222421167,
|
|
"learning_rate": 4.521162521334981e-06,
|
|
"loss": 1.8258352279663086,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 1.7342342342342343,
|
|
"grad_norm": 12.141582208802781,
|
|
"learning_rate": 4.515945646484105e-06,
|
|
"loss": 2.1571366786956787,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 1.7351351351351352,
|
|
"grad_norm": 8.941945890884975,
|
|
"learning_rate": 4.5107293035214224e-06,
|
|
"loss": 1.1934655904769897,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 1.736036036036036,
|
|
"grad_norm": 14.470333982533512,
|
|
"learning_rate": 4.505513498178752e-06,
|
|
"loss": 1.6931910514831543,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 1.736936936936937,
|
|
"grad_norm": 11.323491588803767,
|
|
"learning_rate": 4.500298236187318e-06,
|
|
"loss": 1.9419209957122803,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 1.7378378378378379,
|
|
"grad_norm": 10.616763433886799,
|
|
"learning_rate": 4.495083523277752e-06,
|
|
"loss": 1.0829826593399048,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 1.7387387387387387,
|
|
"grad_norm": 12.848320162302201,
|
|
"learning_rate": 4.48986936518008e-06,
|
|
"loss": 1.0987441539764404,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 1.7396396396396396,
|
|
"grad_norm": 13.297450232626861,
|
|
"learning_rate": 4.484655767623719e-06,
|
|
"loss": 1.629490852355957,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 1.7405405405405405,
|
|
"grad_norm": 12.042664808161364,
|
|
"learning_rate": 4.47944273633747e-06,
|
|
"loss": 1.637264370918274,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 1.7414414414414414,
|
|
"grad_norm": 13.081268204915583,
|
|
"learning_rate": 4.47423027704951e-06,
|
|
"loss": 1.182558536529541,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 1.7423423423423423,
|
|
"grad_norm": 11.812617882184464,
|
|
"learning_rate": 4.46901839548739e-06,
|
|
"loss": 0.9834614992141724,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 1.7432432432432432,
|
|
"grad_norm": 8.428541833076737,
|
|
"learning_rate": 4.463807097378026e-06,
|
|
"loss": 1.5438402891159058,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 1.744144144144144,
|
|
"grad_norm": 15.459648349861814,
|
|
"learning_rate": 4.458596388447691e-06,
|
|
"loss": 2.0940542221069336,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 1.7450450450450452,
|
|
"grad_norm": 10.376271715098074,
|
|
"learning_rate": 4.453386274422013e-06,
|
|
"loss": 2.4595947265625,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 1.7459459459459459,
|
|
"grad_norm": 12.982535008277809,
|
|
"learning_rate": 4.448176761025964e-06,
|
|
"loss": 1.204673409461975,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 1.746846846846847,
|
|
"grad_norm": 8.878620923656074,
|
|
"learning_rate": 4.442967853983858e-06,
|
|
"loss": 1.2832019329071045,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 1.7477477477477477,
|
|
"grad_norm": 18.68623231270244,
|
|
"learning_rate": 4.4377595590193425e-06,
|
|
"loss": 1.9358878135681152,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 1.7486486486486488,
|
|
"grad_norm": 7.264887516302695,
|
|
"learning_rate": 4.432551881855389e-06,
|
|
"loss": 1.9682033061981201,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 1.7495495495495494,
|
|
"grad_norm": 9.547635344931772,
|
|
"learning_rate": 4.4273448282142955e-06,
|
|
"loss": 1.5022704601287842,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 1.7504504504504506,
|
|
"grad_norm": 9.1275832195882,
|
|
"learning_rate": 4.4221384038176715e-06,
|
|
"loss": 1.5394527912139893,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 1.7513513513513512,
|
|
"grad_norm": 9.38133220802807,
|
|
"learning_rate": 4.416932614386436e-06,
|
|
"loss": 2.5494089126586914,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 1.7522522522522523,
|
|
"grad_norm": 11.591914168149954,
|
|
"learning_rate": 4.411727465640808e-06,
|
|
"loss": 0.8944557905197144,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 1.753153153153153,
|
|
"grad_norm": 22.8172078054111,
|
|
"learning_rate": 4.4065229633003075e-06,
|
|
"loss": 1.611649751663208,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 1.7540540540540541,
|
|
"grad_norm": 11.109839607950518,
|
|
"learning_rate": 4.401319113083739e-06,
|
|
"loss": 1.329512119293213,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 1.7549549549549548,
|
|
"grad_norm": 21.985675185072974,
|
|
"learning_rate": 4.3961159207091956e-06,
|
|
"loss": 1.5959854125976562,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 1.755855855855856,
|
|
"grad_norm": 17.05162633790248,
|
|
"learning_rate": 4.390913391894042e-06,
|
|
"loss": 1.1783095598220825,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 1.7567567567567568,
|
|
"grad_norm": 16.460328325388797,
|
|
"learning_rate": 4.385711532354918e-06,
|
|
"loss": 1.4390449523925781,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 1.7576576576576577,
|
|
"grad_norm": 16.566363080592808,
|
|
"learning_rate": 4.380510347807725e-06,
|
|
"loss": 1.1258951425552368,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 1.7585585585585586,
|
|
"grad_norm": 12.633480932184735,
|
|
"learning_rate": 4.375309843967626e-06,
|
|
"loss": 1.5847431421279907,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 1.7594594594594595,
|
|
"grad_norm": 10.331704659363535,
|
|
"learning_rate": 4.370110026549034e-06,
|
|
"loss": 1.314765214920044,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 1.7603603603603604,
|
|
"grad_norm": 11.237550533850884,
|
|
"learning_rate": 4.364910901265607e-06,
|
|
"loss": 1.1468602418899536,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 1.7612612612612613,
|
|
"grad_norm": 24.912977361024677,
|
|
"learning_rate": 4.359712473830243e-06,
|
|
"loss": 1.5914771556854248,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 1.7621621621621621,
|
|
"grad_norm": 10.381393773513075,
|
|
"learning_rate": 4.354514749955076e-06,
|
|
"loss": 1.424102544784546,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 1.763063063063063,
|
|
"grad_norm": 9.357062157493605,
|
|
"learning_rate": 4.3493177353514624e-06,
|
|
"loss": 1.5307369232177734,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 1.763963963963964,
|
|
"grad_norm": 12.328032504587473,
|
|
"learning_rate": 4.344121435729982e-06,
|
|
"loss": 2.0486297607421875,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 1.7648648648648648,
|
|
"grad_norm": 10.662234297040511,
|
|
"learning_rate": 4.338925856800427e-06,
|
|
"loss": 1.174267292022705,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 1.7657657657657657,
|
|
"grad_norm": 17.990451738218127,
|
|
"learning_rate": 4.333731004271802e-06,
|
|
"loss": 1.854561448097229,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 1.7666666666666666,
|
|
"grad_norm": 9.265543688096725,
|
|
"learning_rate": 4.328536883852308e-06,
|
|
"loss": 1.2113375663757324,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 1.7675675675675677,
|
|
"grad_norm": 10.947719937058705,
|
|
"learning_rate": 4.323343501249346e-06,
|
|
"loss": 1.1589584350585938,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 1.7684684684684684,
|
|
"grad_norm": 10.753799580953297,
|
|
"learning_rate": 4.318150862169503e-06,
|
|
"loss": 1.3148674964904785,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 1.7693693693693695,
|
|
"grad_norm": 12.437389422374402,
|
|
"learning_rate": 4.312958972318549e-06,
|
|
"loss": 1.6055564880371094,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 1.7702702702702702,
|
|
"grad_norm": 9.17029071274015,
|
|
"learning_rate": 4.307767837401432e-06,
|
|
"loss": 1.801576018333435,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 1.7711711711711713,
|
|
"grad_norm": 9.802356077955512,
|
|
"learning_rate": 4.302577463122272e-06,
|
|
"loss": 1.3246842622756958,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 1.772072072072072,
|
|
"grad_norm": 10.63612762092608,
|
|
"learning_rate": 4.297387855184353e-06,
|
|
"loss": 1.4052033424377441,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 1.772972972972973,
|
|
"grad_norm": 9.098823159090228,
|
|
"learning_rate": 4.292199019290113e-06,
|
|
"loss": 1.206218957901001,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 1.7738738738738737,
|
|
"grad_norm": 11.419309891598209,
|
|
"learning_rate": 4.287010961141146e-06,
|
|
"loss": 1.620813012123108,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 1.7747747747747749,
|
|
"grad_norm": 24.34857001055044,
|
|
"learning_rate": 4.281823686438189e-06,
|
|
"loss": 0.8904290199279785,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 1.7756756756756755,
|
|
"grad_norm": 22.417586388160636,
|
|
"learning_rate": 4.2766372008811185e-06,
|
|
"loss": 0.9551545977592468,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 1.7765765765765766,
|
|
"grad_norm": 15.312906156689655,
|
|
"learning_rate": 4.2714515101689434e-06,
|
|
"loss": 1.7072341442108154,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 1.7774774774774775,
|
|
"grad_norm": 7.434001990764085,
|
|
"learning_rate": 4.2662666199998015e-06,
|
|
"loss": 1.5110739469528198,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 1.7783783783783784,
|
|
"grad_norm": 13.199118475520589,
|
|
"learning_rate": 4.261082536070949e-06,
|
|
"loss": 1.3560364246368408,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 1.7792792792792793,
|
|
"grad_norm": 14.73411806424218,
|
|
"learning_rate": 4.255899264078756e-06,
|
|
"loss": 2.025747299194336,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 1.7801801801801802,
|
|
"grad_norm": 7.606460147575554,
|
|
"learning_rate": 4.250716809718702e-06,
|
|
"loss": 1.3578187227249146,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 1.781081081081081,
|
|
"grad_norm": 10.633263492608826,
|
|
"learning_rate": 4.245535178685365e-06,
|
|
"loss": 1.8711591958999634,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 1.781981981981982,
|
|
"grad_norm": 12.640152634057468,
|
|
"learning_rate": 4.240354376672423e-06,
|
|
"loss": 1.2626956701278687,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 1.7828828828828829,
|
|
"grad_norm": 8.48581873036355,
|
|
"learning_rate": 4.235174409372639e-06,
|
|
"loss": 1.4237213134765625,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 1.7837837837837838,
|
|
"grad_norm": 9.10623029922968,
|
|
"learning_rate": 4.229995282477861e-06,
|
|
"loss": 1.2612204551696777,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 1.7846846846846847,
|
|
"grad_norm": 12.122076224231822,
|
|
"learning_rate": 4.224817001679011e-06,
|
|
"loss": 2.193499803543091,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 1.7855855855855856,
|
|
"grad_norm": 16.09150960239607,
|
|
"learning_rate": 4.219639572666086e-06,
|
|
"loss": 1.6633172035217285,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 1.7864864864864864,
|
|
"grad_norm": 12.738883623853681,
|
|
"learning_rate": 4.214463001128142e-06,
|
|
"loss": 1.3485755920410156,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 1.7873873873873873,
|
|
"grad_norm": 9.290550916969964,
|
|
"learning_rate": 4.209287292753296e-06,
|
|
"loss": 1.4145152568817139,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 1.7882882882882885,
|
|
"grad_norm": 18.95130486017209,
|
|
"learning_rate": 4.2041124532287144e-06,
|
|
"loss": 2.6751809120178223,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 1.7891891891891891,
|
|
"grad_norm": 8.393607675732447,
|
|
"learning_rate": 4.198938488240612e-06,
|
|
"loss": 1.5532233715057373,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 1.7900900900900902,
|
|
"grad_norm": 11.174186055406079,
|
|
"learning_rate": 4.193765403474239e-06,
|
|
"loss": 2.401888370513916,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 1.790990990990991,
|
|
"grad_norm": 17.38838180555356,
|
|
"learning_rate": 4.18859320461388e-06,
|
|
"loss": 2.247199058532715,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 1.791891891891892,
|
|
"grad_norm": 13.023152953286782,
|
|
"learning_rate": 4.183421897342847e-06,
|
|
"loss": 0.7688824534416199,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 1.7927927927927927,
|
|
"grad_norm": 12.317225228851253,
|
|
"learning_rate": 4.178251487343471e-06,
|
|
"loss": 1.072916030883789,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 1.7936936936936938,
|
|
"grad_norm": 25.611755204557735,
|
|
"learning_rate": 4.173081980297097e-06,
|
|
"loss": 1.5963506698608398,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 1.7945945945945945,
|
|
"grad_norm": 9.788119112625036,
|
|
"learning_rate": 4.167913381884078e-06,
|
|
"loss": 2.787838935852051,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 1.7954954954954956,
|
|
"grad_norm": 11.369520756068557,
|
|
"learning_rate": 4.162745697783771e-06,
|
|
"loss": 1.4114885330200195,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 1.7963963963963963,
|
|
"grad_norm": 14.440209012246697,
|
|
"learning_rate": 4.157578933674523e-06,
|
|
"loss": 1.4251551628112793,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 1.7972972972972974,
|
|
"grad_norm": 11.441179344455747,
|
|
"learning_rate": 4.152413095233675e-06,
|
|
"loss": 1.057724118232727,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 1.798198198198198,
|
|
"grad_norm": 8.968340436397817,
|
|
"learning_rate": 4.147248188137552e-06,
|
|
"loss": 1.0680241584777832,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 1.7990990990990992,
|
|
"grad_norm": 8.745257607417827,
|
|
"learning_rate": 4.142084218061449e-06,
|
|
"loss": 1.6237149238586426,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"grad_norm": 10.871073066257322,
|
|
"learning_rate": 4.1369211906796365e-06,
|
|
"loss": 1.2855372428894043,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 1.800900900900901,
|
|
"grad_norm": 10.979385256727682,
|
|
"learning_rate": 4.131759111665349e-06,
|
|
"loss": 1.7986273765563965,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 1.8018018018018018,
|
|
"grad_norm": 14.11274565981479,
|
|
"learning_rate": 4.126597986690775e-06,
|
|
"loss": 1.5659823417663574,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.8027027027027027,
|
|
"grad_norm": 16.204666886335296,
|
|
"learning_rate": 4.12143782142706e-06,
|
|
"loss": 1.4156365394592285,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 1.8036036036036036,
|
|
"grad_norm": 9.815542164364134,
|
|
"learning_rate": 4.1162786215442925e-06,
|
|
"loss": 1.0431914329528809,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 1.8045045045045045,
|
|
"grad_norm": 10.84105100773819,
|
|
"learning_rate": 4.111120392711498e-06,
|
|
"loss": 2.210622787475586,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 1.8054054054054054,
|
|
"grad_norm": 9.559989900443458,
|
|
"learning_rate": 4.105963140596639e-06,
|
|
"loss": 1.242011308670044,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 1.8063063063063063,
|
|
"grad_norm": 8.568770279944586,
|
|
"learning_rate": 4.1008068708666014e-06,
|
|
"loss": 1.619928002357483,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 1.8072072072072072,
|
|
"grad_norm": 14.442304362577028,
|
|
"learning_rate": 4.095651589187194e-06,
|
|
"loss": 1.4707170724868774,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 1.808108108108108,
|
|
"grad_norm": 14.493957616165693,
|
|
"learning_rate": 4.090497301223139e-06,
|
|
"loss": 1.3726030588150024,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 1.809009009009009,
|
|
"grad_norm": 11.818396980596093,
|
|
"learning_rate": 4.085344012638067e-06,
|
|
"loss": 1.193473219871521,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 1.8099099099099099,
|
|
"grad_norm": 11.66989118748204,
|
|
"learning_rate": 4.080191729094511e-06,
|
|
"loss": 2.0238192081451416,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 1.810810810810811,
|
|
"grad_norm": 9.628899081637774,
|
|
"learning_rate": 4.075040456253895e-06,
|
|
"loss": 0.8842580318450928,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 1.8117117117117116,
|
|
"grad_norm": 12.899685441367888,
|
|
"learning_rate": 4.06989019977654e-06,
|
|
"loss": 1.4153761863708496,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 1.8126126126126128,
|
|
"grad_norm": 10.384908596450975,
|
|
"learning_rate": 4.064740965321645e-06,
|
|
"loss": 1.5833828449249268,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 1.8135135135135134,
|
|
"grad_norm": 11.176020026024228,
|
|
"learning_rate": 4.059592758547289e-06,
|
|
"loss": 1.6319794654846191,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 1.8144144144144145,
|
|
"grad_norm": 17.27729200701665,
|
|
"learning_rate": 4.054445585110418e-06,
|
|
"loss": 1.8185272216796875,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 1.8153153153153152,
|
|
"grad_norm": 8.923348346894493,
|
|
"learning_rate": 4.049299450666847e-06,
|
|
"loss": 1.4681587219238281,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 1.8162162162162163,
|
|
"grad_norm": 19.601648325047385,
|
|
"learning_rate": 4.044154360871246e-06,
|
|
"loss": 1.7325223684310913,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 1.817117117117117,
|
|
"grad_norm": 24.610158924892566,
|
|
"learning_rate": 4.039010321377137e-06,
|
|
"loss": 1.5184123516082764,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 1.818018018018018,
|
|
"grad_norm": 14.186394154039885,
|
|
"learning_rate": 4.03386733783689e-06,
|
|
"loss": 1.391357183456421,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 1.8189189189189188,
|
|
"grad_norm": 11.364046322795115,
|
|
"learning_rate": 4.028725415901714e-06,
|
|
"loss": 1.3705822229385376,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 1.8198198198198199,
|
|
"grad_norm": 7.1758672636868255,
|
|
"learning_rate": 4.023584561221651e-06,
|
|
"loss": 1.6241412162780762,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 1.8207207207207208,
|
|
"grad_norm": 19.287585618713152,
|
|
"learning_rate": 4.018444779445571e-06,
|
|
"loss": 1.5234278440475464,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 1.8216216216216217,
|
|
"grad_norm": 12.373016754651946,
|
|
"learning_rate": 4.013306076221164e-06,
|
|
"loss": 1.373924732208252,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 1.8225225225225226,
|
|
"grad_norm": 8.830996176619,
|
|
"learning_rate": 4.008168457194937e-06,
|
|
"loss": 2.0185816287994385,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 1.8234234234234235,
|
|
"grad_norm": 16.200118685216125,
|
|
"learning_rate": 4.003031928012202e-06,
|
|
"loss": 1.0929310321807861,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 1.8243243243243243,
|
|
"grad_norm": 13.89448554252347,
|
|
"learning_rate": 3.997896494317076e-06,
|
|
"loss": 1.8505454063415527,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 1.8252252252252252,
|
|
"grad_norm": 7.941034603458671,
|
|
"learning_rate": 3.992762161752474e-06,
|
|
"loss": 1.5198599100112915,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 1.8261261261261261,
|
|
"grad_norm": 13.682362887924588,
|
|
"learning_rate": 3.987628935960098e-06,
|
|
"loss": 0.6701310873031616,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 1.827027027027027,
|
|
"grad_norm": 56.10692524433349,
|
|
"learning_rate": 3.982496822580434e-06,
|
|
"loss": 1.7380964756011963,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 1.827927927927928,
|
|
"grad_norm": 13.889223460128088,
|
|
"learning_rate": 3.977365827252746e-06,
|
|
"loss": 1.376246690750122,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 1.8288288288288288,
|
|
"grad_norm": 11.281727205358694,
|
|
"learning_rate": 3.972235955615071e-06,
|
|
"loss": 1.7691125869750977,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 1.8297297297297297,
|
|
"grad_norm": 13.021620717233445,
|
|
"learning_rate": 3.9671072133042105e-06,
|
|
"loss": 1.7032856941223145,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 1.8306306306306306,
|
|
"grad_norm": 18.866824844356582,
|
|
"learning_rate": 3.961979605955724e-06,
|
|
"loss": 0.9565985798835754,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 1.8315315315315317,
|
|
"grad_norm": 12.209220860886418,
|
|
"learning_rate": 3.956853139203925e-06,
|
|
"loss": 2.214951992034912,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 1.8324324324324324,
|
|
"grad_norm": 11.197802721466198,
|
|
"learning_rate": 3.951727818681873e-06,
|
|
"loss": 1.6807488203048706,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 1.8333333333333335,
|
|
"grad_norm": 7.408176024204171,
|
|
"learning_rate": 3.94660365002137e-06,
|
|
"loss": 1.29439115524292,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 1.8342342342342342,
|
|
"grad_norm": 11.600022628467501,
|
|
"learning_rate": 3.941480638852948e-06,
|
|
"loss": 1.2479655742645264,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 1.8351351351351353,
|
|
"grad_norm": 17.70228851131073,
|
|
"learning_rate": 3.936358790805871e-06,
|
|
"loss": 1.2770302295684814,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 1.836036036036036,
|
|
"grad_norm": 11.067481542528773,
|
|
"learning_rate": 3.931238111508124e-06,
|
|
"loss": 1.8459141254425049,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 1.836936936936937,
|
|
"grad_norm": 8.970115289359327,
|
|
"learning_rate": 3.926118606586406e-06,
|
|
"loss": 1.5907042026519775,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 1.8378378378378377,
|
|
"grad_norm": 10.064834908224393,
|
|
"learning_rate": 3.921000281666127e-06,
|
|
"loss": 0.8717818260192871,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 1.8387387387387388,
|
|
"grad_norm": 12.644321201888557,
|
|
"learning_rate": 3.915883142371404e-06,
|
|
"loss": 1.409450650215149,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 1.8396396396396395,
|
|
"grad_norm": 10.237275872132916,
|
|
"learning_rate": 3.910767194325045e-06,
|
|
"loss": 1.5621337890625,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 1.8405405405405406,
|
|
"grad_norm": 11.149022032267956,
|
|
"learning_rate": 3.905652443148553e-06,
|
|
"loss": 1.0682541131973267,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 1.8414414414414413,
|
|
"grad_norm": 18.164644817812203,
|
|
"learning_rate": 3.900538894462112e-06,
|
|
"loss": 1.3991050720214844,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 1.8423423423423424,
|
|
"grad_norm": 10.057496316068189,
|
|
"learning_rate": 3.89542655388459e-06,
|
|
"loss": 2.2244114875793457,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 1.8432432432432433,
|
|
"grad_norm": 11.730127903497804,
|
|
"learning_rate": 3.890315427033522e-06,
|
|
"loss": 2.156032085418701,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 1.8441441441441442,
|
|
"grad_norm": 8.810198754812639,
|
|
"learning_rate": 3.8852055195251146e-06,
|
|
"loss": 1.881744384765625,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 1.845045045045045,
|
|
"grad_norm": 14.642924662376911,
|
|
"learning_rate": 3.8800968369742305e-06,
|
|
"loss": 1.666663408279419,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 1.845945945945946,
|
|
"grad_norm": 11.472829693779188,
|
|
"learning_rate": 3.874989384994389e-06,
|
|
"loss": 1.7958859205245972,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 1.8468468468468469,
|
|
"grad_norm": 10.161389492280458,
|
|
"learning_rate": 3.869883169197755e-06,
|
|
"loss": 1.6539031267166138,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 1.8477477477477477,
|
|
"grad_norm": 7.353611430960883,
|
|
"learning_rate": 3.864778195195138e-06,
|
|
"loss": 1.7106571197509766,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 1.8486486486486486,
|
|
"grad_norm": 20.54870601764021,
|
|
"learning_rate": 3.859674468595979e-06,
|
|
"loss": 1.6156736612319946,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 1.8495495495495495,
|
|
"grad_norm": 9.524807801494127,
|
|
"learning_rate": 3.854571995008351e-06,
|
|
"loss": 2.0353589057922363,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 1.8504504504504504,
|
|
"grad_norm": 12.56311071345327,
|
|
"learning_rate": 3.84947078003895e-06,
|
|
"loss": 1.525078535079956,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 1.8513513513513513,
|
|
"grad_norm": 10.92868839801951,
|
|
"learning_rate": 3.8443708292930894e-06,
|
|
"loss": 1.5466748476028442,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 1.8522522522522522,
|
|
"grad_norm": 14.1266404726735,
|
|
"learning_rate": 3.839272148374692e-06,
|
|
"loss": 1.6088988780975342,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 1.853153153153153,
|
|
"grad_norm": 10.38242638563608,
|
|
"learning_rate": 3.834174742886287e-06,
|
|
"loss": 1.7105833292007446,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 1.8540540540540542,
|
|
"grad_norm": 17.121790212262344,
|
|
"learning_rate": 3.829078618429e-06,
|
|
"loss": 1.685943603515625,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 1.8549549549549549,
|
|
"grad_norm": 9.579254857244152,
|
|
"learning_rate": 3.823983780602551e-06,
|
|
"loss": 1.669414758682251,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 1.855855855855856,
|
|
"grad_norm": 10.041877543818236,
|
|
"learning_rate": 3.818890235005243e-06,
|
|
"loss": 2.121381998062134,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 1.8567567567567567,
|
|
"grad_norm": 11.146971685571359,
|
|
"learning_rate": 3.813797987233965e-06,
|
|
"loss": 1.9388622045516968,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 1.8576576576576578,
|
|
"grad_norm": 14.236058720958708,
|
|
"learning_rate": 3.808707042884176e-06,
|
|
"loss": 1.1108877658843994,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 1.8585585585585584,
|
|
"grad_norm": 15.905826190064678,
|
|
"learning_rate": 3.803617407549901e-06,
|
|
"loss": 1.5356963872909546,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 1.8594594594594596,
|
|
"grad_norm": 12.655561573381654,
|
|
"learning_rate": 3.798529086823729e-06,
|
|
"loss": 1.0193653106689453,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 1.8603603603603602,
|
|
"grad_norm": 10.153514091883679,
|
|
"learning_rate": 3.7934420862968045e-06,
|
|
"loss": 1.2517184019088745,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 1.8612612612612613,
|
|
"grad_norm": 8.058012080834468,
|
|
"learning_rate": 3.7883564115588223e-06,
|
|
"loss": 1.4862089157104492,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 1.862162162162162,
|
|
"grad_norm": 18.065678174274783,
|
|
"learning_rate": 3.7832720681980183e-06,
|
|
"loss": 0.9980199337005615,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 1.8630630630630631,
|
|
"grad_norm": 24.62683147181859,
|
|
"learning_rate": 3.7781890618011667e-06,
|
|
"loss": 1.6335004568099976,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 1.8639639639639638,
|
|
"grad_norm": 8.125913454982724,
|
|
"learning_rate": 3.7731073979535706e-06,
|
|
"loss": 1.5620460510253906,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 1.864864864864865,
|
|
"grad_norm": 10.208284037925765,
|
|
"learning_rate": 3.768027082239062e-06,
|
|
"loss": 1.3698681592941284,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 1.8657657657657658,
|
|
"grad_norm": 15.872653901224046,
|
|
"learning_rate": 3.7629481202399886e-06,
|
|
"loss": 1.7767051458358765,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 1.8666666666666667,
|
|
"grad_norm": 11.434399437622693,
|
|
"learning_rate": 3.75787051753721e-06,
|
|
"loss": 1.7132368087768555,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 1.8675675675675676,
|
|
"grad_norm": 11.78079658204757,
|
|
"learning_rate": 3.752794279710094e-06,
|
|
"loss": 2.404029130935669,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 1.8684684684684685,
|
|
"grad_norm": 16.2721496234969,
|
|
"learning_rate": 3.747719412336508e-06,
|
|
"loss": 1.6003530025482178,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 1.8693693693693694,
|
|
"grad_norm": 8.750799125851948,
|
|
"learning_rate": 3.7426459209928133e-06,
|
|
"loss": 1.8089443445205688,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 1.8702702702702703,
|
|
"grad_norm": 7.842869294288517,
|
|
"learning_rate": 3.737573811253859e-06,
|
|
"loss": 1.1806695461273193,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 1.8711711711711712,
|
|
"grad_norm": 8.460636971047276,
|
|
"learning_rate": 3.7325030886929767e-06,
|
|
"loss": 1.5709047317504883,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 1.872072072072072,
|
|
"grad_norm": 10.385083814761153,
|
|
"learning_rate": 3.7274337588819743e-06,
|
|
"loss": 1.1126965284347534,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 1.872972972972973,
|
|
"grad_norm": 12.998565648430477,
|
|
"learning_rate": 3.7223658273911267e-06,
|
|
"loss": 1.9489595890045166,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 1.8738738738738738,
|
|
"grad_norm": 13.096941955859592,
|
|
"learning_rate": 3.7172992997891756e-06,
|
|
"loss": 1.5732641220092773,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 1.8747747747747747,
|
|
"grad_norm": 15.28789120739809,
|
|
"learning_rate": 3.7122341816433173e-06,
|
|
"loss": 1.1677942276000977,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 1.8756756756756756,
|
|
"grad_norm": 9.603294742851899,
|
|
"learning_rate": 3.707170478519203e-06,
|
|
"loss": 1.341094970703125,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 1.8765765765765767,
|
|
"grad_norm": 9.288471831260694,
|
|
"learning_rate": 3.7021081959809237e-06,
|
|
"loss": 1.9096192121505737,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 1.8774774774774774,
|
|
"grad_norm": 8.025780045255638,
|
|
"learning_rate": 3.6970473395910115e-06,
|
|
"loss": 1.5327026844024658,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 1.8783783783783785,
|
|
"grad_norm": 10.867053119082584,
|
|
"learning_rate": 3.691987914910437e-06,
|
|
"loss": 1.304749608039856,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 1.8792792792792792,
|
|
"grad_norm": 12.023574933479532,
|
|
"learning_rate": 3.6869299274985903e-06,
|
|
"loss": 1.155665636062622,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 1.8801801801801803,
|
|
"grad_norm": 11.74997341729003,
|
|
"learning_rate": 3.6818733829132845e-06,
|
|
"loss": 1.6288031339645386,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 1.881081081081081,
|
|
"grad_norm": 9.644983289206198,
|
|
"learning_rate": 3.6768182867107485e-06,
|
|
"loss": 1.62825608253479,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 1.881981981981982,
|
|
"grad_norm": 12.911515113377188,
|
|
"learning_rate": 3.6717646444456196e-06,
|
|
"loss": 1.0108637809753418,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 1.8828828828828827,
|
|
"grad_norm": 11.753309993592506,
|
|
"learning_rate": 3.6667124616709337e-06,
|
|
"loss": 1.2084550857543945,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 1.8837837837837839,
|
|
"grad_norm": 13.616855367290347,
|
|
"learning_rate": 3.6616617439381286e-06,
|
|
"loss": 1.5335865020751953,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 1.8846846846846845,
|
|
"grad_norm": 9.235423436766794,
|
|
"learning_rate": 3.6566124967970286e-06,
|
|
"loss": 1.4122389554977417,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 1.8855855855855856,
|
|
"grad_norm": 10.201106705652542,
|
|
"learning_rate": 3.651564725795843e-06,
|
|
"loss": 1.2618002891540527,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 1.8864864864864865,
|
|
"grad_norm": 15.52679894691076,
|
|
"learning_rate": 3.64651843648116e-06,
|
|
"loss": 1.1889724731445312,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 1.8873873873873874,
|
|
"grad_norm": 10.143947076783371,
|
|
"learning_rate": 3.6414736343979383e-06,
|
|
"loss": 1.6448466777801514,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 1.8882882882882883,
|
|
"grad_norm": 9.168651128851902,
|
|
"learning_rate": 3.6364303250895032e-06,
|
|
"loss": 1.091215968132019,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 1.8891891891891892,
|
|
"grad_norm": 10.798141475360113,
|
|
"learning_rate": 3.63138851409754e-06,
|
|
"loss": 2.1353302001953125,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 1.89009009009009,
|
|
"grad_norm": 8.01749766654077,
|
|
"learning_rate": 3.626348206962087e-06,
|
|
"loss": 1.6346862316131592,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 1.890990990990991,
|
|
"grad_norm": 8.849643010393113,
|
|
"learning_rate": 3.6213094092215284e-06,
|
|
"loss": 1.44803786277771,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 1.8918918918918919,
|
|
"grad_norm": 12.374021630104043,
|
|
"learning_rate": 3.6162721264125943e-06,
|
|
"loss": 1.6346116065979004,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 1.8927927927927928,
|
|
"grad_norm": 9.596398515770996,
|
|
"learning_rate": 3.6112363640703474e-06,
|
|
"loss": 1.6425296068191528,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 1.8936936936936937,
|
|
"grad_norm": 12.020509416799094,
|
|
"learning_rate": 3.606202127728178e-06,
|
|
"loss": 1.595349669456482,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 1.8945945945945946,
|
|
"grad_norm": 12.07767233630599,
|
|
"learning_rate": 3.6011694229178027e-06,
|
|
"loss": 1.664405107498169,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 1.8954954954954955,
|
|
"grad_norm": 6.388417918849233,
|
|
"learning_rate": 3.596138255169254e-06,
|
|
"loss": 1.658945083618164,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 1.8963963963963963,
|
|
"grad_norm": 16.91008304211875,
|
|
"learning_rate": 3.591108630010874e-06,
|
|
"loss": 1.4404268264770508,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 1.8972972972972975,
|
|
"grad_norm": 13.151558828904543,
|
|
"learning_rate": 3.586080552969312e-06,
|
|
"loss": 2.126494884490967,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 1.8981981981981981,
|
|
"grad_norm": 15.00565265407829,
|
|
"learning_rate": 3.581054029569516e-06,
|
|
"loss": 1.6209547519683838,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 1.8990990990990992,
|
|
"grad_norm": 13.23640949470636,
|
|
"learning_rate": 3.576029065334725e-06,
|
|
"loss": 2.09350323677063,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"grad_norm": 10.747731590654224,
|
|
"learning_rate": 3.5710056657864683e-06,
|
|
"loss": 1.1772202253341675,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 1.900900900900901,
|
|
"grad_norm": 9.279118499789343,
|
|
"learning_rate": 3.5659838364445505e-06,
|
|
"loss": 1.4582226276397705,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 1.9018018018018017,
|
|
"grad_norm": 6.78203843235967,
|
|
"learning_rate": 3.5609635828270545e-06,
|
|
"loss": 1.1998049020767212,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 1.9027027027027028,
|
|
"grad_norm": 14.825188149461917,
|
|
"learning_rate": 3.555944910450332e-06,
|
|
"loss": 1.1677322387695312,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 1.9036036036036035,
|
|
"grad_norm": 15.56932293596776,
|
|
"learning_rate": 3.5509278248289957e-06,
|
|
"loss": 2.0692138671875,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 1.9045045045045046,
|
|
"grad_norm": 12.284534694391617,
|
|
"learning_rate": 3.5459123314759137e-06,
|
|
"loss": 1.833665370941162,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 1.9054054054054053,
|
|
"grad_norm": 12.717321831126291,
|
|
"learning_rate": 3.54089843590221e-06,
|
|
"loss": 1.417661190032959,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 1.9063063063063064,
|
|
"grad_norm": 12.746155447547558,
|
|
"learning_rate": 3.5358861436172487e-06,
|
|
"loss": 1.0046995878219604,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 1.907207207207207,
|
|
"grad_norm": 16.910773646485442,
|
|
"learning_rate": 3.53087546012863e-06,
|
|
"loss": 1.0042613744735718,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 1.9081081081081082,
|
|
"grad_norm": 14.210173017449977,
|
|
"learning_rate": 3.5258663909421893e-06,
|
|
"loss": 1.5375925302505493,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 1.909009009009009,
|
|
"grad_norm": 11.302587028164004,
|
|
"learning_rate": 3.5208589415619886e-06,
|
|
"loss": 0.9769325256347656,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 1.90990990990991,
|
|
"grad_norm": 7.415677960684316,
|
|
"learning_rate": 3.5158531174903086e-06,
|
|
"loss": 1.5886321067810059,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 1.9108108108108108,
|
|
"grad_norm": 14.621743259705289,
|
|
"learning_rate": 3.5108489242276455e-06,
|
|
"loss": 1.2349965572357178,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 1.9117117117117117,
|
|
"grad_norm": 10.825543596160664,
|
|
"learning_rate": 3.5058463672727015e-06,
|
|
"loss": 1.5445003509521484,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 1.9126126126126126,
|
|
"grad_norm": 8.560089551257851,
|
|
"learning_rate": 3.5008454521223833e-06,
|
|
"loss": 1.0274848937988281,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 1.9135135135135135,
|
|
"grad_norm": 16.895379816187045,
|
|
"learning_rate": 3.4958461842717916e-06,
|
|
"loss": 1.7597068548202515,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 1.9144144144144144,
|
|
"grad_norm": 9.569345852519174,
|
|
"learning_rate": 3.4908485692142167e-06,
|
|
"loss": 0.9353008270263672,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 1.9153153153153153,
|
|
"grad_norm": 14.676927795457946,
|
|
"learning_rate": 3.4858526124411356e-06,
|
|
"loss": 1.1858336925506592,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 1.9162162162162162,
|
|
"grad_norm": 12.758919264221314,
|
|
"learning_rate": 3.4808583194421996e-06,
|
|
"loss": 0.9151158332824707,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 1.917117117117117,
|
|
"grad_norm": 16.25087984986083,
|
|
"learning_rate": 3.475865695705234e-06,
|
|
"loss": 1.7295141220092773,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 1.918018018018018,
|
|
"grad_norm": 9.315357505018591,
|
|
"learning_rate": 3.47087474671623e-06,
|
|
"loss": 1.395228385925293,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 1.9189189189189189,
|
|
"grad_norm": 14.907889654172648,
|
|
"learning_rate": 3.4658854779593375e-06,
|
|
"loss": 1.025083303451538,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 1.91981981981982,
|
|
"grad_norm": 27.159363933495012,
|
|
"learning_rate": 3.4608978949168615e-06,
|
|
"loss": 1.0848424434661865,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 1.9207207207207206,
|
|
"grad_norm": 7.798929240714956,
|
|
"learning_rate": 3.4559120030692516e-06,
|
|
"loss": 1.5797157287597656,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 1.9216216216216218,
|
|
"grad_norm": 11.79739917942566,
|
|
"learning_rate": 3.450927807895103e-06,
|
|
"loss": 1.4331327676773071,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 1.9225225225225224,
|
|
"grad_norm": 16.084261449585295,
|
|
"learning_rate": 3.4459453148711443e-06,
|
|
"loss": 2.5673320293426514,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 1.9234234234234235,
|
|
"grad_norm": 9.48737229453046,
|
|
"learning_rate": 3.440964529472235e-06,
|
|
"loss": 1.3175442218780518,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 1.9243243243243242,
|
|
"grad_norm": 12.449238180735826,
|
|
"learning_rate": 3.435985457171356e-06,
|
|
"loss": 1.2205897569656372,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 1.9252252252252253,
|
|
"grad_norm": 10.017944527452725,
|
|
"learning_rate": 3.431008103439608e-06,
|
|
"loss": 1.3516299724578857,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 1.926126126126126,
|
|
"grad_norm": 9.489999545183824,
|
|
"learning_rate": 3.4260324737462024e-06,
|
|
"loss": 2.0574636459350586,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 1.927027027027027,
|
|
"grad_norm": 8.714596265903568,
|
|
"learning_rate": 3.4210585735584566e-06,
|
|
"loss": 1.8997373580932617,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 1.9279279279279278,
|
|
"grad_norm": 9.410757997128773,
|
|
"learning_rate": 3.4160864083417876e-06,
|
|
"loss": 1.4841136932373047,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 1.928828828828829,
|
|
"grad_norm": 7.0051548060462325,
|
|
"learning_rate": 3.4111159835597053e-06,
|
|
"loss": 1.2358179092407227,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 1.9297297297297298,
|
|
"grad_norm": 7.660808937674649,
|
|
"learning_rate": 3.406147304673808e-06,
|
|
"loss": 1.7413294315338135,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 1.9306306306306307,
|
|
"grad_norm": 10.034783141698217,
|
|
"learning_rate": 3.401180377143774e-06,
|
|
"loss": 1.3427538871765137,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 1.9315315315315316,
|
|
"grad_norm": 11.602028892577103,
|
|
"learning_rate": 3.39621520642736e-06,
|
|
"loss": 2.106019973754883,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 1.9324324324324325,
|
|
"grad_norm": 13.051085698353752,
|
|
"learning_rate": 3.391251797980391e-06,
|
|
"loss": 1.3242912292480469,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 1.9333333333333333,
|
|
"grad_norm": 11.717082233526542,
|
|
"learning_rate": 3.386290157256754e-06,
|
|
"loss": 1.0651776790618896,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 1.9342342342342342,
|
|
"grad_norm": 12.567908857251185,
|
|
"learning_rate": 3.3813302897083955e-06,
|
|
"loss": 1.1179988384246826,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 1.9351351351351351,
|
|
"grad_norm": 12.175470882803538,
|
|
"learning_rate": 3.376372200785312e-06,
|
|
"loss": 1.5827984809875488,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 1.936036036036036,
|
|
"grad_norm": 9.893188916383231,
|
|
"learning_rate": 3.371415895935548e-06,
|
|
"loss": 1.4257056713104248,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 1.936936936936937,
|
|
"grad_norm": 12.599640421518274,
|
|
"learning_rate": 3.366461380605185e-06,
|
|
"loss": 1.3703809976577759,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.9378378378378378,
|
|
"grad_norm": 9.390541515646184,
|
|
"learning_rate": 3.3615086602383394e-06,
|
|
"loss": 1.428502082824707,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 1.9387387387387387,
|
|
"grad_norm": 11.01785269445174,
|
|
"learning_rate": 3.3565577402771566e-06,
|
|
"loss": 1.135480284690857,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 1.9396396396396396,
|
|
"grad_norm": 18.738315054280818,
|
|
"learning_rate": 3.3516086261618e-06,
|
|
"loss": 1.8752212524414062,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 1.9405405405405407,
|
|
"grad_norm": 7.545214626155076,
|
|
"learning_rate": 3.346661323330453e-06,
|
|
"loss": 2.4570391178131104,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 1.9414414414414414,
|
|
"grad_norm": 6.884765031860276,
|
|
"learning_rate": 3.3417158372193064e-06,
|
|
"loss": 1.3177688121795654,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 1.9423423423423425,
|
|
"grad_norm": 20.957711722167538,
|
|
"learning_rate": 3.3367721732625537e-06,
|
|
"loss": 1.6490633487701416,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 1.9432432432432432,
|
|
"grad_norm": 15.566743430273707,
|
|
"learning_rate": 3.331830336892388e-06,
|
|
"loss": 1.9409370422363281,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 1.9441441441441443,
|
|
"grad_norm": 10.7281788973491,
|
|
"learning_rate": 3.3268903335389923e-06,
|
|
"loss": 1.3840687274932861,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 1.945045045045045,
|
|
"grad_norm": 16.113661929909828,
|
|
"learning_rate": 3.3219521686305413e-06,
|
|
"loss": 1.8886069059371948,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 1.945945945945946,
|
|
"grad_norm": 11.2864377632836,
|
|
"learning_rate": 3.317015847593181e-06,
|
|
"loss": 1.6758947372436523,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 1.9468468468468467,
|
|
"grad_norm": 9.345236184007552,
|
|
"learning_rate": 3.3120813758510385e-06,
|
|
"loss": 1.097003698348999,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 1.9477477477477478,
|
|
"grad_norm": 18.376686260812527,
|
|
"learning_rate": 3.3071487588262045e-06,
|
|
"loss": 2.1929831504821777,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 1.9486486486486485,
|
|
"grad_norm": 7.93695872362548,
|
|
"learning_rate": 3.302218001938732e-06,
|
|
"loss": 1.305293083190918,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 1.9495495495495496,
|
|
"grad_norm": 7.1626307957024435,
|
|
"learning_rate": 3.297289110606633e-06,
|
|
"loss": 1.2249070405960083,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 1.9504504504504503,
|
|
"grad_norm": 14.021793433378013,
|
|
"learning_rate": 3.2923620902458652e-06,
|
|
"loss": 1.3577923774719238,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 1.9513513513513514,
|
|
"grad_norm": 10.25590694676422,
|
|
"learning_rate": 3.2874369462703353e-06,
|
|
"loss": 1.4521949291229248,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 1.9522522522522523,
|
|
"grad_norm": 12.249003629414416,
|
|
"learning_rate": 3.282513684091885e-06,
|
|
"loss": 1.2066313028335571,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 1.9531531531531532,
|
|
"grad_norm": 15.955002113763394,
|
|
"learning_rate": 3.277592309120289e-06,
|
|
"loss": 2.387820243835449,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 1.954054054054054,
|
|
"grad_norm": 14.008301907388773,
|
|
"learning_rate": 3.2726728267632478e-06,
|
|
"loss": 1.7124426364898682,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 1.954954954954955,
|
|
"grad_norm": 12.527364105021245,
|
|
"learning_rate": 3.2677552424263836e-06,
|
|
"loss": 2.0390710830688477,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 1.9558558558558559,
|
|
"grad_norm": 11.774335869558978,
|
|
"learning_rate": 3.262839561513232e-06,
|
|
"loss": 1.5456371307373047,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 1.9567567567567568,
|
|
"grad_norm": 10.339122054581102,
|
|
"learning_rate": 3.257925789425237e-06,
|
|
"loss": 1.7115483283996582,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 1.9576576576576576,
|
|
"grad_norm": 16.063537164982147,
|
|
"learning_rate": 3.2530139315617457e-06,
|
|
"loss": 1.4859074354171753,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 1.9585585585585585,
|
|
"grad_norm": 13.855946479261425,
|
|
"learning_rate": 3.248103993320002e-06,
|
|
"loss": 0.8990108370780945,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 1.9594594594594594,
|
|
"grad_norm": 16.241122011618927,
|
|
"learning_rate": 3.24319598009514e-06,
|
|
"loss": 2.6057894229888916,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 1.9603603603603603,
|
|
"grad_norm": 12.743938722701444,
|
|
"learning_rate": 3.2382898972801787e-06,
|
|
"loss": 1.8687529563903809,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 1.9612612612612612,
|
|
"grad_norm": 12.14750333699258,
|
|
"learning_rate": 3.233385750266015e-06,
|
|
"loss": 1.365946888923645,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 1.962162162162162,
|
|
"grad_norm": 25.643814303577955,
|
|
"learning_rate": 3.2284835444414203e-06,
|
|
"loss": 1.5597602128982544,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 1.9630630630630632,
|
|
"grad_norm": 10.678586368174491,
|
|
"learning_rate": 3.2235832851930322e-06,
|
|
"loss": 1.0392903089523315,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 1.9639639639639639,
|
|
"grad_norm": 12.384699252942765,
|
|
"learning_rate": 3.2186849779053493e-06,
|
|
"loss": 1.0315731763839722,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 1.964864864864865,
|
|
"grad_norm": 10.355602715542794,
|
|
"learning_rate": 3.213788627960725e-06,
|
|
"loss": 1.3578027486801147,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 1.9657657657657657,
|
|
"grad_norm": 9.43903416775586,
|
|
"learning_rate": 3.2088942407393642e-06,
|
|
"loss": 1.8253684043884277,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 1.9666666666666668,
|
|
"grad_norm": 8.013148187820535,
|
|
"learning_rate": 3.20400182161931e-06,
|
|
"loss": 1.3881949186325073,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 1.9675675675675675,
|
|
"grad_norm": 10.28255390921646,
|
|
"learning_rate": 3.1991113759764493e-06,
|
|
"loss": 1.4869511127471924,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 1.9684684684684686,
|
|
"grad_norm": 9.249386883195186,
|
|
"learning_rate": 3.1942229091844955e-06,
|
|
"loss": 1.9359171390533447,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 1.9693693693693692,
|
|
"grad_norm": 6.886190591874404,
|
|
"learning_rate": 3.1893364266149907e-06,
|
|
"loss": 1.3109782934188843,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 1.9702702702702704,
|
|
"grad_norm": 10.028840789202924,
|
|
"learning_rate": 3.1844519336372925e-06,
|
|
"loss": 1.5913902521133423,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 1.971171171171171,
|
|
"grad_norm": 9.587699091912263,
|
|
"learning_rate": 3.1795694356185803e-06,
|
|
"loss": 1.4712733030319214,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 1.9720720720720721,
|
|
"grad_norm": 10.581855803545576,
|
|
"learning_rate": 3.1746889379238354e-06,
|
|
"loss": 1.532201886177063,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 1.972972972972973,
|
|
"grad_norm": 10.449874477114864,
|
|
"learning_rate": 3.169810445915839e-06,
|
|
"loss": 1.3963842391967773,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 1.973873873873874,
|
|
"grad_norm": 15.404997460341296,
|
|
"learning_rate": 3.1649339649551736e-06,
|
|
"loss": 0.887641429901123,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 1.9747747747747748,
|
|
"grad_norm": 7.810739451336037,
|
|
"learning_rate": 3.16005950040021e-06,
|
|
"loss": 1.6909675598144531,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 1.9756756756756757,
|
|
"grad_norm": 9.349427591093411,
|
|
"learning_rate": 3.155187057607102e-06,
|
|
"loss": 1.4221792221069336,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 1.9765765765765766,
|
|
"grad_norm": 11.823817597071073,
|
|
"learning_rate": 3.150316641929785e-06,
|
|
"loss": 1.5854582786560059,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 1.9774774774774775,
|
|
"grad_norm": 10.292856583888586,
|
|
"learning_rate": 3.1454482587199627e-06,
|
|
"loss": 2.2465217113494873,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 1.9783783783783784,
|
|
"grad_norm": 8.958204490058126,
|
|
"learning_rate": 3.140581913327109e-06,
|
|
"loss": 1.9419772624969482,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 1.9792792792792793,
|
|
"grad_norm": 10.313590809109224,
|
|
"learning_rate": 3.1357176110984578e-06,
|
|
"loss": 1.76797616481781,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 1.9801801801801802,
|
|
"grad_norm": 15.937345081257261,
|
|
"learning_rate": 3.130855357378997e-06,
|
|
"loss": 1.9342007637023926,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 1.981081081081081,
|
|
"grad_norm": 11.655201390613755,
|
|
"learning_rate": 3.125995157511464e-06,
|
|
"loss": 1.3997079133987427,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 1.981981981981982,
|
|
"grad_norm": 15.250413697057784,
|
|
"learning_rate": 3.1211370168363397e-06,
|
|
"loss": 0.9736800193786621,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.9828828828828828,
|
|
"grad_norm": 15.229979109688355,
|
|
"learning_rate": 3.116280940691843e-06,
|
|
"loss": 1.5279369354248047,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 1.983783783783784,
|
|
"grad_norm": 9.31071979938882,
|
|
"learning_rate": 3.1114269344139196e-06,
|
|
"loss": 1.9810595512390137,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 1.9846846846846846,
|
|
"grad_norm": 9.441805416816392,
|
|
"learning_rate": 3.1065750033362497e-06,
|
|
"loss": 1.2492730617523193,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 1.9855855855855857,
|
|
"grad_norm": 11.741858941472692,
|
|
"learning_rate": 3.1017251527902255e-06,
|
|
"loss": 1.3416674137115479,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 1.9864864864864864,
|
|
"grad_norm": 10.130905276857453,
|
|
"learning_rate": 3.096877388104956e-06,
|
|
"loss": 1.4120872020721436,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 1.9873873873873875,
|
|
"grad_norm": 8.173921131163244,
|
|
"learning_rate": 3.0920317146072577e-06,
|
|
"loss": 1.6116995811462402,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 1.9882882882882882,
|
|
"grad_norm": 7.992464699997041,
|
|
"learning_rate": 3.0871881376216497e-06,
|
|
"loss": 1.5697150230407715,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 1.9891891891891893,
|
|
"grad_norm": 9.383321545011542,
|
|
"learning_rate": 3.082346662470347e-06,
|
|
"loss": 1.4324170351028442,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 1.99009009009009,
|
|
"grad_norm": 9.371028759509135,
|
|
"learning_rate": 3.0775072944732553e-06,
|
|
"loss": 1.3041894435882568,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 1.990990990990991,
|
|
"grad_norm": 15.416080366857384,
|
|
"learning_rate": 3.0726700389479647e-06,
|
|
"loss": 1.6367117166519165,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 1.9918918918918918,
|
|
"grad_norm": 12.165896647533119,
|
|
"learning_rate": 3.067834901209744e-06,
|
|
"loss": 1.6322283744812012,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 1.9927927927927929,
|
|
"grad_norm": 10.085776784278893,
|
|
"learning_rate": 3.063001886571536e-06,
|
|
"loss": 1.5652577877044678,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 1.9936936936936935,
|
|
"grad_norm": 12.6253638540615,
|
|
"learning_rate": 3.0581710003439484e-06,
|
|
"loss": 1.5706499814987183,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 1.9945945945945946,
|
|
"grad_norm": 12.404191542392223,
|
|
"learning_rate": 3.0533422478352525e-06,
|
|
"loss": 1.4519004821777344,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 1.9954954954954955,
|
|
"grad_norm": 11.307118564755154,
|
|
"learning_rate": 3.0485156343513733e-06,
|
|
"loss": 2.0829999446868896,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 1.9963963963963964,
|
|
"grad_norm": 15.119484663636769,
|
|
"learning_rate": 3.043691165195887e-06,
|
|
"loss": 1.5040124654769897,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 1.9972972972972973,
|
|
"grad_norm": 11.039823353111718,
|
|
"learning_rate": 3.0388688456700117e-06,
|
|
"loss": 1.6364296674728394,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 1.9981981981981982,
|
|
"grad_norm": 10.258552856771084,
|
|
"learning_rate": 3.0340486810726055e-06,
|
|
"loss": 1.7006560564041138,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 1.999099099099099,
|
|
"grad_norm": 10.452562391291803,
|
|
"learning_rate": 3.029230676700157e-06,
|
|
"loss": 1.1743242740631104,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 8.519149823556289,
|
|
"learning_rate": 3.024414837846782e-06,
|
|
"loss": 0.8965996503829956,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 2.000900900900901,
|
|
"grad_norm": 14.116860160303586,
|
|
"learning_rate": 3.019601169804216e-06,
|
|
"loss": 0.5894768238067627,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 2.001801801801802,
|
|
"grad_norm": 8.801877484178394,
|
|
"learning_rate": 3.0147896778618103e-06,
|
|
"loss": 1.0011879205703735,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 2.002702702702703,
|
|
"grad_norm": 7.8630344967600685,
|
|
"learning_rate": 3.0099803673065235e-06,
|
|
"loss": 0.34475117921829224,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 2.0036036036036036,
|
|
"grad_norm": 13.092072196546898,
|
|
"learning_rate": 3.0051732434229185e-06,
|
|
"loss": 1.7805842161178589,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 2.0045045045045047,
|
|
"grad_norm": 9.695686016646027,
|
|
"learning_rate": 3.0003683114931557e-06,
|
|
"loss": 0.5842613577842712,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 2.0054054054054054,
|
|
"grad_norm": 9.629312273647948,
|
|
"learning_rate": 2.9955655767969854e-06,
|
|
"loss": 0.5329399108886719,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 2.0063063063063065,
|
|
"grad_norm": 8.255293186613402,
|
|
"learning_rate": 2.9907650446117446e-06,
|
|
"loss": 0.24428929388523102,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 2.007207207207207,
|
|
"grad_norm": 16.23335526618716,
|
|
"learning_rate": 2.9859667202123514e-06,
|
|
"loss": 0.8648971319198608,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 2.0081081081081082,
|
|
"grad_norm": 6.750088414031487,
|
|
"learning_rate": 2.9811706088712946e-06,
|
|
"loss": 0.22527940571308136,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 2.009009009009009,
|
|
"grad_norm": 13.697241509002817,
|
|
"learning_rate": 2.9763767158586343e-06,
|
|
"loss": 0.7499334812164307,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 2.00990990990991,
|
|
"grad_norm": 8.598969444244288,
|
|
"learning_rate": 2.9715850464419905e-06,
|
|
"loss": 1.1645452976226807,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 2.0108108108108107,
|
|
"grad_norm": 9.372572117775338,
|
|
"learning_rate": 2.966795605886541e-06,
|
|
"loss": 0.2751765847206116,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 2.011711711711712,
|
|
"grad_norm": 11.278482227681451,
|
|
"learning_rate": 2.9620083994550187e-06,
|
|
"loss": 0.341278612613678,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 2.0126126126126125,
|
|
"grad_norm": 13.289995082559464,
|
|
"learning_rate": 2.9572234324076944e-06,
|
|
"loss": 0.52141273021698,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 2.0135135135135136,
|
|
"grad_norm": 11.311949250244826,
|
|
"learning_rate": 2.952440710002384e-06,
|
|
"loss": 0.41022777557373047,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 2.0144144144144143,
|
|
"grad_norm": 12.825290538173615,
|
|
"learning_rate": 2.947660237494432e-06,
|
|
"loss": 0.3243304193019867,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 2.0153153153153154,
|
|
"grad_norm": 12.675229879172527,
|
|
"learning_rate": 2.942882020136713e-06,
|
|
"loss": 0.6357255578041077,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 2.016216216216216,
|
|
"grad_norm": 13.204185864484606,
|
|
"learning_rate": 2.9381060631796256e-06,
|
|
"loss": 0.9667474031448364,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 2.017117117117117,
|
|
"grad_norm": 8.490586888118822,
|
|
"learning_rate": 2.933332371871081e-06,
|
|
"loss": 0.49755173921585083,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 2.018018018018018,
|
|
"grad_norm": 11.33330600264706,
|
|
"learning_rate": 2.928560951456504e-06,
|
|
"loss": 0.7523829936981201,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 2.018918918918919,
|
|
"grad_norm": 16.910769932644783,
|
|
"learning_rate": 2.9237918071788217e-06,
|
|
"loss": 0.4810546934604645,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 2.0198198198198196,
|
|
"grad_norm": 8.234150024101922,
|
|
"learning_rate": 2.9190249442784623e-06,
|
|
"loss": 0.1807146668434143,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 2.0207207207207207,
|
|
"grad_norm": 8.8698820322946,
|
|
"learning_rate": 2.9142603679933466e-06,
|
|
"loss": 0.6779142022132874,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 2.0216216216216214,
|
|
"grad_norm": 10.512189441751989,
|
|
"learning_rate": 2.909498083558879e-06,
|
|
"loss": 0.616098165512085,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 2.0225225225225225,
|
|
"grad_norm": 10.0793341178136,
|
|
"learning_rate": 2.9047380962079525e-06,
|
|
"loss": 0.28480350971221924,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 2.0234234234234236,
|
|
"grad_norm": 11.802225328280493,
|
|
"learning_rate": 2.899980411170927e-06,
|
|
"loss": 0.3633064329624176,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 2.0243243243243243,
|
|
"grad_norm": 12.639280973121657,
|
|
"learning_rate": 2.8952250336756455e-06,
|
|
"loss": 1.3999977111816406,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 2.0252252252252254,
|
|
"grad_norm": 17.626132922762903,
|
|
"learning_rate": 2.8904719689474026e-06,
|
|
"loss": 1.0454301834106445,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 2.026126126126126,
|
|
"grad_norm": 9.016040345520308,
|
|
"learning_rate": 2.885721222208959e-06,
|
|
"loss": 0.2405223548412323,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 2.027027027027027,
|
|
"grad_norm": 11.25056348026538,
|
|
"learning_rate": 2.880972798680527e-06,
|
|
"loss": 0.3542478680610657,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 2.027927927927928,
|
|
"grad_norm": 9.950745388942867,
|
|
"learning_rate": 2.8762267035797607e-06,
|
|
"loss": 0.23745930194854736,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 2.028828828828829,
|
|
"grad_norm": 16.22337232432947,
|
|
"learning_rate": 2.871482942121766e-06,
|
|
"loss": 0.83488929271698,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 2.0297297297297296,
|
|
"grad_norm": 11.399313632222292,
|
|
"learning_rate": 2.8667415195190745e-06,
|
|
"loss": 0.5515332221984863,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 2.0306306306306308,
|
|
"grad_norm": 11.004201434799734,
|
|
"learning_rate": 2.8620024409816555e-06,
|
|
"loss": 0.6944783329963684,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 2.0315315315315314,
|
|
"grad_norm": 4.012907996201843,
|
|
"learning_rate": 2.8572657117168956e-06,
|
|
"loss": 0.11357761919498444,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 2.0324324324324325,
|
|
"grad_norm": 5.943422057916982,
|
|
"learning_rate": 2.852531336929608e-06,
|
|
"loss": 0.10978563874959946,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 2.033333333333333,
|
|
"grad_norm": 10.8917138227732,
|
|
"learning_rate": 2.84779932182201e-06,
|
|
"loss": 0.23231631517410278,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 2.0342342342342343,
|
|
"grad_norm": 11.957463438051688,
|
|
"learning_rate": 2.843069671593734e-06,
|
|
"loss": 0.5335648059844971,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 2.035135135135135,
|
|
"grad_norm": 13.27800383619201,
|
|
"learning_rate": 2.8383423914418074e-06,
|
|
"loss": 0.3874744176864624,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 2.036036036036036,
|
|
"grad_norm": 11.29022345693228,
|
|
"learning_rate": 2.8336174865606587e-06,
|
|
"loss": 0.31073588132858276,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 2.036936936936937,
|
|
"grad_norm": 8.270076970900663,
|
|
"learning_rate": 2.8288949621421015e-06,
|
|
"loss": 0.22705663740634918,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 2.037837837837838,
|
|
"grad_norm": 22.317803802467708,
|
|
"learning_rate": 2.8241748233753362e-06,
|
|
"loss": 0.627781867980957,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 2.0387387387387386,
|
|
"grad_norm": 12.282119291554915,
|
|
"learning_rate": 2.819457075446945e-06,
|
|
"loss": 0.4912683963775635,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 2.0396396396396397,
|
|
"grad_norm": 17.042188440229676,
|
|
"learning_rate": 2.814741723540876e-06,
|
|
"loss": 0.5708624720573425,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 2.0405405405405403,
|
|
"grad_norm": 12.985899407068953,
|
|
"learning_rate": 2.8100287728384508e-06,
|
|
"loss": 0.5778407454490662,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 2.0414414414414415,
|
|
"grad_norm": 8.102473536840277,
|
|
"learning_rate": 2.8053182285183466e-06,
|
|
"loss": 0.396592378616333,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 2.042342342342342,
|
|
"grad_norm": 11.470603839558542,
|
|
"learning_rate": 2.800610095756604e-06,
|
|
"loss": 0.4478808641433716,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 2.0432432432432432,
|
|
"grad_norm": 8.865998347270581,
|
|
"learning_rate": 2.7959043797266074e-06,
|
|
"loss": 0.38251596689224243,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 2.0441441441441444,
|
|
"grad_norm": 9.78253761748689,
|
|
"learning_rate": 2.7912010855990845e-06,
|
|
"loss": 0.333384245634079,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 2.045045045045045,
|
|
"grad_norm": 10.835170947223752,
|
|
"learning_rate": 2.786500218542111e-06,
|
|
"loss": 0.3847394585609436,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 2.045945945945946,
|
|
"grad_norm": 15.716206208288712,
|
|
"learning_rate": 2.7818017837210842e-06,
|
|
"loss": 0.4596790671348572,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 2.046846846846847,
|
|
"grad_norm": 9.434308340402769,
|
|
"learning_rate": 2.7771057862987384e-06,
|
|
"loss": 0.42317095398902893,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 2.047747747747748,
|
|
"grad_norm": 10.941664955305084,
|
|
"learning_rate": 2.772412231435122e-06,
|
|
"loss": 0.8684549331665039,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 2.0486486486486486,
|
|
"grad_norm": 8.072982259982698,
|
|
"learning_rate": 2.7677211242876064e-06,
|
|
"loss": 0.3451632857322693,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 2.0495495495495497,
|
|
"grad_norm": 24.927221655778474,
|
|
"learning_rate": 2.7630324700108665e-06,
|
|
"loss": 1.1120212078094482,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 2.0504504504504504,
|
|
"grad_norm": 9.417859518363821,
|
|
"learning_rate": 2.7583462737568866e-06,
|
|
"loss": 0.31997615098953247,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 2.0513513513513515,
|
|
"grad_norm": 8.542657946567818,
|
|
"learning_rate": 2.753662540674952e-06,
|
|
"loss": 0.29756706953048706,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 2.052252252252252,
|
|
"grad_norm": 10.937563611004684,
|
|
"learning_rate": 2.748981275911633e-06,
|
|
"loss": 0.2215685397386551,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 2.0531531531531533,
|
|
"grad_norm": 11.318570631865057,
|
|
"learning_rate": 2.7443024846107987e-06,
|
|
"loss": 0.3994358479976654,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 2.054054054054054,
|
|
"grad_norm": 11.700050923938079,
|
|
"learning_rate": 2.739626171913589e-06,
|
|
"loss": 0.4751622974872589,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 2.054954954954955,
|
|
"grad_norm": 8.401848897882775,
|
|
"learning_rate": 2.7349523429584307e-06,
|
|
"loss": 0.5750836730003357,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 2.0558558558558557,
|
|
"grad_norm": 11.685087826628124,
|
|
"learning_rate": 2.7302810028810124e-06,
|
|
"loss": 0.5777537226676941,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 2.056756756756757,
|
|
"grad_norm": 9.849935676447956,
|
|
"learning_rate": 2.725612156814296e-06,
|
|
"loss": 0.5181484222412109,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 2.0576576576576575,
|
|
"grad_norm": 11.001241896604622,
|
|
"learning_rate": 2.720945809888494e-06,
|
|
"loss": 0.8288261294364929,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 2.0585585585585586,
|
|
"grad_norm": 11.29434899953877,
|
|
"learning_rate": 2.716281967231083e-06,
|
|
"loss": 0.36716341972351074,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 2.0594594594594593,
|
|
"grad_norm": 14.91314367439175,
|
|
"learning_rate": 2.711620633966778e-06,
|
|
"loss": 0.35981863737106323,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 2.0603603603603604,
|
|
"grad_norm": 10.481206407261784,
|
|
"learning_rate": 2.706961815217547e-06,
|
|
"loss": 0.43312573432922363,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 2.061261261261261,
|
|
"grad_norm": 18.697687947776096,
|
|
"learning_rate": 2.7023055161025846e-06,
|
|
"loss": 0.5223169922828674,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 2.062162162162162,
|
|
"grad_norm": 12.203868038942481,
|
|
"learning_rate": 2.6976517417383207e-06,
|
|
"loss": 0.9200209379196167,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 2.063063063063063,
|
|
"grad_norm": 22.045402305638788,
|
|
"learning_rate": 2.693000497238416e-06,
|
|
"loss": 0.523088812828064,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 2.063963963963964,
|
|
"grad_norm": 12.213912299040938,
|
|
"learning_rate": 2.6883517877137405e-06,
|
|
"loss": 0.6578672528266907,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 2.064864864864865,
|
|
"grad_norm": 10.268307278686262,
|
|
"learning_rate": 2.683705618272393e-06,
|
|
"loss": 0.5833814144134521,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 2.0657657657657658,
|
|
"grad_norm": 11.559777703090857,
|
|
"learning_rate": 2.679061994019669e-06,
|
|
"loss": 0.5228210687637329,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 2.066666666666667,
|
|
"grad_norm": 10.769271588186752,
|
|
"learning_rate": 2.674420920058074e-06,
|
|
"loss": 0.6461083292961121,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 2.0675675675675675,
|
|
"grad_norm": 9.509527098309293,
|
|
"learning_rate": 2.6697824014873076e-06,
|
|
"loss": 0.43987417221069336,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 2.0684684684684687,
|
|
"grad_norm": 13.96428443975603,
|
|
"learning_rate": 2.66514644340426e-06,
|
|
"loss": 0.8247560262680054,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 2.0693693693693693,
|
|
"grad_norm": 8.226336945205592,
|
|
"learning_rate": 2.660513050903016e-06,
|
|
"loss": 0.2795591354370117,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 2.0702702702702704,
|
|
"grad_norm": 13.102258837455198,
|
|
"learning_rate": 2.655882229074832e-06,
|
|
"loss": 0.37356728315353394,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 2.071171171171171,
|
|
"grad_norm": 12.490435999993819,
|
|
"learning_rate": 2.6512539830081476e-06,
|
|
"loss": 0.5807406306266785,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 2.0720720720720722,
|
|
"grad_norm": 6.102017937500685,
|
|
"learning_rate": 2.646628317788563e-06,
|
|
"loss": 0.19707541167736053,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 2.072972972972973,
|
|
"grad_norm": 8.814493990300647,
|
|
"learning_rate": 2.6420052384988524e-06,
|
|
"loss": 0.2348020076751709,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 2.073873873873874,
|
|
"grad_norm": 10.314523249976816,
|
|
"learning_rate": 2.637384750218941e-06,
|
|
"loss": 0.7949020266532898,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 2.0747747747747747,
|
|
"grad_norm": 6.35103823999349,
|
|
"learning_rate": 2.6327668580259123e-06,
|
|
"loss": 0.29874366521835327,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 2.075675675675676,
|
|
"grad_norm": 8.725783477885596,
|
|
"learning_rate": 2.628151566993991e-06,
|
|
"loss": 0.31836336851119995,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 2.0765765765765765,
|
|
"grad_norm": 12.662520675582003,
|
|
"learning_rate": 2.6235388821945497e-06,
|
|
"loss": 0.6175304055213928,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 2.0774774774774776,
|
|
"grad_norm": 15.111180942349304,
|
|
"learning_rate": 2.6189288086960967e-06,
|
|
"loss": 0.29739707708358765,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 2.0783783783783782,
|
|
"grad_norm": 8.652008298695993,
|
|
"learning_rate": 2.614321351564265e-06,
|
|
"loss": 0.5761679410934448,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 2.0792792792792794,
|
|
"grad_norm": 8.931196997633322,
|
|
"learning_rate": 2.6097165158618205e-06,
|
|
"loss": 0.6768096685409546,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 2.08018018018018,
|
|
"grad_norm": 8.950653312806862,
|
|
"learning_rate": 2.6051143066486407e-06,
|
|
"loss": 0.5083034634590149,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 2.081081081081081,
|
|
"grad_norm": 9.7095512236923,
|
|
"learning_rate": 2.6005147289817256e-06,
|
|
"loss": 0.3268805146217346,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 2.081981981981982,
|
|
"grad_norm": 10.69210399126161,
|
|
"learning_rate": 2.5959177879151757e-06,
|
|
"loss": 0.8243169784545898,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 2.082882882882883,
|
|
"grad_norm": 8.184778880258797,
|
|
"learning_rate": 2.5913234885002015e-06,
|
|
"loss": 0.1818707138299942,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 2.0837837837837836,
|
|
"grad_norm": 12.379077514532058,
|
|
"learning_rate": 2.5867318357851023e-06,
|
|
"loss": 0.38324978947639465,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 2.0846846846846847,
|
|
"grad_norm": 9.331217811685404,
|
|
"learning_rate": 2.582142834815279e-06,
|
|
"loss": 0.3204289376735687,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 2.0855855855855854,
|
|
"grad_norm": 8.261765748612266,
|
|
"learning_rate": 2.5775564906332114e-06,
|
|
"loss": 0.35750338435173035,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 2.0864864864864865,
|
|
"grad_norm": 15.324060478489404,
|
|
"learning_rate": 2.5729728082784606e-06,
|
|
"loss": 0.3415447771549225,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 2.0873873873873876,
|
|
"grad_norm": 13.385202003955543,
|
|
"learning_rate": 2.568391792787668e-06,
|
|
"loss": 1.0079660415649414,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 2.0882882882882883,
|
|
"grad_norm": 8.717041160105445,
|
|
"learning_rate": 2.5638134491945375e-06,
|
|
"loss": 0.38941991329193115,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 2.0891891891891894,
|
|
"grad_norm": 9.207171337601402,
|
|
"learning_rate": 2.5592377825298454e-06,
|
|
"loss": 0.4124867916107178,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 2.09009009009009,
|
|
"grad_norm": 10.13164632452567,
|
|
"learning_rate": 2.5546647978214144e-06,
|
|
"loss": 0.2182263880968094,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 2.090990990990991,
|
|
"grad_norm": 16.035761249089337,
|
|
"learning_rate": 2.550094500094137e-06,
|
|
"loss": 0.9349597692489624,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 2.091891891891892,
|
|
"grad_norm": 9.86133341431888,
|
|
"learning_rate": 2.545526894369939e-06,
|
|
"loss": 0.2269417643547058,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 2.092792792792793,
|
|
"grad_norm": 18.080941203665855,
|
|
"learning_rate": 2.5409619856677914e-06,
|
|
"loss": 0.5008091926574707,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 2.0936936936936936,
|
|
"grad_norm": 12.781689854501156,
|
|
"learning_rate": 2.5363997790037076e-06,
|
|
"loss": 0.37303346395492554,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 2.0945945945945947,
|
|
"grad_norm": 10.169949813847717,
|
|
"learning_rate": 2.5318402793907225e-06,
|
|
"loss": 0.2436217963695526,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 2.0954954954954954,
|
|
"grad_norm": 9.588900664652744,
|
|
"learning_rate": 2.5272834918389072e-06,
|
|
"loss": 0.5392529964447021,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 2.0963963963963965,
|
|
"grad_norm": 12.280038732928787,
|
|
"learning_rate": 2.522729421355342e-06,
|
|
"loss": 0.5733280181884766,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 2.097297297297297,
|
|
"grad_norm": 9.180285847143764,
|
|
"learning_rate": 2.5181780729441313e-06,
|
|
"loss": 0.42448052763938904,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 2.0981981981981983,
|
|
"grad_norm": 10.287915105781291,
|
|
"learning_rate": 2.5136294516063796e-06,
|
|
"loss": 0.46256181597709656,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 2.099099099099099,
|
|
"grad_norm": 12.602324598544971,
|
|
"learning_rate": 2.5090835623402033e-06,
|
|
"loss": 0.5333715677261353,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"grad_norm": 8.603536738831417,
|
|
"learning_rate": 2.504540410140708e-06,
|
|
"loss": 0.46317631006240845,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 2.1009009009009008,
|
|
"grad_norm": 15.252614102903824,
|
|
"learning_rate": 2.5000000000000015e-06,
|
|
"loss": 0.2843453884124756,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 2.101801801801802,
|
|
"grad_norm": 11.515043773530387,
|
|
"learning_rate": 2.495462336907168e-06,
|
|
"loss": 0.28911393880844116,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 2.1027027027027025,
|
|
"grad_norm": 7.601978267814978,
|
|
"learning_rate": 2.490927425848284e-06,
|
|
"loss": 0.2602662444114685,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 2.1036036036036037,
|
|
"grad_norm": 17.107273943657816,
|
|
"learning_rate": 2.486395271806392e-06,
|
|
"loss": 0.8394905924797058,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 2.1045045045045043,
|
|
"grad_norm": 11.895873989178591,
|
|
"learning_rate": 2.481865879761511e-06,
|
|
"loss": 0.6935086250305176,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 2.1054054054054054,
|
|
"grad_norm": 15.929921426123189,
|
|
"learning_rate": 2.4773392546906265e-06,
|
|
"loss": 0.45318153500556946,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 2.106306306306306,
|
|
"grad_norm": 12.85090334195187,
|
|
"learning_rate": 2.472815401567677e-06,
|
|
"loss": 0.6440577507019043,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 2.1072072072072072,
|
|
"grad_norm": 13.249989711629635,
|
|
"learning_rate": 2.468294325363562e-06,
|
|
"loss": 1.3613526821136475,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 2.108108108108108,
|
|
"grad_norm": 9.83621689947193,
|
|
"learning_rate": 2.463776031046124e-06,
|
|
"loss": 0.7230061292648315,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 2.109009009009009,
|
|
"grad_norm": 8.95457234014136,
|
|
"learning_rate": 2.4592605235801544e-06,
|
|
"loss": 0.49418580532073975,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 2.10990990990991,
|
|
"grad_norm": 9.937013635860467,
|
|
"learning_rate": 2.454747807927377e-06,
|
|
"loss": 0.23637472093105316,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 2.110810810810811,
|
|
"grad_norm": 8.702918528271233,
|
|
"learning_rate": 2.4502378890464483e-06,
|
|
"loss": 0.18265977501869202,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 2.111711711711712,
|
|
"grad_norm": 10.920778573170063,
|
|
"learning_rate": 2.4457307718929583e-06,
|
|
"loss": 0.46716073155403137,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 2.1126126126126126,
|
|
"grad_norm": 13.178911538717738,
|
|
"learning_rate": 2.4412264614194094e-06,
|
|
"loss": 0.37622249126434326,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 2.1135135135135137,
|
|
"grad_norm": 9.556232375847543,
|
|
"learning_rate": 2.4367249625752277e-06,
|
|
"loss": 0.3927081227302551,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 2.1144144144144144,
|
|
"grad_norm": 10.204043001488875,
|
|
"learning_rate": 2.4322262803067426e-06,
|
|
"loss": 0.43625408411026,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 2.1153153153153155,
|
|
"grad_norm": 9.301710826830018,
|
|
"learning_rate": 2.427730419557196e-06,
|
|
"loss": 0.19002637267112732,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 2.116216216216216,
|
|
"grad_norm": 11.528515744715996,
|
|
"learning_rate": 2.423237385266723e-06,
|
|
"loss": 0.4054466485977173,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 2.1171171171171173,
|
|
"grad_norm": 6.451910594086666,
|
|
"learning_rate": 2.4187471823723558e-06,
|
|
"loss": 0.26523804664611816,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 2.118018018018018,
|
|
"grad_norm": 8.509331181666335,
|
|
"learning_rate": 2.414259815808019e-06,
|
|
"loss": 0.24853050708770752,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 2.118918918918919,
|
|
"grad_norm": 10.073616000957662,
|
|
"learning_rate": 2.4097752905045124e-06,
|
|
"loss": 0.4062986969947815,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 2.1198198198198197,
|
|
"grad_norm": 14.787379079784657,
|
|
"learning_rate": 2.4052936113895215e-06,
|
|
"loss": 0.603924572467804,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 2.120720720720721,
|
|
"grad_norm": 10.804554462370408,
|
|
"learning_rate": 2.4008147833875984e-06,
|
|
"loss": 0.1669190526008606,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 2.1216216216216215,
|
|
"grad_norm": 22.251089583499112,
|
|
"learning_rate": 2.396338811420168e-06,
|
|
"loss": 0.5073699951171875,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 2.1225225225225226,
|
|
"grad_norm": 8.348405715573369,
|
|
"learning_rate": 2.391865700405511e-06,
|
|
"loss": 0.36968138813972473,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 2.1234234234234233,
|
|
"grad_norm": 12.40229749896702,
|
|
"learning_rate": 2.3873954552587706e-06,
|
|
"loss": 0.3594987392425537,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 2.1243243243243244,
|
|
"grad_norm": 11.519223455557784,
|
|
"learning_rate": 2.382928080891934e-06,
|
|
"loss": 0.7377095222473145,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 2.125225225225225,
|
|
"grad_norm": 8.814689830976269,
|
|
"learning_rate": 2.3784635822138424e-06,
|
|
"loss": 0.40884578227996826,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 2.126126126126126,
|
|
"grad_norm": 8.118374260597792,
|
|
"learning_rate": 2.3740019641301678e-06,
|
|
"loss": 0.7081960439682007,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 2.127027027027027,
|
|
"grad_norm": 8.206424945616865,
|
|
"learning_rate": 2.369543231543425e-06,
|
|
"loss": 0.39920416474342346,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 2.127927927927928,
|
|
"grad_norm": 9.865017992158648,
|
|
"learning_rate": 2.3650873893529543e-06,
|
|
"loss": 0.6542052030563354,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 2.1288288288288286,
|
|
"grad_norm": 9.835055140204027,
|
|
"learning_rate": 2.3606344424549165e-06,
|
|
"loss": 0.324113130569458,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 2.1297297297297297,
|
|
"grad_norm": 14.045904630933688,
|
|
"learning_rate": 2.356184395742299e-06,
|
|
"loss": 0.39475977420806885,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 2.1306306306306304,
|
|
"grad_norm": 8.889340169827602,
|
|
"learning_rate": 2.3517372541048967e-06,
|
|
"loss": 0.38433074951171875,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 2.1315315315315315,
|
|
"grad_norm": 10.743076400389564,
|
|
"learning_rate": 2.347293022429317e-06,
|
|
"loss": 0.33674681186676025,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 2.1324324324324326,
|
|
"grad_norm": 10.001442027595337,
|
|
"learning_rate": 2.342851705598962e-06,
|
|
"loss": 0.28496724367141724,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 2.1333333333333333,
|
|
"grad_norm": 11.78723436159302,
|
|
"learning_rate": 2.3384133084940404e-06,
|
|
"loss": 0.23608997464179993,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 2.1342342342342344,
|
|
"grad_norm": 8.147787293186816,
|
|
"learning_rate": 2.333977835991545e-06,
|
|
"loss": 0.4251982569694519,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 2.135135135135135,
|
|
"grad_norm": 37.97208785539425,
|
|
"learning_rate": 2.3295452929652566e-06,
|
|
"loss": 1.9654648303985596,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 2.136036036036036,
|
|
"grad_norm": 7.966149846560465,
|
|
"learning_rate": 2.325115684285743e-06,
|
|
"loss": 0.26858100295066833,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 2.136936936936937,
|
|
"grad_norm": 9.07230141009553,
|
|
"learning_rate": 2.320689014820338e-06,
|
|
"loss": 0.48784181475639343,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 2.137837837837838,
|
|
"grad_norm": 14.146368634764396,
|
|
"learning_rate": 2.316265289433155e-06,
|
|
"loss": 0.5792821049690247,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 2.1387387387387387,
|
|
"grad_norm": 17.39659726184654,
|
|
"learning_rate": 2.3118445129850643e-06,
|
|
"loss": 0.9232209920883179,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 2.1396396396396398,
|
|
"grad_norm": 9.256364427624968,
|
|
"learning_rate": 2.307426690333704e-06,
|
|
"loss": 0.34143272042274475,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 2.1405405405405404,
|
|
"grad_norm": 9.824285456445423,
|
|
"learning_rate": 2.303011826333458e-06,
|
|
"loss": 0.4122055470943451,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 2.1414414414414416,
|
|
"grad_norm": 7.0748215703635795,
|
|
"learning_rate": 2.2985999258354662e-06,
|
|
"loss": 0.11493399739265442,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 2.142342342342342,
|
|
"grad_norm": 9.884795345718247,
|
|
"learning_rate": 2.2941909936876076e-06,
|
|
"loss": 0.7939780950546265,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 2.1432432432432433,
|
|
"grad_norm": 17.440534655010104,
|
|
"learning_rate": 2.2897850347345023e-06,
|
|
"loss": 0.5796934366226196,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 2.144144144144144,
|
|
"grad_norm": 22.7012418504902,
|
|
"learning_rate": 2.285382053817504e-06,
|
|
"loss": 2.958590507507324,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 2.145045045045045,
|
|
"grad_norm": 11.634567805869871,
|
|
"learning_rate": 2.2809820557746888e-06,
|
|
"loss": 0.31657877564430237,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 2.145945945945946,
|
|
"grad_norm": 11.937121610028802,
|
|
"learning_rate": 2.2765850454408622e-06,
|
|
"loss": 0.471710741519928,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 2.146846846846847,
|
|
"grad_norm": 10.086431126572682,
|
|
"learning_rate": 2.27219102764754e-06,
|
|
"loss": 0.4611928164958954,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 2.1477477477477476,
|
|
"grad_norm": 12.466178277328451,
|
|
"learning_rate": 2.267800007222957e-06,
|
|
"loss": 0.8834566473960876,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 2.1486486486486487,
|
|
"grad_norm": 13.903990390370952,
|
|
"learning_rate": 2.2634119889920468e-06,
|
|
"loss": 0.31093859672546387,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 2.1495495495495494,
|
|
"grad_norm": 9.985430338321182,
|
|
"learning_rate": 2.2590269777764516e-06,
|
|
"loss": 0.4155240058898926,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 2.1504504504504505,
|
|
"grad_norm": 8.882716847913825,
|
|
"learning_rate": 2.2546449783945017e-06,
|
|
"loss": 1.355088472366333,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 2.1513513513513516,
|
|
"grad_norm": 7.520588110764399,
|
|
"learning_rate": 2.2502659956612255e-06,
|
|
"loss": 0.5796226263046265,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 2.1522522522522523,
|
|
"grad_norm": 9.52188001072593,
|
|
"learning_rate": 2.2458900343883316e-06,
|
|
"loss": 0.500851571559906,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 2.153153153153153,
|
|
"grad_norm": 28.729014518090274,
|
|
"learning_rate": 2.2415170993842086e-06,
|
|
"loss": 2.38733172416687,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 2.154054054054054,
|
|
"grad_norm": 10.034144108723407,
|
|
"learning_rate": 2.2371471954539236e-06,
|
|
"loss": 0.6553759574890137,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 2.154954954954955,
|
|
"grad_norm": 15.19632098007346,
|
|
"learning_rate": 2.2327803273992083e-06,
|
|
"loss": 0.6325008273124695,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 2.155855855855856,
|
|
"grad_norm": 7.364135092643524,
|
|
"learning_rate": 2.2284165000184643e-06,
|
|
"loss": 0.1823403388261795,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 2.156756756756757,
|
|
"grad_norm": 13.138761298931799,
|
|
"learning_rate": 2.2240557181067428e-06,
|
|
"loss": 0.4858241379261017,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 2.1576576576576576,
|
|
"grad_norm": 6.964027184356961,
|
|
"learning_rate": 2.2196979864557624e-06,
|
|
"loss": 0.20069685578346252,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 2.1585585585585587,
|
|
"grad_norm": 6.45942914944898,
|
|
"learning_rate": 2.2153433098538775e-06,
|
|
"loss": 0.17389008402824402,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 2.1594594594594594,
|
|
"grad_norm": 15.783068952470174,
|
|
"learning_rate": 2.2109916930860894e-06,
|
|
"loss": 0.6951287388801575,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 2.1603603603603605,
|
|
"grad_norm": 13.483242899827614,
|
|
"learning_rate": 2.2066431409340406e-06,
|
|
"loss": 1.5758002996444702,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 2.161261261261261,
|
|
"grad_norm": 7.773860546466419,
|
|
"learning_rate": 2.202297658176001e-06,
|
|
"loss": 0.2947731614112854,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 2.1621621621621623,
|
|
"grad_norm": 10.921931898275625,
|
|
"learning_rate": 2.197955249586873e-06,
|
|
"loss": 0.29223906993865967,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 2.163063063063063,
|
|
"grad_norm": 11.433754138024282,
|
|
"learning_rate": 2.1936159199381744e-06,
|
|
"loss": 0.3557126224040985,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 2.163963963963964,
|
|
"grad_norm": 12.581255246756,
|
|
"learning_rate": 2.189279673998048e-06,
|
|
"loss": 0.3648565411567688,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 2.1648648648648647,
|
|
"grad_norm": 8.992064868829692,
|
|
"learning_rate": 2.1849465165312406e-06,
|
|
"loss": 0.576091468334198,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 2.165765765765766,
|
|
"grad_norm": 9.631203893299503,
|
|
"learning_rate": 2.1806164522991118e-06,
|
|
"loss": 1.0459250211715698,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 2.1666666666666665,
|
|
"grad_norm": 11.29732154517533,
|
|
"learning_rate": 2.176289486059615e-06,
|
|
"loss": 0.18195028603076935,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 2.1675675675675676,
|
|
"grad_norm": 10.942530797224242,
|
|
"learning_rate": 2.171965622567308e-06,
|
|
"loss": 0.20159509778022766,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 2.1684684684684683,
|
|
"grad_norm": 10.717915076320711,
|
|
"learning_rate": 2.1676448665733326e-06,
|
|
"loss": 0.20560519397258759,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 2.1693693693693694,
|
|
"grad_norm": 10.880165426298795,
|
|
"learning_rate": 2.1633272228254166e-06,
|
|
"loss": 1.1594582796096802,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 2.17027027027027,
|
|
"grad_norm": 10.20619152670396,
|
|
"learning_rate": 2.159012696067871e-06,
|
|
"loss": 0.9054312109947205,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 2.171171171171171,
|
|
"grad_norm": 12.232009152826985,
|
|
"learning_rate": 2.1547012910415804e-06,
|
|
"loss": 0.3813351094722748,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 2.172072072072072,
|
|
"grad_norm": 9.682627987639773,
|
|
"learning_rate": 2.1503930124840017e-06,
|
|
"loss": 0.43170493841171265,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 2.172972972972973,
|
|
"grad_norm": 10.393687565580198,
|
|
"learning_rate": 2.14608786512915e-06,
|
|
"loss": 0.3984493017196655,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 2.173873873873874,
|
|
"grad_norm": 7.4258436830740795,
|
|
"learning_rate": 2.141785853707607e-06,
|
|
"loss": 0.35718607902526855,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 2.1747747747747748,
|
|
"grad_norm": 9.237375290222118,
|
|
"learning_rate": 2.1374869829465016e-06,
|
|
"loss": 0.6362316608428955,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 2.175675675675676,
|
|
"grad_norm": 36.668523947173654,
|
|
"learning_rate": 2.1331912575695197e-06,
|
|
"loss": 2.187713384628296,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 2.1765765765765765,
|
|
"grad_norm": 17.290448536633644,
|
|
"learning_rate": 2.128898682296884e-06,
|
|
"loss": 1.1021785736083984,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 2.1774774774774777,
|
|
"grad_norm": 10.308633095025803,
|
|
"learning_rate": 2.1246092618453562e-06,
|
|
"loss": 0.6403588056564331,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 2.1783783783783783,
|
|
"grad_norm": 8.007542686060276,
|
|
"learning_rate": 2.1203230009282383e-06,
|
|
"loss": 0.3731953501701355,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 2.1792792792792794,
|
|
"grad_norm": 17.30734817093238,
|
|
"learning_rate": 2.116039904255352e-06,
|
|
"loss": 0.32193267345428467,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 2.18018018018018,
|
|
"grad_norm": 34.71799983037904,
|
|
"learning_rate": 2.111759976533049e-06,
|
|
"loss": 0.6704785823822021,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 2.1810810810810812,
|
|
"grad_norm": 8.985538491621071,
|
|
"learning_rate": 2.107483222464193e-06,
|
|
"loss": 0.456855833530426,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 2.181981981981982,
|
|
"grad_norm": 9.028039853273526,
|
|
"learning_rate": 2.1032096467481665e-06,
|
|
"loss": 0.3212411403656006,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 2.182882882882883,
|
|
"grad_norm": 7.9969597536234724,
|
|
"learning_rate": 2.098939254080853e-06,
|
|
"loss": 0.2175375372171402,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 2.1837837837837837,
|
|
"grad_norm": 8.233360129183177,
|
|
"learning_rate": 2.094672049154643e-06,
|
|
"loss": 0.40003734827041626,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 2.184684684684685,
|
|
"grad_norm": 10.031265273306468,
|
|
"learning_rate": 2.0904080366584252e-06,
|
|
"loss": 0.6713770031929016,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 2.1855855855855855,
|
|
"grad_norm": 8.430225178171233,
|
|
"learning_rate": 2.086147221277574e-06,
|
|
"loss": 0.5254551768302917,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 2.1864864864864866,
|
|
"grad_norm": 8.961104515774558,
|
|
"learning_rate": 2.0818896076939597e-06,
|
|
"loss": 0.3879295587539673,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 2.1873873873873872,
|
|
"grad_norm": 17.770233303825282,
|
|
"learning_rate": 2.0776352005859253e-06,
|
|
"loss": 1.496819019317627,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 2.1882882882882884,
|
|
"grad_norm": 8.795502706383143,
|
|
"learning_rate": 2.0733840046282976e-06,
|
|
"loss": 0.17569570243358612,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 2.189189189189189,
|
|
"grad_norm": 9.95446266550367,
|
|
"learning_rate": 2.06913602449237e-06,
|
|
"loss": 0.5640667676925659,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 2.19009009009009,
|
|
"grad_norm": 7.81021561141362,
|
|
"learning_rate": 2.0648912648459072e-06,
|
|
"loss": 0.3175254762172699,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 2.190990990990991,
|
|
"grad_norm": 15.207805329851729,
|
|
"learning_rate": 2.0606497303531297e-06,
|
|
"loss": 0.40925803780555725,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 2.191891891891892,
|
|
"grad_norm": 7.485286956715557,
|
|
"learning_rate": 2.056411425674719e-06,
|
|
"loss": 0.16778123378753662,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 2.1927927927927926,
|
|
"grad_norm": 8.262220768520656,
|
|
"learning_rate": 2.0521763554678048e-06,
|
|
"loss": 0.7765547633171082,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 2.1936936936936937,
|
|
"grad_norm": 10.882030688721331,
|
|
"learning_rate": 2.0479445243859608e-06,
|
|
"loss": 0.33397090435028076,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 2.1945945945945944,
|
|
"grad_norm": 14.131737948130377,
|
|
"learning_rate": 2.0437159370792083e-06,
|
|
"loss": 0.49660491943359375,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 2.1954954954954955,
|
|
"grad_norm": 12.712321066414388,
|
|
"learning_rate": 2.0394905981939956e-06,
|
|
"loss": 0.31156596541404724,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 2.1963963963963966,
|
|
"grad_norm": 9.8263724714149,
|
|
"learning_rate": 2.035268512373208e-06,
|
|
"loss": 0.25942644476890564,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 2.1972972972972973,
|
|
"grad_norm": 13.133466660245679,
|
|
"learning_rate": 2.031049684256155e-06,
|
|
"loss": 0.3258337378501892,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 2.1981981981981984,
|
|
"grad_norm": 10.77065756877913,
|
|
"learning_rate": 2.0268341184785674e-06,
|
|
"loss": 0.4338672161102295,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 2.199099099099099,
|
|
"grad_norm": 14.090799988470666,
|
|
"learning_rate": 2.0226218196725865e-06,
|
|
"loss": 0.865012526512146,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"grad_norm": 7.940566930932142,
|
|
"learning_rate": 2.0184127924667667e-06,
|
|
"loss": 0.2968350350856781,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 2.200900900900901,
|
|
"grad_norm": 13.81979852268089,
|
|
"learning_rate": 2.0142070414860704e-06,
|
|
"loss": 0.9437194466590881,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 2.201801801801802,
|
|
"grad_norm": 7.361096080603361,
|
|
"learning_rate": 2.010004571351854e-06,
|
|
"loss": 0.6113213896751404,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 2.2027027027027026,
|
|
"grad_norm": 15.194571416317675,
|
|
"learning_rate": 2.0058053866818757e-06,
|
|
"loss": 0.6607286930084229,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 2.2036036036036037,
|
|
"grad_norm": 17.15512647139195,
|
|
"learning_rate": 2.001609492090276e-06,
|
|
"loss": 0.37218135595321655,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 2.2045045045045044,
|
|
"grad_norm": 13.243856861488625,
|
|
"learning_rate": 1.9974168921875886e-06,
|
|
"loss": 0.5706183910369873,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 2.2054054054054055,
|
|
"grad_norm": 9.72539519842245,
|
|
"learning_rate": 1.9932275915807187e-06,
|
|
"loss": 0.37227556109428406,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 2.206306306306306,
|
|
"grad_norm": 8.531348273807625,
|
|
"learning_rate": 1.9890415948729537e-06,
|
|
"loss": 0.4036311209201813,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 2.2072072072072073,
|
|
"grad_norm": 10.99128499408585,
|
|
"learning_rate": 1.984858906663943e-06,
|
|
"loss": 0.612502932548523,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 2.208108108108108,
|
|
"grad_norm": 12.58053319709825,
|
|
"learning_rate": 1.9806795315497078e-06,
|
|
"loss": 0.931442379951477,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 2.209009009009009,
|
|
"grad_norm": 11.15221590061521,
|
|
"learning_rate": 1.9765034741226234e-06,
|
|
"loss": 1.0826082229614258,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 2.2099099099099098,
|
|
"grad_norm": 9.658617398693465,
|
|
"learning_rate": 1.972330738971422e-06,
|
|
"loss": 0.4539331793785095,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 2.210810810810811,
|
|
"grad_norm": 10.455885997282982,
|
|
"learning_rate": 1.968161330681187e-06,
|
|
"loss": 0.7638237476348877,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 2.2117117117117115,
|
|
"grad_norm": 9.098237441832358,
|
|
"learning_rate": 1.9639952538333413e-06,
|
|
"loss": 0.4162856340408325,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 2.2126126126126127,
|
|
"grad_norm": 16.96984265892915,
|
|
"learning_rate": 1.959832513005652e-06,
|
|
"loss": 0.4629131555557251,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 2.2135135135135133,
|
|
"grad_norm": 9.889359503482789,
|
|
"learning_rate": 1.9556731127722167e-06,
|
|
"loss": 0.4515780806541443,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 2.2144144144144144,
|
|
"grad_norm": 14.105699173442542,
|
|
"learning_rate": 1.9515170577034657e-06,
|
|
"loss": 0.4987364411354065,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 2.215315315315315,
|
|
"grad_norm": 9.125740656362229,
|
|
"learning_rate": 1.94736435236615e-06,
|
|
"loss": 0.3197804093360901,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 2.2162162162162162,
|
|
"grad_norm": 13.964508580925338,
|
|
"learning_rate": 1.9432150013233442e-06,
|
|
"loss": 0.40308842062950134,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 2.217117117117117,
|
|
"grad_norm": 8.975587496434065,
|
|
"learning_rate": 1.9390690091344334e-06,
|
|
"loss": 0.3318456709384918,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 2.218018018018018,
|
|
"grad_norm": 12.20975301109764,
|
|
"learning_rate": 1.9349263803551112e-06,
|
|
"loss": 0.43268558382987976,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 2.218918918918919,
|
|
"grad_norm": 10.080429017858357,
|
|
"learning_rate": 1.930787119537381e-06,
|
|
"loss": 0.6931536793708801,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 2.21981981981982,
|
|
"grad_norm": 9.745510024467253,
|
|
"learning_rate": 1.9266512312295387e-06,
|
|
"loss": 0.27146437764167786,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 2.220720720720721,
|
|
"grad_norm": 8.48493869634266,
|
|
"learning_rate": 1.922518719976181e-06,
|
|
"loss": 0.4015350937843323,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 2.2216216216216216,
|
|
"grad_norm": 14.834625904722294,
|
|
"learning_rate": 1.9183895903181865e-06,
|
|
"loss": 0.6845652461051941,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 2.2225225225225227,
|
|
"grad_norm": 7.359554443415203,
|
|
"learning_rate": 1.9142638467927254e-06,
|
|
"loss": 0.2753450870513916,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 2.2234234234234234,
|
|
"grad_norm": 17.578800532609876,
|
|
"learning_rate": 1.9101414939332408e-06,
|
|
"loss": 0.5036535263061523,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 2.2243243243243245,
|
|
"grad_norm": 14.711787094540576,
|
|
"learning_rate": 1.9060225362694546e-06,
|
|
"loss": 0.5625418424606323,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 2.225225225225225,
|
|
"grad_norm": 16.115394126319416,
|
|
"learning_rate": 1.9019069783273575e-06,
|
|
"loss": 0.6296632289886475,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 2.2261261261261263,
|
|
"grad_norm": 6.62237801067677,
|
|
"learning_rate": 1.8977948246292e-06,
|
|
"loss": 0.5151411294937134,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 2.227027027027027,
|
|
"grad_norm": 10.63023502401902,
|
|
"learning_rate": 1.8936860796934997e-06,
|
|
"loss": 0.5274820923805237,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 2.227927927927928,
|
|
"grad_norm": 13.538455346956445,
|
|
"learning_rate": 1.8895807480350199e-06,
|
|
"loss": 0.23670229315757751,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 2.2288288288288287,
|
|
"grad_norm": 17.069500581244135,
|
|
"learning_rate": 1.8854788341647812e-06,
|
|
"loss": 0.9304606914520264,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 2.22972972972973,
|
|
"grad_norm": 8.322299236674336,
|
|
"learning_rate": 1.8813803425900418e-06,
|
|
"loss": 0.3520248234272003,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 2.2306306306306305,
|
|
"grad_norm": 14.283858524338202,
|
|
"learning_rate": 1.8772852778143064e-06,
|
|
"loss": 0.6242378950119019,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 2.2315315315315316,
|
|
"grad_norm": 13.279737764878936,
|
|
"learning_rate": 1.8731936443373067e-06,
|
|
"loss": 0.545856773853302,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 2.2324324324324323,
|
|
"grad_norm": 8.937322561874446,
|
|
"learning_rate": 1.8691054466550117e-06,
|
|
"loss": 0.38435298204421997,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 2.2333333333333334,
|
|
"grad_norm": 11.025142158736418,
|
|
"learning_rate": 1.8650206892596079e-06,
|
|
"loss": 0.5997890830039978,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 2.234234234234234,
|
|
"grad_norm": 9.921913825754329,
|
|
"learning_rate": 1.8609393766395083e-06,
|
|
"loss": 0.48749783635139465,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 2.235135135135135,
|
|
"grad_norm": 6.977825819869559,
|
|
"learning_rate": 1.8568615132793356e-06,
|
|
"loss": 0.22039154171943665,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 2.236036036036036,
|
|
"grad_norm": 13.279582479188377,
|
|
"learning_rate": 1.852787103659922e-06,
|
|
"loss": 0.3317052125930786,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 2.236936936936937,
|
|
"grad_norm": 27.894743328137633,
|
|
"learning_rate": 1.8487161522583092e-06,
|
|
"loss": 1.0018060207366943,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 2.237837837837838,
|
|
"grad_norm": 9.609542752211466,
|
|
"learning_rate": 1.844648663547736e-06,
|
|
"loss": 0.4543810784816742,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 2.2387387387387387,
|
|
"grad_norm": 11.633358601966219,
|
|
"learning_rate": 1.8405846419976397e-06,
|
|
"loss": 0.2692357301712036,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 2.2396396396396394,
|
|
"grad_norm": 10.667005608032856,
|
|
"learning_rate": 1.8365240920736405e-06,
|
|
"loss": 0.380027711391449,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 2.2405405405405405,
|
|
"grad_norm": 9.950352591242341,
|
|
"learning_rate": 1.8324670182375521e-06,
|
|
"loss": 0.3946274518966675,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 2.2414414414414416,
|
|
"grad_norm": 10.512109195252133,
|
|
"learning_rate": 1.8284134249473629e-06,
|
|
"loss": 0.5828955769538879,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 2.2423423423423423,
|
|
"grad_norm": 10.123779883062127,
|
|
"learning_rate": 1.8243633166572378e-06,
|
|
"loss": 0.9141032695770264,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 2.2432432432432434,
|
|
"grad_norm": 9.318644467876203,
|
|
"learning_rate": 1.8203166978175163e-06,
|
|
"loss": 0.4999552369117737,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 2.244144144144144,
|
|
"grad_norm": 14.020312178464735,
|
|
"learning_rate": 1.8162735728746978e-06,
|
|
"loss": 0.4719406068325043,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 2.245045045045045,
|
|
"grad_norm": 6.028506071219212,
|
|
"learning_rate": 1.8122339462714477e-06,
|
|
"loss": 0.23761707544326782,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 2.245945945945946,
|
|
"grad_norm": 11.139492296855032,
|
|
"learning_rate": 1.808197822446583e-06,
|
|
"loss": 1.1314280033111572,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 2.246846846846847,
|
|
"grad_norm": 13.521041085334653,
|
|
"learning_rate": 1.8041652058350768e-06,
|
|
"loss": 0.4234413504600525,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 2.2477477477477477,
|
|
"grad_norm": 10.977329354311351,
|
|
"learning_rate": 1.8001361008680413e-06,
|
|
"loss": 0.28913938999176025,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 2.2486486486486488,
|
|
"grad_norm": 8.074083512402291,
|
|
"learning_rate": 1.7961105119727385e-06,
|
|
"loss": 0.6953212022781372,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 2.2495495495495494,
|
|
"grad_norm": 12.551896231855528,
|
|
"learning_rate": 1.7920884435725594e-06,
|
|
"loss": 0.5405001044273376,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 2.2504504504504506,
|
|
"grad_norm": 13.524228680295014,
|
|
"learning_rate": 1.78806990008703e-06,
|
|
"loss": 0.3291250169277191,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 2.2513513513513512,
|
|
"grad_norm": 16.330699738586897,
|
|
"learning_rate": 1.7840548859318063e-06,
|
|
"loss": 0.6878585815429688,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 2.2522522522522523,
|
|
"grad_norm": 13.457245126255158,
|
|
"learning_rate": 1.7800434055186583e-06,
|
|
"loss": 0.419866681098938,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 2.253153153153153,
|
|
"grad_norm": 9.657317852558684,
|
|
"learning_rate": 1.776035463255481e-06,
|
|
"loss": 0.5788836479187012,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 2.254054054054054,
|
|
"grad_norm": 9.96528767047478,
|
|
"learning_rate": 1.7720310635462751e-06,
|
|
"loss": 0.3604414463043213,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 2.254954954954955,
|
|
"grad_norm": 8.826596167069177,
|
|
"learning_rate": 1.7680302107911546e-06,
|
|
"loss": 0.182212233543396,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 2.255855855855856,
|
|
"grad_norm": 11.671071130251336,
|
|
"learning_rate": 1.76403290938633e-06,
|
|
"loss": 1.0697684288024902,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 2.2567567567567566,
|
|
"grad_norm": 8.512611959151466,
|
|
"learning_rate": 1.760039163724116e-06,
|
|
"loss": 0.6069210171699524,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 2.2576576576576577,
|
|
"grad_norm": 8.335018038957672,
|
|
"learning_rate": 1.7560489781929135e-06,
|
|
"loss": 0.3821689486503601,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 2.2585585585585584,
|
|
"grad_norm": 9.372113231197151,
|
|
"learning_rate": 1.7520623571772182e-06,
|
|
"loss": 0.8013373017311096,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 2.2594594594594595,
|
|
"grad_norm": 8.966361160398138,
|
|
"learning_rate": 1.7480793050576034e-06,
|
|
"loss": 0.3552350699901581,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 2.2603603603603606,
|
|
"grad_norm": 17.49916424185179,
|
|
"learning_rate": 1.7440998262107223e-06,
|
|
"loss": 1.0693227052688599,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 2.2612612612612613,
|
|
"grad_norm": 13.105480325359768,
|
|
"learning_rate": 1.740123925009305e-06,
|
|
"loss": 1.0125956535339355,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 2.262162162162162,
|
|
"grad_norm": 13.524544588888908,
|
|
"learning_rate": 1.7361516058221445e-06,
|
|
"loss": 0.38627469539642334,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 2.263063063063063,
|
|
"grad_norm": 6.906671819286857,
|
|
"learning_rate": 1.7321828730141039e-06,
|
|
"loss": 0.3798505663871765,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 2.263963963963964,
|
|
"grad_norm": 6.949145715764155,
|
|
"learning_rate": 1.7282177309461019e-06,
|
|
"loss": 0.27201440930366516,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 2.264864864864865,
|
|
"grad_norm": 9.674123290954732,
|
|
"learning_rate": 1.7242561839751138e-06,
|
|
"loss": 0.8238488435745239,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 2.265765765765766,
|
|
"grad_norm": 11.49045505197047,
|
|
"learning_rate": 1.7202982364541626e-06,
|
|
"loss": 0.5696356296539307,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 2.2666666666666666,
|
|
"grad_norm": 12.344978291788385,
|
|
"learning_rate": 1.716343892732315e-06,
|
|
"loss": 0.5812622308731079,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 2.2675675675675677,
|
|
"grad_norm": 11.180196352054196,
|
|
"learning_rate": 1.7123931571546826e-06,
|
|
"loss": 0.522317111492157,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 2.2684684684684684,
|
|
"grad_norm": 11.142128684527036,
|
|
"learning_rate": 1.708446034062406e-06,
|
|
"loss": 0.23921099305152893,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 2.2693693693693695,
|
|
"grad_norm": 12.743258867382897,
|
|
"learning_rate": 1.7045025277926635e-06,
|
|
"loss": 0.5311107635498047,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 2.27027027027027,
|
|
"grad_norm": 8.283767953358945,
|
|
"learning_rate": 1.700562642678651e-06,
|
|
"loss": 0.43987730145454407,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 2.2711711711711713,
|
|
"grad_norm": 10.346603653226152,
|
|
"learning_rate": 1.6966263830495939e-06,
|
|
"loss": 0.7479614615440369,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 2.272072072072072,
|
|
"grad_norm": 8.60648393048622,
|
|
"learning_rate": 1.6926937532307259e-06,
|
|
"loss": 0.251346230506897,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 2.272972972972973,
|
|
"grad_norm": 6.609265596451475,
|
|
"learning_rate": 1.6887647575432992e-06,
|
|
"loss": 0.1455269753932953,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 2.2738738738738737,
|
|
"grad_norm": 13.412711990056366,
|
|
"learning_rate": 1.6848394003045671e-06,
|
|
"loss": 0.28844141960144043,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 2.274774774774775,
|
|
"grad_norm": 10.126437662126426,
|
|
"learning_rate": 1.6809176858277892e-06,
|
|
"loss": 0.6623111963272095,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 2.2756756756756755,
|
|
"grad_norm": 9.88890973645228,
|
|
"learning_rate": 1.676999618422218e-06,
|
|
"loss": 0.3788198232650757,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 2.2765765765765766,
|
|
"grad_norm": 10.061979823159149,
|
|
"learning_rate": 1.6730852023931027e-06,
|
|
"loss": 0.2634226083755493,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 2.2774774774774773,
|
|
"grad_norm": 11.74720974812679,
|
|
"learning_rate": 1.66917444204168e-06,
|
|
"loss": 0.5780117511749268,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 2.2783783783783784,
|
|
"grad_norm": 12.010340649651225,
|
|
"learning_rate": 1.665267341665165e-06,
|
|
"loss": 0.42022332549095154,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 2.279279279279279,
|
|
"grad_norm": 14.171202814097644,
|
|
"learning_rate": 1.6613639055567583e-06,
|
|
"loss": 0.5564107298851013,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 2.28018018018018,
|
|
"grad_norm": 10.818458942171763,
|
|
"learning_rate": 1.6574641380056255e-06,
|
|
"loss": 0.5242595672607422,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 2.281081081081081,
|
|
"grad_norm": 13.884314958956862,
|
|
"learning_rate": 1.6535680432969104e-06,
|
|
"loss": 0.39503562450408936,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 2.281981981981982,
|
|
"grad_norm": 8.675564235796271,
|
|
"learning_rate": 1.649675625711713e-06,
|
|
"loss": 0.49121540784835815,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 2.282882882882883,
|
|
"grad_norm": 9.267858274942872,
|
|
"learning_rate": 1.6457868895270995e-06,
|
|
"loss": 0.3930075466632843,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 2.2837837837837838,
|
|
"grad_norm": 15.418742829045566,
|
|
"learning_rate": 1.6419018390160857e-06,
|
|
"loss": 0.4331180453300476,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 2.2846846846846844,
|
|
"grad_norm": 12.741874420570895,
|
|
"learning_rate": 1.6380204784476383e-06,
|
|
"loss": 0.44317787885665894,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 2.2855855855855856,
|
|
"grad_norm": 13.591914075283237,
|
|
"learning_rate": 1.6341428120866737e-06,
|
|
"loss": 0.330650269985199,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 2.2864864864864867,
|
|
"grad_norm": 13.321801721382402,
|
|
"learning_rate": 1.630268844194043e-06,
|
|
"loss": 0.244010791182518,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 2.2873873873873873,
|
|
"grad_norm": 12.7555133729587,
|
|
"learning_rate": 1.6263985790265384e-06,
|
|
"loss": 0.6554090976715088,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 2.2882882882882885,
|
|
"grad_norm": 9.937192086504613,
|
|
"learning_rate": 1.62253202083688e-06,
|
|
"loss": 0.45852115750312805,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 2.289189189189189,
|
|
"grad_norm": 18.11730435228161,
|
|
"learning_rate": 1.6186691738737176e-06,
|
|
"loss": 0.8002996444702148,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 2.2900900900900902,
|
|
"grad_norm": 13.036139193085996,
|
|
"learning_rate": 1.6148100423816188e-06,
|
|
"loss": 0.39820799231529236,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 2.290990990990991,
|
|
"grad_norm": 13.64142883016874,
|
|
"learning_rate": 1.610954630601073e-06,
|
|
"loss": 0.3235979676246643,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 2.291891891891892,
|
|
"grad_norm": 8.455559823756069,
|
|
"learning_rate": 1.6071029427684826e-06,
|
|
"loss": 0.6476882696151733,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 2.2927927927927927,
|
|
"grad_norm": 10.194436555749911,
|
|
"learning_rate": 1.603254983116151e-06,
|
|
"loss": 0.45526450872421265,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 2.293693693693694,
|
|
"grad_norm": 22.08213491216057,
|
|
"learning_rate": 1.599410755872295e-06,
|
|
"loss": 1.0749083757400513,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 2.2945945945945945,
|
|
"grad_norm": 8.341265281719512,
|
|
"learning_rate": 1.5955702652610205e-06,
|
|
"loss": 0.35646599531173706,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 2.2954954954954956,
|
|
"grad_norm": 9.919880900975684,
|
|
"learning_rate": 1.5917335155023368e-06,
|
|
"loss": 0.47308945655822754,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 2.2963963963963963,
|
|
"grad_norm": 10.866637431216345,
|
|
"learning_rate": 1.587900510812133e-06,
|
|
"loss": 0.41832196712493896,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 2.2972972972972974,
|
|
"grad_norm": 8.53550263179792,
|
|
"learning_rate": 1.584071255402193e-06,
|
|
"loss": 0.4713675081729889,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 2.298198198198198,
|
|
"grad_norm": 22.36561781790583,
|
|
"learning_rate": 1.580245753480172e-06,
|
|
"loss": 0.6573231220245361,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 2.299099099099099,
|
|
"grad_norm": 13.552639250933119,
|
|
"learning_rate": 1.5764240092496075e-06,
|
|
"loss": 0.8595783710479736,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"grad_norm": 12.396325659130758,
|
|
"learning_rate": 1.572606026909903e-06,
|
|
"loss": 0.3619859516620636,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 2.300900900900901,
|
|
"grad_norm": 11.930996683693635,
|
|
"learning_rate": 1.5687918106563326e-06,
|
|
"loss": 0.3102957606315613,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 2.301801801801802,
|
|
"grad_norm": 9.093928475670925,
|
|
"learning_rate": 1.56498136468003e-06,
|
|
"loss": 0.31744280457496643,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 2.3027027027027027,
|
|
"grad_norm": 9.498696613955307,
|
|
"learning_rate": 1.5611746931679823e-06,
|
|
"loss": 0.2954990565776825,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 2.3036036036036034,
|
|
"grad_norm": 10.863801651131498,
|
|
"learning_rate": 1.557371800303039e-06,
|
|
"loss": 0.2831108868122101,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 2.3045045045045045,
|
|
"grad_norm": 6.796188005580747,
|
|
"learning_rate": 1.5535726902638881e-06,
|
|
"loss": 0.2826022505760193,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 2.3054054054054056,
|
|
"grad_norm": 10.086167587831634,
|
|
"learning_rate": 1.5497773672250665e-06,
|
|
"loss": 0.9375733137130737,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 2.3063063063063063,
|
|
"grad_norm": 7.873436154423617,
|
|
"learning_rate": 1.5459858353569446e-06,
|
|
"loss": 0.6757045984268188,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 2.3072072072072074,
|
|
"grad_norm": 9.980499820582452,
|
|
"learning_rate": 1.542198098825734e-06,
|
|
"loss": 0.27272501587867737,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 2.308108108108108,
|
|
"grad_norm": 12.057179868709511,
|
|
"learning_rate": 1.5384141617934706e-06,
|
|
"loss": 0.3203151226043701,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 2.309009009009009,
|
|
"grad_norm": 10.549196804146696,
|
|
"learning_rate": 1.534634028418015e-06,
|
|
"loss": 0.3926043212413788,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 2.30990990990991,
|
|
"grad_norm": 12.370776973760751,
|
|
"learning_rate": 1.530857702853053e-06,
|
|
"loss": 0.35921239852905273,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 2.310810810810811,
|
|
"grad_norm": 12.012741041762762,
|
|
"learning_rate": 1.5270851892480808e-06,
|
|
"loss": 1.428120493888855,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 2.3117117117117116,
|
|
"grad_norm": 10.442543731233824,
|
|
"learning_rate": 1.5233164917484117e-06,
|
|
"loss": 0.6229287385940552,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 2.3126126126126128,
|
|
"grad_norm": 10.440185732887818,
|
|
"learning_rate": 1.5195516144951594e-06,
|
|
"loss": 0.5179519653320312,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 2.3135135135135134,
|
|
"grad_norm": 9.965937114605188,
|
|
"learning_rate": 1.515790561625246e-06,
|
|
"loss": 0.4289321303367615,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 2.3144144144144145,
|
|
"grad_norm": 13.707154743862104,
|
|
"learning_rate": 1.5120333372713858e-06,
|
|
"loss": 0.8441285490989685,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 2.315315315315315,
|
|
"grad_norm": 9.826595994238435,
|
|
"learning_rate": 1.5082799455620917e-06,
|
|
"loss": 0.38468870520591736,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 2.3162162162162163,
|
|
"grad_norm": 8.11546553655381,
|
|
"learning_rate": 1.5045303906216596e-06,
|
|
"loss": 0.2075853943824768,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 2.317117117117117,
|
|
"grad_norm": 9.693915192091673,
|
|
"learning_rate": 1.5007846765701734e-06,
|
|
"loss": 0.9084070324897766,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 2.318018018018018,
|
|
"grad_norm": 13.014854980149707,
|
|
"learning_rate": 1.4970428075234966e-06,
|
|
"loss": 0.46323809027671814,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 2.3189189189189188,
|
|
"grad_norm": 10.01190665467692,
|
|
"learning_rate": 1.493304787593265e-06,
|
|
"loss": 0.48933979868888855,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 2.31981981981982,
|
|
"grad_norm": 11.709740034571546,
|
|
"learning_rate": 1.4895706208868876e-06,
|
|
"loss": 0.284282386302948,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 2.3207207207207206,
|
|
"grad_norm": 9.884925859468567,
|
|
"learning_rate": 1.485840311507537e-06,
|
|
"loss": 0.41380974650382996,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 2.3216216216216217,
|
|
"grad_norm": 8.315359936098643,
|
|
"learning_rate": 1.4821138635541505e-06,
|
|
"loss": 0.537804901599884,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 2.3225225225225223,
|
|
"grad_norm": 14.054287179309345,
|
|
"learning_rate": 1.4783912811214179e-06,
|
|
"loss": 0.37012577056884766,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 2.3234234234234235,
|
|
"grad_norm": 15.041886225994705,
|
|
"learning_rate": 1.474672568299787e-06,
|
|
"loss": 0.7804893851280212,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 2.3243243243243246,
|
|
"grad_norm": 12.958133604832721,
|
|
"learning_rate": 1.470957729175449e-06,
|
|
"loss": 0.8584966063499451,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 2.3252252252252252,
|
|
"grad_norm": 12.234597497681884,
|
|
"learning_rate": 1.4672467678303386e-06,
|
|
"loss": 1.5729799270629883,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 2.326126126126126,
|
|
"grad_norm": 12.290000307843565,
|
|
"learning_rate": 1.4635396883421348e-06,
|
|
"loss": 0.6885578036308289,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 2.327027027027027,
|
|
"grad_norm": 9.643336005698746,
|
|
"learning_rate": 1.459836494784244e-06,
|
|
"loss": 0.2617989182472229,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 2.327927927927928,
|
|
"grad_norm": 13.168974770375483,
|
|
"learning_rate": 1.45613719122581e-06,
|
|
"loss": 0.24258741736412048,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 2.328828828828829,
|
|
"grad_norm": 7.904785567343135,
|
|
"learning_rate": 1.4524417817316943e-06,
|
|
"loss": 0.31178921461105347,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 2.32972972972973,
|
|
"grad_norm": 10.8750654487972,
|
|
"learning_rate": 1.448750270362486e-06,
|
|
"loss": 0.8041453957557678,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 2.3306306306306306,
|
|
"grad_norm": 12.17876529259977,
|
|
"learning_rate": 1.4450626611744878e-06,
|
|
"loss": 1.0193281173706055,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 2.3315315315315317,
|
|
"grad_norm": 12.944920777841014,
|
|
"learning_rate": 1.4413789582197174e-06,
|
|
"loss": 0.7654247879981995,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 2.3324324324324324,
|
|
"grad_norm": 13.56035559244477,
|
|
"learning_rate": 1.437699165545896e-06,
|
|
"loss": 0.27251487970352173,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 2.3333333333333335,
|
|
"grad_norm": 7.364928049136203,
|
|
"learning_rate": 1.4340232871964494e-06,
|
|
"loss": 0.5466756224632263,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 2.334234234234234,
|
|
"grad_norm": 16.054428607299386,
|
|
"learning_rate": 1.4303513272105057e-06,
|
|
"loss": 0.7691991329193115,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 2.3351351351351353,
|
|
"grad_norm": 10.12932624025978,
|
|
"learning_rate": 1.426683289622882e-06,
|
|
"loss": 0.5401148796081543,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 2.336036036036036,
|
|
"grad_norm": 11.568363376502854,
|
|
"learning_rate": 1.4230191784640911e-06,
|
|
"loss": 0.434628427028656,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 2.336936936936937,
|
|
"grad_norm": 12.54318580685461,
|
|
"learning_rate": 1.4193589977603261e-06,
|
|
"loss": 1.3618683815002441,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 2.3378378378378377,
|
|
"grad_norm": 9.090731008706989,
|
|
"learning_rate": 1.415702751533466e-06,
|
|
"loss": 0.3896636366844177,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 2.338738738738739,
|
|
"grad_norm": 12.430186836267136,
|
|
"learning_rate": 1.4120504438010608e-06,
|
|
"loss": 0.7276833653450012,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 2.3396396396396395,
|
|
"grad_norm": 20.635176549077414,
|
|
"learning_rate": 1.4084020785763403e-06,
|
|
"loss": 1.3383803367614746,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 2.3405405405405406,
|
|
"grad_norm": 8.024414170177936,
|
|
"learning_rate": 1.4047576598681945e-06,
|
|
"loss": 0.5920839309692383,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 2.3414414414414413,
|
|
"grad_norm": 10.529672228076668,
|
|
"learning_rate": 1.4011171916811833e-06,
|
|
"loss": 0.7410644292831421,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 2.3423423423423424,
|
|
"grad_norm": 16.447855063474016,
|
|
"learning_rate": 1.397480678015522e-06,
|
|
"loss": 1.6644569635391235,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 2.343243243243243,
|
|
"grad_norm": 18.214932581055802,
|
|
"learning_rate": 1.3938481228670775e-06,
|
|
"loss": 0.43077927827835083,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 2.344144144144144,
|
|
"grad_norm": 19.84624411854799,
|
|
"learning_rate": 1.390219530227378e-06,
|
|
"loss": 0.26516780257225037,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 2.345045045045045,
|
|
"grad_norm": 7.62250931529997,
|
|
"learning_rate": 1.3865949040835851e-06,
|
|
"loss": 0.8737815618515015,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 2.345945945945946,
|
|
"grad_norm": 17.033630577429417,
|
|
"learning_rate": 1.382974248418511e-06,
|
|
"loss": 0.43705296516418457,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 2.346846846846847,
|
|
"grad_norm": 8.45217583005999,
|
|
"learning_rate": 1.3793575672105986e-06,
|
|
"loss": 0.24107986688613892,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 2.3477477477477477,
|
|
"grad_norm": 17.71263020332587,
|
|
"learning_rate": 1.3757448644339288e-06,
|
|
"loss": 0.3925956189632416,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 2.3486486486486484,
|
|
"grad_norm": 8.510473014540176,
|
|
"learning_rate": 1.372136144058208e-06,
|
|
"loss": 0.36841726303100586,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 2.3495495495495495,
|
|
"grad_norm": 11.87396863630054,
|
|
"learning_rate": 1.368531410048765e-06,
|
|
"loss": 0.9469922780990601,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 2.3504504504504506,
|
|
"grad_norm": 12.730564315864262,
|
|
"learning_rate": 1.364930666366554e-06,
|
|
"loss": 0.6587448120117188,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 2.3513513513513513,
|
|
"grad_norm": 11.508251706554734,
|
|
"learning_rate": 1.3613339169681377e-06,
|
|
"loss": 0.5975847840309143,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 2.3522522522522524,
|
|
"grad_norm": 12.133444595845356,
|
|
"learning_rate": 1.3577411658056965e-06,
|
|
"loss": 0.3198261857032776,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 2.353153153153153,
|
|
"grad_norm": 14.218660516269727,
|
|
"learning_rate": 1.3541524168270115e-06,
|
|
"loss": 0.30245745182037354,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 2.354054054054054,
|
|
"grad_norm": 13.287873155532413,
|
|
"learning_rate": 1.3505676739754715e-06,
|
|
"loss": 0.5945942401885986,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 2.354954954954955,
|
|
"grad_norm": 11.50578039641458,
|
|
"learning_rate": 1.3469869411900572e-06,
|
|
"loss": 0.33322668075561523,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 2.355855855855856,
|
|
"grad_norm": 7.968826858711949,
|
|
"learning_rate": 1.343410222405348e-06,
|
|
"loss": 0.9494017362594604,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 2.3567567567567567,
|
|
"grad_norm": 10.911644363403369,
|
|
"learning_rate": 1.339837521551513e-06,
|
|
"loss": 0.6252211928367615,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 2.357657657657658,
|
|
"grad_norm": 11.911146566416196,
|
|
"learning_rate": 1.3362688425543014e-06,
|
|
"loss": 0.6049370169639587,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 2.3585585585585584,
|
|
"grad_norm": 11.090868085078775,
|
|
"learning_rate": 1.332704189335048e-06,
|
|
"loss": 1.1241182088851929,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 2.3594594594594596,
|
|
"grad_norm": 15.578730816301656,
|
|
"learning_rate": 1.32914356581066e-06,
|
|
"loss": 0.2764025926589966,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 2.3603603603603602,
|
|
"grad_norm": 8.021171358061203,
|
|
"learning_rate": 1.3255869758936214e-06,
|
|
"loss": 0.45589882135391235,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 2.3612612612612613,
|
|
"grad_norm": 6.953521470957379,
|
|
"learning_rate": 1.322034423491978e-06,
|
|
"loss": 0.16293835639953613,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 2.362162162162162,
|
|
"grad_norm": 12.559698917511975,
|
|
"learning_rate": 1.3184859125093458e-06,
|
|
"loss": 1.8385086059570312,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 2.363063063063063,
|
|
"grad_norm": 11.707425053406114,
|
|
"learning_rate": 1.3149414468448934e-06,
|
|
"loss": 0.46540600061416626,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 2.363963963963964,
|
|
"grad_norm": 12.447645842698154,
|
|
"learning_rate": 1.311401030393351e-06,
|
|
"loss": 0.5401040315628052,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 2.364864864864865,
|
|
"grad_norm": 11.019357377234464,
|
|
"learning_rate": 1.307864667044993e-06,
|
|
"loss": 0.8212162256240845,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 2.3657657657657656,
|
|
"grad_norm": 10.156610792104589,
|
|
"learning_rate": 1.304332360685645e-06,
|
|
"loss": 0.20631080865859985,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 2.3666666666666667,
|
|
"grad_norm": 8.326111524727171,
|
|
"learning_rate": 1.3008041151966727e-06,
|
|
"loss": 0.23290196061134338,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 2.3675675675675674,
|
|
"grad_norm": 9.268162081954008,
|
|
"learning_rate": 1.297279934454978e-06,
|
|
"loss": 0.18848250806331635,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 2.3684684684684685,
|
|
"grad_norm": 10.031325843891333,
|
|
"learning_rate": 1.2937598223330006e-06,
|
|
"loss": 0.4422239065170288,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 2.3693693693693696,
|
|
"grad_norm": 11.211182341662486,
|
|
"learning_rate": 1.290243782698703e-06,
|
|
"loss": 0.7510225176811218,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 2.3702702702702703,
|
|
"grad_norm": 13.481796126843822,
|
|
"learning_rate": 1.2867318194155832e-06,
|
|
"loss": 0.4507480263710022,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 2.371171171171171,
|
|
"grad_norm": 12.15068939344174,
|
|
"learning_rate": 1.283223936342649e-06,
|
|
"loss": 0.5214511156082153,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 2.372072072072072,
|
|
"grad_norm": 10.949746757845631,
|
|
"learning_rate": 1.279720137334432e-06,
|
|
"loss": 0.24835021793842316,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 2.372972972972973,
|
|
"grad_norm": 12.252618985064036,
|
|
"learning_rate": 1.2762204262409728e-06,
|
|
"loss": 0.7337615489959717,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 2.373873873873874,
|
|
"grad_norm": 9.277229362366574,
|
|
"learning_rate": 1.2727248069078197e-06,
|
|
"loss": 0.5765893459320068,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 2.374774774774775,
|
|
"grad_norm": 10.925858500937643,
|
|
"learning_rate": 1.2692332831760278e-06,
|
|
"loss": 0.40767624974250793,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 2.3756756756756756,
|
|
"grad_norm": 10.538126233935106,
|
|
"learning_rate": 1.2657458588821485e-06,
|
|
"loss": 0.29305821657180786,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 2.3765765765765767,
|
|
"grad_norm": 8.258931318940817,
|
|
"learning_rate": 1.2622625378582332e-06,
|
|
"loss": 0.19992494583129883,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 2.3774774774774774,
|
|
"grad_norm": 7.827790881705758,
|
|
"learning_rate": 1.2587833239318187e-06,
|
|
"loss": 0.27060243487358093,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 2.3783783783783785,
|
|
"grad_norm": 23.502798506925522,
|
|
"learning_rate": 1.2553082209259343e-06,
|
|
"loss": 1.5089101791381836,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 2.379279279279279,
|
|
"grad_norm": 19.300407883234428,
|
|
"learning_rate": 1.2518372326590878e-06,
|
|
"loss": 0.9108871817588806,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 2.3801801801801803,
|
|
"grad_norm": 19.059018043051722,
|
|
"learning_rate": 1.2483703629452693e-06,
|
|
"loss": 0.7407408952713013,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 2.381081081081081,
|
|
"grad_norm": 13.05390812421338,
|
|
"learning_rate": 1.2449076155939398e-06,
|
|
"loss": 0.6685821413993835,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 2.381981981981982,
|
|
"grad_norm": 9.377555362163923,
|
|
"learning_rate": 1.241448994410035e-06,
|
|
"loss": 0.7205036878585815,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 2.3828828828828827,
|
|
"grad_norm": 12.141375001759368,
|
|
"learning_rate": 1.2379945031939505e-06,
|
|
"loss": 0.7298334836959839,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 2.383783783783784,
|
|
"grad_norm": 8.749528904058588,
|
|
"learning_rate": 1.2345441457415502e-06,
|
|
"loss": 0.613317608833313,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 2.3846846846846845,
|
|
"grad_norm": 10.962029488738938,
|
|
"learning_rate": 1.231097925844153e-06,
|
|
"loss": 0.26030468940734863,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 2.3855855855855856,
|
|
"grad_norm": 18.964485987080764,
|
|
"learning_rate": 1.2276558472885292e-06,
|
|
"loss": 0.3774060010910034,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 2.3864864864864863,
|
|
"grad_norm": 14.988039416352846,
|
|
"learning_rate": 1.2242179138569034e-06,
|
|
"loss": 1.1203635931015015,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 2.3873873873873874,
|
|
"grad_norm": 9.620256470059601,
|
|
"learning_rate": 1.2207841293269396e-06,
|
|
"loss": 0.40332576632499695,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 2.388288288288288,
|
|
"grad_norm": 10.415620579539851,
|
|
"learning_rate": 1.2173544974717495e-06,
|
|
"loss": 0.2817927896976471,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 2.389189189189189,
|
|
"grad_norm": 14.655852119082983,
|
|
"learning_rate": 1.2139290220598742e-06,
|
|
"loss": 0.28211984038352966,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 2.39009009009009,
|
|
"grad_norm": 14.218803553584383,
|
|
"learning_rate": 1.2105077068552956e-06,
|
|
"loss": 0.5699129700660706,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 2.390990990990991,
|
|
"grad_norm": 12.564004231447765,
|
|
"learning_rate": 1.207090555617419e-06,
|
|
"loss": 0.6292193531990051,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 2.391891891891892,
|
|
"grad_norm": 15.224793030912462,
|
|
"learning_rate": 1.2036775721010734e-06,
|
|
"loss": 0.6220686435699463,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 2.3927927927927928,
|
|
"grad_norm": 12.57642723926645,
|
|
"learning_rate": 1.2002687600565138e-06,
|
|
"loss": 0.4795664846897125,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 2.3936936936936934,
|
|
"grad_norm": 13.192313397119912,
|
|
"learning_rate": 1.1968641232294054e-06,
|
|
"loss": 0.4635518789291382,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 2.3945945945945946,
|
|
"grad_norm": 8.527948504974212,
|
|
"learning_rate": 1.1934636653608306e-06,
|
|
"loss": 0.2009531855583191,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 2.3954954954954957,
|
|
"grad_norm": 7.423618142413011,
|
|
"learning_rate": 1.1900673901872755e-06,
|
|
"loss": 0.30849915742874146,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 2.3963963963963963,
|
|
"grad_norm": 10.824429456393483,
|
|
"learning_rate": 1.186675301440633e-06,
|
|
"loss": 0.42854294180870056,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 2.3972972972972975,
|
|
"grad_norm": 10.154317664268815,
|
|
"learning_rate": 1.1832874028481978e-06,
|
|
"loss": 0.29515066742897034,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 2.398198198198198,
|
|
"grad_norm": 11.387489762098907,
|
|
"learning_rate": 1.1799036981326534e-06,
|
|
"loss": 0.4566013813018799,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 2.3990990990990992,
|
|
"grad_norm": 10.031786853252997,
|
|
"learning_rate": 1.1765241910120828e-06,
|
|
"loss": 0.2901766002178192,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"grad_norm": 10.77154888790233,
|
|
"learning_rate": 1.1731488851999513e-06,
|
|
"loss": 0.4467163681983948,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 2.400900900900901,
|
|
"grad_norm": 10.077876677732808,
|
|
"learning_rate": 1.1697777844051105e-06,
|
|
"loss": 0.316597044467926,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 2.4018018018018017,
|
|
"grad_norm": 8.624959871842753,
|
|
"learning_rate": 1.166410892331789e-06,
|
|
"loss": 0.49726009368896484,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 2.402702702702703,
|
|
"grad_norm": 11.80420666971692,
|
|
"learning_rate": 1.163048212679595e-06,
|
|
"loss": 0.8117353320121765,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 2.4036036036036035,
|
|
"grad_norm": 12.29945813337615,
|
|
"learning_rate": 1.1596897491435023e-06,
|
|
"loss": 0.3540371060371399,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 2.4045045045045046,
|
|
"grad_norm": 13.554583419112339,
|
|
"learning_rate": 1.1563355054138575e-06,
|
|
"loss": 0.9296278953552246,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 2.4054054054054053,
|
|
"grad_norm": 10.2306958358243,
|
|
"learning_rate": 1.152985485176365e-06,
|
|
"loss": 0.2750551700592041,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 2.4063063063063064,
|
|
"grad_norm": 12.343688400189615,
|
|
"learning_rate": 1.149639692112095e-06,
|
|
"loss": 0.5621803998947144,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 2.407207207207207,
|
|
"grad_norm": 8.874695614140608,
|
|
"learning_rate": 1.1462981298974651e-06,
|
|
"loss": 0.3487135171890259,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 2.408108108108108,
|
|
"grad_norm": 12.80302754130961,
|
|
"learning_rate": 1.142960802204251e-06,
|
|
"loss": 0.9279593229293823,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 2.409009009009009,
|
|
"grad_norm": 11.9350463131864,
|
|
"learning_rate": 1.1396277126995709e-06,
|
|
"loss": 0.3281620144844055,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 2.40990990990991,
|
|
"grad_norm": 13.01048361599438,
|
|
"learning_rate": 1.1362988650458845e-06,
|
|
"loss": 0.44163113832473755,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 2.410810810810811,
|
|
"grad_norm": 15.599219513642298,
|
|
"learning_rate": 1.1329742629009987e-06,
|
|
"loss": 0.6006256341934204,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 2.4117117117117117,
|
|
"grad_norm": 7.713236398177726,
|
|
"learning_rate": 1.1296539099180464e-06,
|
|
"loss": 0.25887003540992737,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 2.4126126126126124,
|
|
"grad_norm": 12.457655498223152,
|
|
"learning_rate": 1.126337809745498e-06,
|
|
"loss": 0.7419452667236328,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 2.4135135135135135,
|
|
"grad_norm": 20.065914991776893,
|
|
"learning_rate": 1.1230259660271443e-06,
|
|
"loss": 0.9115269184112549,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 2.4144144144144146,
|
|
"grad_norm": 18.767064528646536,
|
|
"learning_rate": 1.1197183824021052e-06,
|
|
"loss": 2.413602352142334,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 2.4153153153153153,
|
|
"grad_norm": 5.151645980162742,
|
|
"learning_rate": 1.1164150625048164e-06,
|
|
"loss": 0.21344856917858124,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 2.4162162162162164,
|
|
"grad_norm": 16.2651109314574,
|
|
"learning_rate": 1.113116009965028e-06,
|
|
"loss": 0.23965248465538025,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 2.417117117117117,
|
|
"grad_norm": 12.271976718031095,
|
|
"learning_rate": 1.1098212284078037e-06,
|
|
"loss": 0.47895270586013794,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 2.418018018018018,
|
|
"grad_norm": 13.982284238144109,
|
|
"learning_rate": 1.1065307214535104e-06,
|
|
"loss": 0.3592027425765991,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 2.418918918918919,
|
|
"grad_norm": 14.500962983533217,
|
|
"learning_rate": 1.1032444927178226e-06,
|
|
"loss": 1.103131651878357,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 2.41981981981982,
|
|
"grad_norm": 12.395740423285796,
|
|
"learning_rate": 1.0999625458117092e-06,
|
|
"loss": 0.6390451192855835,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 2.4207207207207206,
|
|
"grad_norm": 7.942341981552817,
|
|
"learning_rate": 1.0966848843414386e-06,
|
|
"loss": 0.24363839626312256,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 2.4216216216216218,
|
|
"grad_norm": 20.957466451914467,
|
|
"learning_rate": 1.0934115119085647e-06,
|
|
"loss": 0.8040348291397095,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 2.4225225225225224,
|
|
"grad_norm": 13.423648051815215,
|
|
"learning_rate": 1.0901424321099346e-06,
|
|
"loss": 0.47693151235580444,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 2.4234234234234235,
|
|
"grad_norm": 12.691923620714201,
|
|
"learning_rate": 1.0868776485376763e-06,
|
|
"loss": 0.7961431741714478,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 2.424324324324324,
|
|
"grad_norm": 8.736337475525781,
|
|
"learning_rate": 1.0836171647791938e-06,
|
|
"loss": 0.21177604794502258,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 2.4252252252252253,
|
|
"grad_norm": 9.383564788527963,
|
|
"learning_rate": 1.080360984417172e-06,
|
|
"loss": 0.18495744466781616,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 2.426126126126126,
|
|
"grad_norm": 12.149946860892875,
|
|
"learning_rate": 1.0771091110295612e-06,
|
|
"loss": 0.4451485276222229,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 2.427027027027027,
|
|
"grad_norm": 11.67811862675532,
|
|
"learning_rate": 1.0738615481895853e-06,
|
|
"loss": 0.5266054272651672,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 2.4279279279279278,
|
|
"grad_norm": 14.849514912812584,
|
|
"learning_rate": 1.0706182994657256e-06,
|
|
"loss": 0.7762129306793213,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 2.428828828828829,
|
|
"grad_norm": 11.60867834650254,
|
|
"learning_rate": 1.0673793684217287e-06,
|
|
"loss": 0.5322248339653015,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 2.4297297297297296,
|
|
"grad_norm": 11.84218162284018,
|
|
"learning_rate": 1.064144758616591e-06,
|
|
"loss": 0.3062615990638733,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 2.4306306306306307,
|
|
"grad_norm": 10.724465424230159,
|
|
"learning_rate": 1.0609144736045668e-06,
|
|
"loss": 0.8998652100563049,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 2.4315315315315313,
|
|
"grad_norm": 9.500583181441455,
|
|
"learning_rate": 1.0576885169351524e-06,
|
|
"loss": 0.49134063720703125,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 2.4324324324324325,
|
|
"grad_norm": 9.765274644223428,
|
|
"learning_rate": 1.0544668921530932e-06,
|
|
"loss": 0.40656042098999023,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 2.4333333333333336,
|
|
"grad_norm": 14.180153492991261,
|
|
"learning_rate": 1.0512496027983715e-06,
|
|
"loss": 0.3051440417766571,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 2.4342342342342342,
|
|
"grad_norm": 8.5215752388365,
|
|
"learning_rate": 1.0480366524062041e-06,
|
|
"loss": 0.3623253405094147,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 2.435135135135135,
|
|
"grad_norm": 12.102979273045491,
|
|
"learning_rate": 1.0448280445070458e-06,
|
|
"loss": 0.4855523407459259,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 2.436036036036036,
|
|
"grad_norm": 10.951566476655008,
|
|
"learning_rate": 1.0416237826265723e-06,
|
|
"loss": 0.3727246820926666,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 2.436936936936937,
|
|
"grad_norm": 16.248462865581125,
|
|
"learning_rate": 1.0384238702856935e-06,
|
|
"loss": 0.33596110343933105,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 2.437837837837838,
|
|
"grad_norm": 9.516995403990514,
|
|
"learning_rate": 1.0352283110005296e-06,
|
|
"loss": 0.21068738400936127,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 2.438738738738739,
|
|
"grad_norm": 13.136869192821923,
|
|
"learning_rate": 1.032037108282426e-06,
|
|
"loss": 0.636005163192749,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 2.4396396396396396,
|
|
"grad_norm": 13.883140995571818,
|
|
"learning_rate": 1.0288502656379351e-06,
|
|
"loss": 0.2538459897041321,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 2.4405405405405407,
|
|
"grad_norm": 9.42431338236059,
|
|
"learning_rate": 1.0256677865688197e-06,
|
|
"loss": 0.4209163188934326,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 2.4414414414414414,
|
|
"grad_norm": 11.444282007328898,
|
|
"learning_rate": 1.0224896745720513e-06,
|
|
"loss": 0.2533435821533203,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 2.4423423423423425,
|
|
"grad_norm": 9.01683150092805,
|
|
"learning_rate": 1.0193159331397977e-06,
|
|
"loss": 0.7453408241271973,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 2.443243243243243,
|
|
"grad_norm": 7.270793979304956,
|
|
"learning_rate": 1.0161465657594293e-06,
|
|
"loss": 0.2630300521850586,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 2.4441441441441443,
|
|
"grad_norm": 11.058542885628082,
|
|
"learning_rate": 1.0129815759135054e-06,
|
|
"loss": 0.5038114190101624,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 2.445045045045045,
|
|
"grad_norm": 11.35858670184428,
|
|
"learning_rate": 1.00982096707978e-06,
|
|
"loss": 0.5896291732788086,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 2.445945945945946,
|
|
"grad_norm": 13.015325694641724,
|
|
"learning_rate": 1.006664742731187e-06,
|
|
"loss": 0.466558575630188,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 2.4468468468468467,
|
|
"grad_norm": 13.1867455313214,
|
|
"learning_rate": 1.00351290633585e-06,
|
|
"loss": 0.5940424203872681,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 2.447747747747748,
|
|
"grad_norm": 10.949615598143545,
|
|
"learning_rate": 1.000365461357064e-06,
|
|
"loss": 0.3536610007286072,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 2.4486486486486485,
|
|
"grad_norm": 11.424826670440734,
|
|
"learning_rate": 9.972224112533046e-07,
|
|
"loss": 0.2909022867679596,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 2.4495495495495496,
|
|
"grad_norm": 11.051438094626672,
|
|
"learning_rate": 9.940837594782128e-07,
|
|
"loss": 0.6904971599578857,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 2.4504504504504503,
|
|
"grad_norm": 12.78015404531044,
|
|
"learning_rate": 9.90949509480601e-07,
|
|
"loss": 0.3737218976020813,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 2.4513513513513514,
|
|
"grad_norm": 8.983036615223945,
|
|
"learning_rate": 9.878196647044435e-07,
|
|
"loss": 0.2659727931022644,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 2.452252252252252,
|
|
"grad_norm": 9.558765853040109,
|
|
"learning_rate": 9.846942285888716e-07,
|
|
"loss": 0.9288277626037598,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 2.453153153153153,
|
|
"grad_norm": 8.456022072720478,
|
|
"learning_rate": 9.81573204568177e-07,
|
|
"loss": 0.5387973189353943,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 2.454054054054054,
|
|
"grad_norm": 11.695104597533478,
|
|
"learning_rate": 9.784565960717978e-07,
|
|
"loss": 0.20687323808670044,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 2.454954954954955,
|
|
"grad_norm": 10.83457751472315,
|
|
"learning_rate": 9.753444065243263e-07,
|
|
"loss": 0.29569950699806213,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 2.455855855855856,
|
|
"grad_norm": 11.602520717645529,
|
|
"learning_rate": 9.722366393454929e-07,
|
|
"loss": 0.41959670186042786,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 2.4567567567567568,
|
|
"grad_norm": 9.845789746597728,
|
|
"learning_rate": 9.691332979501738e-07,
|
|
"loss": 0.8155454397201538,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 2.4576576576576574,
|
|
"grad_norm": 12.3988207562276,
|
|
"learning_rate": 9.660343857483801e-07,
|
|
"loss": 0.25288331508636475,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 2.4585585585585585,
|
|
"grad_norm": 10.14823444046449,
|
|
"learning_rate": 9.629399061452533e-07,
|
|
"loss": 0.5517836809158325,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 2.4594594594594597,
|
|
"grad_norm": 8.837155572379947,
|
|
"learning_rate": 9.598498625410695e-07,
|
|
"loss": 0.310319185256958,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 2.4603603603603603,
|
|
"grad_norm": 7.122769074595653,
|
|
"learning_rate": 9.56764258331226e-07,
|
|
"loss": 0.13699156045913696,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 2.4612612612612614,
|
|
"grad_norm": 16.731880366793966,
|
|
"learning_rate": 9.536830969062456e-07,
|
|
"loss": 0.6171733140945435,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 2.462162162162162,
|
|
"grad_norm": 6.928365489035181,
|
|
"learning_rate": 9.506063816517652e-07,
|
|
"loss": 0.24341315031051636,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 2.463063063063063,
|
|
"grad_norm": 9.694267931522264,
|
|
"learning_rate": 9.475341159485396e-07,
|
|
"loss": 0.2506348490715027,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 2.463963963963964,
|
|
"grad_norm": 20.161890960014716,
|
|
"learning_rate": 9.444663031724349e-07,
|
|
"loss": 0.6052705645561218,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 2.464864864864865,
|
|
"grad_norm": 9.816328966461741,
|
|
"learning_rate": 9.414029466944196e-07,
|
|
"loss": 0.5910078883171082,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 2.4657657657657657,
|
|
"grad_norm": 13.501478278364509,
|
|
"learning_rate": 9.383440498805712e-07,
|
|
"loss": 0.4284709095954895,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 2.466666666666667,
|
|
"grad_norm": 14.078045085556997,
|
|
"learning_rate": 9.35289616092061e-07,
|
|
"loss": 0.4995902478694916,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 2.4675675675675675,
|
|
"grad_norm": 10.095295580834787,
|
|
"learning_rate": 9.322396486851626e-07,
|
|
"loss": 0.282894492149353,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 2.4684684684684686,
|
|
"grad_norm": 9.508335843182428,
|
|
"learning_rate": 9.291941510112362e-07,
|
|
"loss": 0.4157346785068512,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 2.4693693693693692,
|
|
"grad_norm": 12.648189574783698,
|
|
"learning_rate": 9.261531264167345e-07,
|
|
"loss": 0.28353965282440186,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 2.4702702702702704,
|
|
"grad_norm": 12.457127800370698,
|
|
"learning_rate": 9.231165782431916e-07,
|
|
"loss": 1.0120548009872437,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 2.471171171171171,
|
|
"grad_norm": 26.724546681563147,
|
|
"learning_rate": 9.200845098272276e-07,
|
|
"loss": 0.886185884475708,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 2.472072072072072,
|
|
"grad_norm": 12.488642280138567,
|
|
"learning_rate": 9.170569245005345e-07,
|
|
"loss": 0.41486501693725586,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 2.472972972972973,
|
|
"grad_norm": 9.79116171773031,
|
|
"learning_rate": 9.140338255898834e-07,
|
|
"loss": 0.2732367515563965,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 2.473873873873874,
|
|
"grad_norm": 15.016233353913933,
|
|
"learning_rate": 9.110152164171127e-07,
|
|
"loss": 0.9172552824020386,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 2.4747747747747746,
|
|
"grad_norm": 8.961640954986999,
|
|
"learning_rate": 9.080011002991257e-07,
|
|
"loss": 0.29976850748062134,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 2.4756756756756757,
|
|
"grad_norm": 13.086013721976759,
|
|
"learning_rate": 9.049914805478932e-07,
|
|
"loss": 0.8649406433105469,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 2.4765765765765764,
|
|
"grad_norm": 16.055503602676147,
|
|
"learning_rate": 9.019863604704421e-07,
|
|
"loss": 0.8113471865653992,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 2.4774774774774775,
|
|
"grad_norm": 9.10005654812221,
|
|
"learning_rate": 8.989857433688576e-07,
|
|
"loss": 0.3861403465270996,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 2.4783783783783786,
|
|
"grad_norm": 12.038481207473978,
|
|
"learning_rate": 8.959896325402728e-07,
|
|
"loss": 0.3215363025665283,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 2.4792792792792793,
|
|
"grad_norm": 10.720590684546965,
|
|
"learning_rate": 8.929980312768738e-07,
|
|
"loss": 0.5568872094154358,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 2.48018018018018,
|
|
"grad_norm": 12.123947318739067,
|
|
"learning_rate": 8.900109428658871e-07,
|
|
"loss": 0.44781431555747986,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 2.481081081081081,
|
|
"grad_norm": 9.79153280344517,
|
|
"learning_rate": 8.870283705895855e-07,
|
|
"loss": 0.3352649211883545,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 2.481981981981982,
|
|
"grad_norm": 10.474425766680636,
|
|
"learning_rate": 8.840503177252746e-07,
|
|
"loss": 0.9820655584335327,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 2.482882882882883,
|
|
"grad_norm": 13.449977009017616,
|
|
"learning_rate": 8.810767875452952e-07,
|
|
"loss": 0.8978846073150635,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 2.483783783783784,
|
|
"grad_norm": 6.85527356723595,
|
|
"learning_rate": 8.781077833170215e-07,
|
|
"loss": 0.2883230447769165,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 2.4846846846846846,
|
|
"grad_norm": 11.788202231972665,
|
|
"learning_rate": 8.751433083028493e-07,
|
|
"loss": 0.8722458481788635,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 2.4855855855855857,
|
|
"grad_norm": 7.477429404941512,
|
|
"learning_rate": 8.721833657602041e-07,
|
|
"loss": 0.446788489818573,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 2.4864864864864864,
|
|
"grad_norm": 10.823394975872683,
|
|
"learning_rate": 8.692279589415237e-07,
|
|
"loss": 0.6068504452705383,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 2.4873873873873875,
|
|
"grad_norm": 12.683265359244906,
|
|
"learning_rate": 8.662770910942691e-07,
|
|
"loss": 0.5869661569595337,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 2.488288288288288,
|
|
"grad_norm": 20.22563639414899,
|
|
"learning_rate": 8.633307654609074e-07,
|
|
"loss": 0.6758477687835693,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 2.4891891891891893,
|
|
"grad_norm": 8.320916830477627,
|
|
"learning_rate": 8.603889852789188e-07,
|
|
"loss": 0.4772491455078125,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 2.49009009009009,
|
|
"grad_norm": 15.250771793999402,
|
|
"learning_rate": 8.574517537807897e-07,
|
|
"loss": 0.7708054780960083,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 2.490990990990991,
|
|
"grad_norm": 10.554761116145748,
|
|
"learning_rate": 8.545190741940035e-07,
|
|
"loss": 0.5152315497398376,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 2.4918918918918918,
|
|
"grad_norm": 13.876390538075732,
|
|
"learning_rate": 8.515909497410463e-07,
|
|
"loss": 0.4591796398162842,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 2.492792792792793,
|
|
"grad_norm": 8.38313064008368,
|
|
"learning_rate": 8.48667383639396e-07,
|
|
"loss": 0.29356512427330017,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 2.4936936936936935,
|
|
"grad_norm": 12.98832797562063,
|
|
"learning_rate": 8.457483791015247e-07,
|
|
"loss": 0.4864840805530548,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 2.4945945945945946,
|
|
"grad_norm": 11.411779432463732,
|
|
"learning_rate": 8.428339393348889e-07,
|
|
"loss": 0.45541438460350037,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 2.4954954954954953,
|
|
"grad_norm": 7.802160511790263,
|
|
"learning_rate": 8.399240675419324e-07,
|
|
"loss": 0.5177165865898132,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 2.4963963963963964,
|
|
"grad_norm": 10.767724983891256,
|
|
"learning_rate": 8.370187669200763e-07,
|
|
"loss": 0.24317491054534912,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 2.4972972972972975,
|
|
"grad_norm": 8.393203214778845,
|
|
"learning_rate": 8.341180406617222e-07,
|
|
"loss": 0.3023187518119812,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 2.498198198198198,
|
|
"grad_norm": 20.047387496417723,
|
|
"learning_rate": 8.31221891954243e-07,
|
|
"loss": 0.5361258387565613,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 2.499099099099099,
|
|
"grad_norm": 12.704626402333194,
|
|
"learning_rate": 8.283303239799812e-07,
|
|
"loss": 0.41863399744033813,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 12.384659381413668,
|
|
"learning_rate": 8.254433399162493e-07,
|
|
"loss": 0.3420434594154358,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 2.500900900900901,
|
|
"grad_norm": 12.849105785920203,
|
|
"learning_rate": 8.225609429353187e-07,
|
|
"loss": 0.26572927832603455,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 2.501801801801802,
|
|
"grad_norm": 10.502186478945909,
|
|
"learning_rate": 8.196831362044239e-07,
|
|
"loss": 0.5782580971717834,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 2.5027027027027025,
|
|
"grad_norm": 10.156006592062825,
|
|
"learning_rate": 8.168099228857507e-07,
|
|
"loss": 0.36796340346336365,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 2.5036036036036036,
|
|
"grad_norm": 7.704635180486914,
|
|
"learning_rate": 8.139413061364465e-07,
|
|
"loss": 0.3754027485847473,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 2.5045045045045047,
|
|
"grad_norm": 18.06017832752059,
|
|
"learning_rate": 8.110772891085994e-07,
|
|
"loss": 1.1590102910995483,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 2.5054054054054054,
|
|
"grad_norm": 16.818311897507186,
|
|
"learning_rate": 8.082178749492447e-07,
|
|
"loss": 0.3621762692928314,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 2.5063063063063065,
|
|
"grad_norm": 13.018408238805641,
|
|
"learning_rate": 8.053630668003642e-07,
|
|
"loss": 0.5368507504463196,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 2.507207207207207,
|
|
"grad_norm": 9.370777511282409,
|
|
"learning_rate": 8.02512867798873e-07,
|
|
"loss": 0.6472790837287903,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 2.5081081081081082,
|
|
"grad_norm": 14.778353065331993,
|
|
"learning_rate": 7.996672810766271e-07,
|
|
"loss": 0.42966964840888977,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 2.509009009009009,
|
|
"grad_norm": 8.272991275208621,
|
|
"learning_rate": 7.968263097604095e-07,
|
|
"loss": 0.23913973569869995,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 2.50990990990991,
|
|
"grad_norm": 8.85106374765664,
|
|
"learning_rate": 7.939899569719356e-07,
|
|
"loss": 0.7383207678794861,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 2.5108108108108107,
|
|
"grad_norm": 12.949472758039786,
|
|
"learning_rate": 7.911582258278422e-07,
|
|
"loss": 0.7164812088012695,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 2.511711711711712,
|
|
"grad_norm": 10.196875911406964,
|
|
"learning_rate": 7.88331119439692e-07,
|
|
"loss": 0.5378541946411133,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 2.5126126126126125,
|
|
"grad_norm": 7.234108685746755,
|
|
"learning_rate": 7.855086409139612e-07,
|
|
"loss": 0.39583760499954224,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 2.5135135135135136,
|
|
"grad_norm": 11.629473673891297,
|
|
"learning_rate": 7.826907933520462e-07,
|
|
"loss": 0.23455587029457092,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 2.5144144144144143,
|
|
"grad_norm": 13.185689086343592,
|
|
"learning_rate": 7.798775798502484e-07,
|
|
"loss": 0.5355321764945984,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 2.5153153153153154,
|
|
"grad_norm": 8.547138181766927,
|
|
"learning_rate": 7.770690034997841e-07,
|
|
"loss": 0.24441684782505035,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 2.516216216216216,
|
|
"grad_norm": 6.980235224218154,
|
|
"learning_rate": 7.742650673867675e-07,
|
|
"loss": 0.22758543491363525,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 2.517117117117117,
|
|
"grad_norm": 13.01033463435246,
|
|
"learning_rate": 7.714657745922194e-07,
|
|
"loss": 1.046520709991455,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 2.518018018018018,
|
|
"grad_norm": 12.054871166483865,
|
|
"learning_rate": 7.686711281920567e-07,
|
|
"loss": 0.3230191767215729,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 2.518918918918919,
|
|
"grad_norm": 10.624917173731054,
|
|
"learning_rate": 7.658811312570885e-07,
|
|
"loss": 0.25653040409088135,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 2.51981981981982,
|
|
"grad_norm": 10.269390116412083,
|
|
"learning_rate": 7.630957868530193e-07,
|
|
"loss": 0.2573622763156891,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 2.5207207207207207,
|
|
"grad_norm": 7.930807764459055,
|
|
"learning_rate": 7.603150980404362e-07,
|
|
"loss": 0.3527355194091797,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 2.5216216216216214,
|
|
"grad_norm": 9.467168878249831,
|
|
"learning_rate": 7.575390678748157e-07,
|
|
"loss": 0.38093167543411255,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 2.5225225225225225,
|
|
"grad_norm": 10.378097052204929,
|
|
"learning_rate": 7.547676994065118e-07,
|
|
"loss": 0.2759009897708893,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 2.5234234234234236,
|
|
"grad_norm": 9.870493095046264,
|
|
"learning_rate": 7.520009956807561e-07,
|
|
"loss": 0.5271565914154053,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 2.5243243243243243,
|
|
"grad_norm": 9.136099438325527,
|
|
"learning_rate": 7.492389597376576e-07,
|
|
"loss": 0.29306572675704956,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 2.525225225225225,
|
|
"grad_norm": 28.319250158420076,
|
|
"learning_rate": 7.464815946121929e-07,
|
|
"loss": 0.5735443234443665,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 2.526126126126126,
|
|
"grad_norm": 16.142434433625084,
|
|
"learning_rate": 7.437289033342093e-07,
|
|
"loss": 0.533798098564148,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 2.527027027027027,
|
|
"grad_norm": 11.832880177795374,
|
|
"learning_rate": 7.409808889284143e-07,
|
|
"loss": 0.41538727283477783,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 2.527927927927928,
|
|
"grad_norm": 6.876405183638195,
|
|
"learning_rate": 7.382375544143811e-07,
|
|
"loss": 0.32754501700401306,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 2.528828828828829,
|
|
"grad_norm": 12.662910697067751,
|
|
"learning_rate": 7.354989028065357e-07,
|
|
"loss": 0.48083919286727905,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 2.5297297297297296,
|
|
"grad_norm": 8.905363883940831,
|
|
"learning_rate": 7.327649371141626e-07,
|
|
"loss": 0.22961552441120148,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 2.5306306306306308,
|
|
"grad_norm": 12.711584661950091,
|
|
"learning_rate": 7.300356603413966e-07,
|
|
"loss": 0.32955026626586914,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 2.5315315315315314,
|
|
"grad_norm": 11.819959369245261,
|
|
"learning_rate": 7.27311075487217e-07,
|
|
"loss": 0.3045678436756134,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 2.5324324324324325,
|
|
"grad_norm": 9.582711622109226,
|
|
"learning_rate": 7.245911855454524e-07,
|
|
"loss": 0.26437145471572876,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 2.533333333333333,
|
|
"grad_norm": 10.159482289942703,
|
|
"learning_rate": 7.218759935047665e-07,
|
|
"loss": 0.4841468334197998,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 2.5342342342342343,
|
|
"grad_norm": 13.78607407495029,
|
|
"learning_rate": 7.191655023486682e-07,
|
|
"loss": 0.5130227208137512,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 2.535135135135135,
|
|
"grad_norm": 17.69403701962981,
|
|
"learning_rate": 7.164597150554936e-07,
|
|
"loss": 0.3569299876689911,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 2.536036036036036,
|
|
"grad_norm": 12.797714777488547,
|
|
"learning_rate": 7.137586345984165e-07,
|
|
"loss": 0.454569548368454,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 2.536936936936937,
|
|
"grad_norm": 15.035730903363023,
|
|
"learning_rate": 7.110622639454335e-07,
|
|
"loss": 0.8996487259864807,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 2.537837837837838,
|
|
"grad_norm": 9.225561013089449,
|
|
"learning_rate": 7.083706060593704e-07,
|
|
"loss": 0.316983699798584,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 2.538738738738739,
|
|
"grad_norm": 6.9607596463470385,
|
|
"learning_rate": 7.056836638978698e-07,
|
|
"loss": 0.29841530323028564,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 2.5396396396396397,
|
|
"grad_norm": 10.171800807078554,
|
|
"learning_rate": 7.030014404133984e-07,
|
|
"loss": 0.263772189617157,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 2.5405405405405403,
|
|
"grad_norm": 13.953416938579434,
|
|
"learning_rate": 7.003239385532324e-07,
|
|
"loss": 0.643886148929596,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 2.5414414414414415,
|
|
"grad_norm": 12.628443446584198,
|
|
"learning_rate": 6.976511612594622e-07,
|
|
"loss": 0.7477445006370544,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 2.5423423423423426,
|
|
"grad_norm": 7.342700406531725,
|
|
"learning_rate": 6.94983111468987e-07,
|
|
"loss": 0.20469197630882263,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 2.5432432432432432,
|
|
"grad_norm": 10.276771013603613,
|
|
"learning_rate": 6.923197921135117e-07,
|
|
"loss": 0.4767210781574249,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 2.544144144144144,
|
|
"grad_norm": 16.75752980705756,
|
|
"learning_rate": 6.89661206119543e-07,
|
|
"loss": 0.38303235173225403,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 2.545045045045045,
|
|
"grad_norm": 12.627027923019437,
|
|
"learning_rate": 6.87007356408384e-07,
|
|
"loss": 0.7219441533088684,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 2.545945945945946,
|
|
"grad_norm": 16.181118085524965,
|
|
"learning_rate": 6.843582458961384e-07,
|
|
"loss": 0.30149808526039124,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 2.546846846846847,
|
|
"grad_norm": 12.478379524267625,
|
|
"learning_rate": 6.817138774936976e-07,
|
|
"loss": 0.4125371277332306,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 2.5477477477477475,
|
|
"grad_norm": 11.586766250938542,
|
|
"learning_rate": 6.790742541067441e-07,
|
|
"loss": 0.36932528018951416,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 2.5486486486486486,
|
|
"grad_norm": 8.164706687612089,
|
|
"learning_rate": 6.764393786357476e-07,
|
|
"loss": 0.44537636637687683,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 2.5495495495495497,
|
|
"grad_norm": 11.391787633194824,
|
|
"learning_rate": 6.738092539759589e-07,
|
|
"loss": 0.3408915400505066,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 2.5504504504504504,
|
|
"grad_norm": 12.578359600083925,
|
|
"learning_rate": 6.711838830174106e-07,
|
|
"loss": 0.316663920879364,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 2.5513513513513515,
|
|
"grad_norm": 9.511460694028267,
|
|
"learning_rate": 6.685632686449084e-07,
|
|
"loss": 0.25527459383010864,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 2.552252252252252,
|
|
"grad_norm": 7.891829504280663,
|
|
"learning_rate": 6.659474137380367e-07,
|
|
"loss": 0.2744280695915222,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 2.5531531531531533,
|
|
"grad_norm": 6.566574909703896,
|
|
"learning_rate": 6.633363211711435e-07,
|
|
"loss": 0.28016623854637146,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 2.554054054054054,
|
|
"grad_norm": 19.180555503186014,
|
|
"learning_rate": 6.607299938133499e-07,
|
|
"loss": 0.4957726299762726,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 2.554954954954955,
|
|
"grad_norm": 12.288713341044215,
|
|
"learning_rate": 6.581284345285371e-07,
|
|
"loss": 0.7940866351127625,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 2.5558558558558557,
|
|
"grad_norm": 8.82895512056585,
|
|
"learning_rate": 6.55531646175348e-07,
|
|
"loss": 0.45401430130004883,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 2.556756756756757,
|
|
"grad_norm": 11.756629631873064,
|
|
"learning_rate": 6.529396316071851e-07,
|
|
"loss": 0.5431941151618958,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 2.5576576576576575,
|
|
"grad_norm": 11.969502445434662,
|
|
"learning_rate": 6.503523936722017e-07,
|
|
"loss": 0.4053301215171814,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 2.5585585585585586,
|
|
"grad_norm": 9.058846918218157,
|
|
"learning_rate": 6.47769935213306e-07,
|
|
"loss": 0.16752083599567413,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 2.5594594594594593,
|
|
"grad_norm": 12.549421008138633,
|
|
"learning_rate": 6.451922590681509e-07,
|
|
"loss": 0.5353535413742065,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 2.5603603603603604,
|
|
"grad_norm": 9.233954840547739,
|
|
"learning_rate": 6.426193680691384e-07,
|
|
"loss": 0.6705703139305115,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 2.5612612612612615,
|
|
"grad_norm": 10.078066790506593,
|
|
"learning_rate": 6.400512650434082e-07,
|
|
"loss": 0.39536216855049133,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 2.562162162162162,
|
|
"grad_norm": 11.840546024997153,
|
|
"learning_rate": 6.374879528128441e-07,
|
|
"loss": 0.5658566355705261,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 2.563063063063063,
|
|
"grad_norm": 5.645452605399901,
|
|
"learning_rate": 6.349294341940593e-07,
|
|
"loss": 0.1515130251646042,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 2.563963963963964,
|
|
"grad_norm": 12.147964576938012,
|
|
"learning_rate": 6.323757119984053e-07,
|
|
"loss": 0.6295697093009949,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 2.564864864864865,
|
|
"grad_norm": 9.706790522003507,
|
|
"learning_rate": 6.29826789031961e-07,
|
|
"loss": 0.4312012791633606,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 2.5657657657657658,
|
|
"grad_norm": 9.446993445594122,
|
|
"learning_rate": 6.272826680955296e-07,
|
|
"loss": 0.43167930841445923,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 2.5666666666666664,
|
|
"grad_norm": 15.33390394523994,
|
|
"learning_rate": 6.247433519846424e-07,
|
|
"loss": 0.6086018085479736,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 2.5675675675675675,
|
|
"grad_norm": 11.20570689593158,
|
|
"learning_rate": 6.222088434895462e-07,
|
|
"loss": 0.7297212481498718,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 2.5684684684684687,
|
|
"grad_norm": 9.884279470721058,
|
|
"learning_rate": 6.1967914539521e-07,
|
|
"loss": 0.667428731918335,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 2.5693693693693693,
|
|
"grad_norm": 12.952756668389576,
|
|
"learning_rate": 6.171542604813113e-07,
|
|
"loss": 0.4452500343322754,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 2.57027027027027,
|
|
"grad_norm": 11.905149875696582,
|
|
"learning_rate": 6.146341915222459e-07,
|
|
"loss": 0.6105617880821228,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 2.571171171171171,
|
|
"grad_norm": 11.820755527856914,
|
|
"learning_rate": 6.12118941287112e-07,
|
|
"loss": 0.5909096002578735,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 2.5720720720720722,
|
|
"grad_norm": 12.628751523129457,
|
|
"learning_rate": 6.096085125397138e-07,
|
|
"loss": 0.7401934862136841,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 2.572972972972973,
|
|
"grad_norm": 8.191129931939585,
|
|
"learning_rate": 6.071029080385604e-07,
|
|
"loss": 0.4276942014694214,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 2.573873873873874,
|
|
"grad_norm": 8.703721858390045,
|
|
"learning_rate": 6.046021305368554e-07,
|
|
"loss": 0.30004197359085083,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 2.5747747747747747,
|
|
"grad_norm": 8.553950762759087,
|
|
"learning_rate": 6.021061827825042e-07,
|
|
"loss": 0.5719914436340332,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 2.575675675675676,
|
|
"grad_norm": 14.224655598442842,
|
|
"learning_rate": 5.99615067518099e-07,
|
|
"loss": 0.43226099014282227,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 2.5765765765765765,
|
|
"grad_norm": 14.871823642636507,
|
|
"learning_rate": 5.971287874809273e-07,
|
|
"loss": 0.5740262269973755,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 2.5774774774774776,
|
|
"grad_norm": 8.099573713687539,
|
|
"learning_rate": 5.946473454029594e-07,
|
|
"loss": 0.2974473237991333,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 2.5783783783783782,
|
|
"grad_norm": 12.63806722855833,
|
|
"learning_rate": 5.921707440108526e-07,
|
|
"loss": 1.7352497577667236,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 2.5792792792792794,
|
|
"grad_norm": 6.300345224350456,
|
|
"learning_rate": 5.896989860259433e-07,
|
|
"loss": 0.3095148205757141,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 2.58018018018018,
|
|
"grad_norm": 15.20115945583773,
|
|
"learning_rate": 5.872320741642474e-07,
|
|
"loss": 0.36338961124420166,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 2.581081081081081,
|
|
"grad_norm": 14.771699131375906,
|
|
"learning_rate": 5.847700111364529e-07,
|
|
"loss": 0.4657498300075531,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 2.581981981981982,
|
|
"grad_norm": 9.717327746058759,
|
|
"learning_rate": 5.823127996479233e-07,
|
|
"loss": 0.5524632930755615,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 2.582882882882883,
|
|
"grad_norm": 13.80679440350738,
|
|
"learning_rate": 5.798604423986909e-07,
|
|
"loss": 0.7932208180427551,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 2.583783783783784,
|
|
"grad_norm": 13.23848968111488,
|
|
"learning_rate": 5.774129420834501e-07,
|
|
"loss": 0.547808051109314,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 2.5846846846846847,
|
|
"grad_norm": 11.49878569960777,
|
|
"learning_rate": 5.749703013915631e-07,
|
|
"loss": 0.23563973605632782,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 2.5855855855855854,
|
|
"grad_norm": 15.194198862846598,
|
|
"learning_rate": 5.725325230070488e-07,
|
|
"loss": 0.247900128364563,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 2.5864864864864865,
|
|
"grad_norm": 13.752781534722335,
|
|
"learning_rate": 5.70099609608587e-07,
|
|
"loss": 0.8496720194816589,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 2.5873873873873876,
|
|
"grad_norm": 10.879228284947253,
|
|
"learning_rate": 5.676715638695063e-07,
|
|
"loss": 0.3789510726928711,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 2.5882882882882883,
|
|
"grad_norm": 9.108727839012266,
|
|
"learning_rate": 5.65248388457793e-07,
|
|
"loss": 0.5383734107017517,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 2.589189189189189,
|
|
"grad_norm": 18.22308185678221,
|
|
"learning_rate": 5.628300860360775e-07,
|
|
"loss": 0.888434112071991,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 2.59009009009009,
|
|
"grad_norm": 16.69431853086682,
|
|
"learning_rate": 5.604166592616356e-07,
|
|
"loss": 0.8922913670539856,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 2.590990990990991,
|
|
"grad_norm": 9.724091768622573,
|
|
"learning_rate": 5.580081107863883e-07,
|
|
"loss": 0.29118427634239197,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 2.591891891891892,
|
|
"grad_norm": 11.289451576422394,
|
|
"learning_rate": 5.556044432568936e-07,
|
|
"loss": 0.47553423047065735,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 2.592792792792793,
|
|
"grad_norm": 7.330711679101707,
|
|
"learning_rate": 5.532056593143492e-07,
|
|
"loss": 0.12005828320980072,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 2.5936936936936936,
|
|
"grad_norm": 10.72402962786972,
|
|
"learning_rate": 5.508117615945829e-07,
|
|
"loss": 0.29493868350982666,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 2.5945945945945947,
|
|
"grad_norm": 11.130972531541053,
|
|
"learning_rate": 5.484227527280572e-07,
|
|
"loss": 0.5804529190063477,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 2.5954954954954954,
|
|
"grad_norm": 9.753123234270712,
|
|
"learning_rate": 5.460386353398583e-07,
|
|
"loss": 0.3161405026912689,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 2.5963963963963965,
|
|
"grad_norm": 10.072435072281797,
|
|
"learning_rate": 5.436594120497024e-07,
|
|
"loss": 0.6472926735877991,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 2.597297297297297,
|
|
"grad_norm": 13.20651579999806,
|
|
"learning_rate": 5.412850854719254e-07,
|
|
"loss": 0.3501310348510742,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 2.5981981981981983,
|
|
"grad_norm": 11.833623558460557,
|
|
"learning_rate": 5.389156582154808e-07,
|
|
"loss": 0.6468371748924255,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 2.599099099099099,
|
|
"grad_norm": 17.387581772165536,
|
|
"learning_rate": 5.365511328839434e-07,
|
|
"loss": 0.7932006120681763,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"grad_norm": 6.793615575187419,
|
|
"learning_rate": 5.34191512075497e-07,
|
|
"loss": 0.6898642778396606,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 2.6009009009009008,
|
|
"grad_norm": 18.576504492980916,
|
|
"learning_rate": 5.318367983829393e-07,
|
|
"loss": 1.0162630081176758,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 2.601801801801802,
|
|
"grad_norm": 5.625984073417594,
|
|
"learning_rate": 5.294869943936731e-07,
|
|
"loss": 0.19983269274234772,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 2.6027027027027025,
|
|
"grad_norm": 7.496120354594705,
|
|
"learning_rate": 5.2714210268971e-07,
|
|
"loss": 0.36101967096328735,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 2.6036036036036037,
|
|
"grad_norm": 7.873168721581297,
|
|
"learning_rate": 5.248021258476604e-07,
|
|
"loss": 0.5061078667640686,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 2.6045045045045043,
|
|
"grad_norm": 11.624768233702522,
|
|
"learning_rate": 5.224670664387372e-07,
|
|
"loss": 0.3834763765335083,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 2.6054054054054054,
|
|
"grad_norm": 9.911151099128082,
|
|
"learning_rate": 5.201369270287465e-07,
|
|
"loss": 0.5326143503189087,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 2.6063063063063066,
|
|
"grad_norm": 7.820904391932334,
|
|
"learning_rate": 5.178117101780916e-07,
|
|
"loss": 0.3475406765937805,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 2.6072072072072072,
|
|
"grad_norm": 16.50750543368978,
|
|
"learning_rate": 5.154914184417653e-07,
|
|
"loss": 0.5961655974388123,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 2.608108108108108,
|
|
"grad_norm": 12.496019832747427,
|
|
"learning_rate": 5.13176054369347e-07,
|
|
"loss": 0.40796059370040894,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 2.609009009009009,
|
|
"grad_norm": 22.26962261514545,
|
|
"learning_rate": 5.108656205050044e-07,
|
|
"loss": 1.1192822456359863,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 2.60990990990991,
|
|
"grad_norm": 13.324383607685071,
|
|
"learning_rate": 5.085601193874862e-07,
|
|
"loss": 0.4706074595451355,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 2.610810810810811,
|
|
"grad_norm": 15.002986098962861,
|
|
"learning_rate": 5.062595535501219e-07,
|
|
"loss": 0.3952459990978241,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 2.6117117117117115,
|
|
"grad_norm": 10.50665689705243,
|
|
"learning_rate": 5.039639255208156e-07,
|
|
"loss": 0.5983648300170898,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 2.6126126126126126,
|
|
"grad_norm": 9.743087865938486,
|
|
"learning_rate": 5.016732378220496e-07,
|
|
"loss": 0.3142485022544861,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 2.6135135135135137,
|
|
"grad_norm": 10.900750755523788,
|
|
"learning_rate": 4.993874929708742e-07,
|
|
"loss": 0.6305020451545715,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 2.6144144144144144,
|
|
"grad_norm": 11.325325838620994,
|
|
"learning_rate": 4.971066934789082e-07,
|
|
"loss": 0.39126256108283997,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 2.6153153153153155,
|
|
"grad_norm": 9.195635130603092,
|
|
"learning_rate": 4.948308418523406e-07,
|
|
"loss": 0.6813149452209473,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 2.616216216216216,
|
|
"grad_norm": 7.970211430341625,
|
|
"learning_rate": 4.925599405919185e-07,
|
|
"loss": 0.20607680082321167,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 2.6171171171171173,
|
|
"grad_norm": 14.374879080672446,
|
|
"learning_rate": 4.902939921929528e-07,
|
|
"loss": 1.5283896923065186,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 2.618018018018018,
|
|
"grad_norm": 12.673012709285532,
|
|
"learning_rate": 4.880329991453103e-07,
|
|
"loss": 0.6433181166648865,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 2.618918918918919,
|
|
"grad_norm": 6.616142823623153,
|
|
"learning_rate": 4.857769639334143e-07,
|
|
"loss": 0.31344592571258545,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 2.6198198198198197,
|
|
"grad_norm": 11.189778461418907,
|
|
"learning_rate": 4.835258890362387e-07,
|
|
"loss": 0.17635640501976013,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 2.620720720720721,
|
|
"grad_norm": 10.10800041744702,
|
|
"learning_rate": 4.812797769273087e-07,
|
|
"loss": 0.450918048620224,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 2.6216216216216215,
|
|
"grad_norm": 11.70342646609071,
|
|
"learning_rate": 4.790386300746935e-07,
|
|
"loss": 0.6094970107078552,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 2.6225225225225226,
|
|
"grad_norm": 10.22947360088361,
|
|
"learning_rate": 4.7680245094100964e-07,
|
|
"loss": 0.6853034496307373,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 2.6234234234234233,
|
|
"grad_norm": 12.839932056774703,
|
|
"learning_rate": 4.7457124198341366e-07,
|
|
"loss": 0.3796372413635254,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 2.6243243243243244,
|
|
"grad_norm": 13.780762966229505,
|
|
"learning_rate": 4.7234500565359995e-07,
|
|
"loss": 0.5920535326004028,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 2.6252252252252255,
|
|
"grad_norm": 10.240242886503744,
|
|
"learning_rate": 4.701237443978007e-07,
|
|
"loss": 0.5691888928413391,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 2.626126126126126,
|
|
"grad_norm": 12.114205411324416,
|
|
"learning_rate": 4.679074606567785e-07,
|
|
"loss": 0.5729956030845642,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 2.627027027027027,
|
|
"grad_norm": 11.1676128406666,
|
|
"learning_rate": 4.656961568658308e-07,
|
|
"loss": 0.7371394038200378,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 2.627927927927928,
|
|
"grad_norm": 13.772638871336252,
|
|
"learning_rate": 4.634898354547779e-07,
|
|
"loss": 0.78377366065979,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 2.628828828828829,
|
|
"grad_norm": 17.795301472662988,
|
|
"learning_rate": 4.6128849884797043e-07,
|
|
"loss": 0.3983089029788971,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 2.6297297297297297,
|
|
"grad_norm": 9.779365655263382,
|
|
"learning_rate": 4.5909214946427806e-07,
|
|
"loss": 0.3358728885650635,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 2.6306306306306304,
|
|
"grad_norm": 11.477954073981119,
|
|
"learning_rate": 4.569007897170907e-07,
|
|
"loss": 0.6650623679161072,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 2.6315315315315315,
|
|
"grad_norm": 11.574699250695376,
|
|
"learning_rate": 4.547144220143185e-07,
|
|
"loss": 0.31445077061653137,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 2.6324324324324326,
|
|
"grad_norm": 23.500686708610274,
|
|
"learning_rate": 4.5253304875838177e-07,
|
|
"loss": 0.9121463298797607,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 2.6333333333333333,
|
|
"grad_norm": 11.436808050811093,
|
|
"learning_rate": 4.5035667234621716e-07,
|
|
"loss": 0.45456385612487793,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 2.634234234234234,
|
|
"grad_norm": 13.467816478645442,
|
|
"learning_rate": 4.481852951692672e-07,
|
|
"loss": 0.40558144450187683,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 2.635135135135135,
|
|
"grad_norm": 15.985868983989086,
|
|
"learning_rate": 4.4601891961348454e-07,
|
|
"loss": 0.8153447508811951,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 2.636036036036036,
|
|
"grad_norm": 20.91003276858005,
|
|
"learning_rate": 4.43857548059321e-07,
|
|
"loss": 0.4715864062309265,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 2.636936936936937,
|
|
"grad_norm": 11.525057441277907,
|
|
"learning_rate": 4.4170118288173694e-07,
|
|
"loss": 0.753389835357666,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 2.637837837837838,
|
|
"grad_norm": 10.303849555361593,
|
|
"learning_rate": 4.395498264501863e-07,
|
|
"loss": 0.4432196021080017,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 2.6387387387387387,
|
|
"grad_norm": 9.203487422445022,
|
|
"learning_rate": 4.374034811286193e-07,
|
|
"loss": 0.7571563720703125,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 2.6396396396396398,
|
|
"grad_norm": 14.315069407156413,
|
|
"learning_rate": 4.352621492754833e-07,
|
|
"loss": 0.29805177450180054,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 2.6405405405405404,
|
|
"grad_norm": 9.050108256681167,
|
|
"learning_rate": 4.331258332437127e-07,
|
|
"loss": 0.6004000902175903,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 2.6414414414414416,
|
|
"grad_norm": 10.959545782986904,
|
|
"learning_rate": 4.3099453538073433e-07,
|
|
"loss": 0.3260193467140198,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 2.642342342342342,
|
|
"grad_norm": 14.515970073269813,
|
|
"learning_rate": 4.2886825802845754e-07,
|
|
"loss": 0.5870916247367859,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 2.6432432432432433,
|
|
"grad_norm": 13.144185376047663,
|
|
"learning_rate": 4.267470035232785e-07,
|
|
"loss": 0.678016185760498,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 2.644144144144144,
|
|
"grad_norm": 9.631680019422237,
|
|
"learning_rate": 4.2463077419606977e-07,
|
|
"loss": 0.31092676520347595,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 2.645045045045045,
|
|
"grad_norm": 9.079015859517357,
|
|
"learning_rate": 4.22519572372187e-07,
|
|
"loss": 0.24442800879478455,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 2.645945945945946,
|
|
"grad_norm": 14.122301869657322,
|
|
"learning_rate": 4.204134003714577e-07,
|
|
"loss": 0.9151749610900879,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 2.646846846846847,
|
|
"grad_norm": 19.03327722407373,
|
|
"learning_rate": 4.183122605081852e-07,
|
|
"loss": 0.5897693037986755,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 2.647747747747748,
|
|
"grad_norm": 16.441082240151502,
|
|
"learning_rate": 4.162161550911414e-07,
|
|
"loss": 0.4502699077129364,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 2.6486486486486487,
|
|
"grad_norm": 22.301106051469862,
|
|
"learning_rate": 4.1412508642356574e-07,
|
|
"loss": 0.4282999634742737,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 2.6495495495495494,
|
|
"grad_norm": 7.801578059242751,
|
|
"learning_rate": 4.120390568031674e-07,
|
|
"loss": 0.44312307238578796,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 2.6504504504504505,
|
|
"grad_norm": 14.570842876967822,
|
|
"learning_rate": 4.0995806852211384e-07,
|
|
"loss": 0.5986523032188416,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 2.6513513513513516,
|
|
"grad_norm": 27.648638258523697,
|
|
"learning_rate": 4.078821238670355e-07,
|
|
"loss": 0.642958402633667,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 2.6522522522522523,
|
|
"grad_norm": 9.833657333706556,
|
|
"learning_rate": 4.0581122511901935e-07,
|
|
"loss": 0.24922549724578857,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 2.653153153153153,
|
|
"grad_norm": 11.07686680706659,
|
|
"learning_rate": 4.037453745536102e-07,
|
|
"loss": 0.5340144038200378,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 2.654054054054054,
|
|
"grad_norm": 12.06915123034913,
|
|
"learning_rate": 4.0168457444080267e-07,
|
|
"loss": 0.5442618727684021,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 2.654954954954955,
|
|
"grad_norm": 16.12520800633065,
|
|
"learning_rate": 3.996288270450438e-07,
|
|
"loss": 0.4101296365261078,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 2.655855855855856,
|
|
"grad_norm": 40.56171989302629,
|
|
"learning_rate": 3.975781346252283e-07,
|
|
"loss": 0.5544003844261169,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 2.6567567567567565,
|
|
"grad_norm": 10.706278418644738,
|
|
"learning_rate": 3.955324994346954e-07,
|
|
"loss": 0.2974967062473297,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 2.6576576576576576,
|
|
"grad_norm": 9.782226221682107,
|
|
"learning_rate": 3.9349192372123034e-07,
|
|
"loss": 0.8130560517311096,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 2.6585585585585587,
|
|
"grad_norm": 9.37768439068461,
|
|
"learning_rate": 3.914564097270546e-07,
|
|
"loss": 0.3785146474838257,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 2.6594594594594594,
|
|
"grad_norm": 17.206005574217432,
|
|
"learning_rate": 3.8942595968883167e-07,
|
|
"loss": 1.1182104349136353,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 2.6603603603603605,
|
|
"grad_norm": 20.771815609474533,
|
|
"learning_rate": 3.87400575837657e-07,
|
|
"loss": 0.4129304587841034,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 2.661261261261261,
|
|
"grad_norm": 11.277415853457976,
|
|
"learning_rate": 3.8538026039906307e-07,
|
|
"loss": 0.2300529181957245,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 2.6621621621621623,
|
|
"grad_norm": 9.654022712563158,
|
|
"learning_rate": 3.8336501559300967e-07,
|
|
"loss": 0.8957171440124512,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 2.663063063063063,
|
|
"grad_norm": 12.797325451540969,
|
|
"learning_rate": 3.8135484363388706e-07,
|
|
"loss": 0.6313111782073975,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 2.663963963963964,
|
|
"grad_norm": 15.339600675956936,
|
|
"learning_rate": 3.793497467305113e-07,
|
|
"loss": 0.7425114512443542,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 2.6648648648648647,
|
|
"grad_norm": 9.97783864849821,
|
|
"learning_rate": 3.7734972708612037e-07,
|
|
"loss": 0.5404260158538818,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 2.665765765765766,
|
|
"grad_norm": 9.59150288921644,
|
|
"learning_rate": 3.7535478689837477e-07,
|
|
"loss": 0.2231852412223816,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 2.6666666666666665,
|
|
"grad_norm": 8.70809862679508,
|
|
"learning_rate": 3.733649283593521e-07,
|
|
"loss": 0.18978221714496613,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 2.6675675675675676,
|
|
"grad_norm": 11.210466186650416,
|
|
"learning_rate": 3.7138015365554834e-07,
|
|
"loss": 1.2090866565704346,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 2.6684684684684683,
|
|
"grad_norm": 13.860488258268921,
|
|
"learning_rate": 3.694004649678706e-07,
|
|
"loss": 0.3982599377632141,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 2.6693693693693694,
|
|
"grad_norm": 12.25531176790016,
|
|
"learning_rate": 3.6742586447164e-07,
|
|
"loss": 0.6313212513923645,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 2.6702702702702705,
|
|
"grad_norm": 10.876511676239046,
|
|
"learning_rate": 3.654563543365836e-07,
|
|
"loss": 0.2990618348121643,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 2.671171171171171,
|
|
"grad_norm": 6.791956414843501,
|
|
"learning_rate": 3.6349193672683856e-07,
|
|
"loss": 0.2878454923629761,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 2.672072072072072,
|
|
"grad_norm": 11.29455290884701,
|
|
"learning_rate": 3.615326138009445e-07,
|
|
"loss": 0.639564573764801,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 2.672972972972973,
|
|
"grad_norm": 12.46761612333896,
|
|
"learning_rate": 3.5957838771184137e-07,
|
|
"loss": 0.274543434381485,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 2.673873873873874,
|
|
"grad_norm": 14.973325720741578,
|
|
"learning_rate": 3.576292606068721e-07,
|
|
"loss": 0.330290824174881,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 2.6747747747747748,
|
|
"grad_norm": 11.74660551912347,
|
|
"learning_rate": 3.556852346277734e-07,
|
|
"loss": 0.5368125438690186,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 2.6756756756756754,
|
|
"grad_norm": 9.198014550280483,
|
|
"learning_rate": 3.5374631191067875e-07,
|
|
"loss": 0.4176695942878723,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 2.6765765765765765,
|
|
"grad_norm": 7.897810927600474,
|
|
"learning_rate": 3.51812494586114e-07,
|
|
"loss": 0.2113216370344162,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 2.6774774774774777,
|
|
"grad_norm": 9.424805129671286,
|
|
"learning_rate": 3.498837847789949e-07,
|
|
"loss": 0.550119161605835,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 2.6783783783783783,
|
|
"grad_norm": 7.847503068149666,
|
|
"learning_rate": 3.4796018460862444e-07,
|
|
"loss": 0.628257155418396,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 2.679279279279279,
|
|
"grad_norm": 8.062500342777113,
|
|
"learning_rate": 3.460416961886898e-07,
|
|
"loss": 0.5177410840988159,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 2.68018018018018,
|
|
"grad_norm": 11.597691603877141,
|
|
"learning_rate": 3.4412832162726506e-07,
|
|
"loss": 0.4858509302139282,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 2.6810810810810812,
|
|
"grad_norm": 19.929904139237365,
|
|
"learning_rate": 3.422200630268013e-07,
|
|
"loss": 0.7135697603225708,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 2.681981981981982,
|
|
"grad_norm": 15.940704926590058,
|
|
"learning_rate": 3.403169224841307e-07,
|
|
"loss": 0.9417455792427063,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 2.682882882882883,
|
|
"grad_norm": 12.173019301492538,
|
|
"learning_rate": 3.3841890209045933e-07,
|
|
"loss": 0.14253586530685425,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 2.6837837837837837,
|
|
"grad_norm": 9.487205575782996,
|
|
"learning_rate": 3.3652600393137e-07,
|
|
"loss": 1.2050479650497437,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 2.684684684684685,
|
|
"grad_norm": 19.108920534477047,
|
|
"learning_rate": 3.346382300868134e-07,
|
|
"loss": 0.6063281297683716,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 2.6855855855855855,
|
|
"grad_norm": 12.042226841213884,
|
|
"learning_rate": 3.3275558263111354e-07,
|
|
"loss": 0.4792482852935791,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 2.6864864864864866,
|
|
"grad_norm": 15.098029466037605,
|
|
"learning_rate": 3.3087806363295783e-07,
|
|
"loss": 0.40536242723464966,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 2.6873873873873872,
|
|
"grad_norm": 8.906903068783857,
|
|
"learning_rate": 3.2900567515540163e-07,
|
|
"loss": 0.5243141055107117,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 2.6882882882882884,
|
|
"grad_norm": 18.434403089069676,
|
|
"learning_rate": 3.2713841925585963e-07,
|
|
"loss": 0.4433700144290924,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 2.689189189189189,
|
|
"grad_norm": 11.302154592289067,
|
|
"learning_rate": 3.2527629798610906e-07,
|
|
"loss": 0.2879910469055176,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 2.69009009009009,
|
|
"grad_norm": 12.569851224235085,
|
|
"learning_rate": 3.234193133922858e-07,
|
|
"loss": 0.5801317691802979,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 2.690990990990991,
|
|
"grad_norm": 9.530874455846417,
|
|
"learning_rate": 3.215674675148778e-07,
|
|
"loss": 0.68318110704422,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 2.691891891891892,
|
|
"grad_norm": 8.053056998882271,
|
|
"learning_rate": 3.1972076238873107e-07,
|
|
"loss": 0.4892258644104004,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 2.692792792792793,
|
|
"grad_norm": 13.914923960879545,
|
|
"learning_rate": 3.1787920004303806e-07,
|
|
"loss": 0.5541627407073975,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 2.6936936936936937,
|
|
"grad_norm": 21.508437543644284,
|
|
"learning_rate": 3.1604278250134514e-07,
|
|
"loss": 1.266975998878479,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 2.6945945945945944,
|
|
"grad_norm": 12.186755003576165,
|
|
"learning_rate": 3.142115117815414e-07,
|
|
"loss": 0.31859084963798523,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 2.6954954954954955,
|
|
"grad_norm": 15.816992189418007,
|
|
"learning_rate": 3.1238538989586287e-07,
|
|
"loss": 1.1375571489334106,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 2.6963963963963966,
|
|
"grad_norm": 12.044527763238884,
|
|
"learning_rate": 3.105644188508877e-07,
|
|
"loss": 0.32023024559020996,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 2.6972972972972973,
|
|
"grad_norm": 10.930969892517487,
|
|
"learning_rate": 3.087486006475321e-07,
|
|
"loss": 0.5635913610458374,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 2.698198198198198,
|
|
"grad_norm": 11.19352392870248,
|
|
"learning_rate": 3.069379372810544e-07,
|
|
"loss": 0.2701873779296875,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 2.699099099099099,
|
|
"grad_norm": 12.458216828465993,
|
|
"learning_rate": 3.0513243074104303e-07,
|
|
"loss": 0.41960519552230835,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"grad_norm": 14.87043863301729,
|
|
"learning_rate": 3.0333208301142615e-07,
|
|
"loss": 0.29608380794525146,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 2.700900900900901,
|
|
"grad_norm": 14.340352075710008,
|
|
"learning_rate": 3.015368960704584e-07,
|
|
"loss": 0.4806203544139862,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 2.701801801801802,
|
|
"grad_norm": 11.118175080345315,
|
|
"learning_rate": 2.997468718907259e-07,
|
|
"loss": 0.2992958426475525,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 2.7027027027027026,
|
|
"grad_norm": 9.049822187405406,
|
|
"learning_rate": 2.97962012439143e-07,
|
|
"loss": 0.25708407163619995,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 2.7036036036036037,
|
|
"grad_norm": 8.944507727197477,
|
|
"learning_rate": 2.9618231967694533e-07,
|
|
"loss": 0.4808061122894287,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 2.7045045045045044,
|
|
"grad_norm": 9.718239368961745,
|
|
"learning_rate": 2.944077955596947e-07,
|
|
"loss": 0.5168577432632446,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 2.7054054054054055,
|
|
"grad_norm": 12.317380466134502,
|
|
"learning_rate": 2.926384420372713e-07,
|
|
"loss": 0.35036972165107727,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 2.706306306306306,
|
|
"grad_norm": 8.014069226924516,
|
|
"learning_rate": 2.908742610538762e-07,
|
|
"loss": 0.3752833902835846,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 2.7072072072072073,
|
|
"grad_norm": 12.7383092954234,
|
|
"learning_rate": 2.8911525454802304e-07,
|
|
"loss": 0.8151978850364685,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 2.708108108108108,
|
|
"grad_norm": 9.209115269320733,
|
|
"learning_rate": 2.873614244525436e-07,
|
|
"loss": 0.22708743810653687,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 2.709009009009009,
|
|
"grad_norm": 16.32057044884385,
|
|
"learning_rate": 2.85612772694579e-07,
|
|
"loss": 0.4021984338760376,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 2.7099099099099098,
|
|
"grad_norm": 10.00615274011178,
|
|
"learning_rate": 2.838693011955823e-07,
|
|
"loss": 0.7005878686904907,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 2.710810810810811,
|
|
"grad_norm": 10.060717699823652,
|
|
"learning_rate": 2.821310118713122e-07,
|
|
"loss": 0.6117244362831116,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 2.7117117117117115,
|
|
"grad_norm": 9.917230152814122,
|
|
"learning_rate": 2.8039790663183573e-07,
|
|
"loss": 0.3839304447174072,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 2.7126126126126127,
|
|
"grad_norm": 7.84963929859655,
|
|
"learning_rate": 2.7866998738152016e-07,
|
|
"loss": 0.3038249611854553,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 2.7135135135135133,
|
|
"grad_norm": 11.237512908364222,
|
|
"learning_rate": 2.769472560190384e-07,
|
|
"loss": 0.8849109411239624,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 2.7144144144144144,
|
|
"grad_norm": 11.635626435222667,
|
|
"learning_rate": 2.7522971443735946e-07,
|
|
"loss": 0.4839766025543213,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 2.7153153153153156,
|
|
"grad_norm": 7.328928901326618,
|
|
"learning_rate": 2.735173645237493e-07,
|
|
"loss": 0.4564918279647827,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 2.7162162162162162,
|
|
"grad_norm": 15.957568190247981,
|
|
"learning_rate": 2.71810208159774e-07,
|
|
"loss": 1.339203119277954,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 2.717117117117117,
|
|
"grad_norm": 7.631599104932898,
|
|
"learning_rate": 2.701082472212879e-07,
|
|
"loss": 0.36641496419906616,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 2.718018018018018,
|
|
"grad_norm": 11.386224175739468,
|
|
"learning_rate": 2.6841148357843905e-07,
|
|
"loss": 0.8879693150520325,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 2.718918918918919,
|
|
"grad_norm": 8.528432535758734,
|
|
"learning_rate": 2.6671991909566265e-07,
|
|
"loss": 0.18705230951309204,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 2.71981981981982,
|
|
"grad_norm": 18.3906209804001,
|
|
"learning_rate": 2.650335556316835e-07,
|
|
"loss": 0.4086385667324066,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 2.7207207207207205,
|
|
"grad_norm": 9.512586159138896,
|
|
"learning_rate": 2.6335239503951006e-07,
|
|
"loss": 0.2790507674217224,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 2.7216216216216216,
|
|
"grad_norm": 7.546803365202142,
|
|
"learning_rate": 2.616764391664317e-07,
|
|
"loss": 0.23840834200382233,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 2.7225225225225227,
|
|
"grad_norm": 7.975201178673251,
|
|
"learning_rate": 2.600056898540232e-07,
|
|
"loss": 0.2528843879699707,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 2.7234234234234234,
|
|
"grad_norm": 8.548652521131835,
|
|
"learning_rate": 2.5834014893813486e-07,
|
|
"loss": 0.35694175958633423,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 2.7243243243243245,
|
|
"grad_norm": 12.896325190744951,
|
|
"learning_rate": 2.56679818248895e-07,
|
|
"loss": 0.35105961561203003,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 2.725225225225225,
|
|
"grad_norm": 10.067003688832774,
|
|
"learning_rate": 2.5502469961070643e-07,
|
|
"loss": 0.338885098695755,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 2.7261261261261263,
|
|
"grad_norm": 9.104931644949936,
|
|
"learning_rate": 2.533747948422466e-07,
|
|
"loss": 0.3766016364097595,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 2.727027027027027,
|
|
"grad_norm": 10.795857671933486,
|
|
"learning_rate": 2.517301057564603e-07,
|
|
"loss": 0.2562897801399231,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 2.727927927927928,
|
|
"grad_norm": 7.405613649365451,
|
|
"learning_rate": 2.500906341605652e-07,
|
|
"loss": 0.5181165933609009,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 2.7288288288288287,
|
|
"grad_norm": 9.171006696531848,
|
|
"learning_rate": 2.4845638185604314e-07,
|
|
"loss": 0.3681427240371704,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 2.72972972972973,
|
|
"grad_norm": 18.192644826948797,
|
|
"learning_rate": 2.4682735063864205e-07,
|
|
"loss": 0.40039005875587463,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 2.7306306306306305,
|
|
"grad_norm": 7.724440284318915,
|
|
"learning_rate": 2.452035422983734e-07,
|
|
"loss": 0.37891077995300293,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 2.7315315315315316,
|
|
"grad_norm": 11.685556863073284,
|
|
"learning_rate": 2.435849586195077e-07,
|
|
"loss": 0.2748810946941376,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 2.7324324324324323,
|
|
"grad_norm": 9.146797818479726,
|
|
"learning_rate": 2.4197160138057675e-07,
|
|
"loss": 0.7849463820457458,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 2.7333333333333334,
|
|
"grad_norm": 8.77265614001756,
|
|
"learning_rate": 2.403634723543674e-07,
|
|
"loss": 0.4742673337459564,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 2.7342342342342345,
|
|
"grad_norm": 6.532636672073548,
|
|
"learning_rate": 2.3876057330792344e-07,
|
|
"loss": 0.1301393061876297,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 2.735135135135135,
|
|
"grad_norm": 12.16544161167629,
|
|
"learning_rate": 2.3716290600254043e-07,
|
|
"loss": 0.1819266974925995,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 2.736036036036036,
|
|
"grad_norm": 8.98831106384767,
|
|
"learning_rate": 2.3557047219376628e-07,
|
|
"loss": 0.17506417632102966,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 2.736936936936937,
|
|
"grad_norm": 10.835020726650052,
|
|
"learning_rate": 2.3398327363139739e-07,
|
|
"loss": 0.23951488733291626,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 2.737837837837838,
|
|
"grad_norm": 7.661414503867859,
|
|
"learning_rate": 2.3240131205947814e-07,
|
|
"loss": 0.3445315957069397,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 2.7387387387387387,
|
|
"grad_norm": 6.431788788385182,
|
|
"learning_rate": 2.3082458921629857e-07,
|
|
"loss": 0.22054603695869446,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 2.7396396396396394,
|
|
"grad_norm": 8.864951894862525,
|
|
"learning_rate": 2.2925310683439062e-07,
|
|
"loss": 0.18980057537555695,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 2.7405405405405405,
|
|
"grad_norm": 8.253867273417365,
|
|
"learning_rate": 2.2768686664053074e-07,
|
|
"loss": 0.39419782161712646,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 2.7414414414414416,
|
|
"grad_norm": 14.513183317428417,
|
|
"learning_rate": 2.261258703557323e-07,
|
|
"loss": 0.3947722315788269,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 2.7423423423423423,
|
|
"grad_norm": 7.339659464691865,
|
|
"learning_rate": 2.2457011969524879e-07,
|
|
"loss": 0.3583824038505554,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 2.743243243243243,
|
|
"grad_norm": 10.915913341596568,
|
|
"learning_rate": 2.2301961636856884e-07,
|
|
"loss": 0.4267234802246094,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 2.744144144144144,
|
|
"grad_norm": 11.527013447134877,
|
|
"learning_rate": 2.214743620794152e-07,
|
|
"loss": 0.7158124446868896,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 2.745045045045045,
|
|
"grad_norm": 20.27771534058546,
|
|
"learning_rate": 2.1993435852574297e-07,
|
|
"loss": 0.3993403911590576,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 2.745945945945946,
|
|
"grad_norm": 9.160585240600767,
|
|
"learning_rate": 2.1839960739973687e-07,
|
|
"loss": 0.377150297164917,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 2.746846846846847,
|
|
"grad_norm": 7.85243737608308,
|
|
"learning_rate": 2.168701103878118e-07,
|
|
"loss": 0.35824477672576904,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 2.7477477477477477,
|
|
"grad_norm": 11.30277465334945,
|
|
"learning_rate": 2.1534586917060673e-07,
|
|
"loss": 0.48433351516723633,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 2.7486486486486488,
|
|
"grad_norm": 11.06453851381117,
|
|
"learning_rate": 2.1382688542298912e-07,
|
|
"loss": 0.24319399893283844,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 2.7495495495495494,
|
|
"grad_norm": 14.001568751247557,
|
|
"learning_rate": 2.1231316081404552e-07,
|
|
"loss": 0.7487995028495789,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 2.7504504504504506,
|
|
"grad_norm": 13.81950197404454,
|
|
"learning_rate": 2.108046970070876e-07,
|
|
"loss": 0.6276721358299255,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 2.7513513513513512,
|
|
"grad_norm": 7.978721613829254,
|
|
"learning_rate": 2.093014956596423e-07,
|
|
"loss": 0.3058184087276459,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 2.7522522522522523,
|
|
"grad_norm": 5.464143163192235,
|
|
"learning_rate": 2.0780355842345722e-07,
|
|
"loss": 0.20870988070964813,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 2.753153153153153,
|
|
"grad_norm": 11.223152122559126,
|
|
"learning_rate": 2.0631088694449352e-07,
|
|
"loss": 0.33823296427726746,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 2.754054054054054,
|
|
"grad_norm": 9.854687827814896,
|
|
"learning_rate": 2.0482348286292864e-07,
|
|
"loss": 0.4668673574924469,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 2.754954954954955,
|
|
"grad_norm": 9.436338510663326,
|
|
"learning_rate": 2.0334134781314907e-07,
|
|
"loss": 0.5126863121986389,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 2.755855855855856,
|
|
"grad_norm": 18.530337066924343,
|
|
"learning_rate": 2.018644834237543e-07,
|
|
"loss": 0.6312925815582275,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 2.756756756756757,
|
|
"grad_norm": 9.128267070096916,
|
|
"learning_rate": 2.0039289131755124e-07,
|
|
"loss": 0.350289523601532,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 2.7576576576576577,
|
|
"grad_norm": 10.091471025669732,
|
|
"learning_rate": 1.989265731115525e-07,
|
|
"loss": 0.6063660383224487,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 2.7585585585585584,
|
|
"grad_norm": 9.75504633193664,
|
|
"learning_rate": 1.9746553041697758e-07,
|
|
"loss": 0.3913068175315857,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 2.7594594594594595,
|
|
"grad_norm": 8.800586125231272,
|
|
"learning_rate": 1.9600976483924782e-07,
|
|
"loss": 0.33195608854293823,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 2.7603603603603606,
|
|
"grad_norm": 8.337439084226812,
|
|
"learning_rate": 1.9455927797798645e-07,
|
|
"loss": 0.4687657058238983,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 2.7612612612612613,
|
|
"grad_norm": 10.79715324778288,
|
|
"learning_rate": 1.9311407142701576e-07,
|
|
"loss": 0.5919493436813354,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 2.762162162162162,
|
|
"grad_norm": 9.985293943064102,
|
|
"learning_rate": 1.916741467743566e-07,
|
|
"loss": 0.28231510519981384,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 2.763063063063063,
|
|
"grad_norm": 9.314398130051678,
|
|
"learning_rate": 1.902395056022266e-07,
|
|
"loss": 0.3249053955078125,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 2.763963963963964,
|
|
"grad_norm": 9.18327770307208,
|
|
"learning_rate": 1.8881014948703536e-07,
|
|
"loss": 0.40570229291915894,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 2.764864864864865,
|
|
"grad_norm": 8.896525489631102,
|
|
"learning_rate": 1.8738607999938818e-07,
|
|
"loss": 0.18493056297302246,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 2.7657657657657655,
|
|
"grad_norm": 8.969722816637464,
|
|
"learning_rate": 1.8596729870407836e-07,
|
|
"loss": 0.7123985290527344,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 2.7666666666666666,
|
|
"grad_norm": 10.679117370967804,
|
|
"learning_rate": 1.8455380716009162e-07,
|
|
"loss": 0.26059746742248535,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 2.7675675675675677,
|
|
"grad_norm": 10.097263761653606,
|
|
"learning_rate": 1.8314560692059836e-07,
|
|
"loss": 0.6941015720367432,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 2.7684684684684684,
|
|
"grad_norm": 6.585430257572345,
|
|
"learning_rate": 1.8174269953295631e-07,
|
|
"loss": 0.35611647367477417,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 2.7693693693693695,
|
|
"grad_norm": 7.595614278486373,
|
|
"learning_rate": 1.8034508653870796e-07,
|
|
"loss": 0.6131374835968018,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 2.77027027027027,
|
|
"grad_norm": 9.468174216358069,
|
|
"learning_rate": 1.7895276947357542e-07,
|
|
"loss": 0.5632179975509644,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 2.7711711711711713,
|
|
"grad_norm": 8.469459034669931,
|
|
"learning_rate": 1.775657498674649e-07,
|
|
"loss": 0.39638811349868774,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 2.772072072072072,
|
|
"grad_norm": 9.257610469982092,
|
|
"learning_rate": 1.7618402924445944e-07,
|
|
"loss": 0.3628331124782562,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 2.772972972972973,
|
|
"grad_norm": 9.385884073498866,
|
|
"learning_rate": 1.7480760912282015e-07,
|
|
"loss": 0.8038603067398071,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 2.7738738738738737,
|
|
"grad_norm": 9.072494003276503,
|
|
"learning_rate": 1.7343649101498327e-07,
|
|
"loss": 0.3333243131637573,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 2.774774774774775,
|
|
"grad_norm": 11.359674102953054,
|
|
"learning_rate": 1.7207067642756092e-07,
|
|
"loss": 0.6656917333602905,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 2.7756756756756755,
|
|
"grad_norm": 8.296166497045828,
|
|
"learning_rate": 1.707101668613348e-07,
|
|
"loss": 0.4607012867927551,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 2.7765765765765766,
|
|
"grad_norm": 7.216294213414764,
|
|
"learning_rate": 1.693549638112607e-07,
|
|
"loss": 0.4505841135978699,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 2.7774774774774773,
|
|
"grad_norm": 8.30811616772895,
|
|
"learning_rate": 1.6800506876645972e-07,
|
|
"loss": 0.317621648311615,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 2.7783783783783784,
|
|
"grad_norm": 12.855704246507058,
|
|
"learning_rate": 1.6666048321022367e-07,
|
|
"loss": 0.5960338115692139,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 2.7792792792792795,
|
|
"grad_norm": 14.831071328132793,
|
|
"learning_rate": 1.65321208620009e-07,
|
|
"loss": 1.1567168235778809,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 2.78018018018018,
|
|
"grad_norm": 6.972786870459175,
|
|
"learning_rate": 1.6398724646743525e-07,
|
|
"loss": 0.4468737840652466,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 2.781081081081081,
|
|
"grad_norm": 7.818059401640994,
|
|
"learning_rate": 1.6265859821828656e-07,
|
|
"loss": 0.5314226150512695,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 2.781981981981982,
|
|
"grad_norm": 9.666906330053216,
|
|
"learning_rate": 1.6133526533250566e-07,
|
|
"loss": 0.18195509910583496,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 2.782882882882883,
|
|
"grad_norm": 13.45187665837708,
|
|
"learning_rate": 1.6001724926419826e-07,
|
|
"loss": 0.3830515146255493,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 2.7837837837837838,
|
|
"grad_norm": 18.07292873051059,
|
|
"learning_rate": 1.5870455146162367e-07,
|
|
"loss": 0.5667542815208435,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 2.7846846846846844,
|
|
"grad_norm": 7.150461444462728,
|
|
"learning_rate": 1.5739717336720084e-07,
|
|
"loss": 0.1438254415988922,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 2.7855855855855856,
|
|
"grad_norm": 9.567340392432493,
|
|
"learning_rate": 1.5609511641750118e-07,
|
|
"loss": 0.2774874269962311,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 2.7864864864864867,
|
|
"grad_norm": 11.980751288504987,
|
|
"learning_rate": 1.5479838204324915e-07,
|
|
"loss": 0.3390207886695862,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 2.7873873873873873,
|
|
"grad_norm": 11.66756062576352,
|
|
"learning_rate": 1.535069716693227e-07,
|
|
"loss": 0.5234507918357849,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 2.7882882882882885,
|
|
"grad_norm": 9.71211202192653,
|
|
"learning_rate": 1.5222088671474732e-07,
|
|
"loss": 0.7736992239952087,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 2.789189189189189,
|
|
"grad_norm": 10.281658736805602,
|
|
"learning_rate": 1.5094012859269814e-07,
|
|
"loss": 0.4092180132865906,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 2.7900900900900902,
|
|
"grad_norm": 9.823131199230207,
|
|
"learning_rate": 1.4966469871049605e-07,
|
|
"loss": 0.3046525716781616,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 2.790990990990991,
|
|
"grad_norm": 11.885545549588448,
|
|
"learning_rate": 1.4839459846960946e-07,
|
|
"loss": 0.31213539838790894,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 2.791891891891892,
|
|
"grad_norm": 12.373019318273954,
|
|
"learning_rate": 1.4712982926564646e-07,
|
|
"loss": 0.3035721480846405,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 2.7927927927927927,
|
|
"grad_norm": 9.854784746700393,
|
|
"learning_rate": 1.45870392488362e-07,
|
|
"loss": 0.533233106136322,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 2.793693693693694,
|
|
"grad_norm": 13.07465983745449,
|
|
"learning_rate": 1.446162895216474e-07,
|
|
"loss": 0.49974358081817627,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 2.7945945945945945,
|
|
"grad_norm": 9.560357311500448,
|
|
"learning_rate": 1.4336752174353706e-07,
|
|
"loss": 0.3893500864505768,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 2.7954954954954956,
|
|
"grad_norm": 8.622650578947777,
|
|
"learning_rate": 1.421240905261989e-07,
|
|
"loss": 0.25431448221206665,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 2.7963963963963963,
|
|
"grad_norm": 27.539509816072687,
|
|
"learning_rate": 1.4088599723594e-07,
|
|
"loss": 1.0530831813812256,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 2.7972972972972974,
|
|
"grad_norm": 8.544234342637827,
|
|
"learning_rate": 1.3965324323320095e-07,
|
|
"loss": 0.3481456935405731,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 2.798198198198198,
|
|
"grad_norm": 17.30398356731005,
|
|
"learning_rate": 1.3842582987255494e-07,
|
|
"loss": 0.36002570390701294,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 2.799099099099099,
|
|
"grad_norm": 17.35792289018547,
|
|
"learning_rate": 1.3720375850270806e-07,
|
|
"loss": 0.8561835289001465,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"grad_norm": 9.690736973795328,
|
|
"learning_rate": 1.3598703046649507e-07,
|
|
"loss": 0.5120982527732849,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 2.800900900900901,
|
|
"grad_norm": 6.887583688878893,
|
|
"learning_rate": 1.3477564710088097e-07,
|
|
"loss": 0.7675758600234985,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 2.801801801801802,
|
|
"grad_norm": 9.776529585323216,
|
|
"learning_rate": 1.3356960973695544e-07,
|
|
"loss": 0.3310229778289795,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 2.8027027027027027,
|
|
"grad_norm": 8.184489373151052,
|
|
"learning_rate": 1.3236891969993727e-07,
|
|
"loss": 0.4742078185081482,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 2.8036036036036034,
|
|
"grad_norm": 9.172924808590428,
|
|
"learning_rate": 1.311735783091661e-07,
|
|
"loss": 0.9775221943855286,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 2.8045045045045045,
|
|
"grad_norm": 19.61258494026224,
|
|
"learning_rate": 1.2998358687810687e-07,
|
|
"loss": 0.722926139831543,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 2.8054054054054056,
|
|
"grad_norm": 11.216368934493955,
|
|
"learning_rate": 1.2879894671434468e-07,
|
|
"loss": 0.3220384418964386,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 2.8063063063063063,
|
|
"grad_norm": 14.424103129637146,
|
|
"learning_rate": 1.2761965911958385e-07,
|
|
"loss": 0.3698599338531494,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 2.807207207207207,
|
|
"grad_norm": 10.036482837996969,
|
|
"learning_rate": 1.2644572538965006e-07,
|
|
"loss": 0.3106807768344879,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 2.808108108108108,
|
|
"grad_norm": 11.541431598800086,
|
|
"learning_rate": 1.2527714681448255e-07,
|
|
"loss": 0.7433834075927734,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 2.809009009009009,
|
|
"grad_norm": 10.832494397814555,
|
|
"learning_rate": 1.2411392467813922e-07,
|
|
"loss": 0.3309785723686218,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 2.80990990990991,
|
|
"grad_norm": 17.445178429968113,
|
|
"learning_rate": 1.229560602587898e-07,
|
|
"loss": 0.4417427182197571,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 2.810810810810811,
|
|
"grad_norm": 6.774341516784999,
|
|
"learning_rate": 1.2180355482871831e-07,
|
|
"loss": 0.1555105745792389,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 2.8117117117117116,
|
|
"grad_norm": 9.737932385587456,
|
|
"learning_rate": 1.2065640965432003e-07,
|
|
"loss": 0.4465380012989044,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 2.8126126126126128,
|
|
"grad_norm": 10.501743399687943,
|
|
"learning_rate": 1.1951462599609887e-07,
|
|
"loss": 0.2202143371105194,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 2.8135135135135134,
|
|
"grad_norm": 11.45244426375706,
|
|
"learning_rate": 1.1837820510867015e-07,
|
|
"loss": 0.7149019837379456,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 2.8144144144144145,
|
|
"grad_norm": 9.244790397934926,
|
|
"learning_rate": 1.1724714824075334e-07,
|
|
"loss": 0.2830231785774231,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 2.815315315315315,
|
|
"grad_norm": 12.698000526181215,
|
|
"learning_rate": 1.1612145663517705e-07,
|
|
"loss": 0.8680525422096252,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 2.8162162162162163,
|
|
"grad_norm": 10.379207079081237,
|
|
"learning_rate": 1.1500113152887126e-07,
|
|
"loss": 0.22102048993110657,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 2.817117117117117,
|
|
"grad_norm": 14.270298170762834,
|
|
"learning_rate": 1.138861741528724e-07,
|
|
"loss": 0.9436591267585754,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 2.818018018018018,
|
|
"grad_norm": 13.26213957882597,
|
|
"learning_rate": 1.1277658573231488e-07,
|
|
"loss": 0.3559871017932892,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 2.8189189189189188,
|
|
"grad_norm": 12.621166444214905,
|
|
"learning_rate": 1.1167236748643729e-07,
|
|
"loss": 0.5587183237075806,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 2.81981981981982,
|
|
"grad_norm": 6.363494593930729,
|
|
"learning_rate": 1.1057352062857463e-07,
|
|
"loss": 0.16196994483470917,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 2.820720720720721,
|
|
"grad_norm": 16.66892027792195,
|
|
"learning_rate": 1.0948004636616216e-07,
|
|
"loss": 0.39959704875946045,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 2.8216216216216217,
|
|
"grad_norm": 10.70662559558524,
|
|
"learning_rate": 1.0839194590072932e-07,
|
|
"loss": 0.6919280290603638,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 2.8225225225225223,
|
|
"grad_norm": 16.56026046820519,
|
|
"learning_rate": 1.0730922042790192e-07,
|
|
"loss": 1.7580801248550415,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 2.8234234234234235,
|
|
"grad_norm": 14.565322998042095,
|
|
"learning_rate": 1.0623187113739997e-07,
|
|
"loss": 0.5585309267044067,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 2.8243243243243246,
|
|
"grad_norm": 11.67670268575054,
|
|
"learning_rate": 1.0515989921303427e-07,
|
|
"loss": 0.7268367409706116,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 2.8252252252252252,
|
|
"grad_norm": 7.28452322565033,
|
|
"learning_rate": 1.0409330583271037e-07,
|
|
"loss": 0.3579501807689667,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 2.826126126126126,
|
|
"grad_norm": 13.967372160846384,
|
|
"learning_rate": 1.0303209216841914e-07,
|
|
"loss": 0.3260246515274048,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 2.827027027027027,
|
|
"grad_norm": 8.193213648838276,
|
|
"learning_rate": 1.0197625938624389e-07,
|
|
"loss": 0.4330894947052002,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 2.827927927927928,
|
|
"grad_norm": 7.1412374000814705,
|
|
"learning_rate": 1.0092580864635326e-07,
|
|
"loss": 0.4103037714958191,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 2.828828828828829,
|
|
"grad_norm": 11.615310464679258,
|
|
"learning_rate": 9.988074110300228e-08,
|
|
"loss": 0.48591896891593933,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 2.8297297297297295,
|
|
"grad_norm": 10.031412041495873,
|
|
"learning_rate": 9.884105790453236e-08,
|
|
"loss": 0.4669106602668762,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 2.8306306306306306,
|
|
"grad_norm": 14.658941383654822,
|
|
"learning_rate": 9.780676019336632e-08,
|
|
"loss": 0.6931451559066772,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 2.8315315315315317,
|
|
"grad_norm": 9.597417748980858,
|
|
"learning_rate": 9.677784910601118e-08,
|
|
"loss": 0.3629925549030304,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 2.8324324324324324,
|
|
"grad_norm": 8.526941774340733,
|
|
"learning_rate": 9.57543257730531e-08,
|
|
"loss": 0.2318694144487381,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 2.8333333333333335,
|
|
"grad_norm": 14.408431177296317,
|
|
"learning_rate": 9.473619131916023e-08,
|
|
"loss": 0.23618540167808533,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 2.834234234234234,
|
|
"grad_norm": 18.225393040529905,
|
|
"learning_rate": 9.372344686307655e-08,
|
|
"loss": 0.3627060055732727,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 2.8351351351351353,
|
|
"grad_norm": 9.946450643827884,
|
|
"learning_rate": 9.271609351762689e-08,
|
|
"loss": 0.5038033127784729,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 2.836036036036036,
|
|
"grad_norm": 17.986414138042953,
|
|
"learning_rate": 9.171413238970972e-08,
|
|
"loss": 0.7872669696807861,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 2.836936936936937,
|
|
"grad_norm": 15.127977317489515,
|
|
"learning_rate": 9.071756458029823e-08,
|
|
"loss": 0.351871132850647,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 2.8378378378378377,
|
|
"grad_norm": 11.755022226996932,
|
|
"learning_rate": 8.972639118444204e-08,
|
|
"loss": 0.5332342386245728,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 2.838738738738739,
|
|
"grad_norm": 9.076796300001938,
|
|
"learning_rate": 8.874061329125939e-08,
|
|
"loss": 0.41797953844070435,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 2.8396396396396395,
|
|
"grad_norm": 9.904316253921376,
|
|
"learning_rate": 8.776023198394378e-08,
|
|
"loss": 0.40943342447280884,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 2.8405405405405406,
|
|
"grad_norm": 11.498471545204916,
|
|
"learning_rate": 8.678524833975522e-08,
|
|
"loss": 0.980539083480835,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 2.8414414414414413,
|
|
"grad_norm": 12.455088370462938,
|
|
"learning_rate": 8.581566343002612e-08,
|
|
"loss": 0.31767308712005615,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 2.8423423423423424,
|
|
"grad_norm": 19.403465996640033,
|
|
"learning_rate": 8.485147832015373e-08,
|
|
"loss": 0.49638694524765015,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 2.8432432432432435,
|
|
"grad_norm": 10.368178731883894,
|
|
"learning_rate": 8.389269406960387e-08,
|
|
"loss": 0.43598970770835876,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 2.844144144144144,
|
|
"grad_norm": 8.295404932278744,
|
|
"learning_rate": 8.293931173190661e-08,
|
|
"loss": 0.3116639256477356,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 2.845045045045045,
|
|
"grad_norm": 6.306775114970428,
|
|
"learning_rate": 8.199133235465673e-08,
|
|
"loss": 0.4269426465034485,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 2.845945945945946,
|
|
"grad_norm": 19.210554105758707,
|
|
"learning_rate": 8.104875697951209e-08,
|
|
"loss": 1.2830239534378052,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 2.846846846846847,
|
|
"grad_norm": 8.454835654551928,
|
|
"learning_rate": 8.011158664219254e-08,
|
|
"loss": 0.45161306858062744,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 2.8477477477477477,
|
|
"grad_norm": 14.186434196907499,
|
|
"learning_rate": 7.917982237247934e-08,
|
|
"loss": 0.6510294675827026,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 2.8486486486486484,
|
|
"grad_norm": 11.205340813121374,
|
|
"learning_rate": 7.825346519421184e-08,
|
|
"loss": 0.6806632280349731,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 2.8495495495495495,
|
|
"grad_norm": 9.945073864497756,
|
|
"learning_rate": 7.733251612529024e-08,
|
|
"loss": 0.5039267539978027,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 2.8504504504504506,
|
|
"grad_norm": 11.927579615893958,
|
|
"learning_rate": 7.641697617767008e-08,
|
|
"loss": 0.295940101146698,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 2.8513513513513513,
|
|
"grad_norm": 8.153347724856607,
|
|
"learning_rate": 7.550684635736493e-08,
|
|
"loss": 0.4894489645957947,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 2.852252252252252,
|
|
"grad_norm": 7.8067446075246165,
|
|
"learning_rate": 7.460212766444264e-08,
|
|
"loss": 0.24146123230457306,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 2.853153153153153,
|
|
"grad_norm": 11.148370226951927,
|
|
"learning_rate": 7.370282109302518e-08,
|
|
"loss": 0.5095897912979126,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 2.854054054054054,
|
|
"grad_norm": 16.32831093514919,
|
|
"learning_rate": 7.280892763128766e-08,
|
|
"loss": 0.377160906791687,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 2.854954954954955,
|
|
"grad_norm": 15.46169225419276,
|
|
"learning_rate": 7.192044826145772e-08,
|
|
"loss": 0.8302932381629944,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 2.855855855855856,
|
|
"grad_norm": 6.0030695746135825,
|
|
"learning_rate": 7.103738395981385e-08,
|
|
"loss": 0.32825881242752075,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 2.8567567567567567,
|
|
"grad_norm": 13.482668870269196,
|
|
"learning_rate": 7.015973569668322e-08,
|
|
"loss": 0.3736622631549835,
|
|
"step": 3171
|
|
},
|
|
{
|
|
"epoch": 2.857657657657658,
|
|
"grad_norm": 11.395520680124969,
|
|
"learning_rate": 6.928750443644272e-08,
|
|
"loss": 0.5651583075523376,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 2.8585585585585584,
|
|
"grad_norm": 14.640898599397087,
|
|
"learning_rate": 6.842069113751737e-08,
|
|
"loss": 0.6570605039596558,
|
|
"step": 3173
|
|
},
|
|
{
|
|
"epoch": 2.8594594594594596,
|
|
"grad_norm": 10.251298650104035,
|
|
"learning_rate": 6.75592967523775e-08,
|
|
"loss": 0.7141177654266357,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 2.8603603603603602,
|
|
"grad_norm": 9.714012028774386,
|
|
"learning_rate": 6.670332222753984e-08,
|
|
"loss": 0.2499598264694214,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 2.8612612612612613,
|
|
"grad_norm": 12.450561908834207,
|
|
"learning_rate": 6.585276850356648e-08,
|
|
"loss": 0.3667965531349182,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 2.862162162162162,
|
|
"grad_norm": 10.371921540417889,
|
|
"learning_rate": 6.500763651506092e-08,
|
|
"loss": 0.553224503993988,
|
|
"step": 3177
|
|
},
|
|
{
|
|
"epoch": 2.863063063063063,
|
|
"grad_norm": 11.27330409284051,
|
|
"learning_rate": 6.416792719067144e-08,
|
|
"loss": 0.5520440340042114,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 2.863963963963964,
|
|
"grad_norm": 8.474832985083667,
|
|
"learning_rate": 6.333364145308607e-08,
|
|
"loss": 0.2104385495185852,
|
|
"step": 3179
|
|
},
|
|
{
|
|
"epoch": 2.864864864864865,
|
|
"grad_norm": 12.020900380707227,
|
|
"learning_rate": 6.250478021903372e-08,
|
|
"loss": 0.4151121973991394,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 2.865765765765766,
|
|
"grad_norm": 15.056271582278818,
|
|
"learning_rate": 6.168134439928364e-08,
|
|
"loss": 0.3605121970176697,
|
|
"step": 3181
|
|
},
|
|
{
|
|
"epoch": 2.8666666666666667,
|
|
"grad_norm": 9.830821405315397,
|
|
"learning_rate": 6.08633348986426e-08,
|
|
"loss": 0.40905314683914185,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 2.8675675675675674,
|
|
"grad_norm": 9.854758247754067,
|
|
"learning_rate": 6.005075261595495e-08,
|
|
"loss": 0.6086558103561401,
|
|
"step": 3183
|
|
},
|
|
{
|
|
"epoch": 2.8684684684684685,
|
|
"grad_norm": 7.051560198748801,
|
|
"learning_rate": 5.924359844410199e-08,
|
|
"loss": 0.28362664580345154,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 2.8693693693693696,
|
|
"grad_norm": 10.133311638483319,
|
|
"learning_rate": 5.844187327000039e-08,
|
|
"loss": 0.6039377450942993,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 2.8702702702702703,
|
|
"grad_norm": 15.78844002760415,
|
|
"learning_rate": 5.764557797460046e-08,
|
|
"loss": 0.29750943183898926,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 2.871171171171171,
|
|
"grad_norm": 14.254120783836019,
|
|
"learning_rate": 5.685471343288673e-08,
|
|
"loss": 0.5924072861671448,
|
|
"step": 3187
|
|
},
|
|
{
|
|
"epoch": 2.872072072072072,
|
|
"grad_norm": 14.95662317760345,
|
|
"learning_rate": 5.606928051387683e-08,
|
|
"loss": 0.813556432723999,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 2.872972972972973,
|
|
"grad_norm": 10.654281488495068,
|
|
"learning_rate": 5.528928008061929e-08,
|
|
"loss": 0.3268616795539856,
|
|
"step": 3189
|
|
},
|
|
{
|
|
"epoch": 2.873873873873874,
|
|
"grad_norm": 12.154146024935937,
|
|
"learning_rate": 5.451471299019351e-08,
|
|
"loss": 0.29414835572242737,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 2.8747747747747745,
|
|
"grad_norm": 6.0078985583140225,
|
|
"learning_rate": 5.374558009370812e-08,
|
|
"loss": 0.32455912232398987,
|
|
"step": 3191
|
|
},
|
|
{
|
|
"epoch": 2.8756756756756756,
|
|
"grad_norm": 14.454379249339306,
|
|
"learning_rate": 5.2981882236302075e-08,
|
|
"loss": 0.5916678309440613,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 2.8765765765765767,
|
|
"grad_norm": 14.42246258374093,
|
|
"learning_rate": 5.222362025714078e-08,
|
|
"loss": 0.45917463302612305,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 2.8774774774774774,
|
|
"grad_norm": 9.503167642063966,
|
|
"learning_rate": 5.1470794989416094e-08,
|
|
"loss": 0.37499675154685974,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 2.8783783783783785,
|
|
"grad_norm": 12.330288370609928,
|
|
"learning_rate": 5.0723407260348524e-08,
|
|
"loss": 0.7628058791160583,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 2.879279279279279,
|
|
"grad_norm": 10.40154826672831,
|
|
"learning_rate": 4.998145789118114e-08,
|
|
"loss": 0.886831521987915,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 2.8801801801801803,
|
|
"grad_norm": 11.827194363221171,
|
|
"learning_rate": 4.92449476971818e-08,
|
|
"loss": 0.3936125636100769,
|
|
"step": 3197
|
|
},
|
|
{
|
|
"epoch": 2.881081081081081,
|
|
"grad_norm": 9.102660423404513,
|
|
"learning_rate": 4.851387748764258e-08,
|
|
"loss": 0.5932949185371399,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 2.881981981981982,
|
|
"grad_norm": 16.516755542278325,
|
|
"learning_rate": 4.7788248065877005e-08,
|
|
"loss": 0.39385271072387695,
|
|
"step": 3199
|
|
},
|
|
{
|
|
"epoch": 2.8828828828828827,
|
|
"grad_norm": 8.202026157088671,
|
|
"learning_rate": 4.7068060229221165e-08,
|
|
"loss": 0.25490644574165344,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 2.883783783783784,
|
|
"grad_norm": 14.390882359360264,
|
|
"learning_rate": 4.635331476903093e-08,
|
|
"loss": 0.6286637187004089,
|
|
"step": 3201
|
|
},
|
|
{
|
|
"epoch": 2.8846846846846845,
|
|
"grad_norm": 7.805575653302741,
|
|
"learning_rate": 4.56440124706814e-08,
|
|
"loss": 0.3353833854198456,
|
|
"step": 3202
|
|
},
|
|
{
|
|
"epoch": 2.8855855855855856,
|
|
"grad_norm": 10.56625474443465,
|
|
"learning_rate": 4.494015411356911e-08,
|
|
"loss": 0.6304891109466553,
|
|
"step": 3203
|
|
},
|
|
{
|
|
"epoch": 2.8864864864864863,
|
|
"grad_norm": 17.0211055096181,
|
|
"learning_rate": 4.4241740471105964e-08,
|
|
"loss": 0.7264276742935181,
|
|
"step": 3204
|
|
},
|
|
{
|
|
"epoch": 2.8873873873873874,
|
|
"grad_norm": 9.271331154934623,
|
|
"learning_rate": 4.3548772310723073e-08,
|
|
"loss": 0.3691805303096771,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 2.8882882882882885,
|
|
"grad_norm": 9.185473061048938,
|
|
"learning_rate": 4.2861250393866346e-08,
|
|
"loss": 0.3754459321498871,
|
|
"step": 3206
|
|
},
|
|
{
|
|
"epoch": 2.889189189189189,
|
|
"grad_norm": 11.97298458517804,
|
|
"learning_rate": 4.217917547599815e-08,
|
|
"loss": 0.47409266233444214,
|
|
"step": 3207
|
|
},
|
|
{
|
|
"epoch": 2.89009009009009,
|
|
"grad_norm": 7.687386462005143,
|
|
"learning_rate": 4.150254830659617e-08,
|
|
"loss": 0.31540459394454956,
|
|
"step": 3208
|
|
},
|
|
{
|
|
"epoch": 2.890990990990991,
|
|
"grad_norm": 11.90742204866117,
|
|
"learning_rate": 4.083136962915069e-08,
|
|
"loss": 0.3772083818912506,
|
|
"step": 3209
|
|
},
|
|
{
|
|
"epoch": 2.891891891891892,
|
|
"grad_norm": 9.649776961840816,
|
|
"learning_rate": 4.0165640181165645e-08,
|
|
"loss": 0.2596743106842041,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 2.8927927927927928,
|
|
"grad_norm": 17.218222829814035,
|
|
"learning_rate": 3.950536069415756e-08,
|
|
"loss": 0.3328412175178528,
|
|
"step": 3211
|
|
},
|
|
{
|
|
"epoch": 2.8936936936936934,
|
|
"grad_norm": 12.705058714692216,
|
|
"learning_rate": 3.8850531893654396e-08,
|
|
"loss": 0.5901011228561401,
|
|
"step": 3212
|
|
},
|
|
{
|
|
"epoch": 2.8945945945945946,
|
|
"grad_norm": 11.454782903587885,
|
|
"learning_rate": 3.8201154499193926e-08,
|
|
"loss": 0.8860027194023132,
|
|
"step": 3213
|
|
},
|
|
{
|
|
"epoch": 2.8954954954954957,
|
|
"grad_norm": 14.386825711705143,
|
|
"learning_rate": 3.755722922432481e-08,
|
|
"loss": 0.5607985854148865,
|
|
"step": 3214
|
|
},
|
|
{
|
|
"epoch": 2.8963963963963963,
|
|
"grad_norm": 16.020674920081223,
|
|
"learning_rate": 3.6918756776604947e-08,
|
|
"loss": 0.8824067115783691,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 2.8972972972972975,
|
|
"grad_norm": 15.152606697875525,
|
|
"learning_rate": 3.628573785759926e-08,
|
|
"loss": 1.8139207363128662,
|
|
"step": 3216
|
|
},
|
|
{
|
|
"epoch": 2.898198198198198,
|
|
"grad_norm": 10.61658714741093,
|
|
"learning_rate": 3.565817316288134e-08,
|
|
"loss": 0.7253017425537109,
|
|
"step": 3217
|
|
},
|
|
{
|
|
"epoch": 2.8990990990990992,
|
|
"grad_norm": 10.366162346838676,
|
|
"learning_rate": 3.503606338203125e-08,
|
|
"loss": 0.6917277574539185,
|
|
"step": 3218
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"grad_norm": 8.260677970686553,
|
|
"learning_rate": 3.441940919863551e-08,
|
|
"loss": 0.27983352541923523,
|
|
"step": 3219
|
|
},
|
|
{
|
|
"epoch": 2.900900900900901,
|
|
"grad_norm": 11.270135771023659,
|
|
"learning_rate": 3.3808211290284886e-08,
|
|
"loss": 0.40894824266433716,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 2.9018018018018017,
|
|
"grad_norm": 12.724887344162209,
|
|
"learning_rate": 3.3202470328576044e-08,
|
|
"loss": 0.42095816135406494,
|
|
"step": 3221
|
|
},
|
|
{
|
|
"epoch": 2.902702702702703,
|
|
"grad_norm": 11.13563960252325,
|
|
"learning_rate": 3.260218697910877e-08,
|
|
"loss": 0.2819526791572571,
|
|
"step": 3222
|
|
},
|
|
{
|
|
"epoch": 2.9036036036036035,
|
|
"grad_norm": 11.215550017411921,
|
|
"learning_rate": 3.2007361901485455e-08,
|
|
"loss": 0.7610554695129395,
|
|
"step": 3223
|
|
},
|
|
{
|
|
"epoch": 2.9045045045045046,
|
|
"grad_norm": 12.903076807253631,
|
|
"learning_rate": 3.141799574931104e-08,
|
|
"loss": 0.3334919810295105,
|
|
"step": 3224
|
|
},
|
|
{
|
|
"epoch": 2.9054054054054053,
|
|
"grad_norm": 10.354816353326154,
|
|
"learning_rate": 3.0834089170193035e-08,
|
|
"loss": 0.29180341958999634,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 2.9063063063063064,
|
|
"grad_norm": 7.847481672257274,
|
|
"learning_rate": 3.025564280573878e-08,
|
|
"loss": 0.17764884233474731,
|
|
"step": 3226
|
|
},
|
|
{
|
|
"epoch": 2.907207207207207,
|
|
"grad_norm": 11.399254349729706,
|
|
"learning_rate": 2.968265729155595e-08,
|
|
"loss": 0.4697573482990265,
|
|
"step": 3227
|
|
},
|
|
{
|
|
"epoch": 2.908108108108108,
|
|
"grad_norm": 11.498447581785861,
|
|
"learning_rate": 2.9115133257253127e-08,
|
|
"loss": 0.28961923718452454,
|
|
"step": 3228
|
|
},
|
|
{
|
|
"epoch": 2.909009009009009,
|
|
"grad_norm": 14.312939853206586,
|
|
"learning_rate": 2.8553071326435368e-08,
|
|
"loss": 0.7388343811035156,
|
|
"step": 3229
|
|
},
|
|
{
|
|
"epoch": 2.90990990990991,
|
|
"grad_norm": 10.45193006078936,
|
|
"learning_rate": 2.7996472116707528e-08,
|
|
"loss": 0.42354726791381836,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 2.910810810810811,
|
|
"grad_norm": 10.57611073120087,
|
|
"learning_rate": 2.7445336239671493e-08,
|
|
"loss": 0.22124624252319336,
|
|
"step": 3231
|
|
},
|
|
{
|
|
"epoch": 2.9117117117117117,
|
|
"grad_norm": 13.386066094937002,
|
|
"learning_rate": 2.689966430092561e-08,
|
|
"loss": 0.37491780519485474,
|
|
"step": 3232
|
|
},
|
|
{
|
|
"epoch": 2.9126126126126124,
|
|
"grad_norm": 6.936665276055443,
|
|
"learning_rate": 2.6359456900065804e-08,
|
|
"loss": 0.29225584864616394,
|
|
"step": 3233
|
|
},
|
|
{
|
|
"epoch": 2.9135135135135135,
|
|
"grad_norm": 16.309786733297162,
|
|
"learning_rate": 2.5824714630680592e-08,
|
|
"loss": 0.532295823097229,
|
|
"step": 3234
|
|
},
|
|
{
|
|
"epoch": 2.9144144144144146,
|
|
"grad_norm": 8.917212056104644,
|
|
"learning_rate": 2.529543808035606e-08,
|
|
"loss": 0.5746088624000549,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 2.9153153153153153,
|
|
"grad_norm": 11.758951296263918,
|
|
"learning_rate": 2.4771627830670887e-08,
|
|
"loss": 0.7198326587677002,
|
|
"step": 3236
|
|
},
|
|
{
|
|
"epoch": 2.916216216216216,
|
|
"grad_norm": 11.722109065623714,
|
|
"learning_rate": 2.42532844571991e-08,
|
|
"loss": 0.38108983635902405,
|
|
"step": 3237
|
|
},
|
|
{
|
|
"epoch": 2.917117117117117,
|
|
"grad_norm": 8.778991805597002,
|
|
"learning_rate": 2.3740408529504545e-08,
|
|
"loss": 0.29683244228363037,
|
|
"step": 3238
|
|
},
|
|
{
|
|
"epoch": 2.918018018018018,
|
|
"grad_norm": 11.340263744942852,
|
|
"learning_rate": 2.3233000611146418e-08,
|
|
"loss": 0.25810110569000244,
|
|
"step": 3239
|
|
},
|
|
{
|
|
"epoch": 2.918918918918919,
|
|
"grad_norm": 20.343205243742325,
|
|
"learning_rate": 2.2731061259673726e-08,
|
|
"loss": 0.9457066059112549,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 2.91981981981982,
|
|
"grad_norm": 9.440779790937915,
|
|
"learning_rate": 2.223459102662695e-08,
|
|
"loss": 0.4441452622413635,
|
|
"step": 3241
|
|
},
|
|
{
|
|
"epoch": 2.9207207207207206,
|
|
"grad_norm": 8.627164592105776,
|
|
"learning_rate": 2.174359045753749e-08,
|
|
"loss": 0.3240242004394531,
|
|
"step": 3242
|
|
},
|
|
{
|
|
"epoch": 2.9216216216216218,
|
|
"grad_norm": 13.32946293311854,
|
|
"learning_rate": 2.1258060091925435e-08,
|
|
"loss": 0.3010973632335663,
|
|
"step": 3243
|
|
},
|
|
{
|
|
"epoch": 2.9225225225225224,
|
|
"grad_norm": 8.65777187208865,
|
|
"learning_rate": 2.0778000463301806e-08,
|
|
"loss": 0.29864582419395447,
|
|
"step": 3244
|
|
},
|
|
{
|
|
"epoch": 2.9234234234234235,
|
|
"grad_norm": 11.767462048271236,
|
|
"learning_rate": 2.0303412099164644e-08,
|
|
"loss": 0.6927505731582642,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 2.924324324324324,
|
|
"grad_norm": 12.306775084343114,
|
|
"learning_rate": 1.9834295521001246e-08,
|
|
"loss": 0.5736360549926758,
|
|
"step": 3246
|
|
},
|
|
{
|
|
"epoch": 2.9252252252252253,
|
|
"grad_norm": 11.26548459845472,
|
|
"learning_rate": 1.9370651244285387e-08,
|
|
"loss": 0.2858881652355194,
|
|
"step": 3247
|
|
},
|
|
{
|
|
"epoch": 2.926126126126126,
|
|
"grad_norm": 16.04319591603478,
|
|
"learning_rate": 1.8912479778478433e-08,
|
|
"loss": 0.9276719689369202,
|
|
"step": 3248
|
|
},
|
|
{
|
|
"epoch": 2.927027027027027,
|
|
"grad_norm": 7.64346475088744,
|
|
"learning_rate": 1.8459781627028217e-08,
|
|
"loss": 0.12898045778274536,
|
|
"step": 3249
|
|
},
|
|
{
|
|
"epoch": 2.9279279279279278,
|
|
"grad_norm": 11.383312259923118,
|
|
"learning_rate": 1.8012557287367394e-08,
|
|
"loss": 0.5170237421989441,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 2.928828828828829,
|
|
"grad_norm": 6.685223208905962,
|
|
"learning_rate": 1.7570807250915644e-08,
|
|
"loss": 0.19171756505966187,
|
|
"step": 3251
|
|
},
|
|
{
|
|
"epoch": 2.92972972972973,
|
|
"grad_norm": 6.508299625365426,
|
|
"learning_rate": 1.71345320030758e-08,
|
|
"loss": 0.33970847725868225,
|
|
"step": 3252
|
|
},
|
|
{
|
|
"epoch": 2.9306306306306307,
|
|
"grad_norm": 10.187987487393286,
|
|
"learning_rate": 1.6703732023235496e-08,
|
|
"loss": 0.22915905714035034,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 2.9315315315315313,
|
|
"grad_norm": 11.619803682327976,
|
|
"learning_rate": 1.6278407784766082e-08,
|
|
"loss": 0.9763340353965759,
|
|
"step": 3254
|
|
},
|
|
{
|
|
"epoch": 2.9324324324324325,
|
|
"grad_norm": 12.667673207105299,
|
|
"learning_rate": 1.585855975502204e-08,
|
|
"loss": 0.3878988027572632,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 2.9333333333333336,
|
|
"grad_norm": 9.631707717548007,
|
|
"learning_rate": 1.54441883953399e-08,
|
|
"loss": 0.4277806282043457,
|
|
"step": 3256
|
|
},
|
|
{
|
|
"epoch": 2.9342342342342342,
|
|
"grad_norm": 17.748096278629962,
|
|
"learning_rate": 1.5035294161039882e-08,
|
|
"loss": 0.5856724381446838,
|
|
"step": 3257
|
|
},
|
|
{
|
|
"epoch": 2.935135135135135,
|
|
"grad_norm": 12.035952523628472,
|
|
"learning_rate": 1.4631877501422032e-08,
|
|
"loss": 0.4578258991241455,
|
|
"step": 3258
|
|
},
|
|
{
|
|
"epoch": 2.936036036036036,
|
|
"grad_norm": 8.457963427946522,
|
|
"learning_rate": 1.4233938859767871e-08,
|
|
"loss": 0.6646236777305603,
|
|
"step": 3259
|
|
},
|
|
{
|
|
"epoch": 2.936936936936937,
|
|
"grad_norm": 10.507764583532897,
|
|
"learning_rate": 1.3841478673341512e-08,
|
|
"loss": 0.3159530758857727,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 2.937837837837838,
|
|
"grad_norm": 11.62267773905207,
|
|
"learning_rate": 1.3454497373384113e-08,
|
|
"loss": 0.3774804472923279,
|
|
"step": 3261
|
|
},
|
|
{
|
|
"epoch": 2.9387387387387385,
|
|
"grad_norm": 13.679348849536279,
|
|
"learning_rate": 1.3072995385119414e-08,
|
|
"loss": 0.6660423874855042,
|
|
"step": 3262
|
|
},
|
|
{
|
|
"epoch": 2.9396396396396396,
|
|
"grad_norm": 11.23337226179496,
|
|
"learning_rate": 1.2696973127747647e-08,
|
|
"loss": 0.28214752674102783,
|
|
"step": 3263
|
|
},
|
|
{
|
|
"epoch": 2.9405405405405407,
|
|
"grad_norm": 9.58744730143738,
|
|
"learning_rate": 1.232643101445108e-08,
|
|
"loss": 0.4660099744796753,
|
|
"step": 3264
|
|
},
|
|
{
|
|
"epoch": 2.9414414414414414,
|
|
"grad_norm": 14.04262320731674,
|
|
"learning_rate": 1.1961369452386795e-08,
|
|
"loss": 0.21129421889781952,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 2.9423423423423425,
|
|
"grad_norm": 19.16431899563648,
|
|
"learning_rate": 1.1601788842692807e-08,
|
|
"loss": 0.33819282054901123,
|
|
"step": 3266
|
|
},
|
|
{
|
|
"epoch": 2.943243243243243,
|
|
"grad_norm": 9.506793879772301,
|
|
"learning_rate": 1.1247689580481947e-08,
|
|
"loss": 0.5624006986618042,
|
|
"step": 3267
|
|
},
|
|
{
|
|
"epoch": 2.9441441441441443,
|
|
"grad_norm": 10.162640460978432,
|
|
"learning_rate": 1.0899072054846305e-08,
|
|
"loss": 0.8094921708106995,
|
|
"step": 3268
|
|
},
|
|
{
|
|
"epoch": 2.945045045045045,
|
|
"grad_norm": 14.152942052843835,
|
|
"learning_rate": 1.0555936648853348e-08,
|
|
"loss": 0.3222825527191162,
|
|
"step": 3269
|
|
},
|
|
{
|
|
"epoch": 2.945945945945946,
|
|
"grad_norm": 15.300664485221406,
|
|
"learning_rate": 1.021828373954592e-08,
|
|
"loss": 0.6321902275085449,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 2.9468468468468467,
|
|
"grad_norm": 12.215805986837733,
|
|
"learning_rate": 9.886113697944454e-09,
|
|
"loss": 0.2645990252494812,
|
|
"step": 3271
|
|
},
|
|
{
|
|
"epoch": 2.947747747747748,
|
|
"grad_norm": 16.822664069918225,
|
|
"learning_rate": 9.5594268890431e-09,
|
|
"loss": 0.49241942167282104,
|
|
"step": 3272
|
|
},
|
|
{
|
|
"epoch": 2.9486486486486485,
|
|
"grad_norm": 13.855780315823067,
|
|
"learning_rate": 9.238223671812485e-09,
|
|
"loss": 0.7209813594818115,
|
|
"step": 3273
|
|
},
|
|
{
|
|
"epoch": 2.9495495495495496,
|
|
"grad_norm": 23.393334692473292,
|
|
"learning_rate": 8.922504399195842e-09,
|
|
"loss": 0.6558884382247925,
|
|
"step": 3274
|
|
},
|
|
{
|
|
"epoch": 2.9504504504504503,
|
|
"grad_norm": 15.308223380421072,
|
|
"learning_rate": 8.612269418111774e-09,
|
|
"loss": 0.4492531418800354,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 2.9513513513513514,
|
|
"grad_norm": 16.769733893507475,
|
|
"learning_rate": 8.307519069453151e-09,
|
|
"loss": 0.410381942987442,
|
|
"step": 3276
|
|
},
|
|
{
|
|
"epoch": 2.9522522522522525,
|
|
"grad_norm": 11.516254837208239,
|
|
"learning_rate": 8.008253688084888e-09,
|
|
"loss": 0.2751805782318115,
|
|
"step": 3277
|
|
},
|
|
{
|
|
"epoch": 2.953153153153153,
|
|
"grad_norm": 7.851398966518017,
|
|
"learning_rate": 7.714473602845052e-09,
|
|
"loss": 0.3193674683570862,
|
|
"step": 3278
|
|
},
|
|
{
|
|
"epoch": 2.954054054054054,
|
|
"grad_norm": 10.429980279549909,
|
|
"learning_rate": 7.426179136545974e-09,
|
|
"loss": 0.38420000672340393,
|
|
"step": 3279
|
|
},
|
|
{
|
|
"epoch": 2.954954954954955,
|
|
"grad_norm": 12.064185011961586,
|
|
"learning_rate": 7.143370605970368e-09,
|
|
"loss": 1.0094915628433228,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 2.955855855855856,
|
|
"grad_norm": 13.338177731424542,
|
|
"learning_rate": 6.866048321873542e-09,
|
|
"loss": 0.2732114791870117,
|
|
"step": 3281
|
|
},
|
|
{
|
|
"epoch": 2.9567567567567568,
|
|
"grad_norm": 20.053017422890512,
|
|
"learning_rate": 6.594212588983406e-09,
|
|
"loss": 0.7617955803871155,
|
|
"step": 3282
|
|
},
|
|
{
|
|
"epoch": 2.9576576576576574,
|
|
"grad_norm": 12.413155284763997,
|
|
"learning_rate": 6.327863705997139e-09,
|
|
"loss": 0.27530044317245483,
|
|
"step": 3283
|
|
},
|
|
{
|
|
"epoch": 2.9585585585585585,
|
|
"grad_norm": 12.599218730023678,
|
|
"learning_rate": 6.067001965584518e-09,
|
|
"loss": 0.7731039524078369,
|
|
"step": 3284
|
|
},
|
|
{
|
|
"epoch": 2.9594594594594597,
|
|
"grad_norm": 13.009718545098739,
|
|
"learning_rate": 5.811627654386254e-09,
|
|
"loss": 0.6429943442344666,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 2.9603603603603603,
|
|
"grad_norm": 9.607637414599234,
|
|
"learning_rate": 5.561741053010661e-09,
|
|
"loss": 0.5738593935966492,
|
|
"step": 3286
|
|
},
|
|
{
|
|
"epoch": 2.961261261261261,
|
|
"grad_norm": 32.66930142798836,
|
|
"learning_rate": 5.317342436039763e-09,
|
|
"loss": 0.9971611499786377,
|
|
"step": 3287
|
|
},
|
|
{
|
|
"epoch": 2.962162162162162,
|
|
"grad_norm": 12.894644868161095,
|
|
"learning_rate": 5.078432072022077e-09,
|
|
"loss": 0.5130572319030762,
|
|
"step": 3288
|
|
},
|
|
{
|
|
"epoch": 2.963063063063063,
|
|
"grad_norm": 8.937462133502917,
|
|
"learning_rate": 4.845010223478164e-09,
|
|
"loss": 0.275386244058609,
|
|
"step": 3289
|
|
},
|
|
{
|
|
"epoch": 2.963963963963964,
|
|
"grad_norm": 11.189473389354694,
|
|
"learning_rate": 4.617077146895077e-09,
|
|
"loss": 0.30980658531188965,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 2.964864864864865,
|
|
"grad_norm": 15.211661756409315,
|
|
"learning_rate": 4.394633092730805e-09,
|
|
"loss": 0.43425253033638,
|
|
"step": 3291
|
|
},
|
|
{
|
|
"epoch": 2.9657657657657657,
|
|
"grad_norm": 10.47742440838181,
|
|
"learning_rate": 4.177678305411492e-09,
|
|
"loss": 0.31434789299964905,
|
|
"step": 3292
|
|
},
|
|
{
|
|
"epoch": 2.966666666666667,
|
|
"grad_norm": 9.206209319652455,
|
|
"learning_rate": 3.9662130233303345e-09,
|
|
"loss": 0.7355430722236633,
|
|
"step": 3293
|
|
},
|
|
{
|
|
"epoch": 2.9675675675675675,
|
|
"grad_norm": 15.258599512761888,
|
|
"learning_rate": 3.760237478849793e-09,
|
|
"loss": 0.35044312477111816,
|
|
"step": 3294
|
|
},
|
|
{
|
|
"epoch": 2.9684684684684686,
|
|
"grad_norm": 13.741751480249235,
|
|
"learning_rate": 3.5597518982999346e-09,
|
|
"loss": 0.2986290752887726,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 2.9693693693693692,
|
|
"grad_norm": 9.978224016439794,
|
|
"learning_rate": 3.364756501977873e-09,
|
|
"loss": 0.3490036427974701,
|
|
"step": 3296
|
|
},
|
|
{
|
|
"epoch": 2.9702702702702704,
|
|
"grad_norm": 7.5368131890603465,
|
|
"learning_rate": 3.1752515041483247e-09,
|
|
"loss": 0.6433002948760986,
|
|
"step": 3297
|
|
},
|
|
{
|
|
"epoch": 2.971171171171171,
|
|
"grad_norm": 10.181178482993774,
|
|
"learning_rate": 2.99123711304361e-09,
|
|
"loss": 0.23826146125793457,
|
|
"step": 3298
|
|
},
|
|
{
|
|
"epoch": 2.972072072072072,
|
|
"grad_norm": 10.131560555517467,
|
|
"learning_rate": 2.812713530861433e-09,
|
|
"loss": 0.525177538394928,
|
|
"step": 3299
|
|
},
|
|
{
|
|
"epoch": 2.972972972972973,
|
|
"grad_norm": 10.39489764496429,
|
|
"learning_rate": 2.639680953767099e-09,
|
|
"loss": 0.195095032453537,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 2.973873873873874,
|
|
"grad_norm": 13.987087200682417,
|
|
"learning_rate": 2.472139571892407e-09,
|
|
"loss": 0.6874642968177795,
|
|
"step": 3301
|
|
},
|
|
{
|
|
"epoch": 2.974774774774775,
|
|
"grad_norm": 8.661015237190858,
|
|
"learning_rate": 2.3100895693350946e-09,
|
|
"loss": 0.22578030824661255,
|
|
"step": 3302
|
|
},
|
|
{
|
|
"epoch": 2.9756756756756757,
|
|
"grad_norm": 15.381673495975587,
|
|
"learning_rate": 2.1535311241582813e-09,
|
|
"loss": 0.743354856967926,
|
|
"step": 3303
|
|
},
|
|
{
|
|
"epoch": 2.9765765765765764,
|
|
"grad_norm": 10.7092406635183,
|
|
"learning_rate": 2.002464408392135e-09,
|
|
"loss": 0.2619395852088928,
|
|
"step": 3304
|
|
},
|
|
{
|
|
"epoch": 2.9774774774774775,
|
|
"grad_norm": 10.59307492735683,
|
|
"learning_rate": 1.8568895880305415e-09,
|
|
"loss": 0.5660840272903442,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 2.9783783783783786,
|
|
"grad_norm": 10.067598366755512,
|
|
"learning_rate": 1.7168068230349888e-09,
|
|
"loss": 0.33253148198127747,
|
|
"step": 3306
|
|
},
|
|
{
|
|
"epoch": 2.9792792792792793,
|
|
"grad_norm": 12.974608439563891,
|
|
"learning_rate": 1.5822162673312381e-09,
|
|
"loss": 0.2697274386882782,
|
|
"step": 3307
|
|
},
|
|
{
|
|
"epoch": 2.98018018018018,
|
|
"grad_norm": 27.404269266331838,
|
|
"learning_rate": 1.4531180688087676e-09,
|
|
"loss": 0.7453837394714355,
|
|
"step": 3308
|
|
},
|
|
{
|
|
"epoch": 2.981081081081081,
|
|
"grad_norm": 14.465097789448581,
|
|
"learning_rate": 1.329512369324104e-09,
|
|
"loss": 0.4750048816204071,
|
|
"step": 3309
|
|
},
|
|
{
|
|
"epoch": 2.981981981981982,
|
|
"grad_norm": 12.462823206244318,
|
|
"learning_rate": 1.2113993046969364e-09,
|
|
"loss": 0.40287506580352783,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 2.982882882882883,
|
|
"grad_norm": 9.858468113014286,
|
|
"learning_rate": 1.098779004712891e-09,
|
|
"loss": 0.7160459756851196,
|
|
"step": 3311
|
|
},
|
|
{
|
|
"epoch": 2.983783783783784,
|
|
"grad_norm": 16.969565265560316,
|
|
"learning_rate": 9.91651593120757e-10,
|
|
"loss": 0.35625120997428894,
|
|
"step": 3312
|
|
},
|
|
{
|
|
"epoch": 2.9846846846846846,
|
|
"grad_norm": 5.912720869543489,
|
|
"learning_rate": 8.900171876341512e-10,
|
|
"loss": 0.18205636739730835,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 2.9855855855855857,
|
|
"grad_norm": 8.846669541838752,
|
|
"learning_rate": 7.938758999315177e-10,
|
|
"loss": 0.27084779739379883,
|
|
"step": 3314
|
|
},
|
|
{
|
|
"epoch": 2.9864864864864864,
|
|
"grad_norm": 13.619433761053685,
|
|
"learning_rate": 7.032278356544631e-10,
|
|
"loss": 0.3235873579978943,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 2.9873873873873875,
|
|
"grad_norm": 7.542452056253367,
|
|
"learning_rate": 6.180730944083113e-10,
|
|
"loss": 0.5045291781425476,
|
|
"step": 3316
|
|
},
|
|
{
|
|
"epoch": 2.988288288288288,
|
|
"grad_norm": 11.396204561080522,
|
|
"learning_rate": 5.384117697632141e-10,
|
|
"loss": 0.838004469871521,
|
|
"step": 3317
|
|
},
|
|
{
|
|
"epoch": 2.9891891891891893,
|
|
"grad_norm": 10.738421241795267,
|
|
"learning_rate": 4.642439492519302e-10,
|
|
"loss": 0.3510596752166748,
|
|
"step": 3318
|
|
},
|
|
{
|
|
"epoch": 2.99009009009009,
|
|
"grad_norm": 10.018582234235625,
|
|
"learning_rate": 3.9556971437260117e-10,
|
|
"loss": 0.31141674518585205,
|
|
"step": 3319
|
|
},
|
|
{
|
|
"epoch": 2.990990990990991,
|
|
"grad_norm": 12.65715526365031,
|
|
"learning_rate": 3.323891405848656e-10,
|
|
"loss": 0.54250568151474,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 2.9918918918918918,
|
|
"grad_norm": 11.954924640049924,
|
|
"learning_rate": 2.747022973131896e-10,
|
|
"loss": 0.4889550805091858,
|
|
"step": 3321
|
|
},
|
|
{
|
|
"epoch": 2.992792792792793,
|
|
"grad_norm": 9.086500695806155,
|
|
"learning_rate": 2.2250924794520179e-10,
|
|
"loss": 0.6249538660049438,
|
|
"step": 3322
|
|
},
|
|
{
|
|
"epoch": 2.9936936936936935,
|
|
"grad_norm": 11.255665880191785,
|
|
"learning_rate": 1.758100498311377e-10,
|
|
"loss": 0.4605242908000946,
|
|
"step": 3323
|
|
},
|
|
{
|
|
"epoch": 2.9945945945945946,
|
|
"grad_norm": 8.32903205574773,
|
|
"learning_rate": 1.3460475428495046e-10,
|
|
"loss": 0.2069612741470337,
|
|
"step": 3324
|
|
},
|
|
{
|
|
"epoch": 2.9954954954954953,
|
|
"grad_norm": 9.889481233057783,
|
|
"learning_rate": 9.889340658375545e-11,
|
|
"loss": 0.729750394821167,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 2.9963963963963964,
|
|
"grad_norm": 17.60998683169484,
|
|
"learning_rate": 6.867604596838551e-11,
|
|
"loss": 0.2950170636177063,
|
|
"step": 3326
|
|
},
|
|
{
|
|
"epoch": 2.9972972972972975,
|
|
"grad_norm": 10.062760639643265,
|
|
"learning_rate": 4.395270564172549e-11,
|
|
"loss": 0.38005125522613525,
|
|
"step": 3327
|
|
},
|
|
{
|
|
"epoch": 2.998198198198198,
|
|
"grad_norm": 10.526283818405414,
|
|
"learning_rate": 2.4723412770377708e-11,
|
|
"loss": 0.3282592296600342,
|
|
"step": 3328
|
|
},
|
|
{
|
|
"epoch": 2.999099099099099,
|
|
"grad_norm": 10.437410474566098,
|
|
"learning_rate": 1.0988188484661878e-11,
|
|
"loss": 0.7834963202476501,
|
|
"step": 3329
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 8.285804422799801,
|
|
"learning_rate": 2.7470478758395702e-12,
|
|
"loss": 0.17721490561962128,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 3330,
|
|
"total_flos": 8949330124800.0,
|
|
"train_loss": 1.6578803809801559,
|
|
"train_runtime": 3829.0681,
|
|
"train_samples_per_second": 3.476,
|
|
"train_steps_per_second": 0.87
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 3330,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 8949330124800.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|