15434 lines
373 KiB
JSON
15434 lines
373 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 2.6490066225165565,
|
|
"eval_steps": 500,
|
|
"global_step": 22000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0012040939193257074,
|
|
"grad_norm": 9.316359519958496,
|
|
"learning_rate": 3.0102347983142685e-09,
|
|
"loss": 0.9861,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.002408187838651415,
|
|
"grad_norm": 9.306455612182617,
|
|
"learning_rate": 6.020469596628537e-09,
|
|
"loss": 0.9917,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.003612281757977122,
|
|
"grad_norm": 10.237154006958008,
|
|
"learning_rate": 9.030704394942806e-09,
|
|
"loss": 0.9875,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.00481637567730283,
|
|
"grad_norm": 10.9087553024292,
|
|
"learning_rate": 1.2040939193257074e-08,
|
|
"loss": 1.0501,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.006020469596628537,
|
|
"grad_norm": 8.430444717407227,
|
|
"learning_rate": 1.5051173991571343e-08,
|
|
"loss": 0.9665,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.007224563515954244,
|
|
"grad_norm": 8.640472412109375,
|
|
"learning_rate": 1.8061408789885613e-08,
|
|
"loss": 1.0249,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.008428657435279952,
|
|
"grad_norm": 8.656485557556152,
|
|
"learning_rate": 2.107164358819988e-08,
|
|
"loss": 0.9942,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.00963275135460566,
|
|
"grad_norm": 11.827771186828613,
|
|
"learning_rate": 2.4081878386514148e-08,
|
|
"loss": 1.0359,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.010836845273931367,
|
|
"grad_norm": 7.204784870147705,
|
|
"learning_rate": 2.7092113184828417e-08,
|
|
"loss": 0.9532,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.012040939193257074,
|
|
"grad_norm": 7.546351432800293,
|
|
"learning_rate": 3.010234798314269e-08,
|
|
"loss": 1.0236,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.013245033112582781,
|
|
"grad_norm": 10.156384468078613,
|
|
"learning_rate": 3.311258278145695e-08,
|
|
"loss": 1.0031,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.014449127031908489,
|
|
"grad_norm": 9.753625869750977,
|
|
"learning_rate": 3.6122817579771225e-08,
|
|
"loss": 1.016,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.015653220951234198,
|
|
"grad_norm": 8.019911766052246,
|
|
"learning_rate": 3.9133052378085485e-08,
|
|
"loss": 0.9972,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.016857314870559904,
|
|
"grad_norm": 8.1049165725708,
|
|
"learning_rate": 4.214328717639976e-08,
|
|
"loss": 0.984,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.018061408789885613,
|
|
"grad_norm": 8.9203462600708,
|
|
"learning_rate": 4.5153521974714023e-08,
|
|
"loss": 0.9811,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.01926550270921132,
|
|
"grad_norm": 8.825779914855957,
|
|
"learning_rate": 4.8163756773028296e-08,
|
|
"loss": 1.0207,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.020469596628537028,
|
|
"grad_norm": 8.64220142364502,
|
|
"learning_rate": 5.117399157134256e-08,
|
|
"loss": 0.9971,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.021673690547862733,
|
|
"grad_norm": 8.120660781860352,
|
|
"learning_rate": 5.4184226369656835e-08,
|
|
"loss": 0.9606,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.022877784467188442,
|
|
"grad_norm": 8.181641578674316,
|
|
"learning_rate": 5.71944611679711e-08,
|
|
"loss": 0.9516,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.024081878386514148,
|
|
"grad_norm": 7.607439994812012,
|
|
"learning_rate": 6.020469596628537e-08,
|
|
"loss": 0.9919,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.025285972305839857,
|
|
"grad_norm": 7.434635162353516,
|
|
"learning_rate": 6.321493076459963e-08,
|
|
"loss": 0.9991,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.026490066225165563,
|
|
"grad_norm": 7.569486141204834,
|
|
"learning_rate": 6.62251655629139e-08,
|
|
"loss": 0.984,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.027694160144491272,
|
|
"grad_norm": 7.499971389770508,
|
|
"learning_rate": 6.923540036122818e-08,
|
|
"loss": 0.9598,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.028898254063816978,
|
|
"grad_norm": 6.992701053619385,
|
|
"learning_rate": 7.224563515954245e-08,
|
|
"loss": 0.905,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.030102347983142687,
|
|
"grad_norm": 6.3157877922058105,
|
|
"learning_rate": 7.525586995785671e-08,
|
|
"loss": 0.9493,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.031306441902468396,
|
|
"grad_norm": 6.263482570648193,
|
|
"learning_rate": 7.826610475617097e-08,
|
|
"loss": 0.9501,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.0325105358217941,
|
|
"grad_norm": 6.178393840789795,
|
|
"learning_rate": 8.127633955448524e-08,
|
|
"loss": 0.9056,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.03371462974111981,
|
|
"grad_norm": 4.896974086761475,
|
|
"learning_rate": 8.428657435279951e-08,
|
|
"loss": 0.8679,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.034918723660445516,
|
|
"grad_norm": 5.896145820617676,
|
|
"learning_rate": 8.729680915111379e-08,
|
|
"loss": 0.8658,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.036122817579771226,
|
|
"grad_norm": 5.6855573654174805,
|
|
"learning_rate": 9.030704394942805e-08,
|
|
"loss": 0.9227,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.03732691149909693,
|
|
"grad_norm": 4.907613277435303,
|
|
"learning_rate": 9.331727874774232e-08,
|
|
"loss": 0.8581,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.03853100541842264,
|
|
"grad_norm": 6.029637336730957,
|
|
"learning_rate": 9.632751354605659e-08,
|
|
"loss": 0.8184,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.039735099337748346,
|
|
"grad_norm": 5.0958333015441895,
|
|
"learning_rate": 9.933774834437085e-08,
|
|
"loss": 0.8524,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.040939193257074055,
|
|
"grad_norm": 6.18320369720459,
|
|
"learning_rate": 1.0234798314268512e-07,
|
|
"loss": 0.8371,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.04214328717639976,
|
|
"grad_norm": 4.874738693237305,
|
|
"learning_rate": 1.0535821794099938e-07,
|
|
"loss": 0.8348,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.04334738109572547,
|
|
"grad_norm": 5.273070812225342,
|
|
"learning_rate": 1.0836845273931367e-07,
|
|
"loss": 0.8286,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.044551475015051176,
|
|
"grad_norm": 5.052524089813232,
|
|
"learning_rate": 1.1137868753762793e-07,
|
|
"loss": 0.7585,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.045755568934376885,
|
|
"grad_norm": 4.216408729553223,
|
|
"learning_rate": 1.143889223359422e-07,
|
|
"loss": 0.7869,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.04695966285370259,
|
|
"grad_norm": 5.456339359283447,
|
|
"learning_rate": 1.1739915713425646e-07,
|
|
"loss": 0.7699,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.048163756773028296,
|
|
"grad_norm": 4.809760093688965,
|
|
"learning_rate": 1.2040939193257075e-07,
|
|
"loss": 0.763,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.049367850692354005,
|
|
"grad_norm": 4.933152675628662,
|
|
"learning_rate": 1.23419626730885e-07,
|
|
"loss": 0.7192,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.050571944611679714,
|
|
"grad_norm": 5.025005340576172,
|
|
"learning_rate": 1.2642986152919927e-07,
|
|
"loss": 0.7092,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.05177603853100542,
|
|
"grad_norm": 4.338512420654297,
|
|
"learning_rate": 1.2944009632751355e-07,
|
|
"loss": 0.7346,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.052980132450331126,
|
|
"grad_norm": 4.557036399841309,
|
|
"learning_rate": 1.324503311258278e-07,
|
|
"loss": 0.723,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.054184226369656835,
|
|
"grad_norm": 4.911799907684326,
|
|
"learning_rate": 1.3546056592414207e-07,
|
|
"loss": 0.7673,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.055388320288982544,
|
|
"grad_norm": 4.063588619232178,
|
|
"learning_rate": 1.3847080072245636e-07,
|
|
"loss": 0.708,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.056592414208308246,
|
|
"grad_norm": 4.037914752960205,
|
|
"learning_rate": 1.4148103552077062e-07,
|
|
"loss": 0.7641,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.057796508127633955,
|
|
"grad_norm": 4.673463344573975,
|
|
"learning_rate": 1.444912703190849e-07,
|
|
"loss": 0.7196,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.059000602046959665,
|
|
"grad_norm": 5.096141815185547,
|
|
"learning_rate": 1.4750150511739913e-07,
|
|
"loss": 0.689,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.060204695966285374,
|
|
"grad_norm": 4.904088973999023,
|
|
"learning_rate": 1.5051173991571342e-07,
|
|
"loss": 0.7211,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.061408789885611076,
|
|
"grad_norm": 5.234721660614014,
|
|
"learning_rate": 1.535219747140277e-07,
|
|
"loss": 0.7091,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.06261288380493679,
|
|
"grad_norm": 4.105505466461182,
|
|
"learning_rate": 1.5653220951234194e-07,
|
|
"loss": 0.7607,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.0638169777242625,
|
|
"grad_norm": 4.666725158691406,
|
|
"learning_rate": 1.5954244431065622e-07,
|
|
"loss": 0.7158,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.0650210716435882,
|
|
"grad_norm": 4.976656436920166,
|
|
"learning_rate": 1.6255267910897048e-07,
|
|
"loss": 0.7401,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.06622516556291391,
|
|
"grad_norm": 5.044974327087402,
|
|
"learning_rate": 1.6556291390728477e-07,
|
|
"loss": 0.6685,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.06742925948223961,
|
|
"grad_norm": 4.7259368896484375,
|
|
"learning_rate": 1.6857314870559903e-07,
|
|
"loss": 0.6866,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.06863335340156532,
|
|
"grad_norm": 5.358945369720459,
|
|
"learning_rate": 1.715833835039133e-07,
|
|
"loss": 0.7059,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.06983744732089103,
|
|
"grad_norm": 5.156592845916748,
|
|
"learning_rate": 1.7459361830222757e-07,
|
|
"loss": 0.7111,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.07104154124021674,
|
|
"grad_norm": 4.320924282073975,
|
|
"learning_rate": 1.7760385310054183e-07,
|
|
"loss": 0.6862,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.07224563515954245,
|
|
"grad_norm": 4.59999418258667,
|
|
"learning_rate": 1.806140878988561e-07,
|
|
"loss": 0.6893,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.07344972907886815,
|
|
"grad_norm": 4.4846391677856445,
|
|
"learning_rate": 1.8362432269717038e-07,
|
|
"loss": 0.7128,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.07465382299819386,
|
|
"grad_norm": 5.029007911682129,
|
|
"learning_rate": 1.8663455749548464e-07,
|
|
"loss": 0.7114,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.07585791691751957,
|
|
"grad_norm": 4.288726806640625,
|
|
"learning_rate": 1.896447922937989e-07,
|
|
"loss": 0.6848,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.07706201083684527,
|
|
"grad_norm": 4.063099384307861,
|
|
"learning_rate": 1.9265502709211318e-07,
|
|
"loss": 0.659,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.07826610475617098,
|
|
"grad_norm": 4.031120300292969,
|
|
"learning_rate": 1.9566526189042744e-07,
|
|
"loss": 0.6802,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.07947019867549669,
|
|
"grad_norm": 5.234511852264404,
|
|
"learning_rate": 1.986754966887417e-07,
|
|
"loss": 0.6685,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.0806742925948224,
|
|
"grad_norm": 5.434250831604004,
|
|
"learning_rate": 2.01685731487056e-07,
|
|
"loss": 0.6762,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.08187838651414811,
|
|
"grad_norm": 5.326634407043457,
|
|
"learning_rate": 2.0469596628537025e-07,
|
|
"loss": 0.6951,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.08308248043347381,
|
|
"grad_norm": 3.630930185317993,
|
|
"learning_rate": 2.0770620108368453e-07,
|
|
"loss": 0.6445,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.08428657435279951,
|
|
"grad_norm": 5.273288726806641,
|
|
"learning_rate": 2.1071643588199877e-07,
|
|
"loss": 0.65,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.08549066827212523,
|
|
"grad_norm": 4.212562084197998,
|
|
"learning_rate": 2.1372667068031305e-07,
|
|
"loss": 0.6485,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.08669476219145093,
|
|
"grad_norm": 4.293779373168945,
|
|
"learning_rate": 2.1673690547862734e-07,
|
|
"loss": 0.6734,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.08789885611077664,
|
|
"grad_norm": 4.917520999908447,
|
|
"learning_rate": 2.1974714027694157e-07,
|
|
"loss": 0.6593,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.08910295003010235,
|
|
"grad_norm": 4.624716281890869,
|
|
"learning_rate": 2.2275737507525586e-07,
|
|
"loss": 0.6712,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.09030704394942805,
|
|
"grad_norm": 5.3648552894592285,
|
|
"learning_rate": 2.2576760987357014e-07,
|
|
"loss": 0.6481,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.09151113786875377,
|
|
"grad_norm": 4.328650951385498,
|
|
"learning_rate": 2.287778446718844e-07,
|
|
"loss": 0.6418,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.09271523178807947,
|
|
"grad_norm": 4.933085918426514,
|
|
"learning_rate": 2.3178807947019866e-07,
|
|
"loss": 0.6622,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.09391932570740517,
|
|
"grad_norm": 4.703038215637207,
|
|
"learning_rate": 2.3479831426851292e-07,
|
|
"loss": 0.6317,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.09512341962673089,
|
|
"grad_norm": 4.468968391418457,
|
|
"learning_rate": 2.378085490668272e-07,
|
|
"loss": 0.6431,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.09632751354605659,
|
|
"grad_norm": 4.49053430557251,
|
|
"learning_rate": 2.408187838651415e-07,
|
|
"loss": 0.6212,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.0975316074653823,
|
|
"grad_norm": 4.350528240203857,
|
|
"learning_rate": 2.438290186634557e-07,
|
|
"loss": 0.623,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.09873570138470801,
|
|
"grad_norm": 4.772309303283691,
|
|
"learning_rate": 2.4683925346177e-07,
|
|
"loss": 0.6268,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.09993979530403371,
|
|
"grad_norm": 5.437624931335449,
|
|
"learning_rate": 2.498494882600843e-07,
|
|
"loss": 0.6497,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.10114388922335943,
|
|
"grad_norm": 4.474155902862549,
|
|
"learning_rate": 2.5285972305839853e-07,
|
|
"loss": 0.6092,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.10234798314268513,
|
|
"grad_norm": 4.417370796203613,
|
|
"learning_rate": 2.558699578567128e-07,
|
|
"loss": 0.6587,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.10355207706201083,
|
|
"grad_norm": 5.5498456954956055,
|
|
"learning_rate": 2.588801926550271e-07,
|
|
"loss": 0.6564,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.10475617098133655,
|
|
"grad_norm": 5.326292514801025,
|
|
"learning_rate": 2.6189042745334134e-07,
|
|
"loss": 0.6333,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.10596026490066225,
|
|
"grad_norm": 4.284069538116455,
|
|
"learning_rate": 2.649006622516556e-07,
|
|
"loss": 0.6325,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.10716435881998795,
|
|
"grad_norm": 4.672844886779785,
|
|
"learning_rate": 2.679108970499699e-07,
|
|
"loss": 0.6046,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.10836845273931367,
|
|
"grad_norm": 4.223860740661621,
|
|
"learning_rate": 2.7092113184828414e-07,
|
|
"loss": 0.6094,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.10957254665863937,
|
|
"grad_norm": 4.813838005065918,
|
|
"learning_rate": 2.739313666465984e-07,
|
|
"loss": 0.6332,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.11077664057796509,
|
|
"grad_norm": 3.5245296955108643,
|
|
"learning_rate": 2.769416014449127e-07,
|
|
"loss": 0.6161,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.11198073449729079,
|
|
"grad_norm": 4.577372074127197,
|
|
"learning_rate": 2.7995183624322695e-07,
|
|
"loss": 0.6254,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.11318482841661649,
|
|
"grad_norm": 4.295224666595459,
|
|
"learning_rate": 2.8296207104154123e-07,
|
|
"loss": 0.6089,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.11438892233594221,
|
|
"grad_norm": 4.899755477905273,
|
|
"learning_rate": 2.8597230583985546e-07,
|
|
"loss": 0.6255,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.11559301625526791,
|
|
"grad_norm": 5.047530651092529,
|
|
"learning_rate": 2.889825406381698e-07,
|
|
"loss": 0.6273,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.11679711017459361,
|
|
"grad_norm": 4.75305700302124,
|
|
"learning_rate": 2.9199277543648404e-07,
|
|
"loss": 0.6277,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.11800120409391933,
|
|
"grad_norm": 5.476251602172852,
|
|
"learning_rate": 2.9500301023479827e-07,
|
|
"loss": 0.6121,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.11920529801324503,
|
|
"grad_norm": 6.20451021194458,
|
|
"learning_rate": 2.980132450331126e-07,
|
|
"loss": 0.6121,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.12040939193257075,
|
|
"grad_norm": 4.61058235168457,
|
|
"learning_rate": 3.0102347983142684e-07,
|
|
"loss": 0.5862,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.12161348585189645,
|
|
"grad_norm": 4.537725925445557,
|
|
"learning_rate": 3.0403371462974107e-07,
|
|
"loss": 0.6186,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.12281757977122215,
|
|
"grad_norm": 4.347688674926758,
|
|
"learning_rate": 3.070439494280554e-07,
|
|
"loss": 0.6127,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.12402167369054787,
|
|
"grad_norm": 4.965167045593262,
|
|
"learning_rate": 3.1005418422636965e-07,
|
|
"loss": 0.6026,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.12522576760987358,
|
|
"grad_norm": 4.610491752624512,
|
|
"learning_rate": 3.130644190246839e-07,
|
|
"loss": 0.6327,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.12642986152919927,
|
|
"grad_norm": 5.292304039001465,
|
|
"learning_rate": 3.160746538229982e-07,
|
|
"loss": 0.5972,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.127633955448525,
|
|
"grad_norm": 6.372762680053711,
|
|
"learning_rate": 3.1908488862131245e-07,
|
|
"loss": 0.6393,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.1288380493678507,
|
|
"grad_norm": 5.56066370010376,
|
|
"learning_rate": 3.220951234196267e-07,
|
|
"loss": 0.6196,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.1300421432871764,
|
|
"grad_norm": 4.777896881103516,
|
|
"learning_rate": 3.2510535821794097e-07,
|
|
"loss": 0.6217,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.1312462372065021,
|
|
"grad_norm": 4.9745683670043945,
|
|
"learning_rate": 3.2811559301625525e-07,
|
|
"loss": 0.6189,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.13245033112582782,
|
|
"grad_norm": 3.71576189994812,
|
|
"learning_rate": 3.3112582781456954e-07,
|
|
"loss": 0.5893,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.1336544250451535,
|
|
"grad_norm": 4.458312034606934,
|
|
"learning_rate": 3.3413606261288377e-07,
|
|
"loss": 0.5951,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.13485851896447923,
|
|
"grad_norm": 4.835500240325928,
|
|
"learning_rate": 3.3714629741119806e-07,
|
|
"loss": 0.5791,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.13606261288380495,
|
|
"grad_norm": 4.516515254974365,
|
|
"learning_rate": 3.4015653220951235e-07,
|
|
"loss": 0.5759,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.13726670680313063,
|
|
"grad_norm": 5.564052104949951,
|
|
"learning_rate": 3.431667670078266e-07,
|
|
"loss": 0.5791,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.13847080072245635,
|
|
"grad_norm": 5.586264610290527,
|
|
"learning_rate": 3.4617700180614086e-07,
|
|
"loss": 0.6142,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.13967489464178207,
|
|
"grad_norm": 4.408708572387695,
|
|
"learning_rate": 3.4918723660445515e-07,
|
|
"loss": 0.617,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.14087898856110775,
|
|
"grad_norm": 4.4068403244018555,
|
|
"learning_rate": 3.521974714027694e-07,
|
|
"loss": 0.6099,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.14208308248043347,
|
|
"grad_norm": 3.947399854660034,
|
|
"learning_rate": 3.5520770620108367e-07,
|
|
"loss": 0.5555,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.1432871763997592,
|
|
"grad_norm": 5.264540195465088,
|
|
"learning_rate": 3.5821794099939795e-07,
|
|
"loss": 0.5556,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.1444912703190849,
|
|
"grad_norm": 4.486605644226074,
|
|
"learning_rate": 3.612281757977122e-07,
|
|
"loss": 0.5997,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.1456953642384106,
|
|
"grad_norm": 6.195891857147217,
|
|
"learning_rate": 3.642384105960264e-07,
|
|
"loss": 0.6104,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.1468994581577363,
|
|
"grad_norm": 4.5443572998046875,
|
|
"learning_rate": 3.6724864539434076e-07,
|
|
"loss": 0.5806,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.14810355207706202,
|
|
"grad_norm": 4.380715370178223,
|
|
"learning_rate": 3.70258880192655e-07,
|
|
"loss": 0.5759,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.1493076459963877,
|
|
"grad_norm": 5.033191680908203,
|
|
"learning_rate": 3.732691149909693e-07,
|
|
"loss": 0.5782,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.15051173991571343,
|
|
"grad_norm": 4.244385719299316,
|
|
"learning_rate": 3.7627934978928356e-07,
|
|
"loss": 0.5658,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.15171583383503914,
|
|
"grad_norm": 4.332985877990723,
|
|
"learning_rate": 3.792895845875978e-07,
|
|
"loss": 0.5702,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.15291992775436483,
|
|
"grad_norm": 4.5175628662109375,
|
|
"learning_rate": 3.822998193859121e-07,
|
|
"loss": 0.5588,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.15412402167369055,
|
|
"grad_norm": 4.519990921020508,
|
|
"learning_rate": 3.8531005418422637e-07,
|
|
"loss": 0.5871,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.15532811559301626,
|
|
"grad_norm": 4.500414848327637,
|
|
"learning_rate": 3.883202889825406e-07,
|
|
"loss": 0.5977,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.15653220951234195,
|
|
"grad_norm": 4.714526653289795,
|
|
"learning_rate": 3.913305237808549e-07,
|
|
"loss": 0.5647,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.15773630343166767,
|
|
"grad_norm": 4.869201183319092,
|
|
"learning_rate": 3.9434075857916917e-07,
|
|
"loss": 0.5816,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.15894039735099338,
|
|
"grad_norm": 5.167849540710449,
|
|
"learning_rate": 3.973509933774834e-07,
|
|
"loss": 0.5633,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.16014449127031907,
|
|
"grad_norm": 4.805886745452881,
|
|
"learning_rate": 4.003612281757977e-07,
|
|
"loss": 0.5858,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.1613485851896448,
|
|
"grad_norm": 4.569708824157715,
|
|
"learning_rate": 4.03371462974112e-07,
|
|
"loss": 0.5729,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.1625526791089705,
|
|
"grad_norm": 4.649074554443359,
|
|
"learning_rate": 4.0638169777242626e-07,
|
|
"loss": 0.5904,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.16375677302829622,
|
|
"grad_norm": 4.956695556640625,
|
|
"learning_rate": 4.093919325707405e-07,
|
|
"loss": 0.5743,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.1649608669476219,
|
|
"grad_norm": 5.056834697723389,
|
|
"learning_rate": 4.1240216736905473e-07,
|
|
"loss": 0.5903,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.16616496086694763,
|
|
"grad_norm": 4.751232624053955,
|
|
"learning_rate": 4.1541240216736907e-07,
|
|
"loss": 0.5697,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.16736905478627334,
|
|
"grad_norm": 4.0161027908325195,
|
|
"learning_rate": 4.184226369656833e-07,
|
|
"loss": 0.5588,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.16857314870559903,
|
|
"grad_norm": 4.591194152832031,
|
|
"learning_rate": 4.2143287176399753e-07,
|
|
"loss": 0.5792,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.16977724262492475,
|
|
"grad_norm": 5.218972206115723,
|
|
"learning_rate": 4.2444310656231187e-07,
|
|
"loss": 0.5793,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.17098133654425046,
|
|
"grad_norm": 4.32102108001709,
|
|
"learning_rate": 4.274533413606261e-07,
|
|
"loss": 0.57,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.17218543046357615,
|
|
"grad_norm": 4.359175205230713,
|
|
"learning_rate": 4.3046357615894034e-07,
|
|
"loss": 0.5675,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.17338952438290187,
|
|
"grad_norm": 5.192026615142822,
|
|
"learning_rate": 4.334738109572547e-07,
|
|
"loss": 0.5668,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.17459361830222758,
|
|
"grad_norm": 4.002780914306641,
|
|
"learning_rate": 4.364840457555689e-07,
|
|
"loss": 0.5787,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.17579771222155327,
|
|
"grad_norm": 5.319111347198486,
|
|
"learning_rate": 4.3949428055388314e-07,
|
|
"loss": 0.5734,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.177001806140879,
|
|
"grad_norm": 4.700523376464844,
|
|
"learning_rate": 4.425045153521975e-07,
|
|
"loss": 0.5754,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.1782059000602047,
|
|
"grad_norm": 4.4386372566223145,
|
|
"learning_rate": 4.455147501505117e-07,
|
|
"loss": 0.5459,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.1794099939795304,
|
|
"grad_norm": 4.084826946258545,
|
|
"learning_rate": 4.48524984948826e-07,
|
|
"loss": 0.5399,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.1806140878988561,
|
|
"grad_norm": 4.401342391967773,
|
|
"learning_rate": 4.515352197471403e-07,
|
|
"loss": 0.573,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.18181818181818182,
|
|
"grad_norm": 4.5059685707092285,
|
|
"learning_rate": 4.545454545454545e-07,
|
|
"loss": 0.5724,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.18302227573750754,
|
|
"grad_norm": 5.070437431335449,
|
|
"learning_rate": 4.575556893437688e-07,
|
|
"loss": 0.5711,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.18422636965683323,
|
|
"grad_norm": 4.188956260681152,
|
|
"learning_rate": 4.6056592414208304e-07,
|
|
"loss": 0.5498,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.18543046357615894,
|
|
"grad_norm": 4.391158580780029,
|
|
"learning_rate": 4.635761589403973e-07,
|
|
"loss": 0.5602,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.18663455749548466,
|
|
"grad_norm": 5.272259712219238,
|
|
"learning_rate": 4.665863937387116e-07,
|
|
"loss": 0.5748,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.18783865141481035,
|
|
"grad_norm": 4.982473373413086,
|
|
"learning_rate": 4.6959662853702584e-07,
|
|
"loss": 0.5584,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.18904274533413606,
|
|
"grad_norm": 5.263506889343262,
|
|
"learning_rate": 4.7260686333534013e-07,
|
|
"loss": 0.5828,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.19024683925346178,
|
|
"grad_norm": 4.1373724937438965,
|
|
"learning_rate": 4.756170981336544e-07,
|
|
"loss": 0.5494,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.19145093317278747,
|
|
"grad_norm": 4.439697265625,
|
|
"learning_rate": 4.786273329319686e-07,
|
|
"loss": 0.5522,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.19265502709211318,
|
|
"grad_norm": 4.79713249206543,
|
|
"learning_rate": 4.81637567730283e-07,
|
|
"loss": 0.5058,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.1938591210114389,
|
|
"grad_norm": 3.973453998565674,
|
|
"learning_rate": 4.846478025285972e-07,
|
|
"loss": 0.5471,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.1950632149307646,
|
|
"grad_norm": 4.748741149902344,
|
|
"learning_rate": 4.876580373269115e-07,
|
|
"loss": 0.5768,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.1962673088500903,
|
|
"grad_norm": 5.98441743850708,
|
|
"learning_rate": 4.906682721252258e-07,
|
|
"loss": 0.5497,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.19747140276941602,
|
|
"grad_norm": 5.55325174331665,
|
|
"learning_rate": 4.9367850692354e-07,
|
|
"loss": 0.5595,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.1986754966887417,
|
|
"grad_norm": 5.114386081695557,
|
|
"learning_rate": 4.966887417218543e-07,
|
|
"loss": 0.5635,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.19987959060806743,
|
|
"grad_norm": 4.869389533996582,
|
|
"learning_rate": 4.996989765201686e-07,
|
|
"loss": 0.5409,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.20108368452739314,
|
|
"grad_norm": 4.4507222175598145,
|
|
"learning_rate": 5.027092113184828e-07,
|
|
"loss": 0.5598,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.20228777844671886,
|
|
"grad_norm": 4.574100494384766,
|
|
"learning_rate": 5.057194461167971e-07,
|
|
"loss": 0.5432,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.20349187236604455,
|
|
"grad_norm": 4.581476211547852,
|
|
"learning_rate": 5.087296809151114e-07,
|
|
"loss": 0.5509,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.20469596628537026,
|
|
"grad_norm": 4.631548881530762,
|
|
"learning_rate": 5.117399157134256e-07,
|
|
"loss": 0.5712,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.20590006020469598,
|
|
"grad_norm": 5.006454944610596,
|
|
"learning_rate": 5.147501505117399e-07,
|
|
"loss": 0.5586,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.20710415412402167,
|
|
"grad_norm": 4.4788408279418945,
|
|
"learning_rate": 5.177603853100542e-07,
|
|
"loss": 0.5543,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.20830824804334738,
|
|
"grad_norm": 4.614450931549072,
|
|
"learning_rate": 5.207706201083684e-07,
|
|
"loss": 0.5677,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.2095123419626731,
|
|
"grad_norm": 4.377712249755859,
|
|
"learning_rate": 5.237808549066827e-07,
|
|
"loss": 0.5399,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.2107164358819988,
|
|
"grad_norm": 6.157577991485596,
|
|
"learning_rate": 5.26791089704997e-07,
|
|
"loss": 0.5288,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.2119205298013245,
|
|
"grad_norm": 4.206299781799316,
|
|
"learning_rate": 5.298013245033112e-07,
|
|
"loss": 0.5308,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.21312462372065022,
|
|
"grad_norm": 4.296496868133545,
|
|
"learning_rate": 5.328115593016255e-07,
|
|
"loss": 0.552,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.2143287176399759,
|
|
"grad_norm": 4.474640846252441,
|
|
"learning_rate": 5.358217940999398e-07,
|
|
"loss": 0.5505,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.21553281155930162,
|
|
"grad_norm": 4.762406349182129,
|
|
"learning_rate": 5.38832028898254e-07,
|
|
"loss": 0.5669,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.21673690547862734,
|
|
"grad_norm": 4.40052604675293,
|
|
"learning_rate": 5.418422636965683e-07,
|
|
"loss": 0.5386,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.21794099939795303,
|
|
"grad_norm": 4.364424228668213,
|
|
"learning_rate": 5.448524984948826e-07,
|
|
"loss": 0.5446,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.21914509331727874,
|
|
"grad_norm": 5.686670780181885,
|
|
"learning_rate": 5.478627332931969e-07,
|
|
"loss": 0.5708,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.22034918723660446,
|
|
"grad_norm": 6.244655132293701,
|
|
"learning_rate": 5.508729680915111e-07,
|
|
"loss": 0.5353,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.22155328115593018,
|
|
"grad_norm": 5.4936323165893555,
|
|
"learning_rate": 5.538832028898254e-07,
|
|
"loss": 0.5486,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.22275737507525586,
|
|
"grad_norm": 4.955344200134277,
|
|
"learning_rate": 5.568934376881397e-07,
|
|
"loss": 0.5142,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.22396146899458158,
|
|
"grad_norm": 4.333896636962891,
|
|
"learning_rate": 5.599036724864539e-07,
|
|
"loss": 0.5432,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.2251655629139073,
|
|
"grad_norm": 4.568367958068848,
|
|
"learning_rate": 5.629139072847681e-07,
|
|
"loss": 0.5351,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.22636965683323299,
|
|
"grad_norm": 5.548391342163086,
|
|
"learning_rate": 5.659241420830825e-07,
|
|
"loss": 0.5053,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.2275737507525587,
|
|
"grad_norm": 4.526470184326172,
|
|
"learning_rate": 5.689343768813967e-07,
|
|
"loss": 0.5494,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.22877784467188442,
|
|
"grad_norm": 4.453249454498291,
|
|
"learning_rate": 5.719446116797109e-07,
|
|
"loss": 0.5397,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.2299819385912101,
|
|
"grad_norm": 7.503538131713867,
|
|
"learning_rate": 5.749548464780253e-07,
|
|
"loss": 0.5232,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.23118603251053582,
|
|
"grad_norm": 5.740428924560547,
|
|
"learning_rate": 5.779650812763396e-07,
|
|
"loss": 0.5426,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.23239012642986154,
|
|
"grad_norm": 5.185967445373535,
|
|
"learning_rate": 5.809753160746537e-07,
|
|
"loss": 0.5277,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.23359422034918723,
|
|
"grad_norm": 5.1867547035217285,
|
|
"learning_rate": 5.839855508729681e-07,
|
|
"loss": 0.5233,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.23479831426851294,
|
|
"grad_norm": 4.812213897705078,
|
|
"learning_rate": 5.869957856712824e-07,
|
|
"loss": 0.535,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.23600240818783866,
|
|
"grad_norm": 5.038625240325928,
|
|
"learning_rate": 5.900060204695965e-07,
|
|
"loss": 0.5365,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.23720650210716435,
|
|
"grad_norm": 4.050044536590576,
|
|
"learning_rate": 5.930162552679109e-07,
|
|
"loss": 0.5145,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.23841059602649006,
|
|
"grad_norm": 4.956125736236572,
|
|
"learning_rate": 5.960264900662252e-07,
|
|
"loss": 0.5141,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.23961468994581578,
|
|
"grad_norm": 4.40023136138916,
|
|
"learning_rate": 5.990367248645393e-07,
|
|
"loss": 0.544,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.2408187838651415,
|
|
"grad_norm": 5.268930912017822,
|
|
"learning_rate": 6.020469596628537e-07,
|
|
"loss": 0.5514,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.24202287778446718,
|
|
"grad_norm": 3.9441418647766113,
|
|
"learning_rate": 6.05057194461168e-07,
|
|
"loss": 0.5368,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.2432269717037929,
|
|
"grad_norm": 4.060418605804443,
|
|
"learning_rate": 6.080674292594821e-07,
|
|
"loss": 0.5228,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.24443106562311862,
|
|
"grad_norm": 4.1477861404418945,
|
|
"learning_rate": 6.110776640577965e-07,
|
|
"loss": 0.5221,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.2456351595424443,
|
|
"grad_norm": 5.319125175476074,
|
|
"learning_rate": 6.140878988561108e-07,
|
|
"loss": 0.5441,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.24683925346177002,
|
|
"grad_norm": 4.920033931732178,
|
|
"learning_rate": 6.17098133654425e-07,
|
|
"loss": 0.5307,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.24804334738109574,
|
|
"grad_norm": 5.167773246765137,
|
|
"learning_rate": 6.201083684527393e-07,
|
|
"loss": 0.5304,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.24924744130042142,
|
|
"grad_norm": 5.3018879890441895,
|
|
"learning_rate": 6.231186032510536e-07,
|
|
"loss": 0.5356,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.25045153521974717,
|
|
"grad_norm": 4.822166919708252,
|
|
"learning_rate": 6.261288380493678e-07,
|
|
"loss": 0.513,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.25165562913907286,
|
|
"grad_norm": 4.957582473754883,
|
|
"learning_rate": 6.291390728476821e-07,
|
|
"loss": 0.5069,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.25285972305839854,
|
|
"grad_norm": 6.180065155029297,
|
|
"learning_rate": 6.321493076459964e-07,
|
|
"loss": 0.5329,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.2540638169777243,
|
|
"grad_norm": 5.123517990112305,
|
|
"learning_rate": 6.351595424443106e-07,
|
|
"loss": 0.5169,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.25526791089705,
|
|
"grad_norm": 5.372180938720703,
|
|
"learning_rate": 6.381697772426249e-07,
|
|
"loss": 0.508,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.25647200481637566,
|
|
"grad_norm": 3.907548189163208,
|
|
"learning_rate": 6.411800120409392e-07,
|
|
"loss": 0.5082,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.2576760987357014,
|
|
"grad_norm": 4.107047080993652,
|
|
"learning_rate": 6.441902468392534e-07,
|
|
"loss": 0.5257,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.2588801926550271,
|
|
"grad_norm": 5.055625915527344,
|
|
"learning_rate": 6.472004816375677e-07,
|
|
"loss": 0.5458,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.2600842865743528,
|
|
"grad_norm": 5.573007106781006,
|
|
"learning_rate": 6.502107164358819e-07,
|
|
"loss": 0.5178,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.26128838049367853,
|
|
"grad_norm": 4.955606460571289,
|
|
"learning_rate": 6.532209512341962e-07,
|
|
"loss": 0.5355,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.2624924744130042,
|
|
"grad_norm": 4.537413120269775,
|
|
"learning_rate": 6.562311860325105e-07,
|
|
"loss": 0.5393,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.2636965683323299,
|
|
"grad_norm": 5.761811256408691,
|
|
"learning_rate": 6.592414208308247e-07,
|
|
"loss": 0.5497,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.26490066225165565,
|
|
"grad_norm": 3.865335464477539,
|
|
"learning_rate": 6.622516556291391e-07,
|
|
"loss": 0.4914,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.26610475617098134,
|
|
"grad_norm": 4.600432872772217,
|
|
"learning_rate": 6.652618904274533e-07,
|
|
"loss": 0.5099,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.267308850090307,
|
|
"grad_norm": 4.737097263336182,
|
|
"learning_rate": 6.682721252257675e-07,
|
|
"loss": 0.5236,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.26851294400963277,
|
|
"grad_norm": 4.7886247634887695,
|
|
"learning_rate": 6.712823600240819e-07,
|
|
"loss": 0.5152,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.26971703792895846,
|
|
"grad_norm": 6.00905179977417,
|
|
"learning_rate": 6.742925948223961e-07,
|
|
"loss": 0.5369,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.27092113184828415,
|
|
"grad_norm": 5.080295085906982,
|
|
"learning_rate": 6.773028296207104e-07,
|
|
"loss": 0.5135,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.2721252257676099,
|
|
"grad_norm": 5.130943775177002,
|
|
"learning_rate": 6.803130644190247e-07,
|
|
"loss": 0.4921,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.2733293196869356,
|
|
"grad_norm": 4.8161187171936035,
|
|
"learning_rate": 6.833232992173389e-07,
|
|
"loss": 0.5243,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.27453341360626127,
|
|
"grad_norm": 5.960630416870117,
|
|
"learning_rate": 6.863335340156532e-07,
|
|
"loss": 0.525,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.275737507525587,
|
|
"grad_norm": 6.012716770172119,
|
|
"learning_rate": 6.893437688139675e-07,
|
|
"loss": 0.5126,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.2769416014449127,
|
|
"grad_norm": 4.913167476654053,
|
|
"learning_rate": 6.923540036122817e-07,
|
|
"loss": 0.531,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.2781456953642384,
|
|
"grad_norm": 5.190576076507568,
|
|
"learning_rate": 6.95364238410596e-07,
|
|
"loss": 0.5147,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.27934978928356413,
|
|
"grad_norm": 4.0760602951049805,
|
|
"learning_rate": 6.983744732089103e-07,
|
|
"loss": 0.5135,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.2805538832028898,
|
|
"grad_norm": 4.385684490203857,
|
|
"learning_rate": 7.013847080072245e-07,
|
|
"loss": 0.5196,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.2817579771222155,
|
|
"grad_norm": 4.470118045806885,
|
|
"learning_rate": 7.043949428055388e-07,
|
|
"loss": 0.502,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.28296207104154125,
|
|
"grad_norm": 4.798367023468018,
|
|
"learning_rate": 7.074051776038531e-07,
|
|
"loss": 0.5078,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.28416616496086694,
|
|
"grad_norm": 4.64969539642334,
|
|
"learning_rate": 7.104154124021673e-07,
|
|
"loss": 0.5126,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.28537025888019263,
|
|
"grad_norm": 5.035313606262207,
|
|
"learning_rate": 7.134256472004816e-07,
|
|
"loss": 0.5068,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.2865743527995184,
|
|
"grad_norm": 3.7338409423828125,
|
|
"learning_rate": 7.164358819987959e-07,
|
|
"loss": 0.4956,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.28777844671884406,
|
|
"grad_norm": 5.102356910705566,
|
|
"learning_rate": 7.194461167971101e-07,
|
|
"loss": 0.5128,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.2889825406381698,
|
|
"grad_norm": 5.0710320472717285,
|
|
"learning_rate": 7.224563515954244e-07,
|
|
"loss": 0.5064,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.2901866345574955,
|
|
"grad_norm": 5.2054667472839355,
|
|
"learning_rate": 7.254665863937387e-07,
|
|
"loss": 0.5318,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.2913907284768212,
|
|
"grad_norm": 4.590500831604004,
|
|
"learning_rate": 7.284768211920528e-07,
|
|
"loss": 0.5352,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.2925948223961469,
|
|
"grad_norm": 5.737983226776123,
|
|
"learning_rate": 7.314870559903672e-07,
|
|
"loss": 0.5047,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.2937989163154726,
|
|
"grad_norm": 5.184499263763428,
|
|
"learning_rate": 7.344972907886815e-07,
|
|
"loss": 0.4998,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.2950030102347983,
|
|
"grad_norm": 5.553317070007324,
|
|
"learning_rate": 7.375075255869959e-07,
|
|
"loss": 0.5099,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.29620710415412405,
|
|
"grad_norm": 4.864592552185059,
|
|
"learning_rate": 7.4051776038531e-07,
|
|
"loss": 0.5071,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.29741119807344973,
|
|
"grad_norm": 4.1055803298950195,
|
|
"learning_rate": 7.435279951836243e-07,
|
|
"loss": 0.4985,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.2986152919927754,
|
|
"grad_norm": 5.875371932983398,
|
|
"learning_rate": 7.465382299819386e-07,
|
|
"loss": 0.4982,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.29981938591210117,
|
|
"grad_norm": 4.417768955230713,
|
|
"learning_rate": 7.495484647802528e-07,
|
|
"loss": 0.4999,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.30102347983142685,
|
|
"grad_norm": 4.034854888916016,
|
|
"learning_rate": 7.525586995785671e-07,
|
|
"loss": 0.5063,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.30222757375075254,
|
|
"grad_norm": 4.711478233337402,
|
|
"learning_rate": 7.555689343768814e-07,
|
|
"loss": 0.5194,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.3034316676700783,
|
|
"grad_norm": 4.778373718261719,
|
|
"learning_rate": 7.585791691751956e-07,
|
|
"loss": 0.5178,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.304635761589404,
|
|
"grad_norm": 3.896817922592163,
|
|
"learning_rate": 7.615894039735099e-07,
|
|
"loss": 0.5073,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.30583985550872966,
|
|
"grad_norm": 4.729064464569092,
|
|
"learning_rate": 7.645996387718242e-07,
|
|
"loss": 0.5083,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.3070439494280554,
|
|
"grad_norm": 4.760159015655518,
|
|
"learning_rate": 7.676098735701384e-07,
|
|
"loss": 0.5108,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.3082480433473811,
|
|
"grad_norm": 4.362825870513916,
|
|
"learning_rate": 7.706201083684527e-07,
|
|
"loss": 0.5027,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.3094521372667068,
|
|
"grad_norm": 4.749810695648193,
|
|
"learning_rate": 7.73630343166767e-07,
|
|
"loss": 0.5051,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.3106562311860325,
|
|
"grad_norm": 4.157332897186279,
|
|
"learning_rate": 7.766405779650812e-07,
|
|
"loss": 0.5,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.3118603251053582,
|
|
"grad_norm": 4.272891044616699,
|
|
"learning_rate": 7.796508127633955e-07,
|
|
"loss": 0.4946,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.3130644190246839,
|
|
"grad_norm": 4.159026145935059,
|
|
"learning_rate": 7.826610475617098e-07,
|
|
"loss": 0.4992,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.31426851294400965,
|
|
"grad_norm": 5.095447063446045,
|
|
"learning_rate": 7.85671282360024e-07,
|
|
"loss": 0.4968,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.31547260686333534,
|
|
"grad_norm": 4.606817722320557,
|
|
"learning_rate": 7.886815171583383e-07,
|
|
"loss": 0.5018,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.316676700782661,
|
|
"grad_norm": 4.154166221618652,
|
|
"learning_rate": 7.916917519566526e-07,
|
|
"loss": 0.4848,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.31788079470198677,
|
|
"grad_norm": 4.749946117401123,
|
|
"learning_rate": 7.947019867549668e-07,
|
|
"loss": 0.4955,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.31908488862131246,
|
|
"grad_norm": 6.158957481384277,
|
|
"learning_rate": 7.977122215532812e-07,
|
|
"loss": 0.5088,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.32028898254063815,
|
|
"grad_norm": 4.356431484222412,
|
|
"learning_rate": 8.007224563515954e-07,
|
|
"loss": 0.5071,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.3214930764599639,
|
|
"grad_norm": 5.454282760620117,
|
|
"learning_rate": 8.037326911499096e-07,
|
|
"loss": 0.518,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.3226971703792896,
|
|
"grad_norm": 4.323178291320801,
|
|
"learning_rate": 8.06742925948224e-07,
|
|
"loss": 0.5077,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.32390126429861527,
|
|
"grad_norm": 5.352051258087158,
|
|
"learning_rate": 8.097531607465382e-07,
|
|
"loss": 0.5042,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.325105358217941,
|
|
"grad_norm": 4.680684566497803,
|
|
"learning_rate": 8.127633955448525e-07,
|
|
"loss": 0.5006,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.3263094521372667,
|
|
"grad_norm": 5.054072380065918,
|
|
"learning_rate": 8.157736303431668e-07,
|
|
"loss": 0.5005,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.32751354605659244,
|
|
"grad_norm": 4.090258598327637,
|
|
"learning_rate": 8.18783865141481e-07,
|
|
"loss": 0.4694,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.32871763997591813,
|
|
"grad_norm": 4.663838863372803,
|
|
"learning_rate": 8.217940999397953e-07,
|
|
"loss": 0.502,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.3299217338952438,
|
|
"grad_norm": 4.440493106842041,
|
|
"learning_rate": 8.248043347381095e-07,
|
|
"loss": 0.4933,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.33112582781456956,
|
|
"grad_norm": 5.184099197387695,
|
|
"learning_rate": 8.278145695364238e-07,
|
|
"loss": 0.5088,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.33232992173389525,
|
|
"grad_norm": 4.647283554077148,
|
|
"learning_rate": 8.308248043347381e-07,
|
|
"loss": 0.4909,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.33353401565322094,
|
|
"grad_norm": 4.6232500076293945,
|
|
"learning_rate": 8.338350391330523e-07,
|
|
"loss": 0.4929,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.3347381095725467,
|
|
"grad_norm": 5.234133720397949,
|
|
"learning_rate": 8.368452739313666e-07,
|
|
"loss": 0.5287,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.33594220349187237,
|
|
"grad_norm": 4.967161178588867,
|
|
"learning_rate": 8.398555087296809e-07,
|
|
"loss": 0.5041,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.33714629741119806,
|
|
"grad_norm": 4.8062591552734375,
|
|
"learning_rate": 8.428657435279951e-07,
|
|
"loss": 0.4878,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.3383503913305238,
|
|
"grad_norm": 5.188631534576416,
|
|
"learning_rate": 8.458759783263094e-07,
|
|
"loss": 0.4907,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.3395544852498495,
|
|
"grad_norm": 4.293895244598389,
|
|
"learning_rate": 8.488862131246237e-07,
|
|
"loss": 0.4952,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.3407585791691752,
|
|
"grad_norm": 5.219202041625977,
|
|
"learning_rate": 8.518964479229379e-07,
|
|
"loss": 0.5046,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.3419626730885009,
|
|
"grad_norm": 4.529453754425049,
|
|
"learning_rate": 8.549066827212522e-07,
|
|
"loss": 0.4951,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.3431667670078266,
|
|
"grad_norm": 4.706615924835205,
|
|
"learning_rate": 8.579169175195666e-07,
|
|
"loss": 0.5083,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.3443708609271523,
|
|
"grad_norm": 5.135066986083984,
|
|
"learning_rate": 8.609271523178807e-07,
|
|
"loss": 0.4823,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.34557495484647804,
|
|
"grad_norm": 4.977953910827637,
|
|
"learning_rate": 8.63937387116195e-07,
|
|
"loss": 0.4845,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.34677904876580373,
|
|
"grad_norm": 4.964434623718262,
|
|
"learning_rate": 8.669476219145094e-07,
|
|
"loss": 0.5008,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.3479831426851294,
|
|
"grad_norm": 4.28712797164917,
|
|
"learning_rate": 8.699578567128235e-07,
|
|
"loss": 0.4819,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.34918723660445516,
|
|
"grad_norm": 4.125621318817139,
|
|
"learning_rate": 8.729680915111378e-07,
|
|
"loss": 0.505,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.35039133052378085,
|
|
"grad_norm": 4.779543399810791,
|
|
"learning_rate": 8.759783263094522e-07,
|
|
"loss": 0.5002,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.35159542444310654,
|
|
"grad_norm": 4.9358320236206055,
|
|
"learning_rate": 8.789885611077663e-07,
|
|
"loss": 0.4854,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.3527995183624323,
|
|
"grad_norm": 5.439524173736572,
|
|
"learning_rate": 8.819987959060806e-07,
|
|
"loss": 0.4893,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.354003612281758,
|
|
"grad_norm": 5.939353942871094,
|
|
"learning_rate": 8.85009030704395e-07,
|
|
"loss": 0.4876,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.35520770620108366,
|
|
"grad_norm": 5.600659370422363,
|
|
"learning_rate": 8.880192655027092e-07,
|
|
"loss": 0.4916,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.3564118001204094,
|
|
"grad_norm": 6.2792134284973145,
|
|
"learning_rate": 8.910295003010234e-07,
|
|
"loss": 0.5139,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.3576158940397351,
|
|
"grad_norm": 5.060665130615234,
|
|
"learning_rate": 8.940397350993378e-07,
|
|
"loss": 0.5138,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.3588199879590608,
|
|
"grad_norm": 5.271560192108154,
|
|
"learning_rate": 8.97049969897652e-07,
|
|
"loss": 0.4971,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.3600240818783865,
|
|
"grad_norm": 4.9547014236450195,
|
|
"learning_rate": 9.000602046959662e-07,
|
|
"loss": 0.4767,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.3612281757977122,
|
|
"grad_norm": 5.039198398590088,
|
|
"learning_rate": 9.030704394942806e-07,
|
|
"loss": 0.5038,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.3624322697170379,
|
|
"grad_norm": 3.5281832218170166,
|
|
"learning_rate": 9.060806742925948e-07,
|
|
"loss": 0.4837,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.36363636363636365,
|
|
"grad_norm": 4.734562873840332,
|
|
"learning_rate": 9.09090909090909e-07,
|
|
"loss": 0.4925,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.36484045755568933,
|
|
"grad_norm": 4.400488376617432,
|
|
"learning_rate": 9.121011438892233e-07,
|
|
"loss": 0.4819,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.3660445514750151,
|
|
"grad_norm": 4.797727584838867,
|
|
"learning_rate": 9.151113786875376e-07,
|
|
"loss": 0.4779,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.36724864539434077,
|
|
"grad_norm": 4.852715492248535,
|
|
"learning_rate": 9.181216134858518e-07,
|
|
"loss": 0.4581,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.36845273931366646,
|
|
"grad_norm": 4.8324971199035645,
|
|
"learning_rate": 9.211318482841661e-07,
|
|
"loss": 0.5075,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.3696568332329922,
|
|
"grad_norm": 4.099527835845947,
|
|
"learning_rate": 9.241420830824804e-07,
|
|
"loss": 0.4926,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.3708609271523179,
|
|
"grad_norm": 4.540558338165283,
|
|
"learning_rate": 9.271523178807946e-07,
|
|
"loss": 0.4901,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.3720650210716436,
|
|
"grad_norm": 4.567551612854004,
|
|
"learning_rate": 9.301625526791089e-07,
|
|
"loss": 0.4781,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.3732691149909693,
|
|
"grad_norm": 5.362119674682617,
|
|
"learning_rate": 9.331727874774232e-07,
|
|
"loss": 0.4784,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.374473208910295,
|
|
"grad_norm": 4.974254131317139,
|
|
"learning_rate": 9.361830222757375e-07,
|
|
"loss": 0.4985,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.3756773028296207,
|
|
"grad_norm": 4.490511417388916,
|
|
"learning_rate": 9.391932570740517e-07,
|
|
"loss": 0.4619,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.37688139674894644,
|
|
"grad_norm": 4.691735744476318,
|
|
"learning_rate": 9.42203491872366e-07,
|
|
"loss": 0.4892,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.37808549066827213,
|
|
"grad_norm": 5.031266689300537,
|
|
"learning_rate": 9.452137266706803e-07,
|
|
"loss": 0.4653,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.3792895845875978,
|
|
"grad_norm": 6.112424850463867,
|
|
"learning_rate": 9.482239614689945e-07,
|
|
"loss": 0.4887,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.38049367850692356,
|
|
"grad_norm": 4.281744480133057,
|
|
"learning_rate": 9.512341962673088e-07,
|
|
"loss": 0.4828,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.38169777242624925,
|
|
"grad_norm": 4.672320365905762,
|
|
"learning_rate": 9.54244431065623e-07,
|
|
"loss": 0.4807,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.38290186634557494,
|
|
"grad_norm": 4.8247528076171875,
|
|
"learning_rate": 9.572546658639373e-07,
|
|
"loss": 0.4652,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.3841059602649007,
|
|
"grad_norm": 4.806872844696045,
|
|
"learning_rate": 9.602649006622515e-07,
|
|
"loss": 0.4687,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.38531005418422637,
|
|
"grad_norm": 4.877020835876465,
|
|
"learning_rate": 9.63275135460566e-07,
|
|
"loss": 0.4954,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.38651414810355206,
|
|
"grad_norm": 5.005871295928955,
|
|
"learning_rate": 9.662853702588802e-07,
|
|
"loss": 0.5117,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.3877182420228778,
|
|
"grad_norm": 4.2746100425720215,
|
|
"learning_rate": 9.692956050571944e-07,
|
|
"loss": 0.472,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.3889223359422035,
|
|
"grad_norm": 4.155144691467285,
|
|
"learning_rate": 9.723058398555087e-07,
|
|
"loss": 0.4882,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.3901264298615292,
|
|
"grad_norm": 4.557404041290283,
|
|
"learning_rate": 9.75316074653823e-07,
|
|
"loss": 0.4845,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.3913305237808549,
|
|
"grad_norm": 4.442798614501953,
|
|
"learning_rate": 9.783263094521371e-07,
|
|
"loss": 0.4822,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.3925346177001806,
|
|
"grad_norm": 5.363224029541016,
|
|
"learning_rate": 9.813365442504516e-07,
|
|
"loss": 0.4808,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.3937387116195063,
|
|
"grad_norm": 4.809715747833252,
|
|
"learning_rate": 9.843467790487658e-07,
|
|
"loss": 0.4834,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.39494280553883204,
|
|
"grad_norm": 4.954145431518555,
|
|
"learning_rate": 9.8735701384708e-07,
|
|
"loss": 0.4796,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.39614689945815773,
|
|
"grad_norm": 4.381477355957031,
|
|
"learning_rate": 9.903672486453943e-07,
|
|
"loss": 0.465,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.3973509933774834,
|
|
"grad_norm": 5.086960315704346,
|
|
"learning_rate": 9.933774834437085e-07,
|
|
"loss": 0.4996,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.39855508729680916,
|
|
"grad_norm": 5.4834303855896,
|
|
"learning_rate": 9.963877182420227e-07,
|
|
"loss": 0.4854,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.39975918121613485,
|
|
"grad_norm": 4.411494255065918,
|
|
"learning_rate": 9.993979530403372e-07,
|
|
"loss": 0.4882,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.40096327513546054,
|
|
"grad_norm": 3.9291751384735107,
|
|
"learning_rate": 9.999998233411383e-07,
|
|
"loss": 0.4975,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.4021673690547863,
|
|
"grad_norm": 4.288562774658203,
|
|
"learning_rate": 9.999991056647273e-07,
|
|
"loss": 0.4712,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.40337146297411197,
|
|
"grad_norm": 4.603250026702881,
|
|
"learning_rate": 9.999978359303795e-07,
|
|
"loss": 0.4933,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.4045755568934377,
|
|
"grad_norm": 4.753664970397949,
|
|
"learning_rate": 9.999960141394973e-07,
|
|
"loss": 0.4748,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.4057796508127634,
|
|
"grad_norm": 4.143571376800537,
|
|
"learning_rate": 9.99993640294092e-07,
|
|
"loss": 0.46,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.4069837447320891,
|
|
"grad_norm": 5.25679874420166,
|
|
"learning_rate": 9.99990714396784e-07,
|
|
"loss": 0.4859,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.40818783865141484,
|
|
"grad_norm": 5.903568744659424,
|
|
"learning_rate": 9.999872364508047e-07,
|
|
"loss": 0.4942,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.4093919325707405,
|
|
"grad_norm": 4.5355939865112305,
|
|
"learning_rate": 9.999832064599938e-07,
|
|
"loss": 0.4713,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.4105960264900662,
|
|
"grad_norm": 4.297218322753906,
|
|
"learning_rate": 9.999786244288008e-07,
|
|
"loss": 0.4701,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.41180012040939196,
|
|
"grad_norm": 4.364749908447266,
|
|
"learning_rate": 9.99973490362285e-07,
|
|
"loss": 0.4805,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.41300421432871764,
|
|
"grad_norm": 5.253974914550781,
|
|
"learning_rate": 9.999678042661147e-07,
|
|
"loss": 0.4728,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.41420830824804333,
|
|
"grad_norm": 3.7505037784576416,
|
|
"learning_rate": 9.999615661465685e-07,
|
|
"loss": 0.4666,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.4154124021673691,
|
|
"grad_norm": 4.56821346282959,
|
|
"learning_rate": 9.999547760105335e-07,
|
|
"loss": 0.4654,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.41661649608669477,
|
|
"grad_norm": 5.777834415435791,
|
|
"learning_rate": 9.999474338655073e-07,
|
|
"loss": 0.4708,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.41782059000602045,
|
|
"grad_norm": 4.463301181793213,
|
|
"learning_rate": 9.999395397195961e-07,
|
|
"loss": 0.4736,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.4190246839253462,
|
|
"grad_norm": 4.7559494972229,
|
|
"learning_rate": 9.999310935815165e-07,
|
|
"loss": 0.4858,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.4202287778446719,
|
|
"grad_norm": 5.451569557189941,
|
|
"learning_rate": 9.999220954605932e-07,
|
|
"loss": 0.4945,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.4214328717639976,
|
|
"grad_norm": 4.072139739990234,
|
|
"learning_rate": 9.99912545366762e-07,
|
|
"loss": 0.4685,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.4226369656833233,
|
|
"grad_norm": 5.299817085266113,
|
|
"learning_rate": 9.999024433105666e-07,
|
|
"loss": 0.4782,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.423841059602649,
|
|
"grad_norm": 4.960267543792725,
|
|
"learning_rate": 9.998917893031615e-07,
|
|
"loss": 0.4766,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.4250451535219747,
|
|
"grad_norm": 5.582713603973389,
|
|
"learning_rate": 9.998805833563096e-07,
|
|
"loss": 0.4737,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.42624924744130044,
|
|
"grad_norm": 4.434458255767822,
|
|
"learning_rate": 9.998688254823836e-07,
|
|
"loss": 0.4679,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.4274533413606261,
|
|
"grad_norm": 4.943469524383545,
|
|
"learning_rate": 9.99856515694366e-07,
|
|
"loss": 0.4754,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.4286574352799518,
|
|
"grad_norm": 5.145878314971924,
|
|
"learning_rate": 9.998436540058476e-07,
|
|
"loss": 0.4855,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.42986152919927756,
|
|
"grad_norm": 4.884524822235107,
|
|
"learning_rate": 9.998302404310296e-07,
|
|
"loss": 0.4801,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.43106562311860325,
|
|
"grad_norm": 4.950911045074463,
|
|
"learning_rate": 9.998162749847223e-07,
|
|
"loss": 0.51,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.43226971703792894,
|
|
"grad_norm": 4.5520148277282715,
|
|
"learning_rate": 9.99801757682345e-07,
|
|
"loss": 0.4887,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.4334738109572547,
|
|
"grad_norm": 5.745821952819824,
|
|
"learning_rate": 9.997866885399265e-07,
|
|
"loss": 0.4934,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.43467790487658037,
|
|
"grad_norm": 4.750070095062256,
|
|
"learning_rate": 9.997710675741049e-07,
|
|
"loss": 0.4611,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.43588199879590606,
|
|
"grad_norm": 4.3570966720581055,
|
|
"learning_rate": 9.997548948021275e-07,
|
|
"loss": 0.4819,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.4370860927152318,
|
|
"grad_norm": 3.810598373413086,
|
|
"learning_rate": 9.997381702418513e-07,
|
|
"loss": 0.4514,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.4382901866345575,
|
|
"grad_norm": 4.763775825500488,
|
|
"learning_rate": 9.997208939117418e-07,
|
|
"loss": 0.4686,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.4394942805538832,
|
|
"grad_norm": 4.3974385261535645,
|
|
"learning_rate": 9.997030658308745e-07,
|
|
"loss": 0.4763,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.4406983744732089,
|
|
"grad_norm": 4.901960372924805,
|
|
"learning_rate": 9.996846860189332e-07,
|
|
"loss": 0.4649,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.4419024683925346,
|
|
"grad_norm": 3.764139175415039,
|
|
"learning_rate": 9.996657544962118e-07,
|
|
"loss": 0.4752,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.44310656231186035,
|
|
"grad_norm": 4.972975730895996,
|
|
"learning_rate": 9.996462712836126e-07,
|
|
"loss": 0.4736,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.44431065623118604,
|
|
"grad_norm": 3.928086757659912,
|
|
"learning_rate": 9.996262364026477e-07,
|
|
"loss": 0.4939,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.44551475015051173,
|
|
"grad_norm": 4.017699718475342,
|
|
"learning_rate": 9.99605649875438e-07,
|
|
"loss": 0.4693,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.4467188440698375,
|
|
"grad_norm": 6.103999137878418,
|
|
"learning_rate": 9.995845117247129e-07,
|
|
"loss": 0.4774,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.44792293798916316,
|
|
"grad_norm": 6.031617641448975,
|
|
"learning_rate": 9.99562821973812e-07,
|
|
"loss": 0.4528,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.44912703190848885,
|
|
"grad_norm": 4.691218852996826,
|
|
"learning_rate": 9.99540580646683e-07,
|
|
"loss": 0.4646,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.4503311258278146,
|
|
"grad_norm": 4.680331230163574,
|
|
"learning_rate": 9.995177877678832e-07,
|
|
"loss": 0.469,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.4515352197471403,
|
|
"grad_norm": 4.436509132385254,
|
|
"learning_rate": 9.994944433625784e-07,
|
|
"loss": 0.4619,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.45273931366646597,
|
|
"grad_norm": 4.72512149810791,
|
|
"learning_rate": 9.994705474565435e-07,
|
|
"loss": 0.4404,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.4539434075857917,
|
|
"grad_norm": 4.427882194519043,
|
|
"learning_rate": 9.994461000761627e-07,
|
|
"loss": 0.4826,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.4551475015051174,
|
|
"grad_norm": 4.025267124176025,
|
|
"learning_rate": 9.994211012484285e-07,
|
|
"loss": 0.4671,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.4563515954244431,
|
|
"grad_norm": 5.315865516662598,
|
|
"learning_rate": 9.99395551000943e-07,
|
|
"loss": 0.4922,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.45755568934376883,
|
|
"grad_norm": 5.362889289855957,
|
|
"learning_rate": 9.993694493619162e-07,
|
|
"loss": 0.4554,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.4587597832630945,
|
|
"grad_norm": 3.8804094791412354,
|
|
"learning_rate": 9.993427963601674e-07,
|
|
"loss": 0.4558,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.4599638771824202,
|
|
"grad_norm": 3.8259241580963135,
|
|
"learning_rate": 9.99315592025125e-07,
|
|
"loss": 0.4756,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.46116797110174595,
|
|
"grad_norm": 3.806236505508423,
|
|
"learning_rate": 9.992878363868256e-07,
|
|
"loss": 0.4801,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.46237206502107164,
|
|
"grad_norm": 4.628232002258301,
|
|
"learning_rate": 9.992595294759147e-07,
|
|
"loss": 0.4953,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.46357615894039733,
|
|
"grad_norm": 4.719220161437988,
|
|
"learning_rate": 9.992306713236465e-07,
|
|
"loss": 0.4658,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.4647802528597231,
|
|
"grad_norm": 4.918371200561523,
|
|
"learning_rate": 9.992012619618838e-07,
|
|
"loss": 0.4691,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.46598434677904876,
|
|
"grad_norm": 4.425540447235107,
|
|
"learning_rate": 9.991713014230981e-07,
|
|
"loss": 0.4648,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.46718844069837445,
|
|
"grad_norm": 3.687819480895996,
|
|
"learning_rate": 9.99140789740369e-07,
|
|
"loss": 0.4714,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.4683925346177002,
|
|
"grad_norm": 4.835513591766357,
|
|
"learning_rate": 9.991097269473852e-07,
|
|
"loss": 0.4866,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.4695966285370259,
|
|
"grad_norm": 4.215537071228027,
|
|
"learning_rate": 9.990781130784437e-07,
|
|
"loss": 0.4697,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.4708007224563516,
|
|
"grad_norm": 4.371738433837891,
|
|
"learning_rate": 9.990459481684504e-07,
|
|
"loss": 0.4655,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.4720048163756773,
|
|
"grad_norm": 4.469852924346924,
|
|
"learning_rate": 9.990132322529181e-07,
|
|
"loss": 0.4416,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.473208910295003,
|
|
"grad_norm": 4.61678409576416,
|
|
"learning_rate": 9.989799653679701e-07,
|
|
"loss": 0.4625,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.4744130042143287,
|
|
"grad_norm": 5.12364387512207,
|
|
"learning_rate": 9.989461475503362e-07,
|
|
"loss": 0.4515,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.47561709813365444,
|
|
"grad_norm": 5.4315924644470215,
|
|
"learning_rate": 9.989117788373558e-07,
|
|
"loss": 0.4773,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.4768211920529801,
|
|
"grad_norm": 4.474724769592285,
|
|
"learning_rate": 9.988768592669756e-07,
|
|
"loss": 0.445,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.4780252859723058,
|
|
"grad_norm": 4.433851718902588,
|
|
"learning_rate": 9.98841388877751e-07,
|
|
"loss": 0.4667,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.47922937989163156,
|
|
"grad_norm": 4.388487815856934,
|
|
"learning_rate": 9.988053677088456e-07,
|
|
"loss": 0.443,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.48043347381095725,
|
|
"grad_norm": 5.400040149688721,
|
|
"learning_rate": 9.987687958000314e-07,
|
|
"loss": 0.4702,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.481637567730283,
|
|
"grad_norm": 4.436804294586182,
|
|
"learning_rate": 9.987316731916872e-07,
|
|
"loss": 0.4568,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.4828416616496087,
|
|
"grad_norm": 5.063580513000488,
|
|
"learning_rate": 9.986939999248014e-07,
|
|
"loss": 0.4877,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.48404575556893437,
|
|
"grad_norm": 4.696618556976318,
|
|
"learning_rate": 9.986557760409694e-07,
|
|
"loss": 0.464,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.4852498494882601,
|
|
"grad_norm": 5.019808292388916,
|
|
"learning_rate": 9.98617001582395e-07,
|
|
"loss": 0.4533,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.4864539434075858,
|
|
"grad_norm": 4.419073104858398,
|
|
"learning_rate": 9.9857767659189e-07,
|
|
"loss": 0.4416,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.4876580373269115,
|
|
"grad_norm": 4.31454610824585,
|
|
"learning_rate": 9.985378011128736e-07,
|
|
"loss": 0.458,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.48886213124623723,
|
|
"grad_norm": 5.41327428817749,
|
|
"learning_rate": 9.98497375189373e-07,
|
|
"loss": 0.4669,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.4900662251655629,
|
|
"grad_norm": 4.439949035644531,
|
|
"learning_rate": 9.98456398866023e-07,
|
|
"loss": 0.4532,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.4912703190848886,
|
|
"grad_norm": 4.076527118682861,
|
|
"learning_rate": 9.98414872188067e-07,
|
|
"loss": 0.4565,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.49247441300421435,
|
|
"grad_norm": 4.239142894744873,
|
|
"learning_rate": 9.983727952013545e-07,
|
|
"loss": 0.4686,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.49367850692354004,
|
|
"grad_norm": 4.340599060058594,
|
|
"learning_rate": 9.98330167952344e-07,
|
|
"loss": 0.4654,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.4948826008428657,
|
|
"grad_norm": 4.37545108795166,
|
|
"learning_rate": 9.982869904881007e-07,
|
|
"loss": 0.4634,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.49608669476219147,
|
|
"grad_norm": 4.235968112945557,
|
|
"learning_rate": 9.982432628562976e-07,
|
|
"loss": 0.4537,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.49729078868151716,
|
|
"grad_norm": 5.080899715423584,
|
|
"learning_rate": 9.981989851052153e-07,
|
|
"loss": 0.4675,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.49849488260084285,
|
|
"grad_norm": 4.327193260192871,
|
|
"learning_rate": 9.98154157283742e-07,
|
|
"loss": 0.4336,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.4996989765201686,
|
|
"grad_norm": 4.647739887237549,
|
|
"learning_rate": 9.981087794413721e-07,
|
|
"loss": 0.4547,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.5009030704394943,
|
|
"grad_norm": 4.411125659942627,
|
|
"learning_rate": 9.980628516282088e-07,
|
|
"loss": 0.4453,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.50210716435882,
|
|
"grad_norm": 4.8657026290893555,
|
|
"learning_rate": 9.980163738949615e-07,
|
|
"loss": 0.4714,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.5033112582781457,
|
|
"grad_norm": 4.7668776512146,
|
|
"learning_rate": 9.97969346292947e-07,
|
|
"loss": 0.4472,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.5045153521974715,
|
|
"grad_norm": 5.490717887878418,
|
|
"learning_rate": 9.979217688740895e-07,
|
|
"loss": 0.4767,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.5057194461167971,
|
|
"grad_norm": 4.896997928619385,
|
|
"learning_rate": 9.978736416909201e-07,
|
|
"loss": 0.4714,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.5069235400361228,
|
|
"grad_norm": 4.777568340301514,
|
|
"learning_rate": 9.978249647965768e-07,
|
|
"loss": 0.4608,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.5081276339554486,
|
|
"grad_norm": 4.839885711669922,
|
|
"learning_rate": 9.977757382448047e-07,
|
|
"loss": 0.4798,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.5093317278747742,
|
|
"grad_norm": 4.311272144317627,
|
|
"learning_rate": 9.977259620899557e-07,
|
|
"loss": 0.4347,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.5105358217941,
|
|
"grad_norm": 4.5723772048950195,
|
|
"learning_rate": 9.976756363869883e-07,
|
|
"loss": 0.4485,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.5117399157134257,
|
|
"grad_norm": 4.344234943389893,
|
|
"learning_rate": 9.976247611914681e-07,
|
|
"loss": 0.4623,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.5129440096327513,
|
|
"grad_norm": 4.216832160949707,
|
|
"learning_rate": 9.975733365595678e-07,
|
|
"loss": 0.4587,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.5141481035520771,
|
|
"grad_norm": 4.828461647033691,
|
|
"learning_rate": 9.975213625480656e-07,
|
|
"loss": 0.4616,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.5153521974714028,
|
|
"grad_norm": 4.608251571655273,
|
|
"learning_rate": 9.974688392143473e-07,
|
|
"loss": 0.4537,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.5165562913907285,
|
|
"grad_norm": 5.024391174316406,
|
|
"learning_rate": 9.974157666164047e-07,
|
|
"loss": 0.4596,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.5177603853100542,
|
|
"grad_norm": 4.869425296783447,
|
|
"learning_rate": 9.973621448128362e-07,
|
|
"loss": 0.468,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.5189644792293799,
|
|
"grad_norm": 4.599194526672363,
|
|
"learning_rate": 9.973079738628466e-07,
|
|
"loss": 0.4475,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.5201685731487056,
|
|
"grad_norm": 4.410305500030518,
|
|
"learning_rate": 9.972532538262473e-07,
|
|
"loss": 0.4684,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.5213726670680313,
|
|
"grad_norm": 3.9566409587860107,
|
|
"learning_rate": 9.971979847634552e-07,
|
|
"loss": 0.4472,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.5225767609873571,
|
|
"grad_norm": 4.608943462371826,
|
|
"learning_rate": 9.971421667354944e-07,
|
|
"loss": 0.4591,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.5237808549066827,
|
|
"grad_norm": 4.722293853759766,
|
|
"learning_rate": 9.97085799803994e-07,
|
|
"loss": 0.4529,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.5249849488260084,
|
|
"grad_norm": 4.868890762329102,
|
|
"learning_rate": 9.9702888403119e-07,
|
|
"loss": 0.4742,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.5261890427453342,
|
|
"grad_norm": 4.125800132751465,
|
|
"learning_rate": 9.969714194799243e-07,
|
|
"loss": 0.4501,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.5273931366646598,
|
|
"grad_norm": 4.570892810821533,
|
|
"learning_rate": 9.969134062136442e-07,
|
|
"loss": 0.4392,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.5285972305839856,
|
|
"grad_norm": 3.8944973945617676,
|
|
"learning_rate": 9.968548442964033e-07,
|
|
"loss": 0.4525,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.5298013245033113,
|
|
"grad_norm": 4.27981424331665,
|
|
"learning_rate": 9.96795733792861e-07,
|
|
"loss": 0.4607,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.5310054184226369,
|
|
"grad_norm": 4.3153300285339355,
|
|
"learning_rate": 9.96736074768282e-07,
|
|
"loss": 0.4709,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.5322095123419627,
|
|
"grad_norm": 5.543158531188965,
|
|
"learning_rate": 9.966758672885373e-07,
|
|
"loss": 0.4234,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.5334136062612884,
|
|
"grad_norm": 3.463160991668701,
|
|
"learning_rate": 9.966151114201027e-07,
|
|
"loss": 0.4684,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.534617700180614,
|
|
"grad_norm": 3.8580965995788574,
|
|
"learning_rate": 9.965538072300598e-07,
|
|
"loss": 0.4662,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.5358217940999398,
|
|
"grad_norm": 4.317717552185059,
|
|
"learning_rate": 9.96491954786096e-07,
|
|
"loss": 0.441,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.5370258880192655,
|
|
"grad_norm": 4.992043495178223,
|
|
"learning_rate": 9.964295541565035e-07,
|
|
"loss": 0.4575,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.5382299819385912,
|
|
"grad_norm": 4.042685031890869,
|
|
"learning_rate": 9.963666054101797e-07,
|
|
"loss": 0.421,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.5394340758579169,
|
|
"grad_norm": 4.4409260749816895,
|
|
"learning_rate": 9.96303108616628e-07,
|
|
"loss": 0.4684,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.5406381697772427,
|
|
"grad_norm": 4.652424335479736,
|
|
"learning_rate": 9.96239063845956e-07,
|
|
"loss": 0.4562,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.5418422636965683,
|
|
"grad_norm": 3.927960157394409,
|
|
"learning_rate": 9.961744711688765e-07,
|
|
"loss": 0.4636,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.543046357615894,
|
|
"grad_norm": 4.20367956161499,
|
|
"learning_rate": 9.961093306567074e-07,
|
|
"loss": 0.4629,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.5442504515352198,
|
|
"grad_norm": 5.0242791175842285,
|
|
"learning_rate": 9.960436423813721e-07,
|
|
"loss": 0.4699,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.5454545454545454,
|
|
"grad_norm": 4.339791297912598,
|
|
"learning_rate": 9.959774064153975e-07,
|
|
"loss": 0.4393,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.5466586393738712,
|
|
"grad_norm": 3.955888509750366,
|
|
"learning_rate": 9.959106228319164e-07,
|
|
"loss": 0.4419,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.5478627332931969,
|
|
"grad_norm": 4.508617401123047,
|
|
"learning_rate": 9.958432917046656e-07,
|
|
"loss": 0.4534,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.5490668272125225,
|
|
"grad_norm": 4.84667444229126,
|
|
"learning_rate": 9.957754131079865e-07,
|
|
"loss": 0.4621,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.5502709211318483,
|
|
"grad_norm": 4.65517520904541,
|
|
"learning_rate": 9.957069871168252e-07,
|
|
"loss": 0.4644,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.551475015051174,
|
|
"grad_norm": 4.428783416748047,
|
|
"learning_rate": 9.95638013806732e-07,
|
|
"loss": 0.4285,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.5526791089704997,
|
|
"grad_norm": 5.219538688659668,
|
|
"learning_rate": 9.955684932538615e-07,
|
|
"loss": 0.4342,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.5538832028898254,
|
|
"grad_norm": 4.356168270111084,
|
|
"learning_rate": 9.954984255349726e-07,
|
|
"loss": 0.4502,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.5550872968091511,
|
|
"grad_norm": 4.607705116271973,
|
|
"learning_rate": 9.954278107274286e-07,
|
|
"loss": 0.4397,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.5562913907284768,
|
|
"grad_norm": 4.667281150817871,
|
|
"learning_rate": 9.95356648909196e-07,
|
|
"loss": 0.4749,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.5574954846478025,
|
|
"grad_norm": 5.4144673347473145,
|
|
"learning_rate": 9.952849401588464e-07,
|
|
"loss": 0.4516,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.5586995785671283,
|
|
"grad_norm": 4.449268817901611,
|
|
"learning_rate": 9.952126845555544e-07,
|
|
"loss": 0.467,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.5599036724864539,
|
|
"grad_norm": 4.58141565322876,
|
|
"learning_rate": 9.951398821790988e-07,
|
|
"loss": 0.4674,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.5611077664057796,
|
|
"grad_norm": 4.779237747192383,
|
|
"learning_rate": 9.95066533109862e-07,
|
|
"loss": 0.4486,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.5623118603251054,
|
|
"grad_norm": 4.009070873260498,
|
|
"learning_rate": 9.949926374288298e-07,
|
|
"loss": 0.4466,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.563515954244431,
|
|
"grad_norm": 4.913680553436279,
|
|
"learning_rate": 9.949181952175922e-07,
|
|
"loss": 0.4574,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.5647200481637568,
|
|
"grad_norm": 4.114124774932861,
|
|
"learning_rate": 9.94843206558342e-07,
|
|
"loss": 0.4556,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.5659241420830825,
|
|
"grad_norm": 4.208637237548828,
|
|
"learning_rate": 9.94767671533875e-07,
|
|
"loss": 0.4446,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.5671282360024081,
|
|
"grad_norm": 4.362401962280273,
|
|
"learning_rate": 9.946915902275914e-07,
|
|
"loss": 0.4591,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.5683323299217339,
|
|
"grad_norm": 4.419969081878662,
|
|
"learning_rate": 9.946149627234939e-07,
|
|
"loss": 0.4352,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.5695364238410596,
|
|
"grad_norm": 5.162231922149658,
|
|
"learning_rate": 9.94537789106188e-07,
|
|
"loss": 0.4613,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.5707405177603853,
|
|
"grad_norm": 4.270598411560059,
|
|
"learning_rate": 9.944600694608825e-07,
|
|
"loss": 0.4628,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.571944611679711,
|
|
"grad_norm": 4.181495666503906,
|
|
"learning_rate": 9.943818038733891e-07,
|
|
"loss": 0.4391,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.5731487055990367,
|
|
"grad_norm": 4.3339033126831055,
|
|
"learning_rate": 9.943029924301225e-07,
|
|
"loss": 0.4406,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.5743527995183624,
|
|
"grad_norm": 4.909811496734619,
|
|
"learning_rate": 9.942236352180996e-07,
|
|
"loss": 0.4575,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.5755568934376881,
|
|
"grad_norm": 4.58059549331665,
|
|
"learning_rate": 9.941437323249398e-07,
|
|
"loss": 0.4613,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.5767609873570139,
|
|
"grad_norm": 3.9194531440734863,
|
|
"learning_rate": 9.94063283838866e-07,
|
|
"loss": 0.4449,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.5779650812763396,
|
|
"grad_norm": 4.602609634399414,
|
|
"learning_rate": 9.93982289848702e-07,
|
|
"loss": 0.4622,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.5791691751956652,
|
|
"grad_norm": 4.630181789398193,
|
|
"learning_rate": 9.939007504438754e-07,
|
|
"loss": 0.442,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.580373269114991,
|
|
"grad_norm": 3.903799057006836,
|
|
"learning_rate": 9.938186657144149e-07,
|
|
"loss": 0.4624,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.5815773630343167,
|
|
"grad_norm": 5.423624515533447,
|
|
"learning_rate": 9.937360357509522e-07,
|
|
"loss": 0.4372,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.5827814569536424,
|
|
"grad_norm": 4.571367263793945,
|
|
"learning_rate": 9.936528606447198e-07,
|
|
"loss": 0.4521,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.5839855508729681,
|
|
"grad_norm": 3.8848462104797363,
|
|
"learning_rate": 9.935691404875534e-07,
|
|
"loss": 0.4399,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.5851896447922939,
|
|
"grad_norm": 4.659217357635498,
|
|
"learning_rate": 9.934848753718896e-07,
|
|
"loss": 0.4345,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.5863937387116195,
|
|
"grad_norm": 5.5009026527404785,
|
|
"learning_rate": 9.934000653907672e-07,
|
|
"loss": 0.4173,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.5875978326309452,
|
|
"grad_norm": 3.984834671020508,
|
|
"learning_rate": 9.933147106378263e-07,
|
|
"loss": 0.4354,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.588801926550271,
|
|
"grad_norm": 4.0750346183776855,
|
|
"learning_rate": 9.932288112073086e-07,
|
|
"loss": 0.4447,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.5900060204695966,
|
|
"grad_norm": 4.871407985687256,
|
|
"learning_rate": 9.931423671940575e-07,
|
|
"loss": 0.4501,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.5912101143889223,
|
|
"grad_norm": 4.388524055480957,
|
|
"learning_rate": 9.93055378693517e-07,
|
|
"loss": 0.4421,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.5924142083082481,
|
|
"grad_norm": 4.511969566345215,
|
|
"learning_rate": 9.929678458017328e-07,
|
|
"loss": 0.4431,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.5936183022275737,
|
|
"grad_norm": 4.788571834564209,
|
|
"learning_rate": 9.928797686153514e-07,
|
|
"loss": 0.4621,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.5948223961468995,
|
|
"grad_norm": 5.144417762756348,
|
|
"learning_rate": 9.927911472316205e-07,
|
|
"loss": 0.4418,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.5960264900662252,
|
|
"grad_norm": 4.649743556976318,
|
|
"learning_rate": 9.927019817483887e-07,
|
|
"loss": 0.4639,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.5972305839855508,
|
|
"grad_norm": 4.76192045211792,
|
|
"learning_rate": 9.92612272264105e-07,
|
|
"loss": 0.4646,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.5984346779048766,
|
|
"grad_norm": 4.137574195861816,
|
|
"learning_rate": 9.925220188778193e-07,
|
|
"loss": 0.4537,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.5996387718242023,
|
|
"grad_norm": 4.616219997406006,
|
|
"learning_rate": 9.924312216891819e-07,
|
|
"loss": 0.4451,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.600842865743528,
|
|
"grad_norm": 4.623941421508789,
|
|
"learning_rate": 9.923398807984438e-07,
|
|
"loss": 0.4441,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.6020469596628537,
|
|
"grad_norm": 4.540246486663818,
|
|
"learning_rate": 9.92247996306456e-07,
|
|
"loss": 0.4477,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.6032510535821795,
|
|
"grad_norm": 4.742766380310059,
|
|
"learning_rate": 9.921555683146695e-07,
|
|
"loss": 0.4672,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.6044551475015051,
|
|
"grad_norm": 5.316002368927002,
|
|
"learning_rate": 9.920625969251364e-07,
|
|
"loss": 0.4593,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.6056592414208308,
|
|
"grad_norm": 4.386168003082275,
|
|
"learning_rate": 9.919690822405074e-07,
|
|
"loss": 0.4438,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.6068633353401566,
|
|
"grad_norm": 3.9734067916870117,
|
|
"learning_rate": 9.91875024364034e-07,
|
|
"loss": 0.4428,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.6080674292594822,
|
|
"grad_norm": 4.917031764984131,
|
|
"learning_rate": 9.917804233995673e-07,
|
|
"loss": 0.4622,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.609271523178808,
|
|
"grad_norm": 4.690892696380615,
|
|
"learning_rate": 9.916852794515575e-07,
|
|
"loss": 0.4513,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.6104756170981337,
|
|
"grad_norm": 4.1330952644348145,
|
|
"learning_rate": 9.915895926250552e-07,
|
|
"loss": 0.4523,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.6116797110174593,
|
|
"grad_norm": 4.932434558868408,
|
|
"learning_rate": 9.9149336302571e-07,
|
|
"loss": 0.4407,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.6128838049367851,
|
|
"grad_norm": 4.421885967254639,
|
|
"learning_rate": 9.913965907597702e-07,
|
|
"loss": 0.4332,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.6140878988561108,
|
|
"grad_norm": 5.199044704437256,
|
|
"learning_rate": 9.91299275934084e-07,
|
|
"loss": 0.426,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.6152919927754364,
|
|
"grad_norm": 4.189499855041504,
|
|
"learning_rate": 9.912014186560984e-07,
|
|
"loss": 0.4326,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.6164960866947622,
|
|
"grad_norm": 4.297112464904785,
|
|
"learning_rate": 9.911030190338597e-07,
|
|
"loss": 0.4622,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.6177001806140879,
|
|
"grad_norm": 3.9968087673187256,
|
|
"learning_rate": 9.910040771760122e-07,
|
|
"loss": 0.447,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.6189042745334136,
|
|
"grad_norm": 4.857995510101318,
|
|
"learning_rate": 9.909045931917998e-07,
|
|
"loss": 0.4343,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.6201083684527393,
|
|
"grad_norm": 3.741711378097534,
|
|
"learning_rate": 9.908045671910642e-07,
|
|
"loss": 0.4366,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.621312462372065,
|
|
"grad_norm": 4.424086093902588,
|
|
"learning_rate": 9.907039992842461e-07,
|
|
"loss": 0.448,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.6225165562913907,
|
|
"grad_norm": 5.499582767486572,
|
|
"learning_rate": 9.906028895823842e-07,
|
|
"loss": 0.4546,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.6237206502107164,
|
|
"grad_norm": 4.836984634399414,
|
|
"learning_rate": 9.905012381971157e-07,
|
|
"loss": 0.4605,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.6249247441300422,
|
|
"grad_norm": 4.31553316116333,
|
|
"learning_rate": 9.903990452406756e-07,
|
|
"loss": 0.4302,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.6261288380493678,
|
|
"grad_norm": 4.909146785736084,
|
|
"learning_rate": 9.902963108258968e-07,
|
|
"loss": 0.4445,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.6273329319686936,
|
|
"grad_norm": 4.295082092285156,
|
|
"learning_rate": 9.901930350662103e-07,
|
|
"loss": 0.4364,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.6285370258880193,
|
|
"grad_norm": 4.154002666473389,
|
|
"learning_rate": 9.90089218075645e-07,
|
|
"loss": 0.4526,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.6297411198073449,
|
|
"grad_norm": 4.30592679977417,
|
|
"learning_rate": 9.89984859968827e-07,
|
|
"loss": 0.4442,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.6309452137266707,
|
|
"grad_norm": 5.334674835205078,
|
|
"learning_rate": 9.898799608609795e-07,
|
|
"loss": 0.4415,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.6321493076459964,
|
|
"grad_norm": 4.136261940002441,
|
|
"learning_rate": 9.897745208679239e-07,
|
|
"loss": 0.4442,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.633353401565322,
|
|
"grad_norm": 4.585081577301025,
|
|
"learning_rate": 9.896685401060782e-07,
|
|
"loss": 0.4565,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.6345574954846478,
|
|
"grad_norm": 4.742111682891846,
|
|
"learning_rate": 9.895620186924578e-07,
|
|
"loss": 0.4393,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.6357615894039735,
|
|
"grad_norm": 3.9798941612243652,
|
|
"learning_rate": 9.894549567446748e-07,
|
|
"loss": 0.4255,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.6369656833232992,
|
|
"grad_norm": 4.722369194030762,
|
|
"learning_rate": 9.893473543809383e-07,
|
|
"loss": 0.4377,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 0.6381697772426249,
|
|
"grad_norm": 4.399467945098877,
|
|
"learning_rate": 9.892392117200536e-07,
|
|
"loss": 0.4215,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.6393738711619507,
|
|
"grad_norm": 4.718751430511475,
|
|
"learning_rate": 9.891305288814235e-07,
|
|
"loss": 0.4372,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 0.6405779650812763,
|
|
"grad_norm": 4.376132488250732,
|
|
"learning_rate": 9.890213059850465e-07,
|
|
"loss": 0.4567,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.641782059000602,
|
|
"grad_norm": 5.186975955963135,
|
|
"learning_rate": 9.889115431515173e-07,
|
|
"loss": 0.4414,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 0.6429861529199278,
|
|
"grad_norm": 4.560245037078857,
|
|
"learning_rate": 9.888012405020271e-07,
|
|
"loss": 0.4329,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.6441902468392534,
|
|
"grad_norm": 5.553184986114502,
|
|
"learning_rate": 9.886903981583632e-07,
|
|
"loss": 0.4472,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.6453943407585792,
|
|
"grad_norm": 5.126540660858154,
|
|
"learning_rate": 9.885790162429086e-07,
|
|
"loss": 0.4577,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.6465984346779049,
|
|
"grad_norm": 5.031693935394287,
|
|
"learning_rate": 9.884670948786417e-07,
|
|
"loss": 0.4608,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 0.6478025285972305,
|
|
"grad_norm": 4.265883445739746,
|
|
"learning_rate": 9.883546341891373e-07,
|
|
"loss": 0.4335,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.6490066225165563,
|
|
"grad_norm": 3.7793495655059814,
|
|
"learning_rate": 9.88241634298565e-07,
|
|
"loss": 0.4481,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 0.650210716435882,
|
|
"grad_norm": 4.184829235076904,
|
|
"learning_rate": 9.881280953316903e-07,
|
|
"loss": 0.4351,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.6514148103552077,
|
|
"grad_norm": 5.431835174560547,
|
|
"learning_rate": 9.880140174138735e-07,
|
|
"loss": 0.4739,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 0.6526189042745334,
|
|
"grad_norm": 5.218166828155518,
|
|
"learning_rate": 9.878994006710695e-07,
|
|
"loss": 0.4547,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.6538229981938591,
|
|
"grad_norm": 5.319456100463867,
|
|
"learning_rate": 9.877842452298293e-07,
|
|
"loss": 0.453,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 0.6550270921131849,
|
|
"grad_norm": 4.373801231384277,
|
|
"learning_rate": 9.876685512172979e-07,
|
|
"loss": 0.4245,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.6562311860325105,
|
|
"grad_norm": 4.274784088134766,
|
|
"learning_rate": 9.875523187612153e-07,
|
|
"loss": 0.4327,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.6574352799518363,
|
|
"grad_norm": 5.235876560211182,
|
|
"learning_rate": 9.874355479899157e-07,
|
|
"loss": 0.4365,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.658639373871162,
|
|
"grad_norm": 4.505414962768555,
|
|
"learning_rate": 9.873182390323275e-07,
|
|
"loss": 0.4236,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 0.6598434677904876,
|
|
"grad_norm": 5.843977451324463,
|
|
"learning_rate": 9.87200392017974e-07,
|
|
"loss": 0.4482,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.6610475617098134,
|
|
"grad_norm": 4.754218578338623,
|
|
"learning_rate": 9.870820070769723e-07,
|
|
"loss": 0.4526,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 0.6622516556291391,
|
|
"grad_norm": 4.734755992889404,
|
|
"learning_rate": 9.869630843400329e-07,
|
|
"loss": 0.4286,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.6634557495484648,
|
|
"grad_norm": 4.781942367553711,
|
|
"learning_rate": 9.868436239384608e-07,
|
|
"loss": 0.4395,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 0.6646598434677905,
|
|
"grad_norm": 4.710615634918213,
|
|
"learning_rate": 9.86723626004154e-07,
|
|
"loss": 0.4437,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.6658639373871162,
|
|
"grad_norm": 3.9797275066375732,
|
|
"learning_rate": 9.86603090669605e-07,
|
|
"loss": 0.4285,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 0.6670680313064419,
|
|
"grad_norm": 5.289978981018066,
|
|
"learning_rate": 9.864820180678984e-07,
|
|
"loss": 0.4482,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.6682721252257676,
|
|
"grad_norm": 3.6335768699645996,
|
|
"learning_rate": 9.86360408332713e-07,
|
|
"loss": 0.4578,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.6694762191450934,
|
|
"grad_norm": 3.998011589050293,
|
|
"learning_rate": 9.862382615983201e-07,
|
|
"loss": 0.439,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.670680313064419,
|
|
"grad_norm": 4.6308369636535645,
|
|
"learning_rate": 9.861155779995843e-07,
|
|
"loss": 0.4416,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 0.6718844069837447,
|
|
"grad_norm": 4.869227409362793,
|
|
"learning_rate": 9.859923576719623e-07,
|
|
"loss": 0.4271,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.6730885009030705,
|
|
"grad_norm": 4.426019668579102,
|
|
"learning_rate": 9.858686007515043e-07,
|
|
"loss": 0.424,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 0.6742925948223961,
|
|
"grad_norm": 4.659002304077148,
|
|
"learning_rate": 9.857443073748526e-07,
|
|
"loss": 0.4419,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.6754966887417219,
|
|
"grad_norm": 3.8600122928619385,
|
|
"learning_rate": 9.856194776792412e-07,
|
|
"loss": 0.4397,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 0.6767007826610476,
|
|
"grad_norm": 4.6182756423950195,
|
|
"learning_rate": 9.854941118024973e-07,
|
|
"loss": 0.454,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.6779048765803732,
|
|
"grad_norm": 4.149092674255371,
|
|
"learning_rate": 9.853682098830392e-07,
|
|
"loss": 0.426,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 0.679108970499699,
|
|
"grad_norm": 4.583498954772949,
|
|
"learning_rate": 9.852417720598778e-07,
|
|
"loss": 0.4226,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.6803130644190247,
|
|
"grad_norm": 4.789090633392334,
|
|
"learning_rate": 9.851147984726152e-07,
|
|
"loss": 0.4506,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.6815171583383504,
|
|
"grad_norm": 3.850926160812378,
|
|
"learning_rate": 9.849872892614452e-07,
|
|
"loss": 0.4149,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.6827212522576761,
|
|
"grad_norm": 4.576216697692871,
|
|
"learning_rate": 9.848592445671532e-07,
|
|
"loss": 0.4364,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 0.6839253461770018,
|
|
"grad_norm": 5.302231311798096,
|
|
"learning_rate": 9.847306645311152e-07,
|
|
"loss": 0.4529,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.6851294400963275,
|
|
"grad_norm": 4.6318864822387695,
|
|
"learning_rate": 9.846015492952993e-07,
|
|
"loss": 0.4299,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 0.6863335340156532,
|
|
"grad_norm": 4.18743896484375,
|
|
"learning_rate": 9.844718990022634e-07,
|
|
"loss": 0.4567,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.687537627934979,
|
|
"grad_norm": 4.45042610168457,
|
|
"learning_rate": 9.84341713795157e-07,
|
|
"loss": 0.4461,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 0.6887417218543046,
|
|
"grad_norm": 4.0155415534973145,
|
|
"learning_rate": 9.842109938177197e-07,
|
|
"loss": 0.4422,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.6899458157736303,
|
|
"grad_norm": 4.72194242477417,
|
|
"learning_rate": 9.840797392142819e-07,
|
|
"loss": 0.4499,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 0.6911499096929561,
|
|
"grad_norm": 4.1018595695495605,
|
|
"learning_rate": 9.83947950129764e-07,
|
|
"loss": 0.4305,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.6923540036122817,
|
|
"grad_norm": 4.466518402099609,
|
|
"learning_rate": 9.838156267096772e-07,
|
|
"loss": 0.437,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.6935580975316075,
|
|
"grad_norm": 4.084195137023926,
|
|
"learning_rate": 9.836827691001215e-07,
|
|
"loss": 0.4571,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.6947621914509332,
|
|
"grad_norm": 4.3810319900512695,
|
|
"learning_rate": 9.835493774477876e-07,
|
|
"loss": 0.4358,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 0.6959662853702588,
|
|
"grad_norm": 4.7473464012146,
|
|
"learning_rate": 9.834154518999558e-07,
|
|
"loss": 0.4307,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.6971703792895846,
|
|
"grad_norm": 4.240455627441406,
|
|
"learning_rate": 9.832809926044953e-07,
|
|
"loss": 0.4456,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 0.6983744732089103,
|
|
"grad_norm": 4.3158087730407715,
|
|
"learning_rate": 9.831459997098653e-07,
|
|
"loss": 0.4268,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.699578567128236,
|
|
"grad_norm": 4.3610005378723145,
|
|
"learning_rate": 9.83010473365114e-07,
|
|
"loss": 0.4334,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 0.7007826610475617,
|
|
"grad_norm": 4.417696952819824,
|
|
"learning_rate": 9.828744137198778e-07,
|
|
"loss": 0.4451,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.7019867549668874,
|
|
"grad_norm": 4.091536998748779,
|
|
"learning_rate": 9.827378209243833e-07,
|
|
"loss": 0.4277,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 0.7031908488862131,
|
|
"grad_norm": 5.2131028175354,
|
|
"learning_rate": 9.826006951294448e-07,
|
|
"loss": 0.4353,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.7043949428055388,
|
|
"grad_norm": 4.724157810211182,
|
|
"learning_rate": 9.824630364864653e-07,
|
|
"loss": 0.4379,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.7055990367248646,
|
|
"grad_norm": 3.924499034881592,
|
|
"learning_rate": 9.82324845147436e-07,
|
|
"loss": 0.4341,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.7068031306441902,
|
|
"grad_norm": 3.9886951446533203,
|
|
"learning_rate": 9.821861212649367e-07,
|
|
"loss": 0.4458,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 0.708007224563516,
|
|
"grad_norm": 5.176059246063232,
|
|
"learning_rate": 9.820468649921348e-07,
|
|
"loss": 0.4277,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.7092113184828417,
|
|
"grad_norm": 5.795221328735352,
|
|
"learning_rate": 9.819070764827856e-07,
|
|
"loss": 0.4608,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 0.7104154124021673,
|
|
"grad_norm": 4.0651702880859375,
|
|
"learning_rate": 9.81766755891232e-07,
|
|
"loss": 0.4349,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.7116195063214931,
|
|
"grad_norm": 4.822697162628174,
|
|
"learning_rate": 9.816259033724051e-07,
|
|
"loss": 0.4368,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 0.7128236002408188,
|
|
"grad_norm": 3.429680585861206,
|
|
"learning_rate": 9.814845190818218e-07,
|
|
"loss": 0.4119,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.7140276941601444,
|
|
"grad_norm": 4.649044513702393,
|
|
"learning_rate": 9.813426031755873e-07,
|
|
"loss": 0.431,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 0.7152317880794702,
|
|
"grad_norm": 4.576180458068848,
|
|
"learning_rate": 9.812001558103937e-07,
|
|
"loss": 0.4478,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.7164358819987959,
|
|
"grad_norm": 4.996614933013916,
|
|
"learning_rate": 9.810571771435196e-07,
|
|
"loss": 0.4013,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.7176399759181216,
|
|
"grad_norm": 5.006197929382324,
|
|
"learning_rate": 9.809136673328305e-07,
|
|
"loss": 0.4275,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.7188440698374473,
|
|
"grad_norm": 3.766942024230957,
|
|
"learning_rate": 9.807696265367776e-07,
|
|
"loss": 0.4377,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 0.720048163756773,
|
|
"grad_norm": 4.086816787719727,
|
|
"learning_rate": 9.806250549143992e-07,
|
|
"loss": 0.4384,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.7212522576760987,
|
|
"grad_norm": 5.5871734619140625,
|
|
"learning_rate": 9.804799526253196e-07,
|
|
"loss": 0.4511,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 0.7224563515954244,
|
|
"grad_norm": 4.023412704467773,
|
|
"learning_rate": 9.803343198297484e-07,
|
|
"loss": 0.4446,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.7236604455147502,
|
|
"grad_norm": 4.708857536315918,
|
|
"learning_rate": 9.80188156688482e-07,
|
|
"loss": 0.4395,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 0.7248645394340758,
|
|
"grad_norm": 3.879977226257324,
|
|
"learning_rate": 9.80041463362901e-07,
|
|
"loss": 0.4434,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.7260686333534015,
|
|
"grad_norm": 4.743607997894287,
|
|
"learning_rate": 9.798942400149726e-07,
|
|
"loss": 0.4365,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 0.7272727272727273,
|
|
"grad_norm": 3.6438701152801514,
|
|
"learning_rate": 9.797464868072486e-07,
|
|
"loss": 0.447,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.7284768211920529,
|
|
"grad_norm": 4.472813129425049,
|
|
"learning_rate": 9.79598203902866e-07,
|
|
"loss": 0.443,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.7296809151113787,
|
|
"grad_norm": 5.6175312995910645,
|
|
"learning_rate": 9.794493914655467e-07,
|
|
"loss": 0.4207,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.7308850090307044,
|
|
"grad_norm": 4.9606404304504395,
|
|
"learning_rate": 9.793000496595966e-07,
|
|
"loss": 0.4279,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 0.7320891029500302,
|
|
"grad_norm": 4.130514144897461,
|
|
"learning_rate": 9.791501786499074e-07,
|
|
"loss": 0.4183,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.7332931968693558,
|
|
"grad_norm": 2.9547371864318848,
|
|
"learning_rate": 9.78999778601954e-07,
|
|
"loss": 0.4038,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 0.7344972907886815,
|
|
"grad_norm": 4.06984281539917,
|
|
"learning_rate": 9.788488496817958e-07,
|
|
"loss": 0.4333,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.7357013847080073,
|
|
"grad_norm": 3.900606870651245,
|
|
"learning_rate": 9.78697392056076e-07,
|
|
"loss": 0.418,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 0.7369054786273329,
|
|
"grad_norm": 4.396324157714844,
|
|
"learning_rate": 9.78545405892022e-07,
|
|
"loss": 0.435,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.7381095725466587,
|
|
"grad_norm": 4.068949222564697,
|
|
"learning_rate": 9.78392891357444e-07,
|
|
"loss": 0.4138,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 0.7393136664659844,
|
|
"grad_norm": 4.090792655944824,
|
|
"learning_rate": 9.782398486207364e-07,
|
|
"loss": 0.4106,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.74051776038531,
|
|
"grad_norm": 5.222830295562744,
|
|
"learning_rate": 9.780862778508762e-07,
|
|
"loss": 0.4534,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.7417218543046358,
|
|
"grad_norm": 3.9300661087036133,
|
|
"learning_rate": 9.779321792174238e-07,
|
|
"loss": 0.4436,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.7429259482239615,
|
|
"grad_norm": 4.139192581176758,
|
|
"learning_rate": 9.77777552890522e-07,
|
|
"loss": 0.4384,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 0.7441300421432872,
|
|
"grad_norm": 4.677849292755127,
|
|
"learning_rate": 9.776223990408969e-07,
|
|
"loss": 0.4338,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.7453341360626129,
|
|
"grad_norm": 4.7174391746521,
|
|
"learning_rate": 9.77466717839856e-07,
|
|
"loss": 0.4265,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 0.7465382299819386,
|
|
"grad_norm": 4.314562797546387,
|
|
"learning_rate": 9.773105094592903e-07,
|
|
"loss": 0.4389,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.7477423239012643,
|
|
"grad_norm": 4.679368495941162,
|
|
"learning_rate": 9.77153774071672e-07,
|
|
"loss": 0.4177,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 0.74894641782059,
|
|
"grad_norm": 4.037609577178955,
|
|
"learning_rate": 9.769965118500554e-07,
|
|
"loss": 0.4376,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.7501505117399158,
|
|
"grad_norm": 4.8901448249816895,
|
|
"learning_rate": 9.768387229680765e-07,
|
|
"loss": 0.4597,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 0.7513546056592414,
|
|
"grad_norm": 4.4093122482299805,
|
|
"learning_rate": 9.76680407599953e-07,
|
|
"loss": 0.4332,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.7525586995785671,
|
|
"grad_norm": 4.720508575439453,
|
|
"learning_rate": 9.765215659204837e-07,
|
|
"loss": 0.4579,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.7537627934978929,
|
|
"grad_norm": 4.316104412078857,
|
|
"learning_rate": 9.763621981050486e-07,
|
|
"loss": 0.4499,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.7549668874172185,
|
|
"grad_norm": 4.805814743041992,
|
|
"learning_rate": 9.762023043296082e-07,
|
|
"loss": 0.4229,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 0.7561709813365443,
|
|
"grad_norm": 4.259012699127197,
|
|
"learning_rate": 9.760418847707042e-07,
|
|
"loss": 0.4307,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.75737507525587,
|
|
"grad_norm": 4.74151086807251,
|
|
"learning_rate": 9.75880939605459e-07,
|
|
"loss": 0.4039,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 0.7585791691751956,
|
|
"grad_norm": 4.7510294914245605,
|
|
"learning_rate": 9.757194690115747e-07,
|
|
"loss": 0.4302,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.7597832630945214,
|
|
"grad_norm": 5.057920455932617,
|
|
"learning_rate": 9.75557473167334e-07,
|
|
"loss": 0.4196,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 0.7609873570138471,
|
|
"grad_norm": 4.428061485290527,
|
|
"learning_rate": 9.753949522515992e-07,
|
|
"loss": 0.4271,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.7621914509331728,
|
|
"grad_norm": 4.023929595947266,
|
|
"learning_rate": 9.75231906443813e-07,
|
|
"loss": 0.4125,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 0.7633955448524985,
|
|
"grad_norm": 4.456701755523682,
|
|
"learning_rate": 9.75068335923997e-07,
|
|
"loss": 0.4177,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.7645996387718242,
|
|
"grad_norm": 4.046926975250244,
|
|
"learning_rate": 9.749042408727517e-07,
|
|
"loss": 0.4172,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.7658037326911499,
|
|
"grad_norm": 4.5811944007873535,
|
|
"learning_rate": 9.747396214712584e-07,
|
|
"loss": 0.4165,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.7670078266104756,
|
|
"grad_norm": 3.6832375526428223,
|
|
"learning_rate": 9.745744779012757e-07,
|
|
"loss": 0.4183,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 0.7682119205298014,
|
|
"grad_norm": 4.535373210906982,
|
|
"learning_rate": 9.744088103451417e-07,
|
|
"loss": 0.4205,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.769416014449127,
|
|
"grad_norm": 4.3140363693237305,
|
|
"learning_rate": 9.742426189857729e-07,
|
|
"loss": 0.4414,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 0.7706201083684527,
|
|
"grad_norm": 4.968809604644775,
|
|
"learning_rate": 9.74075904006664e-07,
|
|
"loss": 0.4421,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.7718242022877785,
|
|
"grad_norm": 4.488393783569336,
|
|
"learning_rate": 9.739086655918883e-07,
|
|
"loss": 0.441,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 0.7730282962071041,
|
|
"grad_norm": 4.255595684051514,
|
|
"learning_rate": 9.737409039260966e-07,
|
|
"loss": 0.4211,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.7742323901264299,
|
|
"grad_norm": 4.285024642944336,
|
|
"learning_rate": 9.735726191945175e-07,
|
|
"loss": 0.42,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 0.7754364840457556,
|
|
"grad_norm": 4.8813347816467285,
|
|
"learning_rate": 9.734038115829571e-07,
|
|
"loss": 0.433,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.7766405779650812,
|
|
"grad_norm": 3.9893128871917725,
|
|
"learning_rate": 9.732344812777987e-07,
|
|
"loss": 0.3902,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.777844671884407,
|
|
"grad_norm": 4.2948784828186035,
|
|
"learning_rate": 9.730646284660035e-07,
|
|
"loss": 0.4094,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.7790487658037327,
|
|
"grad_norm": 4.328617572784424,
|
|
"learning_rate": 9.728942533351087e-07,
|
|
"loss": 0.4412,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 0.7802528597230584,
|
|
"grad_norm": 4.67041015625,
|
|
"learning_rate": 9.727233560732286e-07,
|
|
"loss": 0.4157,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.7814569536423841,
|
|
"grad_norm": 4.249061584472656,
|
|
"learning_rate": 9.725519368690538e-07,
|
|
"loss": 0.4398,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 0.7826610475617098,
|
|
"grad_norm": 5.444673538208008,
|
|
"learning_rate": 9.723799959118513e-07,
|
|
"loss": 0.4299,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.7838651414810355,
|
|
"grad_norm": 4.813880920410156,
|
|
"learning_rate": 9.722075333914642e-07,
|
|
"loss": 0.4483,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 0.7850692354003612,
|
|
"grad_norm": 3.9406328201293945,
|
|
"learning_rate": 9.720345494983116e-07,
|
|
"loss": 0.4101,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 0.786273329319687,
|
|
"grad_norm": 5.169934272766113,
|
|
"learning_rate": 9.718610444233878e-07,
|
|
"loss": 0.4284,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 0.7874774232390126,
|
|
"grad_norm": 4.304941177368164,
|
|
"learning_rate": 9.71687018358263e-07,
|
|
"loss": 0.4232,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 0.7886815171583383,
|
|
"grad_norm": 4.452000141143799,
|
|
"learning_rate": 9.715124714950827e-07,
|
|
"loss": 0.4506,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 0.7898856110776641,
|
|
"grad_norm": 3.7503676414489746,
|
|
"learning_rate": 9.713374040265668e-07,
|
|
"loss": 0.4246,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 0.7910897049969897,
|
|
"grad_norm": 4.534003257751465,
|
|
"learning_rate": 9.71161816146011e-07,
|
|
"loss": 0.4247,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 0.7922937989163155,
|
|
"grad_norm": 5.637129306793213,
|
|
"learning_rate": 9.709857080472845e-07,
|
|
"loss": 0.4419,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 0.7934978928356412,
|
|
"grad_norm": 3.844273805618286,
|
|
"learning_rate": 9.708090799248313e-07,
|
|
"loss": 0.4042,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 0.7947019867549668,
|
|
"grad_norm": 4.556625843048096,
|
|
"learning_rate": 9.706319319736703e-07,
|
|
"loss": 0.4384,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.7959060806742926,
|
|
"grad_norm": 4.6486053466796875,
|
|
"learning_rate": 9.70454264389393e-07,
|
|
"loss": 0.4091,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 0.7971101745936183,
|
|
"grad_norm": 4.751596927642822,
|
|
"learning_rate": 9.702760773681658e-07,
|
|
"loss": 0.428,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 0.798314268512944,
|
|
"grad_norm": 4.64603328704834,
|
|
"learning_rate": 9.700973711067282e-07,
|
|
"loss": 0.4376,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 0.7995183624322697,
|
|
"grad_norm": 4.823798656463623,
|
|
"learning_rate": 9.699181458023927e-07,
|
|
"loss": 0.4057,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 0.8007224563515954,
|
|
"grad_norm": 5.07472562789917,
|
|
"learning_rate": 9.697384016530451e-07,
|
|
"loss": 0.4103,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 0.8019265502709211,
|
|
"grad_norm": 5.586597442626953,
|
|
"learning_rate": 9.695581388571444e-07,
|
|
"loss": 0.4401,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 0.8031306441902468,
|
|
"grad_norm": 5.10539436340332,
|
|
"learning_rate": 9.693773576137219e-07,
|
|
"loss": 0.4298,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 0.8043347381095726,
|
|
"grad_norm": 5.036708354949951,
|
|
"learning_rate": 9.691960581223815e-07,
|
|
"loss": 0.4299,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 0.8055388320288982,
|
|
"grad_norm": 4.794188499450684,
|
|
"learning_rate": 9.690142405832988e-07,
|
|
"loss": 0.4296,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 0.8067429259482239,
|
|
"grad_norm": 4.483447074890137,
|
|
"learning_rate": 9.688319051972223e-07,
|
|
"loss": 0.4063,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.8079470198675497,
|
|
"grad_norm": 4.88456916809082,
|
|
"learning_rate": 9.686490521654713e-07,
|
|
"loss": 0.4548,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 0.8091511137868754,
|
|
"grad_norm": 4.166242599487305,
|
|
"learning_rate": 9.684656816899374e-07,
|
|
"loss": 0.4344,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 0.8103552077062011,
|
|
"grad_norm": 4.282528877258301,
|
|
"learning_rate": 9.682817939730831e-07,
|
|
"loss": 0.4143,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 0.8115593016255268,
|
|
"grad_norm": 4.342618942260742,
|
|
"learning_rate": 9.680973892179423e-07,
|
|
"loss": 0.4224,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 0.8127633955448526,
|
|
"grad_norm": 4.768647193908691,
|
|
"learning_rate": 9.679124676281195e-07,
|
|
"loss": 0.4251,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 0.8139674894641782,
|
|
"grad_norm": 4.024239540100098,
|
|
"learning_rate": 9.677270294077896e-07,
|
|
"loss": 0.4415,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 0.8151715833835039,
|
|
"grad_norm": 3.9242262840270996,
|
|
"learning_rate": 9.675410747616984e-07,
|
|
"loss": 0.4475,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 0.8163756773028297,
|
|
"grad_norm": 4.580953121185303,
|
|
"learning_rate": 9.67354603895162e-07,
|
|
"loss": 0.4067,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 0.8175797712221553,
|
|
"grad_norm": 4.859120845794678,
|
|
"learning_rate": 9.67167617014066e-07,
|
|
"loss": 0.4311,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 0.818783865141481,
|
|
"grad_norm": 4.1437835693359375,
|
|
"learning_rate": 9.66980114324866e-07,
|
|
"loss": 0.4135,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.8199879590608068,
|
|
"grad_norm": 4.027251243591309,
|
|
"learning_rate": 9.667920960345872e-07,
|
|
"loss": 0.4021,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 0.8211920529801324,
|
|
"grad_norm": 4.283502101898193,
|
|
"learning_rate": 9.666035623508237e-07,
|
|
"loss": 0.4207,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 0.8223961468994582,
|
|
"grad_norm": 4.910589694976807,
|
|
"learning_rate": 9.66414513481739e-07,
|
|
"loss": 0.4474,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 0.8236002408187839,
|
|
"grad_norm": 5.238614559173584,
|
|
"learning_rate": 9.662249496360653e-07,
|
|
"loss": 0.4294,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 0.8248043347381095,
|
|
"grad_norm": 4.113722801208496,
|
|
"learning_rate": 9.660348710231036e-07,
|
|
"loss": 0.4145,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 0.8260084286574353,
|
|
"grad_norm": 4.979987144470215,
|
|
"learning_rate": 9.65844277852723e-07,
|
|
"loss": 0.421,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 0.827212522576761,
|
|
"grad_norm": 5.396749973297119,
|
|
"learning_rate": 9.656531703353608e-07,
|
|
"loss": 0.4444,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 0.8284166164960867,
|
|
"grad_norm": 4.567556858062744,
|
|
"learning_rate": 9.654615486820222e-07,
|
|
"loss": 0.4198,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 0.8296207104154124,
|
|
"grad_norm": 5.2882304191589355,
|
|
"learning_rate": 9.6526941310428e-07,
|
|
"loss": 0.4274,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 0.8308248043347382,
|
|
"grad_norm": 4.51816987991333,
|
|
"learning_rate": 9.650767638142746e-07,
|
|
"loss": 0.4465,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.8320288982540638,
|
|
"grad_norm": 3.9410834312438965,
|
|
"learning_rate": 9.648836010247137e-07,
|
|
"loss": 0.4182,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 0.8332329921733895,
|
|
"grad_norm": 4.620553493499756,
|
|
"learning_rate": 9.646899249488714e-07,
|
|
"loss": 0.4206,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 0.8344370860927153,
|
|
"grad_norm": 4.430214881896973,
|
|
"learning_rate": 9.644957358005892e-07,
|
|
"loss": 0.4313,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 0.8356411800120409,
|
|
"grad_norm": 4.277939796447754,
|
|
"learning_rate": 9.643010337942747e-07,
|
|
"loss": 0.4313,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 0.8368452739313667,
|
|
"grad_norm": 5.185015678405762,
|
|
"learning_rate": 9.64105819144902e-07,
|
|
"loss": 0.4225,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 0.8380493678506924,
|
|
"grad_norm": 4.402646541595459,
|
|
"learning_rate": 9.63910092068011e-07,
|
|
"loss": 0.417,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 0.839253461770018,
|
|
"grad_norm": 3.664020538330078,
|
|
"learning_rate": 9.637138527797074e-07,
|
|
"loss": 0.4337,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 0.8404575556893438,
|
|
"grad_norm": 4.9388041496276855,
|
|
"learning_rate": 9.635171014966625e-07,
|
|
"loss": 0.412,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 0.8416616496086695,
|
|
"grad_norm": 4.200076103210449,
|
|
"learning_rate": 9.63319838436113e-07,
|
|
"loss": 0.4212,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 0.8428657435279951,
|
|
"grad_norm": 4.56259822845459,
|
|
"learning_rate": 9.631220638158605e-07,
|
|
"loss": 0.4316,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.8440698374473209,
|
|
"grad_norm": 3.910545587539673,
|
|
"learning_rate": 9.629237778542714e-07,
|
|
"loss": 0.4,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 0.8452739313666466,
|
|
"grad_norm": 4.639405250549316,
|
|
"learning_rate": 9.62724980770277e-07,
|
|
"loss": 0.4084,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 0.8464780252859723,
|
|
"grad_norm": 4.84975528717041,
|
|
"learning_rate": 9.625256727833725e-07,
|
|
"loss": 0.4331,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 0.847682119205298,
|
|
"grad_norm": 3.9190306663513184,
|
|
"learning_rate": 9.623258541136175e-07,
|
|
"loss": 0.4171,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 0.8488862131246238,
|
|
"grad_norm": 4.248600482940674,
|
|
"learning_rate": 9.621255249816353e-07,
|
|
"loss": 0.4255,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 0.8500903070439494,
|
|
"grad_norm": 4.055094242095947,
|
|
"learning_rate": 9.61924685608613e-07,
|
|
"loss": 0.4257,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 0.8512944009632751,
|
|
"grad_norm": 4.14054536819458,
|
|
"learning_rate": 9.617233362163007e-07,
|
|
"loss": 0.4046,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 0.8524984948826009,
|
|
"grad_norm": 5.480048179626465,
|
|
"learning_rate": 9.61521477027012e-07,
|
|
"loss": 0.4007,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 0.8537025888019265,
|
|
"grad_norm": 4.100722312927246,
|
|
"learning_rate": 9.613191082636232e-07,
|
|
"loss": 0.4148,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 0.8549066827212523,
|
|
"grad_norm": 3.739861011505127,
|
|
"learning_rate": 9.611162301495735e-07,
|
|
"loss": 0.4156,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.856110776640578,
|
|
"grad_norm": 4.769533634185791,
|
|
"learning_rate": 9.60912842908864e-07,
|
|
"loss": 0.4356,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 0.8573148705599036,
|
|
"grad_norm": 4.347903728485107,
|
|
"learning_rate": 9.60708946766058e-07,
|
|
"loss": 0.4509,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 0.8585189644792294,
|
|
"grad_norm": 4.265124797821045,
|
|
"learning_rate": 9.605045419462813e-07,
|
|
"loss": 0.4231,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 0.8597230583985551,
|
|
"grad_norm": 5.108783721923828,
|
|
"learning_rate": 9.602996286752206e-07,
|
|
"loss": 0.4363,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 0.8609271523178808,
|
|
"grad_norm": 5.001750946044922,
|
|
"learning_rate": 9.600942071791248e-07,
|
|
"loss": 0.4223,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 0.8621312462372065,
|
|
"grad_norm": 4.6718668937683105,
|
|
"learning_rate": 9.598882776848025e-07,
|
|
"loss": 0.4206,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 0.8633353401565322,
|
|
"grad_norm": 4.35657262802124,
|
|
"learning_rate": 9.596818404196249e-07,
|
|
"loss": 0.4136,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 0.8645394340758579,
|
|
"grad_norm": 4.119489669799805,
|
|
"learning_rate": 9.59474895611523e-07,
|
|
"loss": 0.4254,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 0.8657435279951836,
|
|
"grad_norm": 4.4842047691345215,
|
|
"learning_rate": 9.59267443488988e-07,
|
|
"loss": 0.4279,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 0.8669476219145094,
|
|
"grad_norm": 4.105453014373779,
|
|
"learning_rate": 9.590594842810714e-07,
|
|
"loss": 0.4031,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.868151715833835,
|
|
"grad_norm": 4.400485992431641,
|
|
"learning_rate": 9.58851018217385e-07,
|
|
"loss": 0.4098,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 0.8693558097531607,
|
|
"grad_norm": 4.673033714294434,
|
|
"learning_rate": 9.586420455280998e-07,
|
|
"loss": 0.4299,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 0.8705599036724865,
|
|
"grad_norm": 4.483117580413818,
|
|
"learning_rate": 9.584325664439463e-07,
|
|
"loss": 0.438,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 0.8717639975918121,
|
|
"grad_norm": 5.068016052246094,
|
|
"learning_rate": 9.58222581196214e-07,
|
|
"loss": 0.4162,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 0.8729680915111379,
|
|
"grad_norm": 4.488113880157471,
|
|
"learning_rate": 9.580120900167513e-07,
|
|
"loss": 0.4196,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 0.8741721854304636,
|
|
"grad_norm": 4.887204647064209,
|
|
"learning_rate": 9.578010931379654e-07,
|
|
"loss": 0.439,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 0.8753762793497892,
|
|
"grad_norm": 4.7396159172058105,
|
|
"learning_rate": 9.575895907928217e-07,
|
|
"loss": 0.4202,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 0.876580373269115,
|
|
"grad_norm": 4.224496364593506,
|
|
"learning_rate": 9.573775832148438e-07,
|
|
"loss": 0.4027,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 0.8777844671884407,
|
|
"grad_norm": 5.062420845031738,
|
|
"learning_rate": 9.57165070638113e-07,
|
|
"loss": 0.4123,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 0.8789885611077664,
|
|
"grad_norm": 3.75753116607666,
|
|
"learning_rate": 9.569520532972678e-07,
|
|
"loss": 0.4066,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.8801926550270921,
|
|
"grad_norm": 4.535136699676514,
|
|
"learning_rate": 9.567385314275054e-07,
|
|
"loss": 0.4067,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 0.8813967489464178,
|
|
"grad_norm": 4.068704128265381,
|
|
"learning_rate": 9.56524505264578e-07,
|
|
"loss": 0.4238,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 0.8826008428657435,
|
|
"grad_norm": 5.032285690307617,
|
|
"learning_rate": 9.563099750447965e-07,
|
|
"loss": 0.4392,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 0.8838049367850692,
|
|
"grad_norm": 4.432474136352539,
|
|
"learning_rate": 9.560949410050274e-07,
|
|
"loss": 0.4394,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 0.885009030704395,
|
|
"grad_norm": 3.7745227813720703,
|
|
"learning_rate": 9.558794033826933e-07,
|
|
"loss": 0.4228,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 0.8862131246237207,
|
|
"grad_norm": 4.947648525238037,
|
|
"learning_rate": 9.556633624157734e-07,
|
|
"loss": 0.4324,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 0.8874172185430463,
|
|
"grad_norm": 3.695946216583252,
|
|
"learning_rate": 9.554468183428025e-07,
|
|
"loss": 0.407,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 0.8886213124623721,
|
|
"grad_norm": 4.399337291717529,
|
|
"learning_rate": 9.552297714028703e-07,
|
|
"loss": 0.4313,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 0.8898254063816978,
|
|
"grad_norm": 4.042302131652832,
|
|
"learning_rate": 9.550122218356227e-07,
|
|
"loss": 0.4183,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 0.8910295003010235,
|
|
"grad_norm": 4.341307163238525,
|
|
"learning_rate": 9.5479416988126e-07,
|
|
"loss": 0.4335,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.8922335942203492,
|
|
"grad_norm": 3.7946054935455322,
|
|
"learning_rate": 9.545756157805367e-07,
|
|
"loss": 0.4123,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 0.893437688139675,
|
|
"grad_norm": 5.04152250289917,
|
|
"learning_rate": 9.543565597747632e-07,
|
|
"loss": 0.4139,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 0.8946417820590006,
|
|
"grad_norm": 3.8958561420440674,
|
|
"learning_rate": 9.541370021058023e-07,
|
|
"loss": 0.4084,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 0.8958458759783263,
|
|
"grad_norm": 3.7490954399108887,
|
|
"learning_rate": 9.53916943016072e-07,
|
|
"loss": 0.4048,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 0.8970499698976521,
|
|
"grad_norm": 4.4821858406066895,
|
|
"learning_rate": 9.536963827485434e-07,
|
|
"loss": 0.3984,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 0.8982540638169777,
|
|
"grad_norm": 4.666491985321045,
|
|
"learning_rate": 9.53475321546741e-07,
|
|
"loss": 0.4098,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 0.8994581577363034,
|
|
"grad_norm": 4.890908718109131,
|
|
"learning_rate": 9.532537596547423e-07,
|
|
"loss": 0.3982,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 0.9006622516556292,
|
|
"grad_norm": 4.651495933532715,
|
|
"learning_rate": 9.53031697317178e-07,
|
|
"loss": 0.418,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 0.9018663455749548,
|
|
"grad_norm": 4.55120849609375,
|
|
"learning_rate": 9.528091347792308e-07,
|
|
"loss": 0.4187,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 0.9030704394942806,
|
|
"grad_norm": 5.57934045791626,
|
|
"learning_rate": 9.525860722866362e-07,
|
|
"loss": 0.4156,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.9042745334136063,
|
|
"grad_norm": 3.860431432723999,
|
|
"learning_rate": 9.523625100856813e-07,
|
|
"loss": 0.4078,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 0.9054786273329319,
|
|
"grad_norm": 4.670098781585693,
|
|
"learning_rate": 9.521384484232054e-07,
|
|
"loss": 0.4088,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 0.9066827212522577,
|
|
"grad_norm": 4.332681655883789,
|
|
"learning_rate": 9.519138875465986e-07,
|
|
"loss": 0.422,
|
|
"step": 7530
|
|
},
|
|
{
|
|
"epoch": 0.9078868151715834,
|
|
"grad_norm": 4.745145797729492,
|
|
"learning_rate": 9.516888277038029e-07,
|
|
"loss": 0.409,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 4.109555721282959,
|
|
"learning_rate": 9.514632691433106e-07,
|
|
"loss": 0.4177,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 0.9102950030102348,
|
|
"grad_norm": 5.039947032928467,
|
|
"learning_rate": 9.512372121141652e-07,
|
|
"loss": 0.4132,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 0.9114990969295605,
|
|
"grad_norm": 4.389688968658447,
|
|
"learning_rate": 9.510106568659599e-07,
|
|
"loss": 0.4176,
|
|
"step": 7570
|
|
},
|
|
{
|
|
"epoch": 0.9127031908488862,
|
|
"grad_norm": 4.67106819152832,
|
|
"learning_rate": 9.50783603648839e-07,
|
|
"loss": 0.4441,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 0.9139072847682119,
|
|
"grad_norm": 3.6345438957214355,
|
|
"learning_rate": 9.505560527134956e-07,
|
|
"loss": 0.395,
|
|
"step": 7590
|
|
},
|
|
{
|
|
"epoch": 0.9151113786875377,
|
|
"grad_norm": 4.544852256774902,
|
|
"learning_rate": 9.503280043111728e-07,
|
|
"loss": 0.4291,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.9163154726068633,
|
|
"grad_norm": 5.17853307723999,
|
|
"learning_rate": 9.50099458693663e-07,
|
|
"loss": 0.42,
|
|
"step": 7610
|
|
},
|
|
{
|
|
"epoch": 0.917519566526189,
|
|
"grad_norm": 4.111993789672852,
|
|
"learning_rate": 9.498704161133073e-07,
|
|
"loss": 0.4086,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 0.9187236604455148,
|
|
"grad_norm": 3.93930721282959,
|
|
"learning_rate": 9.49640876822996e-07,
|
|
"loss": 0.4128,
|
|
"step": 7630
|
|
},
|
|
{
|
|
"epoch": 0.9199277543648404,
|
|
"grad_norm": 4.442197322845459,
|
|
"learning_rate": 9.494108410761672e-07,
|
|
"loss": 0.4107,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 0.9211318482841662,
|
|
"grad_norm": 4.266764163970947,
|
|
"learning_rate": 9.491803091268077e-07,
|
|
"loss": 0.4093,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 0.9223359422034919,
|
|
"grad_norm": 4.633232593536377,
|
|
"learning_rate": 9.48949281229452e-07,
|
|
"loss": 0.4152,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 0.9235400361228175,
|
|
"grad_norm": 4.4745073318481445,
|
|
"learning_rate": 9.487177576391818e-07,
|
|
"loss": 0.4423,
|
|
"step": 7670
|
|
},
|
|
{
|
|
"epoch": 0.9247441300421433,
|
|
"grad_norm": 3.795365333557129,
|
|
"learning_rate": 9.484857386116268e-07,
|
|
"loss": 0.4013,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 0.925948223961469,
|
|
"grad_norm": 4.76974630355835,
|
|
"learning_rate": 9.48253224402963e-07,
|
|
"loss": 0.4084,
|
|
"step": 7690
|
|
},
|
|
{
|
|
"epoch": 0.9271523178807947,
|
|
"grad_norm": 4.584947109222412,
|
|
"learning_rate": 9.48020215269914e-07,
|
|
"loss": 0.4237,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.9283564118001204,
|
|
"grad_norm": 4.877064228057861,
|
|
"learning_rate": 9.477867114697486e-07,
|
|
"loss": 0.409,
|
|
"step": 7710
|
|
},
|
|
{
|
|
"epoch": 0.9295605057194462,
|
|
"grad_norm": 4.372793674468994,
|
|
"learning_rate": 9.475527132602832e-07,
|
|
"loss": 0.4142,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 0.9307645996387718,
|
|
"grad_norm": 4.198723316192627,
|
|
"learning_rate": 9.473182208998792e-07,
|
|
"loss": 0.4057,
|
|
"step": 7730
|
|
},
|
|
{
|
|
"epoch": 0.9319686935580975,
|
|
"grad_norm": 4.460008144378662,
|
|
"learning_rate": 9.470832346474435e-07,
|
|
"loss": 0.4235,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 0.9331727874774233,
|
|
"grad_norm": 4.3058905601501465,
|
|
"learning_rate": 9.468477547624289e-07,
|
|
"loss": 0.4307,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 0.9343768813967489,
|
|
"grad_norm": 4.6467132568359375,
|
|
"learning_rate": 9.466117815048329e-07,
|
|
"loss": 0.4127,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 0.9355809753160746,
|
|
"grad_norm": 5.1491217613220215,
|
|
"learning_rate": 9.463753151351978e-07,
|
|
"loss": 0.4181,
|
|
"step": 7770
|
|
},
|
|
{
|
|
"epoch": 0.9367850692354004,
|
|
"grad_norm": 5.166205883026123,
|
|
"learning_rate": 9.461383559146102e-07,
|
|
"loss": 0.4102,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 0.937989163154726,
|
|
"grad_norm": 4.453047275543213,
|
|
"learning_rate": 9.459009041047012e-07,
|
|
"loss": 0.4135,
|
|
"step": 7790
|
|
},
|
|
{
|
|
"epoch": 0.9391932570740518,
|
|
"grad_norm": 5.151276111602783,
|
|
"learning_rate": 9.456629599676456e-07,
|
|
"loss": 0.4072,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.9403973509933775,
|
|
"grad_norm": 3.8332607746124268,
|
|
"learning_rate": 9.454245237661615e-07,
|
|
"loss": 0.4363,
|
|
"step": 7810
|
|
},
|
|
{
|
|
"epoch": 0.9416014449127031,
|
|
"grad_norm": 4.51285982131958,
|
|
"learning_rate": 9.451855957635108e-07,
|
|
"loss": 0.4265,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 0.9428055388320289,
|
|
"grad_norm": 4.756032466888428,
|
|
"learning_rate": 9.449461762234981e-07,
|
|
"loss": 0.4322,
|
|
"step": 7830
|
|
},
|
|
{
|
|
"epoch": 0.9440096327513546,
|
|
"grad_norm": 3.7539730072021484,
|
|
"learning_rate": 9.447062654104707e-07,
|
|
"loss": 0.4052,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 0.9452137266706803,
|
|
"grad_norm": 4.208081245422363,
|
|
"learning_rate": 9.444658635893186e-07,
|
|
"loss": 0.4101,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 0.946417820590006,
|
|
"grad_norm": 3.338568925857544,
|
|
"learning_rate": 9.442249710254737e-07,
|
|
"loss": 0.4195,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 0.9476219145093318,
|
|
"grad_norm": 4.421904563903809,
|
|
"learning_rate": 9.439835879849096e-07,
|
|
"loss": 0.4232,
|
|
"step": 7870
|
|
},
|
|
{
|
|
"epoch": 0.9488260084286574,
|
|
"grad_norm": 4.675938129425049,
|
|
"learning_rate": 9.437417147341417e-07,
|
|
"loss": 0.4171,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 0.9500301023479831,
|
|
"grad_norm": 5.047989845275879,
|
|
"learning_rate": 9.434993515402267e-07,
|
|
"loss": 0.4083,
|
|
"step": 7890
|
|
},
|
|
{
|
|
"epoch": 0.9512341962673089,
|
|
"grad_norm": 4.1763916015625,
|
|
"learning_rate": 9.432564986707621e-07,
|
|
"loss": 0.3946,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.9524382901866345,
|
|
"grad_norm": 4.706401348114014,
|
|
"learning_rate": 9.43013156393886e-07,
|
|
"loss": 0.4147,
|
|
"step": 7910
|
|
},
|
|
{
|
|
"epoch": 0.9536423841059603,
|
|
"grad_norm": 4.3355255126953125,
|
|
"learning_rate": 9.427693249782769e-07,
|
|
"loss": 0.4244,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 0.954846478025286,
|
|
"grad_norm": 5.126685619354248,
|
|
"learning_rate": 9.425250046931537e-07,
|
|
"loss": 0.4148,
|
|
"step": 7930
|
|
},
|
|
{
|
|
"epoch": 0.9560505719446116,
|
|
"grad_norm": 3.4599716663360596,
|
|
"learning_rate": 9.422801958082744e-07,
|
|
"loss": 0.4237,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 0.9572546658639374,
|
|
"grad_norm": 4.331906795501709,
|
|
"learning_rate": 9.420348985939371e-07,
|
|
"loss": 0.4097,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 0.9584587597832631,
|
|
"grad_norm": 4.4911370277404785,
|
|
"learning_rate": 9.417891133209787e-07,
|
|
"loss": 0.4029,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 0.9596628537025887,
|
|
"grad_norm": 4.601186275482178,
|
|
"learning_rate": 9.415428402607754e-07,
|
|
"loss": 0.4194,
|
|
"step": 7970
|
|
},
|
|
{
|
|
"epoch": 0.9608669476219145,
|
|
"grad_norm": 4.048129558563232,
|
|
"learning_rate": 9.412960796852412e-07,
|
|
"loss": 0.4205,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 0.9620710415412402,
|
|
"grad_norm": 4.8655571937561035,
|
|
"learning_rate": 9.410488318668292e-07,
|
|
"loss": 0.4229,
|
|
"step": 7990
|
|
},
|
|
{
|
|
"epoch": 0.963275135460566,
|
|
"grad_norm": 3.7495744228363037,
|
|
"learning_rate": 9.408010970785302e-07,
|
|
"loss": 0.3761,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.9644792293798916,
|
|
"grad_norm": 5.3356499671936035,
|
|
"learning_rate": 9.405528755938725e-07,
|
|
"loss": 0.4093,
|
|
"step": 8010
|
|
},
|
|
{
|
|
"epoch": 0.9656833232992174,
|
|
"grad_norm": 5.407442569732666,
|
|
"learning_rate": 9.403041676869217e-07,
|
|
"loss": 0.4066,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 0.9668874172185431,
|
|
"grad_norm": 3.860828161239624,
|
|
"learning_rate": 9.400549736322807e-07,
|
|
"loss": 0.3982,
|
|
"step": 8030
|
|
},
|
|
{
|
|
"epoch": 0.9680915111378687,
|
|
"grad_norm": 4.087296962738037,
|
|
"learning_rate": 9.398052937050892e-07,
|
|
"loss": 0.3951,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 0.9692956050571945,
|
|
"grad_norm": 4.309443473815918,
|
|
"learning_rate": 9.395551281810233e-07,
|
|
"loss": 0.4025,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 0.9704996989765202,
|
|
"grad_norm": 4.655600547790527,
|
|
"learning_rate": 9.39304477336295e-07,
|
|
"loss": 0.4187,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 0.9717037928958459,
|
|
"grad_norm": 4.34591007232666,
|
|
"learning_rate": 9.390533414476527e-07,
|
|
"loss": 0.4164,
|
|
"step": 8070
|
|
},
|
|
{
|
|
"epoch": 0.9729078868151716,
|
|
"grad_norm": 4.547005653381348,
|
|
"learning_rate": 9.388017207923798e-07,
|
|
"loss": 0.4124,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 0.9741119807344973,
|
|
"grad_norm": 5.021882057189941,
|
|
"learning_rate": 9.385496156482953e-07,
|
|
"loss": 0.4289,
|
|
"step": 8090
|
|
},
|
|
{
|
|
"epoch": 0.975316074653823,
|
|
"grad_norm": 4.165801525115967,
|
|
"learning_rate": 9.382970262937526e-07,
|
|
"loss": 0.4058,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.9765201685731487,
|
|
"grad_norm": 4.876884460449219,
|
|
"learning_rate": 9.380439530076407e-07,
|
|
"loss": 0.43,
|
|
"step": 8110
|
|
},
|
|
{
|
|
"epoch": 0.9777242624924745,
|
|
"grad_norm": 4.928086757659912,
|
|
"learning_rate": 9.377903960693818e-07,
|
|
"loss": 0.423,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 0.9789283564118001,
|
|
"grad_norm": 5.006045341491699,
|
|
"learning_rate": 9.375363557589331e-07,
|
|
"loss": 0.4354,
|
|
"step": 8130
|
|
},
|
|
{
|
|
"epoch": 0.9801324503311258,
|
|
"grad_norm": 3.8796417713165283,
|
|
"learning_rate": 9.372818323567846e-07,
|
|
"loss": 0.4132,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 0.9813365442504516,
|
|
"grad_norm": 4.275393962860107,
|
|
"learning_rate": 9.370268261439604e-07,
|
|
"loss": 0.4071,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 0.9825406381697772,
|
|
"grad_norm": 5.467378616333008,
|
|
"learning_rate": 9.367713374020174e-07,
|
|
"loss": 0.4049,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 0.983744732089103,
|
|
"grad_norm": 3.720611095428467,
|
|
"learning_rate": 9.365153664130453e-07,
|
|
"loss": 0.4008,
|
|
"step": 8170
|
|
},
|
|
{
|
|
"epoch": 0.9849488260084287,
|
|
"grad_norm": 4.539004802703857,
|
|
"learning_rate": 9.362589134596661e-07,
|
|
"loss": 0.4118,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 0.9861529199277543,
|
|
"grad_norm": 3.776636838912964,
|
|
"learning_rate": 9.360019788250342e-07,
|
|
"loss": 0.4334,
|
|
"step": 8190
|
|
},
|
|
{
|
|
"epoch": 0.9873570138470801,
|
|
"grad_norm": 3.8309648036956787,
|
|
"learning_rate": 9.357445627928355e-07,
|
|
"loss": 0.4179,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.9885611077664058,
|
|
"grad_norm": 4.798840045928955,
|
|
"learning_rate": 9.354866656472881e-07,
|
|
"loss": 0.4154,
|
|
"step": 8210
|
|
},
|
|
{
|
|
"epoch": 0.9897652016857315,
|
|
"grad_norm": 4.182796955108643,
|
|
"learning_rate": 9.352282876731403e-07,
|
|
"loss": 0.4196,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 0.9909692956050572,
|
|
"grad_norm": 4.6675801277160645,
|
|
"learning_rate": 9.349694291556723e-07,
|
|
"loss": 0.4182,
|
|
"step": 8230
|
|
},
|
|
{
|
|
"epoch": 0.9921733895243829,
|
|
"grad_norm": 4.432309627532959,
|
|
"learning_rate": 9.347100903806941e-07,
|
|
"loss": 0.4206,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 0.9933774834437086,
|
|
"grad_norm": 4.616915702819824,
|
|
"learning_rate": 9.344502716345463e-07,
|
|
"loss": 0.4153,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 0.9945815773630343,
|
|
"grad_norm": 4.290421485900879,
|
|
"learning_rate": 9.341899732040994e-07,
|
|
"loss": 0.4162,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 0.9957856712823601,
|
|
"grad_norm": 4.533810138702393,
|
|
"learning_rate": 9.339291953767539e-07,
|
|
"loss": 0.4113,
|
|
"step": 8270
|
|
},
|
|
{
|
|
"epoch": 0.9969897652016857,
|
|
"grad_norm": 4.271683692932129,
|
|
"learning_rate": 9.336679384404387e-07,
|
|
"loss": 0.4166,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 0.9981938591210114,
|
|
"grad_norm": 5.167937755584717,
|
|
"learning_rate": 9.334062026836127e-07,
|
|
"loss": 0.385,
|
|
"step": 8290
|
|
},
|
|
{
|
|
"epoch": 0.9993979530403372,
|
|
"grad_norm": 4.525483131408691,
|
|
"learning_rate": 9.331439883952628e-07,
|
|
"loss": 0.3977,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 1.0006020469596628,
|
|
"grad_norm": 4.6253533363342285,
|
|
"learning_rate": 9.328812958649044e-07,
|
|
"loss": 0.4123,
|
|
"step": 8310
|
|
},
|
|
{
|
|
"epoch": 1.0018061408789887,
|
|
"grad_norm": 5.165332317352295,
|
|
"learning_rate": 9.326181253825812e-07,
|
|
"loss": 0.3842,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 1.0030102347983143,
|
|
"grad_norm": 3.894192934036255,
|
|
"learning_rate": 9.323544772388645e-07,
|
|
"loss": 0.3528,
|
|
"step": 8330
|
|
},
|
|
{
|
|
"epoch": 1.00421432871764,
|
|
"grad_norm": 3.8034422397613525,
|
|
"learning_rate": 9.320903517248527e-07,
|
|
"loss": 0.3817,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 1.0054184226369658,
|
|
"grad_norm": 4.677804946899414,
|
|
"learning_rate": 9.318257491321714e-07,
|
|
"loss": 0.3772,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 1.0066225165562914,
|
|
"grad_norm": 4.256035327911377,
|
|
"learning_rate": 9.315606697529733e-07,
|
|
"loss": 0.3858,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 1.007826610475617,
|
|
"grad_norm": 4.362122058868408,
|
|
"learning_rate": 9.312951138799371e-07,
|
|
"loss": 0.3702,
|
|
"step": 8370
|
|
},
|
|
{
|
|
"epoch": 1.009030704394943,
|
|
"grad_norm": 4.146007537841797,
|
|
"learning_rate": 9.310290818062681e-07,
|
|
"loss": 0.3869,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 1.0102347983142685,
|
|
"grad_norm": 4.480301856994629,
|
|
"learning_rate": 9.307625738256967e-07,
|
|
"loss": 0.4082,
|
|
"step": 8390
|
|
},
|
|
{
|
|
"epoch": 1.0114388922335942,
|
|
"grad_norm": 4.406433582305908,
|
|
"learning_rate": 9.304955902324793e-07,
|
|
"loss": 0.3846,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 1.01264298615292,
|
|
"grad_norm": 4.386068820953369,
|
|
"learning_rate": 9.302281313213972e-07,
|
|
"loss": 0.3806,
|
|
"step": 8410
|
|
},
|
|
{
|
|
"epoch": 1.0138470800722457,
|
|
"grad_norm": 4.706192970275879,
|
|
"learning_rate": 9.299601973877566e-07,
|
|
"loss": 0.385,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 1.0150511739915713,
|
|
"grad_norm": 5.003023624420166,
|
|
"learning_rate": 9.29691788727388e-07,
|
|
"loss": 0.3785,
|
|
"step": 8430
|
|
},
|
|
{
|
|
"epoch": 1.0162552679108972,
|
|
"grad_norm": 4.118617534637451,
|
|
"learning_rate": 9.294229056366463e-07,
|
|
"loss": 0.3649,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 1.0174593618302228,
|
|
"grad_norm": 4.070971488952637,
|
|
"learning_rate": 9.291535484124101e-07,
|
|
"loss": 0.3897,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 1.0186634557495484,
|
|
"grad_norm": 4.141367435455322,
|
|
"learning_rate": 9.288837173520814e-07,
|
|
"loss": 0.3712,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 1.0198675496688743,
|
|
"grad_norm": 4.00056791305542,
|
|
"learning_rate": 9.286134127535859e-07,
|
|
"loss": 0.372,
|
|
"step": 8470
|
|
},
|
|
{
|
|
"epoch": 1.0210716435882,
|
|
"grad_norm": 4.618954658508301,
|
|
"learning_rate": 9.283426349153711e-07,
|
|
"loss": 0.3708,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 1.0222757375075255,
|
|
"grad_norm": 4.50954008102417,
|
|
"learning_rate": 9.280713841364083e-07,
|
|
"loss": 0.3831,
|
|
"step": 8490
|
|
},
|
|
{
|
|
"epoch": 1.0234798314268514,
|
|
"grad_norm": 4.025129795074463,
|
|
"learning_rate": 9.277996607161898e-07,
|
|
"loss": 0.3807,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 1.024683925346177,
|
|
"grad_norm": 4.727366924285889,
|
|
"learning_rate": 9.275274649547307e-07,
|
|
"loss": 0.3707,
|
|
"step": 8510
|
|
},
|
|
{
|
|
"epoch": 1.0258880192655027,
|
|
"grad_norm": 4.731372833251953,
|
|
"learning_rate": 9.272547971525669e-07,
|
|
"loss": 0.3655,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 1.0270921131848285,
|
|
"grad_norm": 4.237710475921631,
|
|
"learning_rate": 9.269816576107559e-07,
|
|
"loss": 0.365,
|
|
"step": 8530
|
|
},
|
|
{
|
|
"epoch": 1.0282962071041541,
|
|
"grad_norm": 4.294924736022949,
|
|
"learning_rate": 9.267080466308758e-07,
|
|
"loss": 0.3774,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 1.0295003010234798,
|
|
"grad_norm": 4.249452590942383,
|
|
"learning_rate": 9.264339645150256e-07,
|
|
"loss": 0.372,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 1.0307043949428056,
|
|
"grad_norm": 4.078114986419678,
|
|
"learning_rate": 9.26159411565824e-07,
|
|
"loss": 0.3736,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 1.0319084888621313,
|
|
"grad_norm": 5.815018177032471,
|
|
"learning_rate": 9.258843880864101e-07,
|
|
"loss": 0.3708,
|
|
"step": 8570
|
|
},
|
|
{
|
|
"epoch": 1.033112582781457,
|
|
"grad_norm": 4.562671184539795,
|
|
"learning_rate": 9.256088943804421e-07,
|
|
"loss": 0.3926,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 1.0343166767007828,
|
|
"grad_norm": 5.159687042236328,
|
|
"learning_rate": 9.253329307520974e-07,
|
|
"loss": 0.3754,
|
|
"step": 8590
|
|
},
|
|
{
|
|
"epoch": 1.0355207706201084,
|
|
"grad_norm": 4.418034076690674,
|
|
"learning_rate": 9.250564975060725e-07,
|
|
"loss": 0.3756,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 1.036724864539434,
|
|
"grad_norm": 4.661262035369873,
|
|
"learning_rate": 9.247795949475823e-07,
|
|
"loss": 0.3854,
|
|
"step": 8610
|
|
},
|
|
{
|
|
"epoch": 1.0379289584587599,
|
|
"grad_norm": 4.768362522125244,
|
|
"learning_rate": 9.245022233823598e-07,
|
|
"loss": 0.3798,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 1.0391330523780855,
|
|
"grad_norm": 4.090877056121826,
|
|
"learning_rate": 9.242243831166558e-07,
|
|
"loss": 0.3883,
|
|
"step": 8630
|
|
},
|
|
{
|
|
"epoch": 1.0403371462974111,
|
|
"grad_norm": 4.2338032722473145,
|
|
"learning_rate": 9.23946074457239e-07,
|
|
"loss": 0.3784,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 1.041541240216737,
|
|
"grad_norm": 4.812314510345459,
|
|
"learning_rate": 9.236672977113947e-07,
|
|
"loss": 0.3938,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 1.0427453341360626,
|
|
"grad_norm": 5.055131435394287,
|
|
"learning_rate": 9.233880531869253e-07,
|
|
"loss": 0.3784,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 1.0439494280553883,
|
|
"grad_norm": 4.143119812011719,
|
|
"learning_rate": 9.231083411921497e-07,
|
|
"loss": 0.368,
|
|
"step": 8670
|
|
},
|
|
{
|
|
"epoch": 1.0451535219747141,
|
|
"grad_norm": 4.840368270874023,
|
|
"learning_rate": 9.228281620359029e-07,
|
|
"loss": 0.3771,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 1.0463576158940397,
|
|
"grad_norm": 4.708595275878906,
|
|
"learning_rate": 9.225475160275358e-07,
|
|
"loss": 0.3572,
|
|
"step": 8690
|
|
},
|
|
{
|
|
"epoch": 1.0475617098133654,
|
|
"grad_norm": 4.715826511383057,
|
|
"learning_rate": 9.222664034769145e-07,
|
|
"loss": 0.3929,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 1.0487658037326912,
|
|
"grad_norm": 4.969057559967041,
|
|
"learning_rate": 9.219848246944205e-07,
|
|
"loss": 0.3895,
|
|
"step": 8710
|
|
},
|
|
{
|
|
"epoch": 1.0499698976520169,
|
|
"grad_norm": 3.3560914993286133,
|
|
"learning_rate": 9.217027799909499e-07,
|
|
"loss": 0.379,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 1.0511739915713425,
|
|
"grad_norm": 4.196804046630859,
|
|
"learning_rate": 9.214202696779134e-07,
|
|
"loss": 0.3692,
|
|
"step": 8730
|
|
},
|
|
{
|
|
"epoch": 1.0523780854906684,
|
|
"grad_norm": 4.214865684509277,
|
|
"learning_rate": 9.211372940672355e-07,
|
|
"loss": 0.3673,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 1.053582179409994,
|
|
"grad_norm": 4.642685890197754,
|
|
"learning_rate": 9.208538534713548e-07,
|
|
"loss": 0.3961,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 1.0547862733293196,
|
|
"grad_norm": 4.921828269958496,
|
|
"learning_rate": 9.20569948203223e-07,
|
|
"loss": 0.3616,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 1.0559903672486455,
|
|
"grad_norm": 3.9251582622528076,
|
|
"learning_rate": 9.202855785763051e-07,
|
|
"loss": 0.3958,
|
|
"step": 8770
|
|
},
|
|
{
|
|
"epoch": 1.0571944611679711,
|
|
"grad_norm": 4.475203990936279,
|
|
"learning_rate": 9.200007449045785e-07,
|
|
"loss": 0.3782,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 1.0583985550872967,
|
|
"grad_norm": 4.735462665557861,
|
|
"learning_rate": 9.197154475025333e-07,
|
|
"loss": 0.3571,
|
|
"step": 8790
|
|
},
|
|
{
|
|
"epoch": 1.0596026490066226,
|
|
"grad_norm": 4.720487117767334,
|
|
"learning_rate": 9.194296866851712e-07,
|
|
"loss": 0.3632,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 1.0608067429259482,
|
|
"grad_norm": 4.291871547698975,
|
|
"learning_rate": 9.191434627680063e-07,
|
|
"loss": 0.3722,
|
|
"step": 8810
|
|
},
|
|
{
|
|
"epoch": 1.0620108368452739,
|
|
"grad_norm": 4.449291229248047,
|
|
"learning_rate": 9.188567760670631e-07,
|
|
"loss": 0.3857,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 1.0632149307645997,
|
|
"grad_norm": 4.42001485824585,
|
|
"learning_rate": 9.185696268988776e-07,
|
|
"loss": 0.3798,
|
|
"step": 8830
|
|
},
|
|
{
|
|
"epoch": 1.0644190246839254,
|
|
"grad_norm": 4.68118953704834,
|
|
"learning_rate": 9.182820155804965e-07,
|
|
"loss": 0.364,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 1.065623118603251,
|
|
"grad_norm": 4.831759929656982,
|
|
"learning_rate": 9.179939424294763e-07,
|
|
"loss": 0.3656,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 1.0668272125225768,
|
|
"grad_norm": 4.51068115234375,
|
|
"learning_rate": 9.177054077638839e-07,
|
|
"loss": 0.3779,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 1.0680313064419025,
|
|
"grad_norm": 4.588883399963379,
|
|
"learning_rate": 9.174164119022956e-07,
|
|
"loss": 0.3766,
|
|
"step": 8870
|
|
},
|
|
{
|
|
"epoch": 1.069235400361228,
|
|
"grad_norm": 4.487590789794922,
|
|
"learning_rate": 9.171269551637968e-07,
|
|
"loss": 0.3676,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 1.070439494280554,
|
|
"grad_norm": 5.2501702308654785,
|
|
"learning_rate": 9.168370378679819e-07,
|
|
"loss": 0.3764,
|
|
"step": 8890
|
|
},
|
|
{
|
|
"epoch": 1.0716435881998796,
|
|
"grad_norm": 4.199159145355225,
|
|
"learning_rate": 9.165466603349539e-07,
|
|
"loss": 0.3736,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 1.0728476821192052,
|
|
"grad_norm": 4.138830184936523,
|
|
"learning_rate": 9.162558228853235e-07,
|
|
"loss": 0.3745,
|
|
"step": 8910
|
|
},
|
|
{
|
|
"epoch": 1.074051776038531,
|
|
"grad_norm": 4.139305114746094,
|
|
"learning_rate": 9.159645258402095e-07,
|
|
"loss": 0.3693,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 1.0752558699578567,
|
|
"grad_norm": 5.9480438232421875,
|
|
"learning_rate": 9.156727695212386e-07,
|
|
"loss": 0.3644,
|
|
"step": 8930
|
|
},
|
|
{
|
|
"epoch": 1.0764599638771823,
|
|
"grad_norm": 4.251008987426758,
|
|
"learning_rate": 9.153805542505438e-07,
|
|
"loss": 0.3844,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 1.0776640577965082,
|
|
"grad_norm": 4.630239486694336,
|
|
"learning_rate": 9.150878803507654e-07,
|
|
"loss": 0.3699,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 1.0788681517158338,
|
|
"grad_norm": 5.171538829803467,
|
|
"learning_rate": 9.147947481450498e-07,
|
|
"loss": 0.4026,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 1.0800722456351595,
|
|
"grad_norm": 4.777914524078369,
|
|
"learning_rate": 9.145011579570491e-07,
|
|
"loss": 0.3642,
|
|
"step": 8970
|
|
},
|
|
{
|
|
"epoch": 1.0812763395544853,
|
|
"grad_norm": 5.336880207061768,
|
|
"learning_rate": 9.142071101109224e-07,
|
|
"loss": 0.3926,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 1.082480433473811,
|
|
"grad_norm": 3.8747503757476807,
|
|
"learning_rate": 9.139126049313321e-07,
|
|
"loss": 0.3792,
|
|
"step": 8990
|
|
},
|
|
{
|
|
"epoch": 1.0836845273931366,
|
|
"grad_norm": 4.528430461883545,
|
|
"learning_rate": 9.136176427434475e-07,
|
|
"loss": 0.3735,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 1.0848886213124624,
|
|
"grad_norm": 5.05435848236084,
|
|
"learning_rate": 9.133222238729412e-07,
|
|
"loss": 0.3604,
|
|
"step": 9010
|
|
},
|
|
{
|
|
"epoch": 1.086092715231788,
|
|
"grad_norm": 4.354115962982178,
|
|
"learning_rate": 9.130263486459904e-07,
|
|
"loss": 0.3995,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 1.0872968091511137,
|
|
"grad_norm": 5.124173164367676,
|
|
"learning_rate": 9.127300173892763e-07,
|
|
"loss": 0.3622,
|
|
"step": 9030
|
|
},
|
|
{
|
|
"epoch": 1.0885009030704396,
|
|
"grad_norm": 4.644625186920166,
|
|
"learning_rate": 9.124332304299838e-07,
|
|
"loss": 0.3704,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 1.0897049969897652,
|
|
"grad_norm": 4.276961803436279,
|
|
"learning_rate": 9.121359880958002e-07,
|
|
"loss": 0.3771,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 1.0909090909090908,
|
|
"grad_norm": 4.1808648109436035,
|
|
"learning_rate": 9.118382907149163e-07,
|
|
"loss": 0.3638,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 1.0921131848284167,
|
|
"grad_norm": 4.921030521392822,
|
|
"learning_rate": 9.115401386160251e-07,
|
|
"loss": 0.3633,
|
|
"step": 9070
|
|
},
|
|
{
|
|
"epoch": 1.0933172787477423,
|
|
"grad_norm": 4.0871663093566895,
|
|
"learning_rate": 9.112415321283217e-07,
|
|
"loss": 0.358,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 1.094521372667068,
|
|
"grad_norm": 3.419311046600342,
|
|
"learning_rate": 9.10942471581503e-07,
|
|
"loss": 0.3601,
|
|
"step": 9090
|
|
},
|
|
{
|
|
"epoch": 1.0957254665863938,
|
|
"grad_norm": 4.138514518737793,
|
|
"learning_rate": 9.106429573057666e-07,
|
|
"loss": 0.3764,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 1.0969295605057194,
|
|
"grad_norm": 5.0829691886901855,
|
|
"learning_rate": 9.10342989631812e-07,
|
|
"loss": 0.3756,
|
|
"step": 9110
|
|
},
|
|
{
|
|
"epoch": 1.098133654425045,
|
|
"grad_norm": 4.330390930175781,
|
|
"learning_rate": 9.100425688908386e-07,
|
|
"loss": 0.3587,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 1.099337748344371,
|
|
"grad_norm": 5.1065592765808105,
|
|
"learning_rate": 9.097416954145465e-07,
|
|
"loss": 0.38,
|
|
"step": 9130
|
|
},
|
|
{
|
|
"epoch": 1.1005418422636966,
|
|
"grad_norm": 4.509856224060059,
|
|
"learning_rate": 9.094403695351352e-07,
|
|
"loss": 0.38,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 1.1017459361830222,
|
|
"grad_norm": 5.324617862701416,
|
|
"learning_rate": 9.091385915853042e-07,
|
|
"loss": 0.3658,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 1.102950030102348,
|
|
"grad_norm": 5.061591148376465,
|
|
"learning_rate": 9.088363618982521e-07,
|
|
"loss": 0.3723,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 1.1041541240216737,
|
|
"grad_norm": 5.028870582580566,
|
|
"learning_rate": 9.085336808076758e-07,
|
|
"loss": 0.3837,
|
|
"step": 9170
|
|
},
|
|
{
|
|
"epoch": 1.1053582179409993,
|
|
"grad_norm": 4.214852809906006,
|
|
"learning_rate": 9.082305486477708e-07,
|
|
"loss": 0.3681,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 1.1065623118603252,
|
|
"grad_norm": 4.787420272827148,
|
|
"learning_rate": 9.079269657532311e-07,
|
|
"loss": 0.3843,
|
|
"step": 9190
|
|
},
|
|
{
|
|
"epoch": 1.1077664057796508,
|
|
"grad_norm": 3.78640079498291,
|
|
"learning_rate": 9.076229324592477e-07,
|
|
"loss": 0.3747,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 1.1089704996989764,
|
|
"grad_norm": 4.786212921142578,
|
|
"learning_rate": 9.073184491015094e-07,
|
|
"loss": 0.3684,
|
|
"step": 9210
|
|
},
|
|
{
|
|
"epoch": 1.1101745936183023,
|
|
"grad_norm": 3.932164430618286,
|
|
"learning_rate": 9.070135160162015e-07,
|
|
"loss": 0.3662,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 1.111378687537628,
|
|
"grad_norm": 4.249774932861328,
|
|
"learning_rate": 9.067081335400061e-07,
|
|
"loss": 0.3722,
|
|
"step": 9230
|
|
},
|
|
{
|
|
"epoch": 1.1125827814569536,
|
|
"grad_norm": 4.269323348999023,
|
|
"learning_rate": 9.064023020101015e-07,
|
|
"loss": 0.3765,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 1.1137868753762794,
|
|
"grad_norm": 4.183831214904785,
|
|
"learning_rate": 9.060960217641617e-07,
|
|
"loss": 0.3657,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 1.114990969295605,
|
|
"grad_norm": 4.336716175079346,
|
|
"learning_rate": 9.057892931403563e-07,
|
|
"loss": 0.3869,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 1.1161950632149307,
|
|
"grad_norm": 4.948883533477783,
|
|
"learning_rate": 9.054821164773498e-07,
|
|
"loss": 0.3823,
|
|
"step": 9270
|
|
},
|
|
{
|
|
"epoch": 1.1173991571342565,
|
|
"grad_norm": 4.687775611877441,
|
|
"learning_rate": 9.051744921143014e-07,
|
|
"loss": 0.3853,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 1.1186032510535822,
|
|
"grad_norm": 4.803307056427002,
|
|
"learning_rate": 9.048664203908647e-07,
|
|
"loss": 0.3609,
|
|
"step": 9290
|
|
},
|
|
{
|
|
"epoch": 1.1198073449729078,
|
|
"grad_norm": 4.377987861633301,
|
|
"learning_rate": 9.045579016471871e-07,
|
|
"loss": 0.3873,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 1.1210114388922336,
|
|
"grad_norm": 4.264991760253906,
|
|
"learning_rate": 9.042489362239096e-07,
|
|
"loss": 0.3663,
|
|
"step": 9310
|
|
},
|
|
{
|
|
"epoch": 1.1222155328115593,
|
|
"grad_norm": 4.69897985458374,
|
|
"learning_rate": 9.039395244621667e-07,
|
|
"loss": 0.3797,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 1.123419626730885,
|
|
"grad_norm": 4.6573357582092285,
|
|
"learning_rate": 9.036296667035853e-07,
|
|
"loss": 0.3774,
|
|
"step": 9330
|
|
},
|
|
{
|
|
"epoch": 1.1246237206502108,
|
|
"grad_norm": 4.6396307945251465,
|
|
"learning_rate": 9.033193632902848e-07,
|
|
"loss": 0.3708,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 1.1258278145695364,
|
|
"grad_norm": 4.781702518463135,
|
|
"learning_rate": 9.030086145648767e-07,
|
|
"loss": 0.366,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 1.127031908488862,
|
|
"grad_norm": 3.859081745147705,
|
|
"learning_rate": 9.026974208704645e-07,
|
|
"loss": 0.3592,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 1.1282360024081879,
|
|
"grad_norm": 3.917964220046997,
|
|
"learning_rate": 9.023857825506425e-07,
|
|
"loss": 0.3828,
|
|
"step": 9370
|
|
},
|
|
{
|
|
"epoch": 1.1294400963275135,
|
|
"grad_norm": 4.249654293060303,
|
|
"learning_rate": 9.020736999494962e-07,
|
|
"loss": 0.3816,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 1.1306441902468394,
|
|
"grad_norm": 4.181410789489746,
|
|
"learning_rate": 9.017611734116015e-07,
|
|
"loss": 0.3881,
|
|
"step": 9390
|
|
},
|
|
{
|
|
"epoch": 1.131848284166165,
|
|
"grad_norm": 4.529959678649902,
|
|
"learning_rate": 9.014482032820245e-07,
|
|
"loss": 0.3866,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 1.1330523780854906,
|
|
"grad_norm": 4.115703105926514,
|
|
"learning_rate": 9.011347899063212e-07,
|
|
"loss": 0.4017,
|
|
"step": 9410
|
|
},
|
|
{
|
|
"epoch": 1.1342564720048163,
|
|
"grad_norm": 5.330405235290527,
|
|
"learning_rate": 9.008209336305369e-07,
|
|
"loss": 0.382,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 1.1354605659241421,
|
|
"grad_norm": 4.53489351272583,
|
|
"learning_rate": 9.005066348012058e-07,
|
|
"loss": 0.4002,
|
|
"step": 9430
|
|
},
|
|
{
|
|
"epoch": 1.1366646598434678,
|
|
"grad_norm": 4.984791278839111,
|
|
"learning_rate": 9.00191893765351e-07,
|
|
"loss": 0.3699,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 1.1378687537627936,
|
|
"grad_norm": 4.83209753036499,
|
|
"learning_rate": 8.998767108704836e-07,
|
|
"loss": 0.3612,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 1.1390728476821192,
|
|
"grad_norm": 4.549959659576416,
|
|
"learning_rate": 8.995610864646029e-07,
|
|
"loss": 0.3552,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 1.1402769416014449,
|
|
"grad_norm": 4.30760383605957,
|
|
"learning_rate": 8.992450208961949e-07,
|
|
"loss": 0.3796,
|
|
"step": 9470
|
|
},
|
|
{
|
|
"epoch": 1.1414810355207705,
|
|
"grad_norm": 4.3470234870910645,
|
|
"learning_rate": 8.989285145142338e-07,
|
|
"loss": 0.3868,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 1.1426851294400964,
|
|
"grad_norm": 4.755895614624023,
|
|
"learning_rate": 8.986115676681796e-07,
|
|
"loss": 0.3867,
|
|
"step": 9490
|
|
},
|
|
{
|
|
"epoch": 1.143889223359422,
|
|
"grad_norm": 4.874184608459473,
|
|
"learning_rate": 8.982941807079791e-07,
|
|
"loss": 0.3866,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 1.1450933172787479,
|
|
"grad_norm": 4.068636894226074,
|
|
"learning_rate": 8.979763539840649e-07,
|
|
"loss": 0.3558,
|
|
"step": 9510
|
|
},
|
|
{
|
|
"epoch": 1.1462974111980735,
|
|
"grad_norm": 4.380646705627441,
|
|
"learning_rate": 8.976580878473552e-07,
|
|
"loss": 0.3704,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 1.1475015051173991,
|
|
"grad_norm": 4.3028950691223145,
|
|
"learning_rate": 8.973393826492531e-07,
|
|
"loss": 0.3995,
|
|
"step": 9530
|
|
},
|
|
{
|
|
"epoch": 1.1487055990367248,
|
|
"grad_norm": 4.423670768737793,
|
|
"learning_rate": 8.97020238741647e-07,
|
|
"loss": 0.38,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 1.1499096929560506,
|
|
"grad_norm": 4.808249473571777,
|
|
"learning_rate": 8.967006564769093e-07,
|
|
"loss": 0.3779,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 1.1511137868753762,
|
|
"grad_norm": 5.734920501708984,
|
|
"learning_rate": 8.963806362078963e-07,
|
|
"loss": 0.3713,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 1.152317880794702,
|
|
"grad_norm": 4.730371952056885,
|
|
"learning_rate": 8.960601782879483e-07,
|
|
"loss": 0.3583,
|
|
"step": 9570
|
|
},
|
|
{
|
|
"epoch": 1.1535219747140277,
|
|
"grad_norm": 5.035944938659668,
|
|
"learning_rate": 8.957392830708886e-07,
|
|
"loss": 0.39,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 1.1547260686333534,
|
|
"grad_norm": 4.2402119636535645,
|
|
"learning_rate": 8.95417950911023e-07,
|
|
"loss": 0.3655,
|
|
"step": 9590
|
|
},
|
|
{
|
|
"epoch": 1.155930162552679,
|
|
"grad_norm": 3.995563507080078,
|
|
"learning_rate": 8.950961821631406e-07,
|
|
"loss": 0.3657,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 1.1571342564720049,
|
|
"grad_norm": 5.285823822021484,
|
|
"learning_rate": 8.947739771825117e-07,
|
|
"loss": 0.3825,
|
|
"step": 9610
|
|
},
|
|
{
|
|
"epoch": 1.1583383503913305,
|
|
"grad_norm": 4.332102298736572,
|
|
"learning_rate": 8.944513363248885e-07,
|
|
"loss": 0.3808,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 1.1595424443106563,
|
|
"grad_norm": 4.714332103729248,
|
|
"learning_rate": 8.941282599465047e-07,
|
|
"loss": 0.3904,
|
|
"step": 9630
|
|
},
|
|
{
|
|
"epoch": 1.160746538229982,
|
|
"grad_norm": 3.8975484371185303,
|
|
"learning_rate": 8.938047484040748e-07,
|
|
"loss": 0.3559,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 1.1619506321493076,
|
|
"grad_norm": 4.700948238372803,
|
|
"learning_rate": 8.934808020547935e-07,
|
|
"loss": 0.3676,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 1.1631547260686332,
|
|
"grad_norm": 4.926019191741943,
|
|
"learning_rate": 8.931564212563356e-07,
|
|
"loss": 0.3913,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 1.164358819987959,
|
|
"grad_norm": 4.402989864349365,
|
|
"learning_rate": 8.92831606366856e-07,
|
|
"loss": 0.3672,
|
|
"step": 9670
|
|
},
|
|
{
|
|
"epoch": 1.1655629139072847,
|
|
"grad_norm": 4.371270656585693,
|
|
"learning_rate": 8.925063577449886e-07,
|
|
"loss": 0.3529,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 1.1667670078266106,
|
|
"grad_norm": 5.072457790374756,
|
|
"learning_rate": 8.92180675749846e-07,
|
|
"loss": 0.3703,
|
|
"step": 9690
|
|
},
|
|
{
|
|
"epoch": 1.1679711017459362,
|
|
"grad_norm": 5.789607524871826,
|
|
"learning_rate": 8.918545607410197e-07,
|
|
"loss": 0.3618,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 1.1691751956652618,
|
|
"grad_norm": 4.929603576660156,
|
|
"learning_rate": 8.91528013078579e-07,
|
|
"loss": 0.3632,
|
|
"step": 9710
|
|
},
|
|
{
|
|
"epoch": 1.1703792895845875,
|
|
"grad_norm": 4.385134220123291,
|
|
"learning_rate": 8.91201033123071e-07,
|
|
"loss": 0.3726,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 1.1715833835039133,
|
|
"grad_norm": 4.493896961212158,
|
|
"learning_rate": 8.908736212355201e-07,
|
|
"loss": 0.396,
|
|
"step": 9730
|
|
},
|
|
{
|
|
"epoch": 1.172787477423239,
|
|
"grad_norm": 5.4288859367370605,
|
|
"learning_rate": 8.905457777774278e-07,
|
|
"loss": 0.3693,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 1.1739915713425648,
|
|
"grad_norm": 4.925263404846191,
|
|
"learning_rate": 8.902175031107717e-07,
|
|
"loss": 0.3809,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 1.1751956652618905,
|
|
"grad_norm": 4.450766086578369,
|
|
"learning_rate": 8.898887975980058e-07,
|
|
"loss": 0.3747,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 1.176399759181216,
|
|
"grad_norm": 5.003162860870361,
|
|
"learning_rate": 8.895596616020595e-07,
|
|
"loss": 0.3763,
|
|
"step": 9770
|
|
},
|
|
{
|
|
"epoch": 1.1776038531005417,
|
|
"grad_norm": 5.204108238220215,
|
|
"learning_rate": 8.89230095486338e-07,
|
|
"loss": 0.3983,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 1.1788079470198676,
|
|
"grad_norm": 5.1089372634887695,
|
|
"learning_rate": 8.889000996147213e-07,
|
|
"loss": 0.3757,
|
|
"step": 9790
|
|
},
|
|
{
|
|
"epoch": 1.1800120409391932,
|
|
"grad_norm": 5.394412994384766,
|
|
"learning_rate": 8.885696743515632e-07,
|
|
"loss": 0.3764,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 1.181216134858519,
|
|
"grad_norm": 4.811611175537109,
|
|
"learning_rate": 8.882388200616926e-07,
|
|
"loss": 0.3686,
|
|
"step": 9810
|
|
},
|
|
{
|
|
"epoch": 1.1824202287778447,
|
|
"grad_norm": 4.908543109893799,
|
|
"learning_rate": 8.879075371104113e-07,
|
|
"loss": 0.368,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 1.1836243226971703,
|
|
"grad_norm": 4.540360450744629,
|
|
"learning_rate": 8.875758258634949e-07,
|
|
"loss": 0.3698,
|
|
"step": 9830
|
|
},
|
|
{
|
|
"epoch": 1.1848284166164962,
|
|
"grad_norm": 4.033935546875,
|
|
"learning_rate": 8.872436866871917e-07,
|
|
"loss": 0.3522,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 1.1860325105358218,
|
|
"grad_norm": 5.225256443023682,
|
|
"learning_rate": 8.869111199482225e-07,
|
|
"loss": 0.3837,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 1.1872366044551474,
|
|
"grad_norm": 4.02462100982666,
|
|
"learning_rate": 8.865781260137801e-07,
|
|
"loss": 0.381,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 1.1884406983744733,
|
|
"grad_norm": 4.905768871307373,
|
|
"learning_rate": 8.862447052515291e-07,
|
|
"loss": 0.384,
|
|
"step": 9870
|
|
},
|
|
{
|
|
"epoch": 1.189644792293799,
|
|
"grad_norm": 4.620838642120361,
|
|
"learning_rate": 8.859108580296053e-07,
|
|
"loss": 0.3533,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 1.1908488862131246,
|
|
"grad_norm": 4.312672138214111,
|
|
"learning_rate": 8.855765847166154e-07,
|
|
"loss": 0.3591,
|
|
"step": 9890
|
|
},
|
|
{
|
|
"epoch": 1.1920529801324504,
|
|
"grad_norm": 4.337918758392334,
|
|
"learning_rate": 8.852418856816365e-07,
|
|
"loss": 0.374,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 1.193257074051776,
|
|
"grad_norm": 4.154960632324219,
|
|
"learning_rate": 8.849067612942158e-07,
|
|
"loss": 0.3551,
|
|
"step": 9910
|
|
},
|
|
{
|
|
"epoch": 1.1944611679711017,
|
|
"grad_norm": 4.451188564300537,
|
|
"learning_rate": 8.845712119243701e-07,
|
|
"loss": 0.3699,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 1.1956652618904275,
|
|
"grad_norm": 5.723966598510742,
|
|
"learning_rate": 8.842352379425853e-07,
|
|
"loss": 0.3875,
|
|
"step": 9930
|
|
},
|
|
{
|
|
"epoch": 1.1968693558097532,
|
|
"grad_norm": 4.982749938964844,
|
|
"learning_rate": 8.838988397198166e-07,
|
|
"loss": 0.375,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 1.1980734497290788,
|
|
"grad_norm": 4.661801338195801,
|
|
"learning_rate": 8.835620176274869e-07,
|
|
"loss": 0.3721,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 1.1992775436484047,
|
|
"grad_norm": 5.228112697601318,
|
|
"learning_rate": 8.832247720374879e-07,
|
|
"loss": 0.366,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 1.2004816375677303,
|
|
"grad_norm": 4.082928657531738,
|
|
"learning_rate": 8.828871033221782e-07,
|
|
"loss": 0.3621,
|
|
"step": 9970
|
|
},
|
|
{
|
|
"epoch": 1.201685731487056,
|
|
"grad_norm": 3.532892942428589,
|
|
"learning_rate": 8.82549011854384e-07,
|
|
"loss": 0.365,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 1.2028898254063818,
|
|
"grad_norm": 4.03758430480957,
|
|
"learning_rate": 8.822104980073978e-07,
|
|
"loss": 0.3786,
|
|
"step": 9990
|
|
},
|
|
{
|
|
"epoch": 1.2040939193257074,
|
|
"grad_norm": 4.233405590057373,
|
|
"learning_rate": 8.818715621549792e-07,
|
|
"loss": 0.3664,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 1.205298013245033,
|
|
"grad_norm": 4.029031753540039,
|
|
"learning_rate": 8.815322046713531e-07,
|
|
"loss": 0.3655,
|
|
"step": 10010
|
|
},
|
|
{
|
|
"epoch": 1.206502107164359,
|
|
"grad_norm": 4.398824691772461,
|
|
"learning_rate": 8.811924259312102e-07,
|
|
"loss": 0.3818,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 1.2077062010836845,
|
|
"grad_norm": 4.394994258880615,
|
|
"learning_rate": 8.808522263097063e-07,
|
|
"loss": 0.3875,
|
|
"step": 10030
|
|
},
|
|
{
|
|
"epoch": 1.2089102950030102,
|
|
"grad_norm": 4.941735744476318,
|
|
"learning_rate": 8.805116061824617e-07,
|
|
"loss": 0.3635,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 1.210114388922336,
|
|
"grad_norm": 4.183002471923828,
|
|
"learning_rate": 8.801705659255616e-07,
|
|
"loss": 0.3718,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 1.2113184828416617,
|
|
"grad_norm": 3.9239907264709473,
|
|
"learning_rate": 8.798291059155541e-07,
|
|
"loss": 0.3562,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 1.2125225767609873,
|
|
"grad_norm": 4.399021625518799,
|
|
"learning_rate": 8.794872265294516e-07,
|
|
"loss": 0.3577,
|
|
"step": 10070
|
|
},
|
|
{
|
|
"epoch": 1.2137266706803131,
|
|
"grad_norm": 3.739692211151123,
|
|
"learning_rate": 8.791449281447291e-07,
|
|
"loss": 0.3715,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 1.2149307645996388,
|
|
"grad_norm": 6.101430416107178,
|
|
"learning_rate": 8.788022111393245e-07,
|
|
"loss": 0.3791,
|
|
"step": 10090
|
|
},
|
|
{
|
|
"epoch": 1.2161348585189644,
|
|
"grad_norm": 4.473653793334961,
|
|
"learning_rate": 8.784590758916377e-07,
|
|
"loss": 0.3733,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 1.2173389524382903,
|
|
"grad_norm": 5.723465919494629,
|
|
"learning_rate": 8.781155227805304e-07,
|
|
"loss": 0.376,
|
|
"step": 10110
|
|
},
|
|
{
|
|
"epoch": 1.218543046357616,
|
|
"grad_norm": 6.045252323150635,
|
|
"learning_rate": 8.777715521853257e-07,
|
|
"loss": 0.383,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 1.2197471402769415,
|
|
"grad_norm": 4.978476524353027,
|
|
"learning_rate": 8.774271644858078e-07,
|
|
"loss": 0.3902,
|
|
"step": 10130
|
|
},
|
|
{
|
|
"epoch": 1.2209512341962674,
|
|
"grad_norm": 4.655144691467285,
|
|
"learning_rate": 8.770823600622212e-07,
|
|
"loss": 0.3832,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 1.222155328115593,
|
|
"grad_norm": 4.3407883644104,
|
|
"learning_rate": 8.767371392952708e-07,
|
|
"loss": 0.3582,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 1.2233594220349187,
|
|
"grad_norm": 4.6942596435546875,
|
|
"learning_rate": 8.763915025661206e-07,
|
|
"loss": 0.3755,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 1.2245635159542445,
|
|
"grad_norm": 4.285218715667725,
|
|
"learning_rate": 8.760454502563947e-07,
|
|
"loss": 0.3776,
|
|
"step": 10170
|
|
},
|
|
{
|
|
"epoch": 1.2257676098735701,
|
|
"grad_norm": 4.890243053436279,
|
|
"learning_rate": 8.756989827481755e-07,
|
|
"loss": 0.37,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 1.2269717037928958,
|
|
"grad_norm": 4.752533912658691,
|
|
"learning_rate": 8.753521004240038e-07,
|
|
"loss": 0.3717,
|
|
"step": 10190
|
|
},
|
|
{
|
|
"epoch": 1.2281757977122216,
|
|
"grad_norm": 4.077126502990723,
|
|
"learning_rate": 8.750048036668789e-07,
|
|
"loss": 0.3811,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 1.2293798916315473,
|
|
"grad_norm": 3.9369449615478516,
|
|
"learning_rate": 8.74657092860257e-07,
|
|
"loss": 0.3737,
|
|
"step": 10210
|
|
},
|
|
{
|
|
"epoch": 1.230583985550873,
|
|
"grad_norm": 4.381350040435791,
|
|
"learning_rate": 8.74308968388052e-07,
|
|
"loss": 0.3528,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 1.2317880794701987,
|
|
"grad_norm": 4.581336975097656,
|
|
"learning_rate": 8.739604306346342e-07,
|
|
"loss": 0.3728,
|
|
"step": 10230
|
|
},
|
|
{
|
|
"epoch": 1.2329921733895244,
|
|
"grad_norm": 5.837801933288574,
|
|
"learning_rate": 8.736114799848306e-07,
|
|
"loss": 0.3812,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 1.23419626730885,
|
|
"grad_norm": 4.347848892211914,
|
|
"learning_rate": 8.732621168239236e-07,
|
|
"loss": 0.3818,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 1.2354003612281759,
|
|
"grad_norm": 4.717700004577637,
|
|
"learning_rate": 8.729123415376514e-07,
|
|
"loss": 0.3516,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 1.2366044551475015,
|
|
"grad_norm": 4.809170722961426,
|
|
"learning_rate": 8.725621545122072e-07,
|
|
"loss": 0.3642,
|
|
"step": 10270
|
|
},
|
|
{
|
|
"epoch": 1.2378085490668271,
|
|
"grad_norm": 4.547823905944824,
|
|
"learning_rate": 8.722115561342387e-07,
|
|
"loss": 0.3791,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 1.239012642986153,
|
|
"grad_norm": 4.235891819000244,
|
|
"learning_rate": 8.718605467908478e-07,
|
|
"loss": 0.3663,
|
|
"step": 10290
|
|
},
|
|
{
|
|
"epoch": 1.2402167369054786,
|
|
"grad_norm": 4.648200035095215,
|
|
"learning_rate": 8.715091268695901e-07,
|
|
"loss": 0.3623,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 1.2414208308248043,
|
|
"grad_norm": 4.6003737449646,
|
|
"learning_rate": 8.711572967584747e-07,
|
|
"loss": 0.378,
|
|
"step": 10310
|
|
},
|
|
{
|
|
"epoch": 1.24262492474413,
|
|
"grad_norm": 4.921525001525879,
|
|
"learning_rate": 8.708050568459635e-07,
|
|
"loss": 0.3602,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 1.2438290186634557,
|
|
"grad_norm": 4.075355052947998,
|
|
"learning_rate": 8.704524075209709e-07,
|
|
"loss": 0.3698,
|
|
"step": 10330
|
|
},
|
|
{
|
|
"epoch": 1.2450331125827814,
|
|
"grad_norm": 5.707545280456543,
|
|
"learning_rate": 8.700993491728634e-07,
|
|
"loss": 0.3538,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 1.2462372065021072,
|
|
"grad_norm": 4.669870853424072,
|
|
"learning_rate": 8.697458821914587e-07,
|
|
"loss": 0.3685,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 1.2474413004214329,
|
|
"grad_norm": 4.101998329162598,
|
|
"learning_rate": 8.693920069670264e-07,
|
|
"loss": 0.3823,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 1.2486453943407585,
|
|
"grad_norm": 4.307315349578857,
|
|
"learning_rate": 8.690377238902862e-07,
|
|
"loss": 0.3718,
|
|
"step": 10370
|
|
},
|
|
{
|
|
"epoch": 1.2498494882600844,
|
|
"grad_norm": 4.498570442199707,
|
|
"learning_rate": 8.686830333524084e-07,
|
|
"loss": 0.3894,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 1.25105358217941,
|
|
"grad_norm": 4.348161697387695,
|
|
"learning_rate": 8.68327935745013e-07,
|
|
"loss": 0.3661,
|
|
"step": 10390
|
|
},
|
|
{
|
|
"epoch": 1.2522576760987358,
|
|
"grad_norm": 4.509785175323486,
|
|
"learning_rate": 8.679724314601701e-07,
|
|
"loss": 0.3691,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 1.2534617700180615,
|
|
"grad_norm": 4.251500606536865,
|
|
"learning_rate": 8.676165208903978e-07,
|
|
"loss": 0.3489,
|
|
"step": 10410
|
|
},
|
|
{
|
|
"epoch": 1.254665863937387,
|
|
"grad_norm": 3.91599702835083,
|
|
"learning_rate": 8.672602044286637e-07,
|
|
"loss": 0.3835,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 1.2558699578567127,
|
|
"grad_norm": 4.641791820526123,
|
|
"learning_rate": 8.66903482468383e-07,
|
|
"loss": 0.3676,
|
|
"step": 10430
|
|
},
|
|
{
|
|
"epoch": 1.2570740517760386,
|
|
"grad_norm": 6.0034499168396,
|
|
"learning_rate": 8.665463554034187e-07,
|
|
"loss": 0.3728,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 1.2582781456953642,
|
|
"grad_norm": 5.09488582611084,
|
|
"learning_rate": 8.661888236280813e-07,
|
|
"loss": 0.3718,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 1.25948223961469,
|
|
"grad_norm": 5.368484020233154,
|
|
"learning_rate": 8.658308875371279e-07,
|
|
"loss": 0.3908,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 1.2606863335340157,
|
|
"grad_norm": 5.200775623321533,
|
|
"learning_rate": 8.654725475257621e-07,
|
|
"loss": 0.3655,
|
|
"step": 10470
|
|
},
|
|
{
|
|
"epoch": 1.2618904274533413,
|
|
"grad_norm": 4.358388900756836,
|
|
"learning_rate": 8.651138039896338e-07,
|
|
"loss": 0.3748,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 1.263094521372667,
|
|
"grad_norm": 4.452842712402344,
|
|
"learning_rate": 8.647546573248377e-07,
|
|
"loss": 0.3731,
|
|
"step": 10490
|
|
},
|
|
{
|
|
"epoch": 1.2642986152919928,
|
|
"grad_norm": 4.0504584312438965,
|
|
"learning_rate": 8.643951079279144e-07,
|
|
"loss": 0.3767,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 1.2655027092113185,
|
|
"grad_norm": 5.186153411865234,
|
|
"learning_rate": 8.640351561958486e-07,
|
|
"loss": 0.362,
|
|
"step": 10510
|
|
},
|
|
{
|
|
"epoch": 1.2667068031306443,
|
|
"grad_norm": 4.57370662689209,
|
|
"learning_rate": 8.636748025260696e-07,
|
|
"loss": 0.3766,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 1.26791089704997,
|
|
"grad_norm": 5.416035175323486,
|
|
"learning_rate": 8.633140473164502e-07,
|
|
"loss": 0.3653,
|
|
"step": 10530
|
|
},
|
|
{
|
|
"epoch": 1.2691149909692956,
|
|
"grad_norm": 4.351581573486328,
|
|
"learning_rate": 8.629528909653065e-07,
|
|
"loss": 0.3556,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 1.2703190848886212,
|
|
"grad_norm": 5.305721759796143,
|
|
"learning_rate": 8.625913338713982e-07,
|
|
"loss": 0.3873,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 1.271523178807947,
|
|
"grad_norm": 3.8972630500793457,
|
|
"learning_rate": 8.622293764339264e-07,
|
|
"loss": 0.3812,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 1.2727272727272727,
|
|
"grad_norm": 5.005763530731201,
|
|
"learning_rate": 8.61867019052535e-07,
|
|
"loss": 0.3761,
|
|
"step": 10570
|
|
},
|
|
{
|
|
"epoch": 1.2739313666465986,
|
|
"grad_norm": 4.1513848304748535,
|
|
"learning_rate": 8.615042621273093e-07,
|
|
"loss": 0.3525,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 1.2751354605659242,
|
|
"grad_norm": 5.166493892669678,
|
|
"learning_rate": 8.611411060587757e-07,
|
|
"loss": 0.3866,
|
|
"step": 10590
|
|
},
|
|
{
|
|
"epoch": 1.2763395544852498,
|
|
"grad_norm": 4.168553352355957,
|
|
"learning_rate": 8.60777551247901e-07,
|
|
"loss": 0.3735,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 1.2775436484045755,
|
|
"grad_norm": 4.891838550567627,
|
|
"learning_rate": 8.60413598096093e-07,
|
|
"loss": 0.3603,
|
|
"step": 10610
|
|
},
|
|
{
|
|
"epoch": 1.2787477423239013,
|
|
"grad_norm": 4.317160606384277,
|
|
"learning_rate": 8.600492470051983e-07,
|
|
"loss": 0.3765,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 1.279951836243227,
|
|
"grad_norm": 4.056015968322754,
|
|
"learning_rate": 8.59684498377504e-07,
|
|
"loss": 0.3704,
|
|
"step": 10630
|
|
},
|
|
{
|
|
"epoch": 1.2811559301625528,
|
|
"grad_norm": 4.8416242599487305,
|
|
"learning_rate": 8.593193526157354e-07,
|
|
"loss": 0.3475,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 1.2823600240818784,
|
|
"grad_norm": 5.178276062011719,
|
|
"learning_rate": 8.589538101230564e-07,
|
|
"loss": 0.3823,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 1.283564118001204,
|
|
"grad_norm": 4.507132053375244,
|
|
"learning_rate": 8.58587871303069e-07,
|
|
"loss": 0.3597,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 1.2847682119205297,
|
|
"grad_norm": 4.44130277633667,
|
|
"learning_rate": 8.582215365598127e-07,
|
|
"loss": 0.3748,
|
|
"step": 10670
|
|
},
|
|
{
|
|
"epoch": 1.2859723058398556,
|
|
"grad_norm": 4.559373378753662,
|
|
"learning_rate": 8.578548062977644e-07,
|
|
"loss": 0.3684,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 1.2871763997591812,
|
|
"grad_norm": 4.59391450881958,
|
|
"learning_rate": 8.574876809218374e-07,
|
|
"loss": 0.3729,
|
|
"step": 10690
|
|
},
|
|
{
|
|
"epoch": 1.288380493678507,
|
|
"grad_norm": 4.64610481262207,
|
|
"learning_rate": 8.571201608373815e-07,
|
|
"loss": 0.367,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 1.2895845875978327,
|
|
"grad_norm": 5.637624740600586,
|
|
"learning_rate": 8.56752246450182e-07,
|
|
"loss": 0.3799,
|
|
"step": 10710
|
|
},
|
|
{
|
|
"epoch": 1.2907886815171583,
|
|
"grad_norm": 4.1183271408081055,
|
|
"learning_rate": 8.563839381664599e-07,
|
|
"loss": 0.3744,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 1.291992775436484,
|
|
"grad_norm": 5.679279327392578,
|
|
"learning_rate": 8.560152363928709e-07,
|
|
"loss": 0.3636,
|
|
"step": 10730
|
|
},
|
|
{
|
|
"epoch": 1.2931968693558098,
|
|
"grad_norm": 4.73154878616333,
|
|
"learning_rate": 8.556461415365052e-07,
|
|
"loss": 0.3772,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 1.2944009632751354,
|
|
"grad_norm": 4.206639289855957,
|
|
"learning_rate": 8.552766540048871e-07,
|
|
"loss": 0.3652,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 1.2956050571944613,
|
|
"grad_norm": 4.551361083984375,
|
|
"learning_rate": 8.549067742059741e-07,
|
|
"loss": 0.36,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 1.296809151113787,
|
|
"grad_norm": 4.472609043121338,
|
|
"learning_rate": 8.545365025481574e-07,
|
|
"loss": 0.3949,
|
|
"step": 10770
|
|
},
|
|
{
|
|
"epoch": 1.2980132450331126,
|
|
"grad_norm": 3.9386298656463623,
|
|
"learning_rate": 8.541658394402605e-07,
|
|
"loss": 0.3736,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 1.2992173389524382,
|
|
"grad_norm": 5.128427505493164,
|
|
"learning_rate": 8.537947852915388e-07,
|
|
"loss": 0.3708,
|
|
"step": 10790
|
|
},
|
|
{
|
|
"epoch": 1.300421432871764,
|
|
"grad_norm": 4.362430095672607,
|
|
"learning_rate": 8.534233405116804e-07,
|
|
"loss": 0.3707,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 1.3016255267910897,
|
|
"grad_norm": 5.032322883605957,
|
|
"learning_rate": 8.530515055108036e-07,
|
|
"loss": 0.3694,
|
|
"step": 10810
|
|
},
|
|
{
|
|
"epoch": 1.3028296207104155,
|
|
"grad_norm": 3.745659828186035,
|
|
"learning_rate": 8.526792806994585e-07,
|
|
"loss": 0.3531,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 1.3040337146297412,
|
|
"grad_norm": 3.8410699367523193,
|
|
"learning_rate": 8.523066664886248e-07,
|
|
"loss": 0.3591,
|
|
"step": 10830
|
|
},
|
|
{
|
|
"epoch": 1.3052378085490668,
|
|
"grad_norm": 6.065695285797119,
|
|
"learning_rate": 8.519336632897128e-07,
|
|
"loss": 0.3748,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 1.3064419024683924,
|
|
"grad_norm": 4.5033464431762695,
|
|
"learning_rate": 8.515602715145615e-07,
|
|
"loss": 0.3661,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 1.3076459963877183,
|
|
"grad_norm": 4.6679558753967285,
|
|
"learning_rate": 8.511864915754399e-07,
|
|
"loss": 0.3835,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 1.308850090307044,
|
|
"grad_norm": 4.266571998596191,
|
|
"learning_rate": 8.50812323885045e-07,
|
|
"loss": 0.3799,
|
|
"step": 10870
|
|
},
|
|
{
|
|
"epoch": 1.3100541842263698,
|
|
"grad_norm": 4.90196418762207,
|
|
"learning_rate": 8.504377688565019e-07,
|
|
"loss": 0.3551,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 1.3112582781456954,
|
|
"grad_norm": 4.301276683807373,
|
|
"learning_rate": 8.500628269033635e-07,
|
|
"loss": 0.3825,
|
|
"step": 10890
|
|
},
|
|
{
|
|
"epoch": 1.312462372065021,
|
|
"grad_norm": 4.9276580810546875,
|
|
"learning_rate": 8.4968749843961e-07,
|
|
"loss": 0.37,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 1.3136664659843467,
|
|
"grad_norm": 4.929906845092773,
|
|
"learning_rate": 8.493117838796482e-07,
|
|
"loss": 0.3751,
|
|
"step": 10910
|
|
},
|
|
{
|
|
"epoch": 1.3148705599036725,
|
|
"grad_norm": 4.179794788360596,
|
|
"learning_rate": 8.489356836383112e-07,
|
|
"loss": 0.3714,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 1.3160746538229982,
|
|
"grad_norm": 4.671365261077881,
|
|
"learning_rate": 8.485591981308583e-07,
|
|
"loss": 0.3665,
|
|
"step": 10930
|
|
},
|
|
{
|
|
"epoch": 1.317278747742324,
|
|
"grad_norm": 4.073710918426514,
|
|
"learning_rate": 8.481823277729734e-07,
|
|
"loss": 0.3602,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 1.3184828416616496,
|
|
"grad_norm": 4.633068084716797,
|
|
"learning_rate": 8.478050729807663e-07,
|
|
"loss": 0.3682,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 1.3196869355809753,
|
|
"grad_norm": 5.233600616455078,
|
|
"learning_rate": 8.474274341707701e-07,
|
|
"loss": 0.3781,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 1.320891029500301,
|
|
"grad_norm": 4.329504013061523,
|
|
"learning_rate": 8.470494117599431e-07,
|
|
"loss": 0.3763,
|
|
"step": 10970
|
|
},
|
|
{
|
|
"epoch": 1.3220951234196268,
|
|
"grad_norm": 4.211668968200684,
|
|
"learning_rate": 8.466710061656664e-07,
|
|
"loss": 0.3325,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 1.3232992173389524,
|
|
"grad_norm": 4.388267993927002,
|
|
"learning_rate": 8.462922178057443e-07,
|
|
"loss": 0.3709,
|
|
"step": 10990
|
|
},
|
|
{
|
|
"epoch": 1.3245033112582782,
|
|
"grad_norm": 5.167718887329102,
|
|
"learning_rate": 8.45913047098404e-07,
|
|
"loss": 0.362,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 1.3257074051776039,
|
|
"grad_norm": 4.614595890045166,
|
|
"learning_rate": 8.455334944622945e-07,
|
|
"loss": 0.3549,
|
|
"step": 11010
|
|
},
|
|
{
|
|
"epoch": 1.3269114990969295,
|
|
"grad_norm": 4.618056774139404,
|
|
"learning_rate": 8.451535603164864e-07,
|
|
"loss": 0.3773,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 1.3281155930162551,
|
|
"grad_norm": 4.563729763031006,
|
|
"learning_rate": 8.447732450804723e-07,
|
|
"loss": 0.3688,
|
|
"step": 11030
|
|
},
|
|
{
|
|
"epoch": 1.329319686935581,
|
|
"grad_norm": 4.429327011108398,
|
|
"learning_rate": 8.443925491741646e-07,
|
|
"loss": 0.3429,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 1.3305237808549066,
|
|
"grad_norm": 4.474249362945557,
|
|
"learning_rate": 8.440114730178966e-07,
|
|
"loss": 0.3879,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 1.3317278747742325,
|
|
"grad_norm": 4.212963581085205,
|
|
"learning_rate": 8.436300170324215e-07,
|
|
"loss": 0.349,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 1.3329319686935581,
|
|
"grad_norm": 4.393470287322998,
|
|
"learning_rate": 8.432481816389112e-07,
|
|
"loss": 0.3609,
|
|
"step": 11070
|
|
},
|
|
{
|
|
"epoch": 1.3341360626128838,
|
|
"grad_norm": 4.512639045715332,
|
|
"learning_rate": 8.428659672589574e-07,
|
|
"loss": 0.3446,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 1.3353401565322094,
|
|
"grad_norm": 5.399291515350342,
|
|
"learning_rate": 8.424833743145696e-07,
|
|
"loss": 0.3643,
|
|
"step": 11090
|
|
},
|
|
{
|
|
"epoch": 1.3365442504515352,
|
|
"grad_norm": 4.692162990570068,
|
|
"learning_rate": 8.421004032281756e-07,
|
|
"loss": 0.3782,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 1.3377483443708609,
|
|
"grad_norm": 4.4849677085876465,
|
|
"learning_rate": 8.417170544226203e-07,
|
|
"loss": 0.36,
|
|
"step": 11110
|
|
},
|
|
{
|
|
"epoch": 1.3389524382901867,
|
|
"grad_norm": 4.692328453063965,
|
|
"learning_rate": 8.413333283211664e-07,
|
|
"loss": 0.3626,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 1.3401565322095124,
|
|
"grad_norm": 4.903812408447266,
|
|
"learning_rate": 8.409492253474925e-07,
|
|
"loss": 0.3576,
|
|
"step": 11130
|
|
},
|
|
{
|
|
"epoch": 1.341360626128838,
|
|
"grad_norm": 4.484142780303955,
|
|
"learning_rate": 8.405647459256937e-07,
|
|
"loss": 0.3611,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 1.3425647200481636,
|
|
"grad_norm": 4.777652263641357,
|
|
"learning_rate": 8.401798904802804e-07,
|
|
"loss": 0.3654,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 1.3437688139674895,
|
|
"grad_norm": 4.49363374710083,
|
|
"learning_rate": 8.397946594361785e-07,
|
|
"loss": 0.3684,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 1.3449729078868151,
|
|
"grad_norm": 5.207254886627197,
|
|
"learning_rate": 8.394090532187284e-07,
|
|
"loss": 0.3706,
|
|
"step": 11170
|
|
},
|
|
{
|
|
"epoch": 1.346177001806141,
|
|
"grad_norm": 5.246047496795654,
|
|
"learning_rate": 8.390230722536849e-07,
|
|
"loss": 0.365,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 1.3473810957254666,
|
|
"grad_norm": 4.5202317237854,
|
|
"learning_rate": 8.386367169672164e-07,
|
|
"loss": 0.3549,
|
|
"step": 11190
|
|
},
|
|
{
|
|
"epoch": 1.3485851896447922,
|
|
"grad_norm": 5.0257368087768555,
|
|
"learning_rate": 8.382499877859046e-07,
|
|
"loss": 0.3765,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 1.3497892835641179,
|
|
"grad_norm": 3.513502597808838,
|
|
"learning_rate": 8.378628851367441e-07,
|
|
"loss": 0.3435,
|
|
"step": 11210
|
|
},
|
|
{
|
|
"epoch": 1.3509933774834437,
|
|
"grad_norm": 4.943020820617676,
|
|
"learning_rate": 8.374754094471421e-07,
|
|
"loss": 0.3754,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 1.3521974714027694,
|
|
"grad_norm": 4.6621012687683105,
|
|
"learning_rate": 8.37087561144917e-07,
|
|
"loss": 0.3823,
|
|
"step": 11230
|
|
},
|
|
{
|
|
"epoch": 1.3534015653220952,
|
|
"grad_norm": 3.8831217288970947,
|
|
"learning_rate": 8.366993406582996e-07,
|
|
"loss": 0.3606,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 1.3546056592414208,
|
|
"grad_norm": 4.315981388092041,
|
|
"learning_rate": 8.363107484159305e-07,
|
|
"loss": 0.3647,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 1.3558097531607465,
|
|
"grad_norm": 4.6641011238098145,
|
|
"learning_rate": 8.359217848468616e-07,
|
|
"loss": 0.377,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 1.357013847080072,
|
|
"grad_norm": 4.609387397766113,
|
|
"learning_rate": 8.355324503805545e-07,
|
|
"loss": 0.369,
|
|
"step": 11270
|
|
},
|
|
{
|
|
"epoch": 1.358217940999398,
|
|
"grad_norm": 4.37289571762085,
|
|
"learning_rate": 8.351427454468805e-07,
|
|
"loss": 0.3594,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 1.3594220349187236,
|
|
"grad_norm": 5.407008171081543,
|
|
"learning_rate": 8.347526704761192e-07,
|
|
"loss": 0.3732,
|
|
"step": 11290
|
|
},
|
|
{
|
|
"epoch": 1.3606261288380495,
|
|
"grad_norm": 4.5802083015441895,
|
|
"learning_rate": 8.3436222589896e-07,
|
|
"loss": 0.3506,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 1.361830222757375,
|
|
"grad_norm": 4.10429048538208,
|
|
"learning_rate": 8.339714121464994e-07,
|
|
"loss": 0.3917,
|
|
"step": 11310
|
|
},
|
|
{
|
|
"epoch": 1.3630343166767007,
|
|
"grad_norm": 4.250566005706787,
|
|
"learning_rate": 8.335802296502419e-07,
|
|
"loss": 0.3515,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 1.3642384105960264,
|
|
"grad_norm": 5.012816429138184,
|
|
"learning_rate": 8.33188678842099e-07,
|
|
"loss": 0.354,
|
|
"step": 11330
|
|
},
|
|
{
|
|
"epoch": 1.3654425045153522,
|
|
"grad_norm": 4.53849983215332,
|
|
"learning_rate": 8.327967601543891e-07,
|
|
"loss": 0.3612,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 1.3666465984346778,
|
|
"grad_norm": 4.784470081329346,
|
|
"learning_rate": 8.324044740198364e-07,
|
|
"loss": 0.356,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 1.3678506923540037,
|
|
"grad_norm": 4.100750923156738,
|
|
"learning_rate": 8.320118208715714e-07,
|
|
"loss": 0.3769,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 1.3690547862733293,
|
|
"grad_norm": 5.738262176513672,
|
|
"learning_rate": 8.316188011431291e-07,
|
|
"loss": 0.3797,
|
|
"step": 11370
|
|
},
|
|
{
|
|
"epoch": 1.370258880192655,
|
|
"grad_norm": 4.102308750152588,
|
|
"learning_rate": 8.312254152684495e-07,
|
|
"loss": 0.3723,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 1.3714629741119808,
|
|
"grad_norm": 3.786195993423462,
|
|
"learning_rate": 8.308316636818773e-07,
|
|
"loss": 0.3638,
|
|
"step": 11390
|
|
},
|
|
{
|
|
"epoch": 1.3726670680313064,
|
|
"grad_norm": 4.1659159660339355,
|
|
"learning_rate": 8.304375468181606e-07,
|
|
"loss": 0.3487,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 1.373871161950632,
|
|
"grad_norm": 4.081630229949951,
|
|
"learning_rate": 8.300430651124505e-07,
|
|
"loss": 0.3602,
|
|
"step": 11410
|
|
},
|
|
{
|
|
"epoch": 1.375075255869958,
|
|
"grad_norm": 4.725644111633301,
|
|
"learning_rate": 8.296482190003019e-07,
|
|
"loss": 0.3746,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 1.3762793497892836,
|
|
"grad_norm": 4.421098709106445,
|
|
"learning_rate": 8.292530089176709e-07,
|
|
"loss": 0.3632,
|
|
"step": 11430
|
|
},
|
|
{
|
|
"epoch": 1.3774834437086092,
|
|
"grad_norm": 4.213558197021484,
|
|
"learning_rate": 8.288574353009164e-07,
|
|
"loss": 0.3748,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 1.378687537627935,
|
|
"grad_norm": 5.2602458000183105,
|
|
"learning_rate": 8.284614985867979e-07,
|
|
"loss": 0.355,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 1.3798916315472607,
|
|
"grad_norm": 4.735654354095459,
|
|
"learning_rate": 8.280651992124766e-07,
|
|
"loss": 0.3619,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 1.3810957254665863,
|
|
"grad_norm": 5.071203708648682,
|
|
"learning_rate": 8.276685376155133e-07,
|
|
"loss": 0.3693,
|
|
"step": 11470
|
|
},
|
|
{
|
|
"epoch": 1.3822998193859122,
|
|
"grad_norm": 4.431037902832031,
|
|
"learning_rate": 8.272715142338694e-07,
|
|
"loss": 0.3652,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 1.3835039133052378,
|
|
"grad_norm": 4.460841178894043,
|
|
"learning_rate": 8.268741295059056e-07,
|
|
"loss": 0.3732,
|
|
"step": 11490
|
|
},
|
|
{
|
|
"epoch": 1.3847080072245634,
|
|
"grad_norm": 5.048714637756348,
|
|
"learning_rate": 8.264763838703812e-07,
|
|
"loss": 0.364,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 1.3859121011438893,
|
|
"grad_norm": 4.322780132293701,
|
|
"learning_rate": 8.260782777664544e-07,
|
|
"loss": 0.3606,
|
|
"step": 11510
|
|
},
|
|
{
|
|
"epoch": 1.387116195063215,
|
|
"grad_norm": 4.763073921203613,
|
|
"learning_rate": 8.256798116336813e-07,
|
|
"loss": 0.3885,
|
|
"step": 11520
|
|
},
|
|
{
|
|
"epoch": 1.3883202889825406,
|
|
"grad_norm": 4.54296350479126,
|
|
"learning_rate": 8.252809859120153e-07,
|
|
"loss": 0.3629,
|
|
"step": 11530
|
|
},
|
|
{
|
|
"epoch": 1.3895243829018664,
|
|
"grad_norm": 4.481988430023193,
|
|
"learning_rate": 8.248818010418073e-07,
|
|
"loss": 0.3641,
|
|
"step": 11540
|
|
},
|
|
{
|
|
"epoch": 1.390728476821192,
|
|
"grad_norm": 4.431914806365967,
|
|
"learning_rate": 8.244822574638041e-07,
|
|
"loss": 0.3591,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 1.3919325707405177,
|
|
"grad_norm": 4.374257564544678,
|
|
"learning_rate": 8.240823556191489e-07,
|
|
"loss": 0.3634,
|
|
"step": 11560
|
|
},
|
|
{
|
|
"epoch": 1.3931366646598435,
|
|
"grad_norm": 3.9488606452941895,
|
|
"learning_rate": 8.23682095949381e-07,
|
|
"loss": 0.3466,
|
|
"step": 11570
|
|
},
|
|
{
|
|
"epoch": 1.3943407585791692,
|
|
"grad_norm": 4.069718837738037,
|
|
"learning_rate": 8.232814788964336e-07,
|
|
"loss": 0.3286,
|
|
"step": 11580
|
|
},
|
|
{
|
|
"epoch": 1.3955448524984948,
|
|
"grad_norm": 4.749855995178223,
|
|
"learning_rate": 8.228805049026355e-07,
|
|
"loss": 0.3546,
|
|
"step": 11590
|
|
},
|
|
{
|
|
"epoch": 1.3967489464178207,
|
|
"grad_norm": 3.9409117698669434,
|
|
"learning_rate": 8.224791744107089e-07,
|
|
"loss": 0.3663,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 1.3979530403371463,
|
|
"grad_norm": 4.028295993804932,
|
|
"learning_rate": 8.220774878637704e-07,
|
|
"loss": 0.3705,
|
|
"step": 11610
|
|
},
|
|
{
|
|
"epoch": 1.399157134256472,
|
|
"grad_norm": 4.911005973815918,
|
|
"learning_rate": 8.21675445705329e-07,
|
|
"loss": 0.3691,
|
|
"step": 11620
|
|
},
|
|
{
|
|
"epoch": 1.4003612281757978,
|
|
"grad_norm": 4.403053283691406,
|
|
"learning_rate": 8.212730483792868e-07,
|
|
"loss": 0.3736,
|
|
"step": 11630
|
|
},
|
|
{
|
|
"epoch": 1.4015653220951234,
|
|
"grad_norm": 4.316033840179443,
|
|
"learning_rate": 8.208702963299376e-07,
|
|
"loss": 0.373,
|
|
"step": 11640
|
|
},
|
|
{
|
|
"epoch": 1.402769416014449,
|
|
"grad_norm": 5.129039764404297,
|
|
"learning_rate": 8.204671900019676e-07,
|
|
"loss": 0.37,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 1.403973509933775,
|
|
"grad_norm": 4.374388694763184,
|
|
"learning_rate": 8.200637298404531e-07,
|
|
"loss": 0.3621,
|
|
"step": 11660
|
|
},
|
|
{
|
|
"epoch": 1.4051776038531005,
|
|
"grad_norm": 4.385969161987305,
|
|
"learning_rate": 8.19659916290862e-07,
|
|
"loss": 0.3744,
|
|
"step": 11670
|
|
},
|
|
{
|
|
"epoch": 1.4063816977724262,
|
|
"grad_norm": 4.8797149658203125,
|
|
"learning_rate": 8.192557497990521e-07,
|
|
"loss": 0.3554,
|
|
"step": 11680
|
|
},
|
|
{
|
|
"epoch": 1.407585791691752,
|
|
"grad_norm": 3.9471144676208496,
|
|
"learning_rate": 8.188512308112707e-07,
|
|
"loss": 0.3702,
|
|
"step": 11690
|
|
},
|
|
{
|
|
"epoch": 1.4087898856110777,
|
|
"grad_norm": 4.70519495010376,
|
|
"learning_rate": 8.184463597741544e-07,
|
|
"loss": 0.3422,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 1.4099939795304035,
|
|
"grad_norm": 5.044809818267822,
|
|
"learning_rate": 8.180411371347287e-07,
|
|
"loss": 0.3702,
|
|
"step": 11710
|
|
},
|
|
{
|
|
"epoch": 1.4111980734497291,
|
|
"grad_norm": 4.174710273742676,
|
|
"learning_rate": 8.17635563340407e-07,
|
|
"loss": 0.3513,
|
|
"step": 11720
|
|
},
|
|
{
|
|
"epoch": 1.4124021673690548,
|
|
"grad_norm": 4.635099411010742,
|
|
"learning_rate": 8.172296388389907e-07,
|
|
"loss": 0.3779,
|
|
"step": 11730
|
|
},
|
|
{
|
|
"epoch": 1.4136062612883804,
|
|
"grad_norm": 5.230491638183594,
|
|
"learning_rate": 8.168233640786682e-07,
|
|
"loss": 0.3601,
|
|
"step": 11740
|
|
},
|
|
{
|
|
"epoch": 1.4148103552077063,
|
|
"grad_norm": 4.704545497894287,
|
|
"learning_rate": 8.164167395080149e-07,
|
|
"loss": 0.3569,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 1.416014449127032,
|
|
"grad_norm": 4.232817649841309,
|
|
"learning_rate": 8.160097655759917e-07,
|
|
"loss": 0.374,
|
|
"step": 11760
|
|
},
|
|
{
|
|
"epoch": 1.4172185430463577,
|
|
"grad_norm": 5.304251670837402,
|
|
"learning_rate": 8.156024427319463e-07,
|
|
"loss": 0.3668,
|
|
"step": 11770
|
|
},
|
|
{
|
|
"epoch": 1.4184226369656834,
|
|
"grad_norm": 4.5971245765686035,
|
|
"learning_rate": 8.151947714256111e-07,
|
|
"loss": 0.3778,
|
|
"step": 11780
|
|
},
|
|
{
|
|
"epoch": 1.419626730885009,
|
|
"grad_norm": 4.492901802062988,
|
|
"learning_rate": 8.14786752107103e-07,
|
|
"loss": 0.3418,
|
|
"step": 11790
|
|
},
|
|
{
|
|
"epoch": 1.4208308248043346,
|
|
"grad_norm": 4.80876350402832,
|
|
"learning_rate": 8.143783852269237e-07,
|
|
"loss": 0.3633,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 1.4220349187236605,
|
|
"grad_norm": 3.9827497005462646,
|
|
"learning_rate": 8.13969671235958e-07,
|
|
"loss": 0.3649,
|
|
"step": 11810
|
|
},
|
|
{
|
|
"epoch": 1.4232390126429861,
|
|
"grad_norm": 4.20520544052124,
|
|
"learning_rate": 8.135606105854747e-07,
|
|
"loss": 0.3495,
|
|
"step": 11820
|
|
},
|
|
{
|
|
"epoch": 1.424443106562312,
|
|
"grad_norm": 4.29602575302124,
|
|
"learning_rate": 8.131512037271247e-07,
|
|
"loss": 0.3678,
|
|
"step": 11830
|
|
},
|
|
{
|
|
"epoch": 1.4256472004816376,
|
|
"grad_norm": 4.648280143737793,
|
|
"learning_rate": 8.127414511129416e-07,
|
|
"loss": 0.3789,
|
|
"step": 11840
|
|
},
|
|
{
|
|
"epoch": 1.4268512944009633,
|
|
"grad_norm": 4.162654399871826,
|
|
"learning_rate": 8.123313531953404e-07,
|
|
"loss": 0.372,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 1.4280553883202889,
|
|
"grad_norm": 4.688777446746826,
|
|
"learning_rate": 8.119209104271176e-07,
|
|
"loss": 0.3576,
|
|
"step": 11860
|
|
},
|
|
{
|
|
"epoch": 1.4292594822396147,
|
|
"grad_norm": 4.464323997497559,
|
|
"learning_rate": 8.115101232614506e-07,
|
|
"loss": 0.3817,
|
|
"step": 11870
|
|
},
|
|
{
|
|
"epoch": 1.4304635761589404,
|
|
"grad_norm": 4.280879974365234,
|
|
"learning_rate": 8.110989921518965e-07,
|
|
"loss": 0.3604,
|
|
"step": 11880
|
|
},
|
|
{
|
|
"epoch": 1.4316676700782662,
|
|
"grad_norm": 3.778425693511963,
|
|
"learning_rate": 8.106875175523926e-07,
|
|
"loss": 0.3553,
|
|
"step": 11890
|
|
},
|
|
{
|
|
"epoch": 1.4328717639975919,
|
|
"grad_norm": 4.960265159606934,
|
|
"learning_rate": 8.102756999172554e-07,
|
|
"loss": 0.3723,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 1.4340758579169175,
|
|
"grad_norm": 4.935343265533447,
|
|
"learning_rate": 8.098635397011802e-07,
|
|
"loss": 0.3714,
|
|
"step": 11910
|
|
},
|
|
{
|
|
"epoch": 1.4352799518362431,
|
|
"grad_norm": 4.417319297790527,
|
|
"learning_rate": 8.094510373592402e-07,
|
|
"loss": 0.3612,
|
|
"step": 11920
|
|
},
|
|
{
|
|
"epoch": 1.436484045755569,
|
|
"grad_norm": 4.819094181060791,
|
|
"learning_rate": 8.090381933468868e-07,
|
|
"loss": 0.3602,
|
|
"step": 11930
|
|
},
|
|
{
|
|
"epoch": 1.4376881396748946,
|
|
"grad_norm": 4.769229888916016,
|
|
"learning_rate": 8.086250081199484e-07,
|
|
"loss": 0.3597,
|
|
"step": 11940
|
|
},
|
|
{
|
|
"epoch": 1.4388922335942205,
|
|
"grad_norm": 4.872611999511719,
|
|
"learning_rate": 8.082114821346302e-07,
|
|
"loss": 0.3698,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 1.440096327513546,
|
|
"grad_norm": 4.3483686447143555,
|
|
"learning_rate": 8.077976158475135e-07,
|
|
"loss": 0.366,
|
|
"step": 11960
|
|
},
|
|
{
|
|
"epoch": 1.4413004214328717,
|
|
"grad_norm": 4.28345251083374,
|
|
"learning_rate": 8.073834097155555e-07,
|
|
"loss": 0.3564,
|
|
"step": 11970
|
|
},
|
|
{
|
|
"epoch": 1.4425045153521974,
|
|
"grad_norm": 4.1988606452941895,
|
|
"learning_rate": 8.069688641960888e-07,
|
|
"loss": 0.3557,
|
|
"step": 11980
|
|
},
|
|
{
|
|
"epoch": 1.4437086092715232,
|
|
"grad_norm": 4.156854152679443,
|
|
"learning_rate": 8.065539797468201e-07,
|
|
"loss": 0.3631,
|
|
"step": 11990
|
|
},
|
|
{
|
|
"epoch": 1.4449127031908489,
|
|
"grad_norm": 5.002780914306641,
|
|
"learning_rate": 8.061387568258312e-07,
|
|
"loss": 0.362,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 1.4461167971101747,
|
|
"grad_norm": 4.551509380340576,
|
|
"learning_rate": 8.057231958915767e-07,
|
|
"loss": 0.3545,
|
|
"step": 12010
|
|
},
|
|
{
|
|
"epoch": 1.4473208910295003,
|
|
"grad_norm": 3.529510498046875,
|
|
"learning_rate": 8.053072974028851e-07,
|
|
"loss": 0.3698,
|
|
"step": 12020
|
|
},
|
|
{
|
|
"epoch": 1.448524984948826,
|
|
"grad_norm": 5.073483467102051,
|
|
"learning_rate": 8.048910618189573e-07,
|
|
"loss": 0.3762,
|
|
"step": 12030
|
|
},
|
|
{
|
|
"epoch": 1.4497290788681516,
|
|
"grad_norm": 4.148519992828369,
|
|
"learning_rate": 8.044744895993665e-07,
|
|
"loss": 0.3714,
|
|
"step": 12040
|
|
},
|
|
{
|
|
"epoch": 1.4509331727874775,
|
|
"grad_norm": 5.03234338760376,
|
|
"learning_rate": 8.040575812040574e-07,
|
|
"loss": 0.3651,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 1.452137266706803,
|
|
"grad_norm": 4.286599159240723,
|
|
"learning_rate": 8.03640337093346e-07,
|
|
"loss": 0.3646,
|
|
"step": 12060
|
|
},
|
|
{
|
|
"epoch": 1.453341360626129,
|
|
"grad_norm": 5.805792808532715,
|
|
"learning_rate": 8.03222757727919e-07,
|
|
"loss": 0.3662,
|
|
"step": 12070
|
|
},
|
|
{
|
|
"epoch": 1.4545454545454546,
|
|
"grad_norm": 5.614697456359863,
|
|
"learning_rate": 8.028048435688333e-07,
|
|
"loss": 0.3661,
|
|
"step": 12080
|
|
},
|
|
{
|
|
"epoch": 1.4557495484647802,
|
|
"grad_norm": 4.117318630218506,
|
|
"learning_rate": 8.023865950775153e-07,
|
|
"loss": 0.3611,
|
|
"step": 12090
|
|
},
|
|
{
|
|
"epoch": 1.4569536423841059,
|
|
"grad_norm": 4.437227249145508,
|
|
"learning_rate": 8.019680127157606e-07,
|
|
"loss": 0.3551,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 1.4581577363034317,
|
|
"grad_norm": 4.852316856384277,
|
|
"learning_rate": 8.015490969457337e-07,
|
|
"loss": 0.3738,
|
|
"step": 12110
|
|
},
|
|
{
|
|
"epoch": 1.4593618302227573,
|
|
"grad_norm": 4.06812047958374,
|
|
"learning_rate": 8.011298482299666e-07,
|
|
"loss": 0.3535,
|
|
"step": 12120
|
|
},
|
|
{
|
|
"epoch": 1.4605659241420832,
|
|
"grad_norm": 4.921239376068115,
|
|
"learning_rate": 8.007102670313595e-07,
|
|
"loss": 0.3586,
|
|
"step": 12130
|
|
},
|
|
{
|
|
"epoch": 1.4617700180614088,
|
|
"grad_norm": 3.9317848682403564,
|
|
"learning_rate": 8.002903538131794e-07,
|
|
"loss": 0.3527,
|
|
"step": 12140
|
|
},
|
|
{
|
|
"epoch": 1.4629741119807345,
|
|
"grad_norm": 5.692650318145752,
|
|
"learning_rate": 7.998701090390601e-07,
|
|
"loss": 0.364,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 1.46417820590006,
|
|
"grad_norm": 4.238543510437012,
|
|
"learning_rate": 7.994495331730013e-07,
|
|
"loss": 0.3516,
|
|
"step": 12160
|
|
},
|
|
{
|
|
"epoch": 1.465382299819386,
|
|
"grad_norm": 4.356393814086914,
|
|
"learning_rate": 7.990286266793685e-07,
|
|
"loss": 0.3464,
|
|
"step": 12170
|
|
},
|
|
{
|
|
"epoch": 1.4665863937387116,
|
|
"grad_norm": 4.616797924041748,
|
|
"learning_rate": 7.986073900228916e-07,
|
|
"loss": 0.3465,
|
|
"step": 12180
|
|
},
|
|
{
|
|
"epoch": 1.4677904876580374,
|
|
"grad_norm": 3.8541862964630127,
|
|
"learning_rate": 7.981858236686661e-07,
|
|
"loss": 0.3546,
|
|
"step": 12190
|
|
},
|
|
{
|
|
"epoch": 1.468994581577363,
|
|
"grad_norm": 5.685515880584717,
|
|
"learning_rate": 7.977639280821505e-07,
|
|
"loss": 0.3563,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 1.4701986754966887,
|
|
"grad_norm": 4.1002702713012695,
|
|
"learning_rate": 7.973417037291672e-07,
|
|
"loss": 0.3771,
|
|
"step": 12210
|
|
},
|
|
{
|
|
"epoch": 1.4714027694160143,
|
|
"grad_norm": 4.752336025238037,
|
|
"learning_rate": 7.969191510759019e-07,
|
|
"loss": 0.366,
|
|
"step": 12220
|
|
},
|
|
{
|
|
"epoch": 1.4726068633353402,
|
|
"grad_norm": 4.7561774253845215,
|
|
"learning_rate": 7.964962705889027e-07,
|
|
"loss": 0.3621,
|
|
"step": 12230
|
|
},
|
|
{
|
|
"epoch": 1.4738109572546658,
|
|
"grad_norm": 4.569270133972168,
|
|
"learning_rate": 7.96073062735079e-07,
|
|
"loss": 0.3662,
|
|
"step": 12240
|
|
},
|
|
{
|
|
"epoch": 1.4750150511739917,
|
|
"grad_norm": 3.9785332679748535,
|
|
"learning_rate": 7.956495279817025e-07,
|
|
"loss": 0.3711,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 1.4762191450933173,
|
|
"grad_norm": 4.953578948974609,
|
|
"learning_rate": 7.952256667964053e-07,
|
|
"loss": 0.3671,
|
|
"step": 12260
|
|
},
|
|
{
|
|
"epoch": 1.477423239012643,
|
|
"grad_norm": 4.805257320404053,
|
|
"learning_rate": 7.948014796471802e-07,
|
|
"loss": 0.3707,
|
|
"step": 12270
|
|
},
|
|
{
|
|
"epoch": 1.4786273329319686,
|
|
"grad_norm": 4.094834804534912,
|
|
"learning_rate": 7.943769670023799e-07,
|
|
"loss": 0.3699,
|
|
"step": 12280
|
|
},
|
|
{
|
|
"epoch": 1.4798314268512944,
|
|
"grad_norm": 5.696323394775391,
|
|
"learning_rate": 7.939521293307161e-07,
|
|
"loss": 0.3753,
|
|
"step": 12290
|
|
},
|
|
{
|
|
"epoch": 1.48103552077062,
|
|
"grad_norm": 4.848500728607178,
|
|
"learning_rate": 7.935269671012599e-07,
|
|
"loss": 0.3643,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 1.482239614689946,
|
|
"grad_norm": 4.916533946990967,
|
|
"learning_rate": 7.931014807834404e-07,
|
|
"loss": 0.3621,
|
|
"step": 12310
|
|
},
|
|
{
|
|
"epoch": 1.4834437086092715,
|
|
"grad_norm": 4.234400272369385,
|
|
"learning_rate": 7.926756708470447e-07,
|
|
"loss": 0.3464,
|
|
"step": 12320
|
|
},
|
|
{
|
|
"epoch": 1.4846478025285972,
|
|
"grad_norm": 4.844507217407227,
|
|
"learning_rate": 7.922495377622171e-07,
|
|
"loss": 0.3535,
|
|
"step": 12330
|
|
},
|
|
{
|
|
"epoch": 1.4858518964479228,
|
|
"grad_norm": 5.471369743347168,
|
|
"learning_rate": 7.918230819994588e-07,
|
|
"loss": 0.3592,
|
|
"step": 12340
|
|
},
|
|
{
|
|
"epoch": 1.4870559903672487,
|
|
"grad_norm": 5.131628036499023,
|
|
"learning_rate": 7.913963040296272e-07,
|
|
"loss": 0.376,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 1.4882600842865743,
|
|
"grad_norm": 4.308112144470215,
|
|
"learning_rate": 7.909692043239353e-07,
|
|
"loss": 0.3526,
|
|
"step": 12360
|
|
},
|
|
{
|
|
"epoch": 1.4894641782059002,
|
|
"grad_norm": 4.5161356925964355,
|
|
"learning_rate": 7.905417833539518e-07,
|
|
"loss": 0.3548,
|
|
"step": 12370
|
|
},
|
|
{
|
|
"epoch": 1.4906682721252258,
|
|
"grad_norm": 4.657468795776367,
|
|
"learning_rate": 7.901140415915995e-07,
|
|
"loss": 0.3727,
|
|
"step": 12380
|
|
},
|
|
{
|
|
"epoch": 1.4918723660445514,
|
|
"grad_norm": 4.615851879119873,
|
|
"learning_rate": 7.896859795091562e-07,
|
|
"loss": 0.3728,
|
|
"step": 12390
|
|
},
|
|
{
|
|
"epoch": 1.493076459963877,
|
|
"grad_norm": 3.6912169456481934,
|
|
"learning_rate": 7.892575975792523e-07,
|
|
"loss": 0.3646,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 1.494280553883203,
|
|
"grad_norm": 4.871870517730713,
|
|
"learning_rate": 7.888288962748723e-07,
|
|
"loss": 0.3416,
|
|
"step": 12410
|
|
},
|
|
{
|
|
"epoch": 1.4954846478025285,
|
|
"grad_norm": 4.7089385986328125,
|
|
"learning_rate": 7.883998760693529e-07,
|
|
"loss": 0.3883,
|
|
"step": 12420
|
|
},
|
|
{
|
|
"epoch": 1.4966887417218544,
|
|
"grad_norm": 4.376954078674316,
|
|
"learning_rate": 7.87970537436383e-07,
|
|
"loss": 0.3427,
|
|
"step": 12430
|
|
},
|
|
{
|
|
"epoch": 1.49789283564118,
|
|
"grad_norm": 4.280700206756592,
|
|
"learning_rate": 7.875408808500028e-07,
|
|
"loss": 0.3651,
|
|
"step": 12440
|
|
},
|
|
{
|
|
"epoch": 1.4990969295605057,
|
|
"grad_norm": 4.794469356536865,
|
|
"learning_rate": 7.871109067846041e-07,
|
|
"loss": 0.3731,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 1.5003010234798313,
|
|
"grad_norm": 4.945312023162842,
|
|
"learning_rate": 7.86680615714929e-07,
|
|
"loss": 0.3586,
|
|
"step": 12460
|
|
},
|
|
{
|
|
"epoch": 1.5015051173991572,
|
|
"grad_norm": 3.5225555896759033,
|
|
"learning_rate": 7.862500081160692e-07,
|
|
"loss": 0.3595,
|
|
"step": 12470
|
|
},
|
|
{
|
|
"epoch": 1.502709211318483,
|
|
"grad_norm": 4.152462005615234,
|
|
"learning_rate": 7.858190844634664e-07,
|
|
"loss": 0.3777,
|
|
"step": 12480
|
|
},
|
|
{
|
|
"epoch": 1.5039133052378086,
|
|
"grad_norm": 5.704073905944824,
|
|
"learning_rate": 7.853878452329113e-07,
|
|
"loss": 0.375,
|
|
"step": 12490
|
|
},
|
|
{
|
|
"epoch": 1.5051173991571343,
|
|
"grad_norm": 5.431835174560547,
|
|
"learning_rate": 7.849562909005425e-07,
|
|
"loss": 0.3596,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 1.50632149307646,
|
|
"grad_norm": 4.785493850708008,
|
|
"learning_rate": 7.845244219428469e-07,
|
|
"loss": 0.3888,
|
|
"step": 12510
|
|
},
|
|
{
|
|
"epoch": 1.5075255869957855,
|
|
"grad_norm": 4.297571182250977,
|
|
"learning_rate": 7.84092238836659e-07,
|
|
"loss": 0.3576,
|
|
"step": 12520
|
|
},
|
|
{
|
|
"epoch": 1.5087296809151114,
|
|
"grad_norm": 4.948373317718506,
|
|
"learning_rate": 7.836597420591595e-07,
|
|
"loss": 0.3766,
|
|
"step": 12530
|
|
},
|
|
{
|
|
"epoch": 1.5099337748344372,
|
|
"grad_norm": 4.502270698547363,
|
|
"learning_rate": 7.832269320878762e-07,
|
|
"loss": 0.3624,
|
|
"step": 12540
|
|
},
|
|
{
|
|
"epoch": 1.5111378687537629,
|
|
"grad_norm": 3.8894035816192627,
|
|
"learning_rate": 7.827938094006821e-07,
|
|
"loss": 0.3743,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 1.5123419626730885,
|
|
"grad_norm": 4.615447044372559,
|
|
"learning_rate": 7.823603744757956e-07,
|
|
"loss": 0.3586,
|
|
"step": 12560
|
|
},
|
|
{
|
|
"epoch": 1.5135460565924141,
|
|
"grad_norm": 4.9232401847839355,
|
|
"learning_rate": 7.8192662779178e-07,
|
|
"loss": 0.3488,
|
|
"step": 12570
|
|
},
|
|
{
|
|
"epoch": 1.5147501505117398,
|
|
"grad_norm": 4.241856575012207,
|
|
"learning_rate": 7.81492569827543e-07,
|
|
"loss": 0.355,
|
|
"step": 12580
|
|
},
|
|
{
|
|
"epoch": 1.5159542444310656,
|
|
"grad_norm": 5.041738986968994,
|
|
"learning_rate": 7.810582010623354e-07,
|
|
"loss": 0.3755,
|
|
"step": 12590
|
|
},
|
|
{
|
|
"epoch": 1.5171583383503915,
|
|
"grad_norm": 4.944552421569824,
|
|
"learning_rate": 7.806235219757518e-07,
|
|
"loss": 0.3643,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 1.5183624322697171,
|
|
"grad_norm": 5.554732799530029,
|
|
"learning_rate": 7.801885330477289e-07,
|
|
"loss": 0.3687,
|
|
"step": 12610
|
|
},
|
|
{
|
|
"epoch": 1.5195665261890428,
|
|
"grad_norm": 6.034419059753418,
|
|
"learning_rate": 7.797532347585459e-07,
|
|
"loss": 0.3595,
|
|
"step": 12620
|
|
},
|
|
{
|
|
"epoch": 1.5207706201083684,
|
|
"grad_norm": 4.2550048828125,
|
|
"learning_rate": 7.793176275888231e-07,
|
|
"loss": 0.3727,
|
|
"step": 12630
|
|
},
|
|
{
|
|
"epoch": 1.521974714027694,
|
|
"grad_norm": 4.084836006164551,
|
|
"learning_rate": 7.788817120195226e-07,
|
|
"loss": 0.3646,
|
|
"step": 12640
|
|
},
|
|
{
|
|
"epoch": 1.5231788079470199,
|
|
"grad_norm": 4.183859825134277,
|
|
"learning_rate": 7.784454885319464e-07,
|
|
"loss": 0.3846,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 1.5243829018663457,
|
|
"grad_norm": 4.216065406799316,
|
|
"learning_rate": 7.780089576077364e-07,
|
|
"loss": 0.3794,
|
|
"step": 12660
|
|
},
|
|
{
|
|
"epoch": 1.5255869957856714,
|
|
"grad_norm": 4.975666522979736,
|
|
"learning_rate": 7.775721197288744e-07,
|
|
"loss": 0.3903,
|
|
"step": 12670
|
|
},
|
|
{
|
|
"epoch": 1.526791089704997,
|
|
"grad_norm": 4.360125541687012,
|
|
"learning_rate": 7.77134975377681e-07,
|
|
"loss": 0.3481,
|
|
"step": 12680
|
|
},
|
|
{
|
|
"epoch": 1.5279951836243226,
|
|
"grad_norm": 5.113675594329834,
|
|
"learning_rate": 7.766975250368149e-07,
|
|
"loss": 0.3624,
|
|
"step": 12690
|
|
},
|
|
{
|
|
"epoch": 1.5291992775436483,
|
|
"grad_norm": 4.466128349304199,
|
|
"learning_rate": 7.76259769189273e-07,
|
|
"loss": 0.3619,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 1.5304033714629741,
|
|
"grad_norm": 4.945206165313721,
|
|
"learning_rate": 7.758217083183891e-07,
|
|
"loss": 0.358,
|
|
"step": 12710
|
|
},
|
|
{
|
|
"epoch": 1.5316074653823,
|
|
"grad_norm": 4.3737287521362305,
|
|
"learning_rate": 7.753833429078342e-07,
|
|
"loss": 0.3566,
|
|
"step": 12720
|
|
},
|
|
{
|
|
"epoch": 1.5328115593016256,
|
|
"grad_norm": 4.813685894012451,
|
|
"learning_rate": 7.749446734416152e-07,
|
|
"loss": 0.344,
|
|
"step": 12730
|
|
},
|
|
{
|
|
"epoch": 1.5340156532209512,
|
|
"grad_norm": 3.858191728591919,
|
|
"learning_rate": 7.745057004040751e-07,
|
|
"loss": 0.3461,
|
|
"step": 12740
|
|
},
|
|
{
|
|
"epoch": 1.5352197471402769,
|
|
"grad_norm": 4.396629333496094,
|
|
"learning_rate": 7.740664242798919e-07,
|
|
"loss": 0.3496,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 1.5364238410596025,
|
|
"grad_norm": 4.17794132232666,
|
|
"learning_rate": 7.73626845554078e-07,
|
|
"loss": 0.3584,
|
|
"step": 12760
|
|
},
|
|
{
|
|
"epoch": 1.5376279349789284,
|
|
"grad_norm": 6.110503673553467,
|
|
"learning_rate": 7.731869647119801e-07,
|
|
"loss": 0.3741,
|
|
"step": 12770
|
|
},
|
|
{
|
|
"epoch": 1.5388320288982542,
|
|
"grad_norm": 4.858775615692139,
|
|
"learning_rate": 7.727467822392787e-07,
|
|
"loss": 0.3489,
|
|
"step": 12780
|
|
},
|
|
{
|
|
"epoch": 1.5400361228175798,
|
|
"grad_norm": 4.899129390716553,
|
|
"learning_rate": 7.723062986219871e-07,
|
|
"loss": 0.3574,
|
|
"step": 12790
|
|
},
|
|
{
|
|
"epoch": 1.5412402167369055,
|
|
"grad_norm": 4.589954853057861,
|
|
"learning_rate": 7.718655143464508e-07,
|
|
"loss": 0.3697,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 1.542444310656231,
|
|
"grad_norm": 4.615177154541016,
|
|
"learning_rate": 7.71424429899348e-07,
|
|
"loss": 0.3574,
|
|
"step": 12810
|
|
},
|
|
{
|
|
"epoch": 1.5436484045755567,
|
|
"grad_norm": 5.081363201141357,
|
|
"learning_rate": 7.709830457676876e-07,
|
|
"loss": 0.3793,
|
|
"step": 12820
|
|
},
|
|
{
|
|
"epoch": 1.5448524984948826,
|
|
"grad_norm": 5.210774898529053,
|
|
"learning_rate": 7.7054136243881e-07,
|
|
"loss": 0.3562,
|
|
"step": 12830
|
|
},
|
|
{
|
|
"epoch": 1.5460565924142085,
|
|
"grad_norm": 4.458885192871094,
|
|
"learning_rate": 7.700993804003855e-07,
|
|
"loss": 0.3619,
|
|
"step": 12840
|
|
},
|
|
{
|
|
"epoch": 1.547260686333534,
|
|
"grad_norm": 4.320379734039307,
|
|
"learning_rate": 7.696571001404142e-07,
|
|
"loss": 0.3629,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 1.5484647802528597,
|
|
"grad_norm": 4.779387474060059,
|
|
"learning_rate": 7.692145221472258e-07,
|
|
"loss": 0.3633,
|
|
"step": 12860
|
|
},
|
|
{
|
|
"epoch": 1.5496688741721854,
|
|
"grad_norm": 4.924083709716797,
|
|
"learning_rate": 7.687716469094786e-07,
|
|
"loss": 0.3624,
|
|
"step": 12870
|
|
},
|
|
{
|
|
"epoch": 1.550872968091511,
|
|
"grad_norm": 5.194228649139404,
|
|
"learning_rate": 7.68328474916159e-07,
|
|
"loss": 0.3592,
|
|
"step": 12880
|
|
},
|
|
{
|
|
"epoch": 1.5520770620108368,
|
|
"grad_norm": 4.606070041656494,
|
|
"learning_rate": 7.67885006656581e-07,
|
|
"loss": 0.3686,
|
|
"step": 12890
|
|
},
|
|
{
|
|
"epoch": 1.5532811559301627,
|
|
"grad_norm": 4.206083297729492,
|
|
"learning_rate": 7.674412426203859e-07,
|
|
"loss": 0.3551,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 1.5544852498494883,
|
|
"grad_norm": 4.67086124420166,
|
|
"learning_rate": 7.669971832975416e-07,
|
|
"loss": 0.3569,
|
|
"step": 12910
|
|
},
|
|
{
|
|
"epoch": 1.555689343768814,
|
|
"grad_norm": 5.904470443725586,
|
|
"learning_rate": 7.665528291783417e-07,
|
|
"loss": 0.3407,
|
|
"step": 12920
|
|
},
|
|
{
|
|
"epoch": 1.5568934376881396,
|
|
"grad_norm": 4.242117404937744,
|
|
"learning_rate": 7.661081807534058e-07,
|
|
"loss": 0.3422,
|
|
"step": 12930
|
|
},
|
|
{
|
|
"epoch": 1.5580975316074652,
|
|
"grad_norm": 4.790373802185059,
|
|
"learning_rate": 7.656632385136778e-07,
|
|
"loss": 0.3573,
|
|
"step": 12940
|
|
},
|
|
{
|
|
"epoch": 1.559301625526791,
|
|
"grad_norm": 4.904318809509277,
|
|
"learning_rate": 7.652180029504268e-07,
|
|
"loss": 0.3606,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 1.560505719446117,
|
|
"grad_norm": 4.863579750061035,
|
|
"learning_rate": 7.64772474555245e-07,
|
|
"loss": 0.361,
|
|
"step": 12960
|
|
},
|
|
{
|
|
"epoch": 1.5617098133654426,
|
|
"grad_norm": 5.459078311920166,
|
|
"learning_rate": 7.643266538200483e-07,
|
|
"loss": 0.3577,
|
|
"step": 12970
|
|
},
|
|
{
|
|
"epoch": 1.5629139072847682,
|
|
"grad_norm": 5.426388740539551,
|
|
"learning_rate": 7.638805412370755e-07,
|
|
"loss": 0.3725,
|
|
"step": 12980
|
|
},
|
|
{
|
|
"epoch": 1.5641180012040938,
|
|
"grad_norm": 4.903288841247559,
|
|
"learning_rate": 7.634341372988872e-07,
|
|
"loss": 0.3562,
|
|
"step": 12990
|
|
},
|
|
{
|
|
"epoch": 1.5653220951234195,
|
|
"grad_norm": 4.128101825714111,
|
|
"learning_rate": 7.629874424983664e-07,
|
|
"loss": 0.3405,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 1.5665261890427453,
|
|
"grad_norm": 4.6488213539123535,
|
|
"learning_rate": 7.625404573287163e-07,
|
|
"loss": 0.3731,
|
|
"step": 13010
|
|
},
|
|
{
|
|
"epoch": 1.5677302829620712,
|
|
"grad_norm": 4.610156059265137,
|
|
"learning_rate": 7.620931822834614e-07,
|
|
"loss": 0.3575,
|
|
"step": 13020
|
|
},
|
|
{
|
|
"epoch": 1.5689343768813968,
|
|
"grad_norm": 5.422335147857666,
|
|
"learning_rate": 7.616456178564462e-07,
|
|
"loss": 0.3833,
|
|
"step": 13030
|
|
},
|
|
{
|
|
"epoch": 1.5701384708007224,
|
|
"grad_norm": 4.844593048095703,
|
|
"learning_rate": 7.611977645418343e-07,
|
|
"loss": 0.3647,
|
|
"step": 13040
|
|
},
|
|
{
|
|
"epoch": 1.571342564720048,
|
|
"grad_norm": 4.274131774902344,
|
|
"learning_rate": 7.607496228341088e-07,
|
|
"loss": 0.3542,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 1.5725466586393737,
|
|
"grad_norm": 4.641569137573242,
|
|
"learning_rate": 7.60301193228071e-07,
|
|
"loss": 0.3704,
|
|
"step": 13060
|
|
},
|
|
{
|
|
"epoch": 1.5737507525586996,
|
|
"grad_norm": 4.771531105041504,
|
|
"learning_rate": 7.598524762188395e-07,
|
|
"loss": 0.3529,
|
|
"step": 13070
|
|
},
|
|
{
|
|
"epoch": 1.5749548464780254,
|
|
"grad_norm": 5.63432502746582,
|
|
"learning_rate": 7.594034723018514e-07,
|
|
"loss": 0.3554,
|
|
"step": 13080
|
|
},
|
|
{
|
|
"epoch": 1.576158940397351,
|
|
"grad_norm": 3.5664002895355225,
|
|
"learning_rate": 7.589541819728596e-07,
|
|
"loss": 0.3617,
|
|
"step": 13090
|
|
},
|
|
{
|
|
"epoch": 1.5773630343166767,
|
|
"grad_norm": 4.43233060836792,
|
|
"learning_rate": 7.585046057279337e-07,
|
|
"loss": 0.3795,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 1.5785671282360023,
|
|
"grad_norm": 4.293588638305664,
|
|
"learning_rate": 7.580547440634587e-07,
|
|
"loss": 0.3361,
|
|
"step": 13110
|
|
},
|
|
{
|
|
"epoch": 1.5797712221553282,
|
|
"grad_norm": 4.606287479400635,
|
|
"learning_rate": 7.576045974761351e-07,
|
|
"loss": 0.3573,
|
|
"step": 13120
|
|
},
|
|
{
|
|
"epoch": 1.5809753160746538,
|
|
"grad_norm": 4.9702558517456055,
|
|
"learning_rate": 7.571541664629775e-07,
|
|
"loss": 0.3718,
|
|
"step": 13130
|
|
},
|
|
{
|
|
"epoch": 1.5821794099939797,
|
|
"grad_norm": 4.685069561004639,
|
|
"learning_rate": 7.567034515213151e-07,
|
|
"loss": 0.3704,
|
|
"step": 13140
|
|
},
|
|
{
|
|
"epoch": 1.5833835039133053,
|
|
"grad_norm": 4.804528713226318,
|
|
"learning_rate": 7.562524531487902e-07,
|
|
"loss": 0.3511,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 1.584587597832631,
|
|
"grad_norm": 5.332268714904785,
|
|
"learning_rate": 7.558011718433582e-07,
|
|
"loss": 0.3573,
|
|
"step": 13160
|
|
},
|
|
{
|
|
"epoch": 1.5857916917519566,
|
|
"grad_norm": 4.4862284660339355,
|
|
"learning_rate": 7.553496081032867e-07,
|
|
"loss": 0.3423,
|
|
"step": 13170
|
|
},
|
|
{
|
|
"epoch": 1.5869957856712824,
|
|
"grad_norm": 4.632198810577393,
|
|
"learning_rate": 7.548977624271555e-07,
|
|
"loss": 0.3719,
|
|
"step": 13180
|
|
},
|
|
{
|
|
"epoch": 1.588199879590608,
|
|
"grad_norm": 4.4371137619018555,
|
|
"learning_rate": 7.544456353138553e-07,
|
|
"loss": 0.3515,
|
|
"step": 13190
|
|
},
|
|
{
|
|
"epoch": 1.589403973509934,
|
|
"grad_norm": 4.162461757659912,
|
|
"learning_rate": 7.539932272625879e-07,
|
|
"loss": 0.363,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 1.5906080674292595,
|
|
"grad_norm": 4.980907917022705,
|
|
"learning_rate": 7.535405387728648e-07,
|
|
"loss": 0.362,
|
|
"step": 13210
|
|
},
|
|
{
|
|
"epoch": 1.5918121613485852,
|
|
"grad_norm": 4.321689128875732,
|
|
"learning_rate": 7.530875703445077e-07,
|
|
"loss": 0.3441,
|
|
"step": 13220
|
|
},
|
|
{
|
|
"epoch": 1.5930162552679108,
|
|
"grad_norm": 4.930966854095459,
|
|
"learning_rate": 7.526343224776471e-07,
|
|
"loss": 0.3505,
|
|
"step": 13230
|
|
},
|
|
{
|
|
"epoch": 1.5942203491872367,
|
|
"grad_norm": 4.267889499664307,
|
|
"learning_rate": 7.52180795672722e-07,
|
|
"loss": 0.3678,
|
|
"step": 13240
|
|
},
|
|
{
|
|
"epoch": 1.5954244431065623,
|
|
"grad_norm": 3.8834383487701416,
|
|
"learning_rate": 7.517269904304794e-07,
|
|
"loss": 0.3648,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 1.5966285370258881,
|
|
"grad_norm": 4.397730827331543,
|
|
"learning_rate": 7.512729072519739e-07,
|
|
"loss": 0.3601,
|
|
"step": 13260
|
|
},
|
|
{
|
|
"epoch": 1.5978326309452138,
|
|
"grad_norm": 4.559187889099121,
|
|
"learning_rate": 7.508185466385666e-07,
|
|
"loss": 0.3508,
|
|
"step": 13270
|
|
},
|
|
{
|
|
"epoch": 1.5990367248645394,
|
|
"grad_norm": 4.514613628387451,
|
|
"learning_rate": 7.503639090919255e-07,
|
|
"loss": 0.3578,
|
|
"step": 13280
|
|
},
|
|
{
|
|
"epoch": 1.600240818783865,
|
|
"grad_norm": 4.5233073234558105,
|
|
"learning_rate": 7.499089951140237e-07,
|
|
"loss": 0.3516,
|
|
"step": 13290
|
|
},
|
|
{
|
|
"epoch": 1.601444912703191,
|
|
"grad_norm": 4.616694450378418,
|
|
"learning_rate": 7.494538052071402e-07,
|
|
"loss": 0.3616,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 1.6026490066225165,
|
|
"grad_norm": 4.6488518714904785,
|
|
"learning_rate": 7.489983398738579e-07,
|
|
"loss": 0.3582,
|
|
"step": 13310
|
|
},
|
|
{
|
|
"epoch": 1.6038531005418424,
|
|
"grad_norm": 4.645969390869141,
|
|
"learning_rate": 7.485425996170644e-07,
|
|
"loss": 0.3548,
|
|
"step": 13320
|
|
},
|
|
{
|
|
"epoch": 1.605057194461168,
|
|
"grad_norm": 5.864965438842773,
|
|
"learning_rate": 7.480865849399507e-07,
|
|
"loss": 0.3587,
|
|
"step": 13330
|
|
},
|
|
{
|
|
"epoch": 1.6062612883804936,
|
|
"grad_norm": 4.283803939819336,
|
|
"learning_rate": 7.476302963460108e-07,
|
|
"loss": 0.3626,
|
|
"step": 13340
|
|
},
|
|
{
|
|
"epoch": 1.6074653822998193,
|
|
"grad_norm": 4.545533657073975,
|
|
"learning_rate": 7.47173734339041e-07,
|
|
"loss": 0.3526,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 1.6086694762191451,
|
|
"grad_norm": 4.885293483734131,
|
|
"learning_rate": 7.467168994231393e-07,
|
|
"loss": 0.3685,
|
|
"step": 13360
|
|
},
|
|
{
|
|
"epoch": 1.6098735701384708,
|
|
"grad_norm": 4.112198829650879,
|
|
"learning_rate": 7.462597921027056e-07,
|
|
"loss": 0.3727,
|
|
"step": 13370
|
|
},
|
|
{
|
|
"epoch": 1.6110776640577966,
|
|
"grad_norm": 4.272058963775635,
|
|
"learning_rate": 7.458024128824403e-07,
|
|
"loss": 0.3567,
|
|
"step": 13380
|
|
},
|
|
{
|
|
"epoch": 1.6122817579771223,
|
|
"grad_norm": 4.891336441040039,
|
|
"learning_rate": 7.453447622673438e-07,
|
|
"loss": 0.3566,
|
|
"step": 13390
|
|
},
|
|
{
|
|
"epoch": 1.6134858518964479,
|
|
"grad_norm": 5.003636837005615,
|
|
"learning_rate": 7.448868407627163e-07,
|
|
"loss": 0.3717,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 1.6146899458157735,
|
|
"grad_norm": 3.9844002723693848,
|
|
"learning_rate": 7.444286488741571e-07,
|
|
"loss": 0.3537,
|
|
"step": 13410
|
|
},
|
|
{
|
|
"epoch": 1.6158940397350994,
|
|
"grad_norm": 4.326488018035889,
|
|
"learning_rate": 7.439701871075641e-07,
|
|
"loss": 0.3353,
|
|
"step": 13420
|
|
},
|
|
{
|
|
"epoch": 1.617098133654425,
|
|
"grad_norm": 4.168161392211914,
|
|
"learning_rate": 7.435114559691333e-07,
|
|
"loss": 0.3506,
|
|
"step": 13430
|
|
},
|
|
{
|
|
"epoch": 1.6183022275737509,
|
|
"grad_norm": 5.062152862548828,
|
|
"learning_rate": 7.430524559653575e-07,
|
|
"loss": 0.3536,
|
|
"step": 13440
|
|
},
|
|
{
|
|
"epoch": 1.6195063214930765,
|
|
"grad_norm": 5.29563570022583,
|
|
"learning_rate": 7.425931876030272e-07,
|
|
"loss": 0.359,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 1.6207104154124021,
|
|
"grad_norm": 4.655216693878174,
|
|
"learning_rate": 7.421336513892284e-07,
|
|
"loss": 0.3459,
|
|
"step": 13460
|
|
},
|
|
{
|
|
"epoch": 1.6219145093317278,
|
|
"grad_norm": 4.558264255523682,
|
|
"learning_rate": 7.416738478313438e-07,
|
|
"loss": 0.3603,
|
|
"step": 13470
|
|
},
|
|
{
|
|
"epoch": 1.6231186032510536,
|
|
"grad_norm": 4.36596155166626,
|
|
"learning_rate": 7.412137774370501e-07,
|
|
"loss": 0.3632,
|
|
"step": 13480
|
|
},
|
|
{
|
|
"epoch": 1.6243226971703792,
|
|
"grad_norm": 4.248297214508057,
|
|
"learning_rate": 7.407534407143198e-07,
|
|
"loss": 0.3575,
|
|
"step": 13490
|
|
},
|
|
{
|
|
"epoch": 1.625526791089705,
|
|
"grad_norm": 4.935293197631836,
|
|
"learning_rate": 7.402928381714184e-07,
|
|
"loss": 0.3583,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 1.6267308850090307,
|
|
"grad_norm": 4.29832649230957,
|
|
"learning_rate": 7.398319703169057e-07,
|
|
"loss": 0.3593,
|
|
"step": 13510
|
|
},
|
|
{
|
|
"epoch": 1.6279349789283564,
|
|
"grad_norm": 4.707507610321045,
|
|
"learning_rate": 7.39370837659634e-07,
|
|
"loss": 0.3486,
|
|
"step": 13520
|
|
},
|
|
{
|
|
"epoch": 1.629139072847682,
|
|
"grad_norm": 4.7867326736450195,
|
|
"learning_rate": 7.389094407087481e-07,
|
|
"loss": 0.3708,
|
|
"step": 13530
|
|
},
|
|
{
|
|
"epoch": 1.6303431667670079,
|
|
"grad_norm": 5.004173755645752,
|
|
"learning_rate": 7.384477799736847e-07,
|
|
"loss": 0.3693,
|
|
"step": 13540
|
|
},
|
|
{
|
|
"epoch": 1.6315472606863335,
|
|
"grad_norm": 4.378966331481934,
|
|
"learning_rate": 7.379858559641716e-07,
|
|
"loss": 0.3792,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 1.6327513546056593,
|
|
"grad_norm": 4.35708475112915,
|
|
"learning_rate": 7.375236691902272e-07,
|
|
"loss": 0.357,
|
|
"step": 13560
|
|
},
|
|
{
|
|
"epoch": 1.633955448524985,
|
|
"grad_norm": 4.158879280090332,
|
|
"learning_rate": 7.370612201621606e-07,
|
|
"loss": 0.3705,
|
|
"step": 13570
|
|
},
|
|
{
|
|
"epoch": 1.6351595424443106,
|
|
"grad_norm": 4.620648384094238,
|
|
"learning_rate": 7.365985093905693e-07,
|
|
"loss": 0.3288,
|
|
"step": 13580
|
|
},
|
|
{
|
|
"epoch": 1.6363636363636362,
|
|
"grad_norm": 4.588129997253418,
|
|
"learning_rate": 7.361355373863413e-07,
|
|
"loss": 0.3545,
|
|
"step": 13590
|
|
},
|
|
{
|
|
"epoch": 1.637567730282962,
|
|
"grad_norm": 4.273639678955078,
|
|
"learning_rate": 7.356723046606517e-07,
|
|
"loss": 0.3597,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 1.6387718242022877,
|
|
"grad_norm": 4.793459415435791,
|
|
"learning_rate": 7.352088117249644e-07,
|
|
"loss": 0.3532,
|
|
"step": 13610
|
|
},
|
|
{
|
|
"epoch": 1.6399759181216136,
|
|
"grad_norm": 4.27385950088501,
|
|
"learning_rate": 7.347450590910299e-07,
|
|
"loss": 0.3787,
|
|
"step": 13620
|
|
},
|
|
{
|
|
"epoch": 1.6411800120409392,
|
|
"grad_norm": 4.229093551635742,
|
|
"learning_rate": 7.34281047270886e-07,
|
|
"loss": 0.3592,
|
|
"step": 13630
|
|
},
|
|
{
|
|
"epoch": 1.6423841059602649,
|
|
"grad_norm": 4.402678489685059,
|
|
"learning_rate": 7.338167767768564e-07,
|
|
"loss": 0.3612,
|
|
"step": 13640
|
|
},
|
|
{
|
|
"epoch": 1.6435881998795905,
|
|
"grad_norm": 4.09978723526001,
|
|
"learning_rate": 7.333522481215503e-07,
|
|
"loss": 0.3571,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 1.6447922937989163,
|
|
"grad_norm": 4.659477710723877,
|
|
"learning_rate": 7.32887461817862e-07,
|
|
"loss": 0.3725,
|
|
"step": 13660
|
|
},
|
|
{
|
|
"epoch": 1.645996387718242,
|
|
"grad_norm": 4.500072002410889,
|
|
"learning_rate": 7.324224183789707e-07,
|
|
"loss": 0.3458,
|
|
"step": 13670
|
|
},
|
|
{
|
|
"epoch": 1.6472004816375678,
|
|
"grad_norm": 5.1016526222229,
|
|
"learning_rate": 7.319571183183388e-07,
|
|
"loss": 0.3734,
|
|
"step": 13680
|
|
},
|
|
{
|
|
"epoch": 1.6484045755568935,
|
|
"grad_norm": 4.819193363189697,
|
|
"learning_rate": 7.314915621497129e-07,
|
|
"loss": 0.3601,
|
|
"step": 13690
|
|
},
|
|
{
|
|
"epoch": 1.649608669476219,
|
|
"grad_norm": 4.4075026512146,
|
|
"learning_rate": 7.310257503871214e-07,
|
|
"loss": 0.3556,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 1.6508127633955447,
|
|
"grad_norm": 4.471024036407471,
|
|
"learning_rate": 7.305596835448753e-07,
|
|
"loss": 0.3625,
|
|
"step": 13710
|
|
},
|
|
{
|
|
"epoch": 1.6520168573148706,
|
|
"grad_norm": 4.29016637802124,
|
|
"learning_rate": 7.300933621375676e-07,
|
|
"loss": 0.3619,
|
|
"step": 13720
|
|
},
|
|
{
|
|
"epoch": 1.6532209512341962,
|
|
"grad_norm": 4.514208793640137,
|
|
"learning_rate": 7.296267866800722e-07,
|
|
"loss": 0.3622,
|
|
"step": 13730
|
|
},
|
|
{
|
|
"epoch": 1.654425045153522,
|
|
"grad_norm": 4.275468826293945,
|
|
"learning_rate": 7.291599576875432e-07,
|
|
"loss": 0.3667,
|
|
"step": 13740
|
|
},
|
|
{
|
|
"epoch": 1.6556291390728477,
|
|
"grad_norm": 4.0805559158325195,
|
|
"learning_rate": 7.286928756754148e-07,
|
|
"loss": 0.371,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 1.6568332329921733,
|
|
"grad_norm": 4.84345006942749,
|
|
"learning_rate": 7.282255411594006e-07,
|
|
"loss": 0.3696,
|
|
"step": 13760
|
|
},
|
|
{
|
|
"epoch": 1.658037326911499,
|
|
"grad_norm": 4.703734874725342,
|
|
"learning_rate": 7.277579546554931e-07,
|
|
"loss": 0.3673,
|
|
"step": 13770
|
|
},
|
|
{
|
|
"epoch": 1.6592414208308248,
|
|
"grad_norm": 4.18894624710083,
|
|
"learning_rate": 7.272901166799627e-07,
|
|
"loss": 0.3365,
|
|
"step": 13780
|
|
},
|
|
{
|
|
"epoch": 1.6604455147501507,
|
|
"grad_norm": 4.9901204109191895,
|
|
"learning_rate": 7.268220277493578e-07,
|
|
"loss": 0.3588,
|
|
"step": 13790
|
|
},
|
|
{
|
|
"epoch": 1.6616496086694763,
|
|
"grad_norm": 4.896132946014404,
|
|
"learning_rate": 7.263536883805039e-07,
|
|
"loss": 0.3659,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 1.662853702588802,
|
|
"grad_norm": 4.311833381652832,
|
|
"learning_rate": 7.258850990905025e-07,
|
|
"loss": 0.3707,
|
|
"step": 13810
|
|
},
|
|
{
|
|
"epoch": 1.6640577965081276,
|
|
"grad_norm": 4.157628059387207,
|
|
"learning_rate": 7.254162603967317e-07,
|
|
"loss": 0.3498,
|
|
"step": 13820
|
|
},
|
|
{
|
|
"epoch": 1.6652618904274532,
|
|
"grad_norm": 5.240469932556152,
|
|
"learning_rate": 7.249471728168443e-07,
|
|
"loss": 0.3559,
|
|
"step": 13830
|
|
},
|
|
{
|
|
"epoch": 1.666465984346779,
|
|
"grad_norm": 4.077708721160889,
|
|
"learning_rate": 7.244778368687687e-07,
|
|
"loss": 0.3745,
|
|
"step": 13840
|
|
},
|
|
{
|
|
"epoch": 1.667670078266105,
|
|
"grad_norm": 4.9550395011901855,
|
|
"learning_rate": 7.240082530707069e-07,
|
|
"loss": 0.3563,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 1.6688741721854305,
|
|
"grad_norm": 5.530270576477051,
|
|
"learning_rate": 7.235384219411348e-07,
|
|
"loss": 0.3764,
|
|
"step": 13860
|
|
},
|
|
{
|
|
"epoch": 1.6700782661047562,
|
|
"grad_norm": 4.50790548324585,
|
|
"learning_rate": 7.230683439988012e-07,
|
|
"loss": 0.3471,
|
|
"step": 13870
|
|
},
|
|
{
|
|
"epoch": 1.6712823600240818,
|
|
"grad_norm": 4.373943328857422,
|
|
"learning_rate": 7.225980197627277e-07,
|
|
"loss": 0.3601,
|
|
"step": 13880
|
|
},
|
|
{
|
|
"epoch": 1.6724864539434074,
|
|
"grad_norm": 3.9449055194854736,
|
|
"learning_rate": 7.221274497522076e-07,
|
|
"loss": 0.3533,
|
|
"step": 13890
|
|
},
|
|
{
|
|
"epoch": 1.6736905478627333,
|
|
"grad_norm": 4.625890254974365,
|
|
"learning_rate": 7.216566344868058e-07,
|
|
"loss": 0.3771,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 1.6748946417820592,
|
|
"grad_norm": 4.7843475341796875,
|
|
"learning_rate": 7.211855744863577e-07,
|
|
"loss": 0.3477,
|
|
"step": 13910
|
|
},
|
|
{
|
|
"epoch": 1.6760987357013848,
|
|
"grad_norm": 4.275618076324463,
|
|
"learning_rate": 7.207142702709688e-07,
|
|
"loss": 0.3452,
|
|
"step": 13920
|
|
},
|
|
{
|
|
"epoch": 1.6773028296207104,
|
|
"grad_norm": 5.26132869720459,
|
|
"learning_rate": 7.202427223610152e-07,
|
|
"loss": 0.3568,
|
|
"step": 13930
|
|
},
|
|
{
|
|
"epoch": 1.678506923540036,
|
|
"grad_norm": 4.528031826019287,
|
|
"learning_rate": 7.197709312771406e-07,
|
|
"loss": 0.347,
|
|
"step": 13940
|
|
},
|
|
{
|
|
"epoch": 1.6797110174593617,
|
|
"grad_norm": 4.68961763381958,
|
|
"learning_rate": 7.192988975402583e-07,
|
|
"loss": 0.3687,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 1.6809151113786875,
|
|
"grad_norm": 4.3820719718933105,
|
|
"learning_rate": 7.188266216715493e-07,
|
|
"loss": 0.3572,
|
|
"step": 13960
|
|
},
|
|
{
|
|
"epoch": 1.6821192052980134,
|
|
"grad_norm": 3.974177598953247,
|
|
"learning_rate": 7.183541041924616e-07,
|
|
"loss": 0.34,
|
|
"step": 13970
|
|
},
|
|
{
|
|
"epoch": 1.683323299217339,
|
|
"grad_norm": 4.8562331199646,
|
|
"learning_rate": 7.178813456247102e-07,
|
|
"loss": 0.3532,
|
|
"step": 13980
|
|
},
|
|
{
|
|
"epoch": 1.6845273931366647,
|
|
"grad_norm": 3.9439549446105957,
|
|
"learning_rate": 7.174083464902763e-07,
|
|
"loss": 0.3459,
|
|
"step": 13990
|
|
},
|
|
{
|
|
"epoch": 1.6857314870559903,
|
|
"grad_norm": 4.226308345794678,
|
|
"learning_rate": 7.16935107311407e-07,
|
|
"loss": 0.3352,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 1.686935580975316,
|
|
"grad_norm": 4.850135326385498,
|
|
"learning_rate": 7.164616286106135e-07,
|
|
"loss": 0.3661,
|
|
"step": 14010
|
|
},
|
|
{
|
|
"epoch": 1.6881396748946418,
|
|
"grad_norm": 4.845891952514648,
|
|
"learning_rate": 7.159879109106725e-07,
|
|
"loss": 0.3868,
|
|
"step": 14020
|
|
},
|
|
{
|
|
"epoch": 1.6893437688139676,
|
|
"grad_norm": 5.063507556915283,
|
|
"learning_rate": 7.155139547346242e-07,
|
|
"loss": 0.3628,
|
|
"step": 14030
|
|
},
|
|
{
|
|
"epoch": 1.6905478627332933,
|
|
"grad_norm": 4.6817216873168945,
|
|
"learning_rate": 7.15039760605772e-07,
|
|
"loss": 0.3744,
|
|
"step": 14040
|
|
},
|
|
{
|
|
"epoch": 1.691751956652619,
|
|
"grad_norm": 4.315075874328613,
|
|
"learning_rate": 7.145653290476819e-07,
|
|
"loss": 0.3613,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 1.6929560505719445,
|
|
"grad_norm": 4.234760284423828,
|
|
"learning_rate": 7.140906605841825e-07,
|
|
"loss": 0.3733,
|
|
"step": 14060
|
|
},
|
|
{
|
|
"epoch": 1.6941601444912702,
|
|
"grad_norm": 5.843511581420898,
|
|
"learning_rate": 7.136157557393637e-07,
|
|
"loss": 0.3443,
|
|
"step": 14070
|
|
},
|
|
{
|
|
"epoch": 1.695364238410596,
|
|
"grad_norm": 4.704221248626709,
|
|
"learning_rate": 7.131406150375762e-07,
|
|
"loss": 0.3384,
|
|
"step": 14080
|
|
},
|
|
{
|
|
"epoch": 1.6965683323299219,
|
|
"grad_norm": 4.1078200340271,
|
|
"learning_rate": 7.126652390034316e-07,
|
|
"loss": 0.3554,
|
|
"step": 14090
|
|
},
|
|
{
|
|
"epoch": 1.6977724262492475,
|
|
"grad_norm": 4.6124773025512695,
|
|
"learning_rate": 7.12189628161801e-07,
|
|
"loss": 0.3323,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 1.6989765201685731,
|
|
"grad_norm": 3.9569902420043945,
|
|
"learning_rate": 7.117137830378146e-07,
|
|
"loss": 0.3581,
|
|
"step": 14110
|
|
},
|
|
{
|
|
"epoch": 1.7001806140878988,
|
|
"grad_norm": 4.327024459838867,
|
|
"learning_rate": 7.112377041568617e-07,
|
|
"loss": 0.3605,
|
|
"step": 14120
|
|
},
|
|
{
|
|
"epoch": 1.7013847080072244,
|
|
"grad_norm": 4.041974067687988,
|
|
"learning_rate": 7.107613920445895e-07,
|
|
"loss": 0.3514,
|
|
"step": 14130
|
|
},
|
|
{
|
|
"epoch": 1.7025888019265503,
|
|
"grad_norm": 4.295658588409424,
|
|
"learning_rate": 7.102848472269026e-07,
|
|
"loss": 0.3489,
|
|
"step": 14140
|
|
},
|
|
{
|
|
"epoch": 1.7037928958458761,
|
|
"grad_norm": 4.117722988128662,
|
|
"learning_rate": 7.098080702299628e-07,
|
|
"loss": 0.3382,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 1.7049969897652018,
|
|
"grad_norm": 5.249290943145752,
|
|
"learning_rate": 7.093310615801879e-07,
|
|
"loss": 0.3696,
|
|
"step": 14160
|
|
},
|
|
{
|
|
"epoch": 1.7062010836845274,
|
|
"grad_norm": 3.8647286891937256,
|
|
"learning_rate": 7.088538218042518e-07,
|
|
"loss": 0.3403,
|
|
"step": 14170
|
|
},
|
|
{
|
|
"epoch": 1.707405177603853,
|
|
"grad_norm": 4.454891204833984,
|
|
"learning_rate": 7.083763514290834e-07,
|
|
"loss": 0.3743,
|
|
"step": 14180
|
|
},
|
|
{
|
|
"epoch": 1.7086092715231787,
|
|
"grad_norm": 4.183931827545166,
|
|
"learning_rate": 7.078986509818662e-07,
|
|
"loss": 0.3493,
|
|
"step": 14190
|
|
},
|
|
{
|
|
"epoch": 1.7098133654425045,
|
|
"grad_norm": 3.9510889053344727,
|
|
"learning_rate": 7.074207209900379e-07,
|
|
"loss": 0.3469,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 1.7110174593618304,
|
|
"grad_norm": 4.839264869689941,
|
|
"learning_rate": 7.069425619812896e-07,
|
|
"loss": 0.3444,
|
|
"step": 14210
|
|
},
|
|
{
|
|
"epoch": 1.712221553281156,
|
|
"grad_norm": 4.237350940704346,
|
|
"learning_rate": 7.064641744835649e-07,
|
|
"loss": 0.3474,
|
|
"step": 14220
|
|
},
|
|
{
|
|
"epoch": 1.7134256472004816,
|
|
"grad_norm": 4.17114782333374,
|
|
"learning_rate": 7.059855590250603e-07,
|
|
"loss": 0.3465,
|
|
"step": 14230
|
|
},
|
|
{
|
|
"epoch": 1.7146297411198073,
|
|
"grad_norm": 4.114003658294678,
|
|
"learning_rate": 7.055067161342233e-07,
|
|
"loss": 0.3674,
|
|
"step": 14240
|
|
},
|
|
{
|
|
"epoch": 1.715833835039133,
|
|
"grad_norm": 4.886813640594482,
|
|
"learning_rate": 7.050276463397533e-07,
|
|
"loss": 0.3848,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 1.7170379289584587,
|
|
"grad_norm": 4.069955348968506,
|
|
"learning_rate": 7.045483501705996e-07,
|
|
"loss": 0.3493,
|
|
"step": 14260
|
|
},
|
|
{
|
|
"epoch": 1.7182420228777846,
|
|
"grad_norm": 4.502857685089111,
|
|
"learning_rate": 7.040688281559617e-07,
|
|
"loss": 0.3548,
|
|
"step": 14270
|
|
},
|
|
{
|
|
"epoch": 1.7194461167971102,
|
|
"grad_norm": 4.283501148223877,
|
|
"learning_rate": 7.035890808252884e-07,
|
|
"loss": 0.3571,
|
|
"step": 14280
|
|
},
|
|
{
|
|
"epoch": 1.7206502107164359,
|
|
"grad_norm": 4.563022136688232,
|
|
"learning_rate": 7.031091087082772e-07,
|
|
"loss": 0.3485,
|
|
"step": 14290
|
|
},
|
|
{
|
|
"epoch": 1.7218543046357615,
|
|
"grad_norm": 4.165189266204834,
|
|
"learning_rate": 7.02628912334874e-07,
|
|
"loss": 0.3417,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 1.7230583985550871,
|
|
"grad_norm": 4.657063961029053,
|
|
"learning_rate": 7.021484922352721e-07,
|
|
"loss": 0.3611,
|
|
"step": 14310
|
|
},
|
|
{
|
|
"epoch": 1.724262492474413,
|
|
"grad_norm": 6.094346046447754,
|
|
"learning_rate": 7.016678489399121e-07,
|
|
"loss": 0.3371,
|
|
"step": 14320
|
|
},
|
|
{
|
|
"epoch": 1.7254665863937388,
|
|
"grad_norm": 4.576262474060059,
|
|
"learning_rate": 7.011869829794806e-07,
|
|
"loss": 0.3624,
|
|
"step": 14330
|
|
},
|
|
{
|
|
"epoch": 1.7266706803130645,
|
|
"grad_norm": 5.231967449188232,
|
|
"learning_rate": 7.007058948849105e-07,
|
|
"loss": 0.3745,
|
|
"step": 14340
|
|
},
|
|
{
|
|
"epoch": 1.72787477423239,
|
|
"grad_norm": 4.39863395690918,
|
|
"learning_rate": 7.002245851873794e-07,
|
|
"loss": 0.3545,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 1.7290788681517157,
|
|
"grad_norm": 4.428983211517334,
|
|
"learning_rate": 6.997430544183103e-07,
|
|
"loss": 0.3534,
|
|
"step": 14360
|
|
},
|
|
{
|
|
"epoch": 1.7302829620710414,
|
|
"grad_norm": 5.451033115386963,
|
|
"learning_rate": 6.992613031093698e-07,
|
|
"loss": 0.3584,
|
|
"step": 14370
|
|
},
|
|
{
|
|
"epoch": 1.7314870559903672,
|
|
"grad_norm": 4.715031147003174,
|
|
"learning_rate": 6.987793317924682e-07,
|
|
"loss": 0.3643,
|
|
"step": 14380
|
|
},
|
|
{
|
|
"epoch": 1.732691149909693,
|
|
"grad_norm": 4.199245452880859,
|
|
"learning_rate": 6.982971409997583e-07,
|
|
"loss": 0.3539,
|
|
"step": 14390
|
|
},
|
|
{
|
|
"epoch": 1.7338952438290187,
|
|
"grad_norm": 5.606119632720947,
|
|
"learning_rate": 6.97814731263636e-07,
|
|
"loss": 0.3613,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 1.7350993377483444,
|
|
"grad_norm": 5.036284923553467,
|
|
"learning_rate": 6.973321031167382e-07,
|
|
"loss": 0.3679,
|
|
"step": 14410
|
|
},
|
|
{
|
|
"epoch": 1.73630343166767,
|
|
"grad_norm": 4.951879978179932,
|
|
"learning_rate": 6.968492570919434e-07,
|
|
"loss": 0.3572,
|
|
"step": 14420
|
|
},
|
|
{
|
|
"epoch": 1.7375075255869958,
|
|
"grad_norm": 4.428969860076904,
|
|
"learning_rate": 6.963661937223703e-07,
|
|
"loss": 0.3538,
|
|
"step": 14430
|
|
},
|
|
{
|
|
"epoch": 1.7387116195063215,
|
|
"grad_norm": 3.7024569511413574,
|
|
"learning_rate": 6.958829135413782e-07,
|
|
"loss": 0.3644,
|
|
"step": 14440
|
|
},
|
|
{
|
|
"epoch": 1.7399157134256473,
|
|
"grad_norm": 4.4168381690979,
|
|
"learning_rate": 6.95399417082565e-07,
|
|
"loss": 0.3498,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 1.741119807344973,
|
|
"grad_norm": 4.818751335144043,
|
|
"learning_rate": 6.949157048797678e-07,
|
|
"loss": 0.3726,
|
|
"step": 14460
|
|
},
|
|
{
|
|
"epoch": 1.7423239012642986,
|
|
"grad_norm": 5.769382953643799,
|
|
"learning_rate": 6.944317774670622e-07,
|
|
"loss": 0.3517,
|
|
"step": 14470
|
|
},
|
|
{
|
|
"epoch": 1.7435279951836242,
|
|
"grad_norm": 4.914524078369141,
|
|
"learning_rate": 6.939476353787607e-07,
|
|
"loss": 0.349,
|
|
"step": 14480
|
|
},
|
|
{
|
|
"epoch": 1.74473208910295,
|
|
"grad_norm": 4.6800456047058105,
|
|
"learning_rate": 6.934632791494134e-07,
|
|
"loss": 0.3725,
|
|
"step": 14490
|
|
},
|
|
{
|
|
"epoch": 1.7459361830222757,
|
|
"grad_norm": 4.627834796905518,
|
|
"learning_rate": 6.929787093138067e-07,
|
|
"loss": 0.359,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 1.7471402769416016,
|
|
"grad_norm": 5.098109245300293,
|
|
"learning_rate": 6.924939264069626e-07,
|
|
"loss": 0.3502,
|
|
"step": 14510
|
|
},
|
|
{
|
|
"epoch": 1.7483443708609272,
|
|
"grad_norm": 4.18192720413208,
|
|
"learning_rate": 6.920089309641388e-07,
|
|
"loss": 0.3448,
|
|
"step": 14520
|
|
},
|
|
{
|
|
"epoch": 1.7495484647802528,
|
|
"grad_norm": 4.4052815437316895,
|
|
"learning_rate": 6.915237235208274e-07,
|
|
"loss": 0.3459,
|
|
"step": 14530
|
|
},
|
|
{
|
|
"epoch": 1.7507525586995785,
|
|
"grad_norm": 5.557136058807373,
|
|
"learning_rate": 6.910383046127544e-07,
|
|
"loss": 0.355,
|
|
"step": 14540
|
|
},
|
|
{
|
|
"epoch": 1.7519566526189043,
|
|
"grad_norm": 5.7654128074646,
|
|
"learning_rate": 6.905526747758796e-07,
|
|
"loss": 0.3624,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 1.75316074653823,
|
|
"grad_norm": 5.040695667266846,
|
|
"learning_rate": 6.900668345463957e-07,
|
|
"loss": 0.3513,
|
|
"step": 14560
|
|
},
|
|
{
|
|
"epoch": 1.7543648404575558,
|
|
"grad_norm": 4.529175758361816,
|
|
"learning_rate": 6.895807844607274e-07,
|
|
"loss": 0.348,
|
|
"step": 14570
|
|
},
|
|
{
|
|
"epoch": 1.7555689343768814,
|
|
"grad_norm": 4.473850727081299,
|
|
"learning_rate": 6.890945250555312e-07,
|
|
"loss": 0.3708,
|
|
"step": 14580
|
|
},
|
|
{
|
|
"epoch": 1.756773028296207,
|
|
"grad_norm": 4.2242865562438965,
|
|
"learning_rate": 6.88608056867695e-07,
|
|
"loss": 0.3536,
|
|
"step": 14590
|
|
},
|
|
{
|
|
"epoch": 1.7579771222155327,
|
|
"grad_norm": 4.953219413757324,
|
|
"learning_rate": 6.881213804343369e-07,
|
|
"loss": 0.3564,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 1.7591812161348586,
|
|
"grad_norm": 4.626575469970703,
|
|
"learning_rate": 6.876344962928051e-07,
|
|
"loss": 0.3624,
|
|
"step": 14610
|
|
},
|
|
{
|
|
"epoch": 1.7603853100541842,
|
|
"grad_norm": 5.615645408630371,
|
|
"learning_rate": 6.87147404980677e-07,
|
|
"loss": 0.3711,
|
|
"step": 14620
|
|
},
|
|
{
|
|
"epoch": 1.76158940397351,
|
|
"grad_norm": 4.350038051605225,
|
|
"learning_rate": 6.866601070357587e-07,
|
|
"loss": 0.3517,
|
|
"step": 14630
|
|
},
|
|
{
|
|
"epoch": 1.7627934978928357,
|
|
"grad_norm": 4.5289387702941895,
|
|
"learning_rate": 6.861726029960849e-07,
|
|
"loss": 0.3602,
|
|
"step": 14640
|
|
},
|
|
{
|
|
"epoch": 1.7639975918121613,
|
|
"grad_norm": 5.127388954162598,
|
|
"learning_rate": 6.856848933999173e-07,
|
|
"loss": 0.345,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 1.765201685731487,
|
|
"grad_norm": 4.675601482391357,
|
|
"learning_rate": 6.851969787857447e-07,
|
|
"loss": 0.3484,
|
|
"step": 14660
|
|
},
|
|
{
|
|
"epoch": 1.7664057796508128,
|
|
"grad_norm": 3.9305527210235596,
|
|
"learning_rate": 6.847088596922824e-07,
|
|
"loss": 0.3478,
|
|
"step": 14670
|
|
},
|
|
{
|
|
"epoch": 1.7676098735701384,
|
|
"grad_norm": 4.547889709472656,
|
|
"learning_rate": 6.842205366584715e-07,
|
|
"loss": 0.3627,
|
|
"step": 14680
|
|
},
|
|
{
|
|
"epoch": 1.7688139674894643,
|
|
"grad_norm": 5.042651653289795,
|
|
"learning_rate": 6.837320102234781e-07,
|
|
"loss": 0.3595,
|
|
"step": 14690
|
|
},
|
|
{
|
|
"epoch": 1.77001806140879,
|
|
"grad_norm": 4.645577907562256,
|
|
"learning_rate": 6.832432809266928e-07,
|
|
"loss": 0.3417,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 1.7712221553281156,
|
|
"grad_norm": 5.52669095993042,
|
|
"learning_rate": 6.827543493077306e-07,
|
|
"loss": 0.352,
|
|
"step": 14710
|
|
},
|
|
{
|
|
"epoch": 1.7724262492474412,
|
|
"grad_norm": 4.48500394821167,
|
|
"learning_rate": 6.822652159064293e-07,
|
|
"loss": 0.3427,
|
|
"step": 14720
|
|
},
|
|
{
|
|
"epoch": 1.773630343166767,
|
|
"grad_norm": 4.676848411560059,
|
|
"learning_rate": 6.817758812628503e-07,
|
|
"loss": 0.3568,
|
|
"step": 14730
|
|
},
|
|
{
|
|
"epoch": 1.7748344370860927,
|
|
"grad_norm": 4.112384796142578,
|
|
"learning_rate": 6.812863459172764e-07,
|
|
"loss": 0.3626,
|
|
"step": 14740
|
|
},
|
|
{
|
|
"epoch": 1.7760385310054185,
|
|
"grad_norm": 4.3355326652526855,
|
|
"learning_rate": 6.807966104102122e-07,
|
|
"loss": 0.3408,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 1.7772426249247442,
|
|
"grad_norm": 4.12075138092041,
|
|
"learning_rate": 6.803066752823837e-07,
|
|
"loss": 0.3516,
|
|
"step": 14760
|
|
},
|
|
{
|
|
"epoch": 1.7784467188440698,
|
|
"grad_norm": 4.14115571975708,
|
|
"learning_rate": 6.79816541074737e-07,
|
|
"loss": 0.3442,
|
|
"step": 14770
|
|
},
|
|
{
|
|
"epoch": 1.7796508127633954,
|
|
"grad_norm": 4.440965175628662,
|
|
"learning_rate": 6.793262083284377e-07,
|
|
"loss": 0.348,
|
|
"step": 14780
|
|
},
|
|
{
|
|
"epoch": 1.7808549066827213,
|
|
"grad_norm": 4.727054595947266,
|
|
"learning_rate": 6.788356775848712e-07,
|
|
"loss": 0.3545,
|
|
"step": 14790
|
|
},
|
|
{
|
|
"epoch": 1.782059000602047,
|
|
"grad_norm": 4.421995639801025,
|
|
"learning_rate": 6.783449493856411e-07,
|
|
"loss": 0.3584,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 1.7832630945213728,
|
|
"grad_norm": 4.619497776031494,
|
|
"learning_rate": 6.778540242725695e-07,
|
|
"loss": 0.3621,
|
|
"step": 14810
|
|
},
|
|
{
|
|
"epoch": 1.7844671884406984,
|
|
"grad_norm": 4.975179672241211,
|
|
"learning_rate": 6.773629027876952e-07,
|
|
"loss": 0.3433,
|
|
"step": 14820
|
|
},
|
|
{
|
|
"epoch": 1.785671282360024,
|
|
"grad_norm": 4.3249030113220215,
|
|
"learning_rate": 6.768715854732743e-07,
|
|
"loss": 0.362,
|
|
"step": 14830
|
|
},
|
|
{
|
|
"epoch": 1.7868753762793497,
|
|
"grad_norm": 4.467803001403809,
|
|
"learning_rate": 6.763800728717792e-07,
|
|
"loss": 0.3589,
|
|
"step": 14840
|
|
},
|
|
{
|
|
"epoch": 1.7880794701986755,
|
|
"grad_norm": 5.496029376983643,
|
|
"learning_rate": 6.758883655258976e-07,
|
|
"loss": 0.3395,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 1.7892835641180012,
|
|
"grad_norm": 4.524773120880127,
|
|
"learning_rate": 6.753964639785321e-07,
|
|
"loss": 0.3544,
|
|
"step": 14860
|
|
},
|
|
{
|
|
"epoch": 1.790487658037327,
|
|
"grad_norm": 4.625549793243408,
|
|
"learning_rate": 6.749043687728005e-07,
|
|
"loss": 0.3721,
|
|
"step": 14870
|
|
},
|
|
{
|
|
"epoch": 1.7916917519566526,
|
|
"grad_norm": 5.1430230140686035,
|
|
"learning_rate": 6.744120804520335e-07,
|
|
"loss": 0.3516,
|
|
"step": 14880
|
|
},
|
|
{
|
|
"epoch": 1.7928958458759783,
|
|
"grad_norm": 5.0784173011779785,
|
|
"learning_rate": 6.739195995597757e-07,
|
|
"loss": 0.3579,
|
|
"step": 14890
|
|
},
|
|
{
|
|
"epoch": 1.794099939795304,
|
|
"grad_norm": 4.529468536376953,
|
|
"learning_rate": 6.734269266397836e-07,
|
|
"loss": 0.3573,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 1.7953040337146298,
|
|
"grad_norm": 4.950248718261719,
|
|
"learning_rate": 6.729340622360267e-07,
|
|
"loss": 0.3615,
|
|
"step": 14910
|
|
},
|
|
{
|
|
"epoch": 1.7965081276339554,
|
|
"grad_norm": 3.968449831008911,
|
|
"learning_rate": 6.724410068926852e-07,
|
|
"loss": 0.3361,
|
|
"step": 14920
|
|
},
|
|
{
|
|
"epoch": 1.7977122215532813,
|
|
"grad_norm": 4.806743144989014,
|
|
"learning_rate": 6.7194776115415e-07,
|
|
"loss": 0.3497,
|
|
"step": 14930
|
|
},
|
|
{
|
|
"epoch": 1.7989163154726069,
|
|
"grad_norm": 4.263092517852783,
|
|
"learning_rate": 6.714543255650229e-07,
|
|
"loss": 0.3659,
|
|
"step": 14940
|
|
},
|
|
{
|
|
"epoch": 1.8001204093919325,
|
|
"grad_norm": 4.752941131591797,
|
|
"learning_rate": 6.709607006701148e-07,
|
|
"loss": 0.3363,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 1.8013245033112582,
|
|
"grad_norm": 5.102241516113281,
|
|
"learning_rate": 6.704668870144458e-07,
|
|
"loss": 0.3487,
|
|
"step": 14960
|
|
},
|
|
{
|
|
"epoch": 1.802528597230584,
|
|
"grad_norm": 3.8051202297210693,
|
|
"learning_rate": 6.699728851432442e-07,
|
|
"loss": 0.3373,
|
|
"step": 14970
|
|
},
|
|
{
|
|
"epoch": 1.8037326911499096,
|
|
"grad_norm": 4.386908054351807,
|
|
"learning_rate": 6.694786956019467e-07,
|
|
"loss": 0.3646,
|
|
"step": 14980
|
|
},
|
|
{
|
|
"epoch": 1.8049367850692355,
|
|
"grad_norm": 4.566622257232666,
|
|
"learning_rate": 6.689843189361962e-07,
|
|
"loss": 0.3698,
|
|
"step": 14990
|
|
},
|
|
{
|
|
"epoch": 1.8061408789885611,
|
|
"grad_norm": 4.474935054779053,
|
|
"learning_rate": 6.684897556918434e-07,
|
|
"loss": 0.3567,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 1.8073449729078868,
|
|
"grad_norm": 4.712069034576416,
|
|
"learning_rate": 6.67995006414944e-07,
|
|
"loss": 0.3573,
|
|
"step": 15010
|
|
},
|
|
{
|
|
"epoch": 1.8085490668272124,
|
|
"grad_norm": 4.497696876525879,
|
|
"learning_rate": 6.675000716517595e-07,
|
|
"loss": 0.3373,
|
|
"step": 15020
|
|
},
|
|
{
|
|
"epoch": 1.8097531607465382,
|
|
"grad_norm": 4.327920436859131,
|
|
"learning_rate": 6.670049519487565e-07,
|
|
"loss": 0.3689,
|
|
"step": 15030
|
|
},
|
|
{
|
|
"epoch": 1.810957254665864,
|
|
"grad_norm": 6.609139919281006,
|
|
"learning_rate": 6.665096478526053e-07,
|
|
"loss": 0.3465,
|
|
"step": 15040
|
|
},
|
|
{
|
|
"epoch": 1.8121613485851897,
|
|
"grad_norm": 4.8396196365356445,
|
|
"learning_rate": 6.6601415991018e-07,
|
|
"loss": 0.3628,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 1.8133654425045154,
|
|
"grad_norm": 5.569112777709961,
|
|
"learning_rate": 6.655184886685577e-07,
|
|
"loss": 0.3484,
|
|
"step": 15060
|
|
},
|
|
{
|
|
"epoch": 1.814569536423841,
|
|
"grad_norm": 4.458260536193848,
|
|
"learning_rate": 6.650226346750178e-07,
|
|
"loss": 0.3523,
|
|
"step": 15070
|
|
},
|
|
{
|
|
"epoch": 1.8157736303431666,
|
|
"grad_norm": 4.671230316162109,
|
|
"learning_rate": 6.645265984770417e-07,
|
|
"loss": 0.3501,
|
|
"step": 15080
|
|
},
|
|
{
|
|
"epoch": 1.8169777242624925,
|
|
"grad_norm": 4.7510504722595215,
|
|
"learning_rate": 6.640303806223116e-07,
|
|
"loss": 0.3565,
|
|
"step": 15090
|
|
},
|
|
{
|
|
"epoch": 1.8181818181818183,
|
|
"grad_norm": 4.930042266845703,
|
|
"learning_rate": 6.635339816587108e-07,
|
|
"loss": 0.3519,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 1.819385912101144,
|
|
"grad_norm": 4.401383876800537,
|
|
"learning_rate": 6.63037402134322e-07,
|
|
"loss": 0.3444,
|
|
"step": 15110
|
|
},
|
|
{
|
|
"epoch": 1.8205900060204696,
|
|
"grad_norm": 4.55552864074707,
|
|
"learning_rate": 6.625406425974277e-07,
|
|
"loss": 0.3593,
|
|
"step": 15120
|
|
},
|
|
{
|
|
"epoch": 1.8217940999397952,
|
|
"grad_norm": 4.647222995758057,
|
|
"learning_rate": 6.620437035965088e-07,
|
|
"loss": 0.3513,
|
|
"step": 15130
|
|
},
|
|
{
|
|
"epoch": 1.8229981938591209,
|
|
"grad_norm": 4.750911235809326,
|
|
"learning_rate": 6.615465856802446e-07,
|
|
"loss": 0.3754,
|
|
"step": 15140
|
|
},
|
|
{
|
|
"epoch": 1.8242022877784467,
|
|
"grad_norm": 3.9289968013763428,
|
|
"learning_rate": 6.610492893975117e-07,
|
|
"loss": 0.3511,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 1.8254063816977726,
|
|
"grad_norm": 3.834213972091675,
|
|
"learning_rate": 6.605518152973842e-07,
|
|
"loss": 0.3446,
|
|
"step": 15160
|
|
},
|
|
{
|
|
"epoch": 1.8266104756170982,
|
|
"grad_norm": 5.1060075759887695,
|
|
"learning_rate": 6.600541639291316e-07,
|
|
"loss": 0.3548,
|
|
"step": 15170
|
|
},
|
|
{
|
|
"epoch": 1.8278145695364238,
|
|
"grad_norm": 4.696617603302002,
|
|
"learning_rate": 6.595563358422202e-07,
|
|
"loss": 0.3576,
|
|
"step": 15180
|
|
},
|
|
{
|
|
"epoch": 1.8290186634557495,
|
|
"grad_norm": 4.141697883605957,
|
|
"learning_rate": 6.590583315863105e-07,
|
|
"loss": 0.3513,
|
|
"step": 15190
|
|
},
|
|
{
|
|
"epoch": 1.8302227573750751,
|
|
"grad_norm": 5.357382774353027,
|
|
"learning_rate": 6.58560151711258e-07,
|
|
"loss": 0.3508,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 1.831426851294401,
|
|
"grad_norm": 4.808011054992676,
|
|
"learning_rate": 6.58061796767112e-07,
|
|
"loss": 0.3568,
|
|
"step": 15210
|
|
},
|
|
{
|
|
"epoch": 1.8326309452137268,
|
|
"grad_norm": 4.633763790130615,
|
|
"learning_rate": 6.575632673041151e-07,
|
|
"loss": 0.355,
|
|
"step": 15220
|
|
},
|
|
{
|
|
"epoch": 1.8338350391330525,
|
|
"grad_norm": 4.953246116638184,
|
|
"learning_rate": 6.570645638727026e-07,
|
|
"loss": 0.3604,
|
|
"step": 15230
|
|
},
|
|
{
|
|
"epoch": 1.835039133052378,
|
|
"grad_norm": 4.354135513305664,
|
|
"learning_rate": 6.565656870235019e-07,
|
|
"loss": 0.337,
|
|
"step": 15240
|
|
},
|
|
{
|
|
"epoch": 1.8362432269717037,
|
|
"grad_norm": 5.245918273925781,
|
|
"learning_rate": 6.560666373073316e-07,
|
|
"loss": 0.3711,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 1.8374473208910294,
|
|
"grad_norm": 5.532114028930664,
|
|
"learning_rate": 6.555674152752016e-07,
|
|
"loss": 0.3618,
|
|
"step": 15260
|
|
},
|
|
{
|
|
"epoch": 1.8386514148103552,
|
|
"grad_norm": 5.3348212242126465,
|
|
"learning_rate": 6.55068021478312e-07,
|
|
"loss": 0.3646,
|
|
"step": 15270
|
|
},
|
|
{
|
|
"epoch": 1.839855508729681,
|
|
"grad_norm": 4.423579216003418,
|
|
"learning_rate": 6.54568456468052e-07,
|
|
"loss": 0.3522,
|
|
"step": 15280
|
|
},
|
|
{
|
|
"epoch": 1.8410596026490067,
|
|
"grad_norm": 4.966454982757568,
|
|
"learning_rate": 6.540687207960005e-07,
|
|
"loss": 0.3592,
|
|
"step": 15290
|
|
},
|
|
{
|
|
"epoch": 1.8422636965683323,
|
|
"grad_norm": 4.406902313232422,
|
|
"learning_rate": 6.535688150139246e-07,
|
|
"loss": 0.3637,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 1.843467790487658,
|
|
"grad_norm": 4.565004348754883,
|
|
"learning_rate": 6.530687396737791e-07,
|
|
"loss": 0.343,
|
|
"step": 15310
|
|
},
|
|
{
|
|
"epoch": 1.8446718844069836,
|
|
"grad_norm": 4.898248672485352,
|
|
"learning_rate": 6.525684953277061e-07,
|
|
"loss": 0.3589,
|
|
"step": 15320
|
|
},
|
|
{
|
|
"epoch": 1.8458759783263095,
|
|
"grad_norm": 4.416904449462891,
|
|
"learning_rate": 6.520680825280344e-07,
|
|
"loss": 0.3297,
|
|
"step": 15330
|
|
},
|
|
{
|
|
"epoch": 1.8470800722456353,
|
|
"grad_norm": 4.844006538391113,
|
|
"learning_rate": 6.515675018272786e-07,
|
|
"loss": 0.3692,
|
|
"step": 15340
|
|
},
|
|
{
|
|
"epoch": 1.848284166164961,
|
|
"grad_norm": 4.351726531982422,
|
|
"learning_rate": 6.510667537781389e-07,
|
|
"loss": 0.3627,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 1.8494882600842866,
|
|
"grad_norm": 4.276306629180908,
|
|
"learning_rate": 6.505658389335e-07,
|
|
"loss": 0.3581,
|
|
"step": 15360
|
|
},
|
|
{
|
|
"epoch": 1.8506923540036122,
|
|
"grad_norm": 4.866278648376465,
|
|
"learning_rate": 6.500647578464311e-07,
|
|
"loss": 0.3756,
|
|
"step": 15370
|
|
},
|
|
{
|
|
"epoch": 1.8518964479229378,
|
|
"grad_norm": 4.005789279937744,
|
|
"learning_rate": 6.495635110701847e-07,
|
|
"loss": 0.3551,
|
|
"step": 15380
|
|
},
|
|
{
|
|
"epoch": 1.8531005418422637,
|
|
"grad_norm": 4.069939136505127,
|
|
"learning_rate": 6.490620991581963e-07,
|
|
"loss": 0.3426,
|
|
"step": 15390
|
|
},
|
|
{
|
|
"epoch": 1.8543046357615895,
|
|
"grad_norm": 5.377545356750488,
|
|
"learning_rate": 6.485605226640836e-07,
|
|
"loss": 0.363,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 1.8555087296809152,
|
|
"grad_norm": 4.171127796173096,
|
|
"learning_rate": 6.480587821416465e-07,
|
|
"loss": 0.3601,
|
|
"step": 15410
|
|
},
|
|
{
|
|
"epoch": 1.8567128236002408,
|
|
"grad_norm": 4.944298267364502,
|
|
"learning_rate": 6.475568781448654e-07,
|
|
"loss": 0.3445,
|
|
"step": 15420
|
|
},
|
|
{
|
|
"epoch": 1.8579169175195664,
|
|
"grad_norm": 4.719433784484863,
|
|
"learning_rate": 6.470548112279015e-07,
|
|
"loss": 0.349,
|
|
"step": 15430
|
|
},
|
|
{
|
|
"epoch": 1.859121011438892,
|
|
"grad_norm": 4.289638042449951,
|
|
"learning_rate": 6.465525819450959e-07,
|
|
"loss": 0.3675,
|
|
"step": 15440
|
|
},
|
|
{
|
|
"epoch": 1.860325105358218,
|
|
"grad_norm": 4.580896377563477,
|
|
"learning_rate": 6.46050190850969e-07,
|
|
"loss": 0.362,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 1.8615291992775438,
|
|
"grad_norm": 4.68642520904541,
|
|
"learning_rate": 6.455476385002195e-07,
|
|
"loss": 0.3544,
|
|
"step": 15460
|
|
},
|
|
{
|
|
"epoch": 1.8627332931968694,
|
|
"grad_norm": 4.221519470214844,
|
|
"learning_rate": 6.450449254477246e-07,
|
|
"loss": 0.3557,
|
|
"step": 15470
|
|
},
|
|
{
|
|
"epoch": 1.863937387116195,
|
|
"grad_norm": 5.103092670440674,
|
|
"learning_rate": 6.445420522485387e-07,
|
|
"loss": 0.3575,
|
|
"step": 15480
|
|
},
|
|
{
|
|
"epoch": 1.8651414810355207,
|
|
"grad_norm": 5.300514221191406,
|
|
"learning_rate": 6.440390194578933e-07,
|
|
"loss": 0.3655,
|
|
"step": 15490
|
|
},
|
|
{
|
|
"epoch": 1.8663455749548463,
|
|
"grad_norm": 5.2280049324035645,
|
|
"learning_rate": 6.435358276311955e-07,
|
|
"loss": 0.3615,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 1.8675496688741722,
|
|
"grad_norm": 4.393173694610596,
|
|
"learning_rate": 6.430324773240287e-07,
|
|
"loss": 0.3617,
|
|
"step": 15510
|
|
},
|
|
{
|
|
"epoch": 1.868753762793498,
|
|
"grad_norm": 3.9914498329162598,
|
|
"learning_rate": 6.425289690921508e-07,
|
|
"loss": 0.3482,
|
|
"step": 15520
|
|
},
|
|
{
|
|
"epoch": 1.8699578567128237,
|
|
"grad_norm": 4.967134475708008,
|
|
"learning_rate": 6.420253034914943e-07,
|
|
"loss": 0.3635,
|
|
"step": 15530
|
|
},
|
|
{
|
|
"epoch": 1.8711619506321493,
|
|
"grad_norm": 4.27791166305542,
|
|
"learning_rate": 6.415214810781653e-07,
|
|
"loss": 0.3508,
|
|
"step": 15540
|
|
},
|
|
{
|
|
"epoch": 1.872366044551475,
|
|
"grad_norm": 4.6500163078308105,
|
|
"learning_rate": 6.410175024084431e-07,
|
|
"loss": 0.3589,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 1.8735701384708006,
|
|
"grad_norm": 4.22102689743042,
|
|
"learning_rate": 6.405133680387797e-07,
|
|
"loss": 0.3558,
|
|
"step": 15560
|
|
},
|
|
{
|
|
"epoch": 1.8747742323901264,
|
|
"grad_norm": 4.9325947761535645,
|
|
"learning_rate": 6.400090785257987e-07,
|
|
"loss": 0.3696,
|
|
"step": 15570
|
|
},
|
|
{
|
|
"epoch": 1.8759783263094523,
|
|
"grad_norm": 3.8292155265808105,
|
|
"learning_rate": 6.395046344262951e-07,
|
|
"loss": 0.356,
|
|
"step": 15580
|
|
},
|
|
{
|
|
"epoch": 1.877182420228778,
|
|
"grad_norm": 4.739902973175049,
|
|
"learning_rate": 6.390000362972348e-07,
|
|
"loss": 0.3407,
|
|
"step": 15590
|
|
},
|
|
{
|
|
"epoch": 1.8783865141481035,
|
|
"grad_norm": 3.770754814147949,
|
|
"learning_rate": 6.384952846957535e-07,
|
|
"loss": 0.3502,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 1.8795906080674292,
|
|
"grad_norm": 4.367559432983398,
|
|
"learning_rate": 6.379903801791566e-07,
|
|
"loss": 0.3566,
|
|
"step": 15610
|
|
},
|
|
{
|
|
"epoch": 1.8807947019867548,
|
|
"grad_norm": 5.16295862197876,
|
|
"learning_rate": 6.374853233049182e-07,
|
|
"loss": 0.3668,
|
|
"step": 15620
|
|
},
|
|
{
|
|
"epoch": 1.8819987959060807,
|
|
"grad_norm": 4.346946716308594,
|
|
"learning_rate": 6.369801146306802e-07,
|
|
"loss": 0.3483,
|
|
"step": 15630
|
|
},
|
|
{
|
|
"epoch": 1.8832028898254065,
|
|
"grad_norm": 4.716429710388184,
|
|
"learning_rate": 6.36474754714253e-07,
|
|
"loss": 0.3452,
|
|
"step": 15640
|
|
},
|
|
{
|
|
"epoch": 1.8844069837447321,
|
|
"grad_norm": 4.5193891525268555,
|
|
"learning_rate": 6.359692441136131e-07,
|
|
"loss": 0.361,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 1.8856110776640578,
|
|
"grad_norm": 3.9874355792999268,
|
|
"learning_rate": 6.354635833869042e-07,
|
|
"loss": 0.358,
|
|
"step": 15660
|
|
},
|
|
{
|
|
"epoch": 1.8868151715833834,
|
|
"grad_norm": 4.598703861236572,
|
|
"learning_rate": 6.349577730924349e-07,
|
|
"loss": 0.35,
|
|
"step": 15670
|
|
},
|
|
{
|
|
"epoch": 1.8880192655027093,
|
|
"grad_norm": 5.374682426452637,
|
|
"learning_rate": 6.344518137886798e-07,
|
|
"loss": 0.3639,
|
|
"step": 15680
|
|
},
|
|
{
|
|
"epoch": 1.889223359422035,
|
|
"grad_norm": 6.002275466918945,
|
|
"learning_rate": 6.339457060342772e-07,
|
|
"loss": 0.3546,
|
|
"step": 15690
|
|
},
|
|
{
|
|
"epoch": 1.8904274533413608,
|
|
"grad_norm": 4.864243984222412,
|
|
"learning_rate": 6.3343945038803e-07,
|
|
"loss": 0.3543,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 1.8916315472606864,
|
|
"grad_norm": 3.9879305362701416,
|
|
"learning_rate": 6.329330474089039e-07,
|
|
"loss": 0.3549,
|
|
"step": 15710
|
|
},
|
|
{
|
|
"epoch": 1.892835641180012,
|
|
"grad_norm": 4.457694053649902,
|
|
"learning_rate": 6.324264976560277e-07,
|
|
"loss": 0.3584,
|
|
"step": 15720
|
|
},
|
|
{
|
|
"epoch": 1.8940397350993377,
|
|
"grad_norm": 3.741135835647583,
|
|
"learning_rate": 6.319198016886918e-07,
|
|
"loss": 0.3618,
|
|
"step": 15730
|
|
},
|
|
{
|
|
"epoch": 1.8952438290186635,
|
|
"grad_norm": 4.002588272094727,
|
|
"learning_rate": 6.314129600663484e-07,
|
|
"loss": 0.3492,
|
|
"step": 15740
|
|
},
|
|
{
|
|
"epoch": 1.8964479229379891,
|
|
"grad_norm": 4.551817893981934,
|
|
"learning_rate": 6.309059733486102e-07,
|
|
"loss": 0.3567,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 1.897652016857315,
|
|
"grad_norm": 4.268725872039795,
|
|
"learning_rate": 6.303988420952505e-07,
|
|
"loss": 0.3591,
|
|
"step": 15760
|
|
},
|
|
{
|
|
"epoch": 1.8988561107766406,
|
|
"grad_norm": 4.963777542114258,
|
|
"learning_rate": 6.298915668662017e-07,
|
|
"loss": 0.3551,
|
|
"step": 15770
|
|
},
|
|
{
|
|
"epoch": 1.9000602046959663,
|
|
"grad_norm": 4.293519973754883,
|
|
"learning_rate": 6.293841482215558e-07,
|
|
"loss": 0.3586,
|
|
"step": 15780
|
|
},
|
|
{
|
|
"epoch": 1.901264298615292,
|
|
"grad_norm": 4.556762218475342,
|
|
"learning_rate": 6.288765867215625e-07,
|
|
"loss": 0.3538,
|
|
"step": 15790
|
|
},
|
|
{
|
|
"epoch": 1.9024683925346177,
|
|
"grad_norm": 3.792178153991699,
|
|
"learning_rate": 6.283688829266297e-07,
|
|
"loss": 0.3331,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 1.9036724864539434,
|
|
"grad_norm": 5.197310447692871,
|
|
"learning_rate": 6.278610373973219e-07,
|
|
"loss": 0.3515,
|
|
"step": 15810
|
|
},
|
|
{
|
|
"epoch": 1.9048765803732692,
|
|
"grad_norm": 5.082350730895996,
|
|
"learning_rate": 6.273530506943609e-07,
|
|
"loss": 0.3389,
|
|
"step": 15820
|
|
},
|
|
{
|
|
"epoch": 1.9060806742925949,
|
|
"grad_norm": 4.892045021057129,
|
|
"learning_rate": 6.268449233786236e-07,
|
|
"loss": 0.3531,
|
|
"step": 15830
|
|
},
|
|
{
|
|
"epoch": 1.9072847682119205,
|
|
"grad_norm": 4.555123805999756,
|
|
"learning_rate": 6.263366560111423e-07,
|
|
"loss": 0.3414,
|
|
"step": 15840
|
|
},
|
|
{
|
|
"epoch": 1.9084888621312461,
|
|
"grad_norm": 4.728994846343994,
|
|
"learning_rate": 6.258282491531043e-07,
|
|
"loss": 0.3556,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 1.909692956050572,
|
|
"grad_norm": 4.745967388153076,
|
|
"learning_rate": 6.253197033658507e-07,
|
|
"loss": 0.343,
|
|
"step": 15860
|
|
},
|
|
{
|
|
"epoch": 1.9108970499698976,
|
|
"grad_norm": 4.600861072540283,
|
|
"learning_rate": 6.248110192108757e-07,
|
|
"loss": 0.3475,
|
|
"step": 15870
|
|
},
|
|
{
|
|
"epoch": 1.9121011438892235,
|
|
"grad_norm": 4.099234580993652,
|
|
"learning_rate": 6.243021972498269e-07,
|
|
"loss": 0.3624,
|
|
"step": 15880
|
|
},
|
|
{
|
|
"epoch": 1.913305237808549,
|
|
"grad_norm": 4.272284030914307,
|
|
"learning_rate": 6.237932380445034e-07,
|
|
"loss": 0.3565,
|
|
"step": 15890
|
|
},
|
|
{
|
|
"epoch": 1.9145093317278747,
|
|
"grad_norm": 3.7602131366729736,
|
|
"learning_rate": 6.232841421568565e-07,
|
|
"loss": 0.3499,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 1.9157134256472004,
|
|
"grad_norm": 4.971080303192139,
|
|
"learning_rate": 6.227749101489877e-07,
|
|
"loss": 0.3701,
|
|
"step": 15910
|
|
},
|
|
{
|
|
"epoch": 1.9169175195665262,
|
|
"grad_norm": 5.319652080535889,
|
|
"learning_rate": 6.222655425831495e-07,
|
|
"loss": 0.3451,
|
|
"step": 15920
|
|
},
|
|
{
|
|
"epoch": 1.9181216134858519,
|
|
"grad_norm": 4.283812522888184,
|
|
"learning_rate": 6.217560400217433e-07,
|
|
"loss": 0.3559,
|
|
"step": 15930
|
|
},
|
|
{
|
|
"epoch": 1.9193257074051777,
|
|
"grad_norm": 5.055164813995361,
|
|
"learning_rate": 6.212464030273204e-07,
|
|
"loss": 0.3562,
|
|
"step": 15940
|
|
},
|
|
{
|
|
"epoch": 1.9205298013245033,
|
|
"grad_norm": 4.813416004180908,
|
|
"learning_rate": 6.207366321625798e-07,
|
|
"loss": 0.3606,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 1.921733895243829,
|
|
"grad_norm": 4.402296543121338,
|
|
"learning_rate": 6.202267279903686e-07,
|
|
"loss": 0.353,
|
|
"step": 15960
|
|
},
|
|
{
|
|
"epoch": 1.9229379891631546,
|
|
"grad_norm": 4.458485126495361,
|
|
"learning_rate": 6.197166910736814e-07,
|
|
"loss": 0.3523,
|
|
"step": 15970
|
|
},
|
|
{
|
|
"epoch": 1.9241420830824805,
|
|
"grad_norm": 3.5323286056518555,
|
|
"learning_rate": 6.192065219756587e-07,
|
|
"loss": 0.357,
|
|
"step": 15980
|
|
},
|
|
{
|
|
"epoch": 1.925346177001806,
|
|
"grad_norm": 4.047741413116455,
|
|
"learning_rate": 6.186962212595876e-07,
|
|
"loss": 0.3513,
|
|
"step": 15990
|
|
},
|
|
{
|
|
"epoch": 1.926550270921132,
|
|
"grad_norm": 4.608432769775391,
|
|
"learning_rate": 6.181857894889e-07,
|
|
"loss": 0.3556,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 1.9277543648404576,
|
|
"grad_norm": 4.246164321899414,
|
|
"learning_rate": 6.17675227227173e-07,
|
|
"loss": 0.3274,
|
|
"step": 16010
|
|
},
|
|
{
|
|
"epoch": 1.9289584587597832,
|
|
"grad_norm": 4.55797004699707,
|
|
"learning_rate": 6.171645350381272e-07,
|
|
"loss": 0.3537,
|
|
"step": 16020
|
|
},
|
|
{
|
|
"epoch": 1.9301625526791089,
|
|
"grad_norm": 4.349902629852295,
|
|
"learning_rate": 6.166537134856272e-07,
|
|
"loss": 0.3454,
|
|
"step": 16030
|
|
},
|
|
{
|
|
"epoch": 1.9313666465984347,
|
|
"grad_norm": 4.9922614097595215,
|
|
"learning_rate": 6.161427631336799e-07,
|
|
"loss": 0.3377,
|
|
"step": 16040
|
|
},
|
|
{
|
|
"epoch": 1.9325707405177603,
|
|
"grad_norm": 4.467525005340576,
|
|
"learning_rate": 6.156316845464351e-07,
|
|
"loss": 0.345,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 1.9337748344370862,
|
|
"grad_norm": 4.589630603790283,
|
|
"learning_rate": 6.151204782881835e-07,
|
|
"loss": 0.3393,
|
|
"step": 16060
|
|
},
|
|
{
|
|
"epoch": 1.9349789283564118,
|
|
"grad_norm": 4.475553035736084,
|
|
"learning_rate": 6.146091449233571e-07,
|
|
"loss": 0.3544,
|
|
"step": 16070
|
|
},
|
|
{
|
|
"epoch": 1.9361830222757375,
|
|
"grad_norm": 4.827112197875977,
|
|
"learning_rate": 6.140976850165283e-07,
|
|
"loss": 0.3447,
|
|
"step": 16080
|
|
},
|
|
{
|
|
"epoch": 1.937387116195063,
|
|
"grad_norm": 3.81062388420105,
|
|
"learning_rate": 6.135860991324092e-07,
|
|
"loss": 0.3493,
|
|
"step": 16090
|
|
},
|
|
{
|
|
"epoch": 1.938591210114389,
|
|
"grad_norm": 4.450663089752197,
|
|
"learning_rate": 6.130743878358505e-07,
|
|
"loss": 0.3601,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 1.9397953040337146,
|
|
"grad_norm": 3.878636598587036,
|
|
"learning_rate": 6.125625516918421e-07,
|
|
"loss": 0.3638,
|
|
"step": 16110
|
|
},
|
|
{
|
|
"epoch": 1.9409993979530404,
|
|
"grad_norm": 4.681748390197754,
|
|
"learning_rate": 6.120505912655114e-07,
|
|
"loss": 0.3542,
|
|
"step": 16120
|
|
},
|
|
{
|
|
"epoch": 1.942203491872366,
|
|
"grad_norm": 5.228558540344238,
|
|
"learning_rate": 6.115385071221231e-07,
|
|
"loss": 0.3538,
|
|
"step": 16130
|
|
},
|
|
{
|
|
"epoch": 1.9434075857916917,
|
|
"grad_norm": 5.1694488525390625,
|
|
"learning_rate": 6.110262998270781e-07,
|
|
"loss": 0.3689,
|
|
"step": 16140
|
|
},
|
|
{
|
|
"epoch": 1.9446116797110173,
|
|
"grad_norm": 4.253943920135498,
|
|
"learning_rate": 6.10513969945914e-07,
|
|
"loss": 0.3518,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 1.9458157736303432,
|
|
"grad_norm": 4.636354446411133,
|
|
"learning_rate": 6.100015180443031e-07,
|
|
"loss": 0.3643,
|
|
"step": 16160
|
|
},
|
|
{
|
|
"epoch": 1.9470198675496688,
|
|
"grad_norm": 3.8941125869750977,
|
|
"learning_rate": 6.094889446880529e-07,
|
|
"loss": 0.3444,
|
|
"step": 16170
|
|
},
|
|
{
|
|
"epoch": 1.9482239614689947,
|
|
"grad_norm": 4.6928391456604,
|
|
"learning_rate": 6.089762504431046e-07,
|
|
"loss": 0.3541,
|
|
"step": 16180
|
|
},
|
|
{
|
|
"epoch": 1.9494280553883203,
|
|
"grad_norm": 4.19013786315918,
|
|
"learning_rate": 6.084634358755334e-07,
|
|
"loss": 0.357,
|
|
"step": 16190
|
|
},
|
|
{
|
|
"epoch": 1.950632149307646,
|
|
"grad_norm": 4.565307140350342,
|
|
"learning_rate": 6.079505015515465e-07,
|
|
"loss": 0.3419,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 1.9518362432269716,
|
|
"grad_norm": 5.345344543457031,
|
|
"learning_rate": 6.074374480374843e-07,
|
|
"loss": 0.3569,
|
|
"step": 16210
|
|
},
|
|
{
|
|
"epoch": 1.9530403371462974,
|
|
"grad_norm": 4.672290802001953,
|
|
"learning_rate": 6.069242758998181e-07,
|
|
"loss": 0.3564,
|
|
"step": 16220
|
|
},
|
|
{
|
|
"epoch": 1.954244431065623,
|
|
"grad_norm": 4.522906303405762,
|
|
"learning_rate": 6.064109857051505e-07,
|
|
"loss": 0.35,
|
|
"step": 16230
|
|
},
|
|
{
|
|
"epoch": 1.955448524984949,
|
|
"grad_norm": 4.692704200744629,
|
|
"learning_rate": 6.058975780202143e-07,
|
|
"loss": 0.334,
|
|
"step": 16240
|
|
},
|
|
{
|
|
"epoch": 1.9566526189042746,
|
|
"grad_norm": 4.350996971130371,
|
|
"learning_rate": 6.053840534118722e-07,
|
|
"loss": 0.3512,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 1.9578567128236002,
|
|
"grad_norm": 4.869346618652344,
|
|
"learning_rate": 6.04870412447116e-07,
|
|
"loss": 0.3415,
|
|
"step": 16260
|
|
},
|
|
{
|
|
"epoch": 1.9590608067429258,
|
|
"grad_norm": 4.5982818603515625,
|
|
"learning_rate": 6.043566556930655e-07,
|
|
"loss": 0.3697,
|
|
"step": 16270
|
|
},
|
|
{
|
|
"epoch": 1.9602649006622517,
|
|
"grad_norm": 4.133756637573242,
|
|
"learning_rate": 6.038427837169688e-07,
|
|
"loss": 0.3498,
|
|
"step": 16280
|
|
},
|
|
{
|
|
"epoch": 1.9614689945815773,
|
|
"grad_norm": 4.6877546310424805,
|
|
"learning_rate": 6.033287970862013e-07,
|
|
"loss": 0.3622,
|
|
"step": 16290
|
|
},
|
|
{
|
|
"epoch": 1.9626730885009032,
|
|
"grad_norm": 5.100693702697754,
|
|
"learning_rate": 6.028146963682648e-07,
|
|
"loss": 0.3571,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 1.9638771824202288,
|
|
"grad_norm": 5.0933685302734375,
|
|
"learning_rate": 6.023004821307867e-07,
|
|
"loss": 0.3247,
|
|
"step": 16310
|
|
},
|
|
{
|
|
"epoch": 1.9650812763395544,
|
|
"grad_norm": 3.7194926738739014,
|
|
"learning_rate": 6.017861549415207e-07,
|
|
"loss": 0.3519,
|
|
"step": 16320
|
|
},
|
|
{
|
|
"epoch": 1.96628537025888,
|
|
"grad_norm": 4.424744606018066,
|
|
"learning_rate": 6.012717153683442e-07,
|
|
"loss": 0.3401,
|
|
"step": 16330
|
|
},
|
|
{
|
|
"epoch": 1.967489464178206,
|
|
"grad_norm": 3.9198262691497803,
|
|
"learning_rate": 6.007571639792593e-07,
|
|
"loss": 0.3434,
|
|
"step": 16340
|
|
},
|
|
{
|
|
"epoch": 1.9686935580975318,
|
|
"grad_norm": 3.9350152015686035,
|
|
"learning_rate": 6.002425013423913e-07,
|
|
"loss": 0.3447,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 1.9698976520168574,
|
|
"grad_norm": 4.852246284484863,
|
|
"learning_rate": 5.997277280259885e-07,
|
|
"loss": 0.3457,
|
|
"step": 16360
|
|
},
|
|
{
|
|
"epoch": 1.971101745936183,
|
|
"grad_norm": 4.658691883087158,
|
|
"learning_rate": 5.992128445984212e-07,
|
|
"loss": 0.3692,
|
|
"step": 16370
|
|
},
|
|
{
|
|
"epoch": 1.9723058398555087,
|
|
"grad_norm": 4.637414932250977,
|
|
"learning_rate": 5.986978516281815e-07,
|
|
"loss": 0.3555,
|
|
"step": 16380
|
|
},
|
|
{
|
|
"epoch": 1.9735099337748343,
|
|
"grad_norm": 4.982326984405518,
|
|
"learning_rate": 5.981827496838822e-07,
|
|
"loss": 0.3526,
|
|
"step": 16390
|
|
},
|
|
{
|
|
"epoch": 1.9747140276941602,
|
|
"grad_norm": 4.729382514953613,
|
|
"learning_rate": 5.976675393342566e-07,
|
|
"loss": 0.3558,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 1.975918121613486,
|
|
"grad_norm": 4.774322509765625,
|
|
"learning_rate": 5.971522211481575e-07,
|
|
"loss": 0.358,
|
|
"step": 16410
|
|
},
|
|
{
|
|
"epoch": 1.9771222155328116,
|
|
"grad_norm": 4.948471546173096,
|
|
"learning_rate": 5.966367956945572e-07,
|
|
"loss": 0.359,
|
|
"step": 16420
|
|
},
|
|
{
|
|
"epoch": 1.9783263094521373,
|
|
"grad_norm": 4.0199198722839355,
|
|
"learning_rate": 5.961212635425459e-07,
|
|
"loss": 0.3423,
|
|
"step": 16430
|
|
},
|
|
{
|
|
"epoch": 1.979530403371463,
|
|
"grad_norm": 4.141156196594238,
|
|
"learning_rate": 5.956056252613319e-07,
|
|
"loss": 0.3475,
|
|
"step": 16440
|
|
},
|
|
{
|
|
"epoch": 1.9807344972907885,
|
|
"grad_norm": 4.316824913024902,
|
|
"learning_rate": 5.950898814202407e-07,
|
|
"loss": 0.3436,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 1.9819385912101144,
|
|
"grad_norm": 5.594763278961182,
|
|
"learning_rate": 5.945740325887144e-07,
|
|
"loss": 0.3435,
|
|
"step": 16460
|
|
},
|
|
{
|
|
"epoch": 1.9831426851294403,
|
|
"grad_norm": 4.995075702667236,
|
|
"learning_rate": 5.940580793363105e-07,
|
|
"loss": 0.3539,
|
|
"step": 16470
|
|
},
|
|
{
|
|
"epoch": 1.9843467790487659,
|
|
"grad_norm": 4.139880180358887,
|
|
"learning_rate": 5.935420222327028e-07,
|
|
"loss": 0.3544,
|
|
"step": 16480
|
|
},
|
|
{
|
|
"epoch": 1.9855508729680915,
|
|
"grad_norm": 3.917797088623047,
|
|
"learning_rate": 5.930258618476785e-07,
|
|
"loss": 0.3331,
|
|
"step": 16490
|
|
},
|
|
{
|
|
"epoch": 1.9867549668874172,
|
|
"grad_norm": 5.234194755554199,
|
|
"learning_rate": 5.9250959875114e-07,
|
|
"loss": 0.3477,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 1.9879590608067428,
|
|
"grad_norm": 4.324552059173584,
|
|
"learning_rate": 5.919932335131022e-07,
|
|
"loss": 0.341,
|
|
"step": 16510
|
|
},
|
|
{
|
|
"epoch": 1.9891631547260686,
|
|
"grad_norm": 5.321447849273682,
|
|
"learning_rate": 5.914767667036936e-07,
|
|
"loss": 0.3606,
|
|
"step": 16520
|
|
},
|
|
{
|
|
"epoch": 1.9903672486453945,
|
|
"grad_norm": 4.159404277801514,
|
|
"learning_rate": 5.90960198893154e-07,
|
|
"loss": 0.3484,
|
|
"step": 16530
|
|
},
|
|
{
|
|
"epoch": 1.9915713425647201,
|
|
"grad_norm": 4.632839202880859,
|
|
"learning_rate": 5.904435306518354e-07,
|
|
"loss": 0.35,
|
|
"step": 16540
|
|
},
|
|
{
|
|
"epoch": 1.9927754364840458,
|
|
"grad_norm": 4.1767168045043945,
|
|
"learning_rate": 5.899267625502004e-07,
|
|
"loss": 0.356,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 1.9939795304033714,
|
|
"grad_norm": 4.770878314971924,
|
|
"learning_rate": 5.894098951588218e-07,
|
|
"loss": 0.3338,
|
|
"step": 16560
|
|
},
|
|
{
|
|
"epoch": 1.995183624322697,
|
|
"grad_norm": 4.481430530548096,
|
|
"learning_rate": 5.888929290483821e-07,
|
|
"loss": 0.3569,
|
|
"step": 16570
|
|
},
|
|
{
|
|
"epoch": 1.9963877182420229,
|
|
"grad_norm": 4.496611595153809,
|
|
"learning_rate": 5.883758647896729e-07,
|
|
"loss": 0.3602,
|
|
"step": 16580
|
|
},
|
|
{
|
|
"epoch": 1.9975918121613487,
|
|
"grad_norm": 3.9505410194396973,
|
|
"learning_rate": 5.878587029535942e-07,
|
|
"loss": 0.3403,
|
|
"step": 16590
|
|
},
|
|
{
|
|
"epoch": 1.9987959060806744,
|
|
"grad_norm": 4.308087348937988,
|
|
"learning_rate": 5.873414441111532e-07,
|
|
"loss": 0.3556,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 4.440168857574463,
|
|
"learning_rate": 5.868240888334652e-07,
|
|
"loss": 0.3312,
|
|
"step": 16610
|
|
},
|
|
{
|
|
"epoch": 2.0012040939193256,
|
|
"grad_norm": 4.038889408111572,
|
|
"learning_rate": 5.863066376917508e-07,
|
|
"loss": 0.3224,
|
|
"step": 16620
|
|
},
|
|
{
|
|
"epoch": 2.0024081878386513,
|
|
"grad_norm": 4.833006381988525,
|
|
"learning_rate": 5.857890912573376e-07,
|
|
"loss": 0.3001,
|
|
"step": 16630
|
|
},
|
|
{
|
|
"epoch": 2.0036122817579773,
|
|
"grad_norm": 4.160131931304932,
|
|
"learning_rate": 5.852714501016572e-07,
|
|
"loss": 0.2985,
|
|
"step": 16640
|
|
},
|
|
{
|
|
"epoch": 2.004816375677303,
|
|
"grad_norm": 5.080901622772217,
|
|
"learning_rate": 5.84753714796247e-07,
|
|
"loss": 0.3228,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 2.0060204695966286,
|
|
"grad_norm": 4.37393856048584,
|
|
"learning_rate": 5.842358859127478e-07,
|
|
"loss": 0.3036,
|
|
"step": 16660
|
|
},
|
|
{
|
|
"epoch": 2.0072245635159542,
|
|
"grad_norm": 4.473939895629883,
|
|
"learning_rate": 5.837179640229032e-07,
|
|
"loss": 0.3135,
|
|
"step": 16670
|
|
},
|
|
{
|
|
"epoch": 2.00842865743528,
|
|
"grad_norm": 5.297366619110107,
|
|
"learning_rate": 5.831999496985605e-07,
|
|
"loss": 0.3059,
|
|
"step": 16680
|
|
},
|
|
{
|
|
"epoch": 2.0096327513546055,
|
|
"grad_norm": 5.174331188201904,
|
|
"learning_rate": 5.826818435116683e-07,
|
|
"loss": 0.3123,
|
|
"step": 16690
|
|
},
|
|
{
|
|
"epoch": 2.0108368452739316,
|
|
"grad_norm": 4.679065704345703,
|
|
"learning_rate": 5.821636460342769e-07,
|
|
"loss": 0.3232,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 2.012040939193257,
|
|
"grad_norm": 4.446617126464844,
|
|
"learning_rate": 5.816453578385375e-07,
|
|
"loss": 0.3063,
|
|
"step": 16710
|
|
},
|
|
{
|
|
"epoch": 2.013245033112583,
|
|
"grad_norm": 5.05123233795166,
|
|
"learning_rate": 5.811269794967014e-07,
|
|
"loss": 0.3095,
|
|
"step": 16720
|
|
},
|
|
{
|
|
"epoch": 2.0144491270319085,
|
|
"grad_norm": 4.649383544921875,
|
|
"learning_rate": 5.806085115811191e-07,
|
|
"loss": 0.309,
|
|
"step": 16730
|
|
},
|
|
{
|
|
"epoch": 2.015653220951234,
|
|
"grad_norm": 4.328246116638184,
|
|
"learning_rate": 5.800899546642406e-07,
|
|
"loss": 0.2981,
|
|
"step": 16740
|
|
},
|
|
{
|
|
"epoch": 2.0168573148705597,
|
|
"grad_norm": 4.504574775695801,
|
|
"learning_rate": 5.795713093186136e-07,
|
|
"loss": 0.3162,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 2.018061408789886,
|
|
"grad_norm": 4.636085033416748,
|
|
"learning_rate": 5.790525761168839e-07,
|
|
"loss": 0.318,
|
|
"step": 16760
|
|
},
|
|
{
|
|
"epoch": 2.0192655027092115,
|
|
"grad_norm": 5.4193291664123535,
|
|
"learning_rate": 5.785337556317938e-07,
|
|
"loss": 0.3216,
|
|
"step": 16770
|
|
},
|
|
{
|
|
"epoch": 2.020469596628537,
|
|
"grad_norm": 4.318239212036133,
|
|
"learning_rate": 5.780148484361826e-07,
|
|
"loss": 0.3018,
|
|
"step": 16780
|
|
},
|
|
{
|
|
"epoch": 2.0216736905478627,
|
|
"grad_norm": 4.4032087326049805,
|
|
"learning_rate": 5.774958551029847e-07,
|
|
"loss": 0.3078,
|
|
"step": 16790
|
|
},
|
|
{
|
|
"epoch": 2.0228777844671884,
|
|
"grad_norm": 4.946054458618164,
|
|
"learning_rate": 5.769767762052301e-07,
|
|
"loss": 0.3155,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 2.024081878386514,
|
|
"grad_norm": 4.1051344871521,
|
|
"learning_rate": 5.764576123160429e-07,
|
|
"loss": 0.3183,
|
|
"step": 16810
|
|
},
|
|
{
|
|
"epoch": 2.02528597230584,
|
|
"grad_norm": 4.6641459465026855,
|
|
"learning_rate": 5.759383640086415e-07,
|
|
"loss": 0.3063,
|
|
"step": 16820
|
|
},
|
|
{
|
|
"epoch": 2.0264900662251657,
|
|
"grad_norm": 4.728779315948486,
|
|
"learning_rate": 5.75419031856337e-07,
|
|
"loss": 0.3153,
|
|
"step": 16830
|
|
},
|
|
{
|
|
"epoch": 2.0276941601444913,
|
|
"grad_norm": 5.103392124176025,
|
|
"learning_rate": 5.748996164325331e-07,
|
|
"loss": 0.304,
|
|
"step": 16840
|
|
},
|
|
{
|
|
"epoch": 2.028898254063817,
|
|
"grad_norm": 5.283243656158447,
|
|
"learning_rate": 5.743801183107261e-07,
|
|
"loss": 0.3188,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 2.0301023479831426,
|
|
"grad_norm": 4.704992294311523,
|
|
"learning_rate": 5.73860538064503e-07,
|
|
"loss": 0.306,
|
|
"step": 16860
|
|
},
|
|
{
|
|
"epoch": 2.0313064419024682,
|
|
"grad_norm": 5.523532390594482,
|
|
"learning_rate": 5.733408762675414e-07,
|
|
"loss": 0.3164,
|
|
"step": 16870
|
|
},
|
|
{
|
|
"epoch": 2.0325105358217943,
|
|
"grad_norm": 4.29448127746582,
|
|
"learning_rate": 5.728211334936093e-07,
|
|
"loss": 0.3011,
|
|
"step": 16880
|
|
},
|
|
{
|
|
"epoch": 2.03371462974112,
|
|
"grad_norm": 4.910971164703369,
|
|
"learning_rate": 5.723013103165642e-07,
|
|
"loss": 0.3093,
|
|
"step": 16890
|
|
},
|
|
{
|
|
"epoch": 2.0349187236604456,
|
|
"grad_norm": 4.527739524841309,
|
|
"learning_rate": 5.717814073103519e-07,
|
|
"loss": 0.2994,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 2.036122817579771,
|
|
"grad_norm": 4.409666061401367,
|
|
"learning_rate": 5.712614250490064e-07,
|
|
"loss": 0.3165,
|
|
"step": 16910
|
|
},
|
|
{
|
|
"epoch": 2.037326911499097,
|
|
"grad_norm": 4.129342079162598,
|
|
"learning_rate": 5.707413641066497e-07,
|
|
"loss": 0.3159,
|
|
"step": 16920
|
|
},
|
|
{
|
|
"epoch": 2.0385310054184225,
|
|
"grad_norm": 4.361571788787842,
|
|
"learning_rate": 5.702212250574904e-07,
|
|
"loss": 0.3008,
|
|
"step": 16930
|
|
},
|
|
{
|
|
"epoch": 2.0397350993377485,
|
|
"grad_norm": 4.482879638671875,
|
|
"learning_rate": 5.697010084758232e-07,
|
|
"loss": 0.3169,
|
|
"step": 16940
|
|
},
|
|
{
|
|
"epoch": 2.040939193257074,
|
|
"grad_norm": 4.7954535484313965,
|
|
"learning_rate": 5.691807149360285e-07,
|
|
"loss": 0.3057,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 2.0421432871764,
|
|
"grad_norm": 4.840571403503418,
|
|
"learning_rate": 5.686603450125717e-07,
|
|
"loss": 0.2973,
|
|
"step": 16960
|
|
},
|
|
{
|
|
"epoch": 2.0433473810957254,
|
|
"grad_norm": 4.597223281860352,
|
|
"learning_rate": 5.681398992800024e-07,
|
|
"loss": 0.3144,
|
|
"step": 16970
|
|
},
|
|
{
|
|
"epoch": 2.044551475015051,
|
|
"grad_norm": 4.794790744781494,
|
|
"learning_rate": 5.676193783129542e-07,
|
|
"loss": 0.3087,
|
|
"step": 16980
|
|
},
|
|
{
|
|
"epoch": 2.0457555689343767,
|
|
"grad_norm": 4.340571403503418,
|
|
"learning_rate": 5.670987826861435e-07,
|
|
"loss": 0.3083,
|
|
"step": 16990
|
|
},
|
|
{
|
|
"epoch": 2.046959662853703,
|
|
"grad_norm": 4.629497051239014,
|
|
"learning_rate": 5.665781129743693e-07,
|
|
"loss": 0.3088,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 2.0481637567730284,
|
|
"grad_norm": 4.827451229095459,
|
|
"learning_rate": 5.660573697525121e-07,
|
|
"loss": 0.3039,
|
|
"step": 17010
|
|
},
|
|
{
|
|
"epoch": 2.049367850692354,
|
|
"grad_norm": 4.8336381912231445,
|
|
"learning_rate": 5.655365535955342e-07,
|
|
"loss": 0.306,
|
|
"step": 17020
|
|
},
|
|
{
|
|
"epoch": 2.0505719446116797,
|
|
"grad_norm": 5.4790940284729,
|
|
"learning_rate": 5.650156650784777e-07,
|
|
"loss": 0.3129,
|
|
"step": 17030
|
|
},
|
|
{
|
|
"epoch": 2.0517760385310053,
|
|
"grad_norm": 3.705552577972412,
|
|
"learning_rate": 5.64494704776465e-07,
|
|
"loss": 0.3062,
|
|
"step": 17040
|
|
},
|
|
{
|
|
"epoch": 2.052980132450331,
|
|
"grad_norm": 4.869053840637207,
|
|
"learning_rate": 5.639736732646976e-07,
|
|
"loss": 0.3169,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 2.054184226369657,
|
|
"grad_norm": 4.759436130523682,
|
|
"learning_rate": 5.634525711184556e-07,
|
|
"loss": 0.3129,
|
|
"step": 17060
|
|
},
|
|
{
|
|
"epoch": 2.0553883202889827,
|
|
"grad_norm": 4.388055324554443,
|
|
"learning_rate": 5.629313989130975e-07,
|
|
"loss": 0.3026,
|
|
"step": 17070
|
|
},
|
|
{
|
|
"epoch": 2.0565924142083083,
|
|
"grad_norm": 5.617096900939941,
|
|
"learning_rate": 5.624101572240587e-07,
|
|
"loss": 0.3064,
|
|
"step": 17080
|
|
},
|
|
{
|
|
"epoch": 2.057796508127634,
|
|
"grad_norm": 4.787253379821777,
|
|
"learning_rate": 5.618888466268513e-07,
|
|
"loss": 0.3174,
|
|
"step": 17090
|
|
},
|
|
{
|
|
"epoch": 2.0590006020469596,
|
|
"grad_norm": 4.347087383270264,
|
|
"learning_rate": 5.613674676970638e-07,
|
|
"loss": 0.3028,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 2.060204695966285,
|
|
"grad_norm": 4.601030349731445,
|
|
"learning_rate": 5.608460210103598e-07,
|
|
"loss": 0.3136,
|
|
"step": 17110
|
|
},
|
|
{
|
|
"epoch": 2.0614087898856113,
|
|
"grad_norm": 4.6767048835754395,
|
|
"learning_rate": 5.603245071424783e-07,
|
|
"loss": 0.3126,
|
|
"step": 17120
|
|
},
|
|
{
|
|
"epoch": 2.062612883804937,
|
|
"grad_norm": 5.636801719665527,
|
|
"learning_rate": 5.598029266692315e-07,
|
|
"loss": 0.3107,
|
|
"step": 17130
|
|
},
|
|
{
|
|
"epoch": 2.0638169777242625,
|
|
"grad_norm": 5.514817714691162,
|
|
"learning_rate": 5.592812801665061e-07,
|
|
"loss": 0.3191,
|
|
"step": 17140
|
|
},
|
|
{
|
|
"epoch": 2.065021071643588,
|
|
"grad_norm": 4.12761116027832,
|
|
"learning_rate": 5.587595682102611e-07,
|
|
"loss": 0.3119,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 2.066225165562914,
|
|
"grad_norm": 4.940089702606201,
|
|
"learning_rate": 5.582377913765283e-07,
|
|
"loss": 0.3072,
|
|
"step": 17160
|
|
},
|
|
{
|
|
"epoch": 2.0674292594822394,
|
|
"grad_norm": 4.235925674438477,
|
|
"learning_rate": 5.577159502414103e-07,
|
|
"loss": 0.3168,
|
|
"step": 17170
|
|
},
|
|
{
|
|
"epoch": 2.0686333534015655,
|
|
"grad_norm": 5.036463260650635,
|
|
"learning_rate": 5.57194045381082e-07,
|
|
"loss": 0.3236,
|
|
"step": 17180
|
|
},
|
|
{
|
|
"epoch": 2.069837447320891,
|
|
"grad_norm": 3.9009006023406982,
|
|
"learning_rate": 5.56672077371787e-07,
|
|
"loss": 0.3111,
|
|
"step": 17190
|
|
},
|
|
{
|
|
"epoch": 2.0710415412402168,
|
|
"grad_norm": 4.592634677886963,
|
|
"learning_rate": 5.5615004678984e-07,
|
|
"loss": 0.3001,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 2.0722456351595424,
|
|
"grad_norm": 4.5537004470825195,
|
|
"learning_rate": 5.556279542116242e-07,
|
|
"loss": 0.305,
|
|
"step": 17210
|
|
},
|
|
{
|
|
"epoch": 2.073449729078868,
|
|
"grad_norm": 4.557441711425781,
|
|
"learning_rate": 5.551058002135913e-07,
|
|
"loss": 0.2978,
|
|
"step": 17220
|
|
},
|
|
{
|
|
"epoch": 2.0746538229981937,
|
|
"grad_norm": 3.7024407386779785,
|
|
"learning_rate": 5.545835853722608e-07,
|
|
"loss": 0.3134,
|
|
"step": 17230
|
|
},
|
|
{
|
|
"epoch": 2.0758579169175198,
|
|
"grad_norm": 5.503789901733398,
|
|
"learning_rate": 5.540613102642195e-07,
|
|
"loss": 0.3217,
|
|
"step": 17240
|
|
},
|
|
{
|
|
"epoch": 2.0770620108368454,
|
|
"grad_norm": 4.864404678344727,
|
|
"learning_rate": 5.535389754661208e-07,
|
|
"loss": 0.2983,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 2.078266104756171,
|
|
"grad_norm": 5.232902526855469,
|
|
"learning_rate": 5.530165815546835e-07,
|
|
"loss": 0.3154,
|
|
"step": 17260
|
|
},
|
|
{
|
|
"epoch": 2.0794701986754967,
|
|
"grad_norm": 4.34998083114624,
|
|
"learning_rate": 5.524941291066923e-07,
|
|
"loss": 0.3078,
|
|
"step": 17270
|
|
},
|
|
{
|
|
"epoch": 2.0806742925948223,
|
|
"grad_norm": 4.243396282196045,
|
|
"learning_rate": 5.519716186989962e-07,
|
|
"loss": 0.2971,
|
|
"step": 17280
|
|
},
|
|
{
|
|
"epoch": 2.081878386514148,
|
|
"grad_norm": 4.376738548278809,
|
|
"learning_rate": 5.514490509085083e-07,
|
|
"loss": 0.3081,
|
|
"step": 17290
|
|
},
|
|
{
|
|
"epoch": 2.083082480433474,
|
|
"grad_norm": 4.597198486328125,
|
|
"learning_rate": 5.50926426312205e-07,
|
|
"loss": 0.3279,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 2.0842865743527996,
|
|
"grad_norm": 4.825913906097412,
|
|
"learning_rate": 5.504037454871258e-07,
|
|
"loss": 0.3164,
|
|
"step": 17310
|
|
},
|
|
{
|
|
"epoch": 2.0854906682721253,
|
|
"grad_norm": 4.312431812286377,
|
|
"learning_rate": 5.498810090103711e-07,
|
|
"loss": 0.29,
|
|
"step": 17320
|
|
},
|
|
{
|
|
"epoch": 2.086694762191451,
|
|
"grad_norm": 4.7181854248046875,
|
|
"learning_rate": 5.493582174591045e-07,
|
|
"loss": 0.2962,
|
|
"step": 17330
|
|
},
|
|
{
|
|
"epoch": 2.0878988561107765,
|
|
"grad_norm": 5.4123759269714355,
|
|
"learning_rate": 5.488353714105488e-07,
|
|
"loss": 0.3044,
|
|
"step": 17340
|
|
},
|
|
{
|
|
"epoch": 2.089102950030102,
|
|
"grad_norm": 4.742303371429443,
|
|
"learning_rate": 5.48312471441988e-07,
|
|
"loss": 0.287,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 2.0903070439494282,
|
|
"grad_norm": 3.8717334270477295,
|
|
"learning_rate": 5.477895181307651e-07,
|
|
"loss": 0.3205,
|
|
"step": 17360
|
|
},
|
|
{
|
|
"epoch": 2.091511137868754,
|
|
"grad_norm": 4.724112510681152,
|
|
"learning_rate": 5.472665120542824e-07,
|
|
"loss": 0.2851,
|
|
"step": 17370
|
|
},
|
|
{
|
|
"epoch": 2.0927152317880795,
|
|
"grad_norm": 5.797724723815918,
|
|
"learning_rate": 5.4674345379e-07,
|
|
"loss": 0.3136,
|
|
"step": 17380
|
|
},
|
|
{
|
|
"epoch": 2.093919325707405,
|
|
"grad_norm": 4.77787446975708,
|
|
"learning_rate": 5.462203439154361e-07,
|
|
"loss": 0.3059,
|
|
"step": 17390
|
|
},
|
|
{
|
|
"epoch": 2.0951234196267308,
|
|
"grad_norm": 4.670202732086182,
|
|
"learning_rate": 5.456971830081655e-07,
|
|
"loss": 0.3219,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 2.0963275135460564,
|
|
"grad_norm": 4.7208099365234375,
|
|
"learning_rate": 5.451739716458195e-07,
|
|
"loss": 0.3146,
|
|
"step": 17410
|
|
},
|
|
{
|
|
"epoch": 2.0975316074653825,
|
|
"grad_norm": 4.647831439971924,
|
|
"learning_rate": 5.446507104060851e-07,
|
|
"loss": 0.3266,
|
|
"step": 17420
|
|
},
|
|
{
|
|
"epoch": 2.098735701384708,
|
|
"grad_norm": 4.2992987632751465,
|
|
"learning_rate": 5.441273998667046e-07,
|
|
"loss": 0.3091,
|
|
"step": 17430
|
|
},
|
|
{
|
|
"epoch": 2.0999397953040337,
|
|
"grad_norm": 4.718204975128174,
|
|
"learning_rate": 5.436040406054742e-07,
|
|
"loss": 0.3103,
|
|
"step": 17440
|
|
},
|
|
{
|
|
"epoch": 2.1011438892233594,
|
|
"grad_norm": 4.716932773590088,
|
|
"learning_rate": 5.430806332002443e-07,
|
|
"loss": 0.3044,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 2.102347983142685,
|
|
"grad_norm": 4.856298923492432,
|
|
"learning_rate": 5.425571782289185e-07,
|
|
"loss": 0.3039,
|
|
"step": 17460
|
|
},
|
|
{
|
|
"epoch": 2.1035520770620106,
|
|
"grad_norm": 5.1161208152771,
|
|
"learning_rate": 5.420336762694524e-07,
|
|
"loss": 0.3014,
|
|
"step": 17470
|
|
},
|
|
{
|
|
"epoch": 2.1047561709813367,
|
|
"grad_norm": 4.895595550537109,
|
|
"learning_rate": 5.415101278998543e-07,
|
|
"loss": 0.3113,
|
|
"step": 17480
|
|
},
|
|
{
|
|
"epoch": 2.1059602649006623,
|
|
"grad_norm": 4.259979248046875,
|
|
"learning_rate": 5.409865336981832e-07,
|
|
"loss": 0.3158,
|
|
"step": 17490
|
|
},
|
|
{
|
|
"epoch": 2.107164358819988,
|
|
"grad_norm": 5.523928642272949,
|
|
"learning_rate": 5.404628942425484e-07,
|
|
"loss": 0.3293,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 2.1083684527393136,
|
|
"grad_norm": 5.490001201629639,
|
|
"learning_rate": 5.399392101111102e-07,
|
|
"loss": 0.3253,
|
|
"step": 17510
|
|
},
|
|
{
|
|
"epoch": 2.1095725466586392,
|
|
"grad_norm": 4.070251941680908,
|
|
"learning_rate": 5.39415481882077e-07,
|
|
"loss": 0.3341,
|
|
"step": 17520
|
|
},
|
|
{
|
|
"epoch": 2.110776640577965,
|
|
"grad_norm": 4.516000270843506,
|
|
"learning_rate": 5.388917101337069e-07,
|
|
"loss": 0.3115,
|
|
"step": 17530
|
|
},
|
|
{
|
|
"epoch": 2.111980734497291,
|
|
"grad_norm": 4.881539821624756,
|
|
"learning_rate": 5.383678954443056e-07,
|
|
"loss": 0.2962,
|
|
"step": 17540
|
|
},
|
|
{
|
|
"epoch": 2.1131848284166166,
|
|
"grad_norm": 4.361866474151611,
|
|
"learning_rate": 5.378440383922261e-07,
|
|
"loss": 0.2959,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 2.1143889223359422,
|
|
"grad_norm": 4.218469619750977,
|
|
"learning_rate": 5.373201395558683e-07,
|
|
"loss": 0.3004,
|
|
"step": 17560
|
|
},
|
|
{
|
|
"epoch": 2.115593016255268,
|
|
"grad_norm": 5.058506488800049,
|
|
"learning_rate": 5.367961995136782e-07,
|
|
"loss": 0.3177,
|
|
"step": 17570
|
|
},
|
|
{
|
|
"epoch": 2.1167971101745935,
|
|
"grad_norm": 5.340724468231201,
|
|
"learning_rate": 5.362722188441476e-07,
|
|
"loss": 0.3116,
|
|
"step": 17580
|
|
},
|
|
{
|
|
"epoch": 2.118001204093919,
|
|
"grad_norm": 4.867612361907959,
|
|
"learning_rate": 5.357481981258128e-07,
|
|
"loss": 0.3287,
|
|
"step": 17590
|
|
},
|
|
{
|
|
"epoch": 2.119205298013245,
|
|
"grad_norm": 4.499852180480957,
|
|
"learning_rate": 5.352241379372545e-07,
|
|
"loss": 0.3057,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 2.120409391932571,
|
|
"grad_norm": 5.446403980255127,
|
|
"learning_rate": 5.347000388570966e-07,
|
|
"loss": 0.3206,
|
|
"step": 17610
|
|
},
|
|
{
|
|
"epoch": 2.1216134858518965,
|
|
"grad_norm": 4.157654762268066,
|
|
"learning_rate": 5.341759014640067e-07,
|
|
"loss": 0.2985,
|
|
"step": 17620
|
|
},
|
|
{
|
|
"epoch": 2.122817579771222,
|
|
"grad_norm": 5.162617206573486,
|
|
"learning_rate": 5.336517263366939e-07,
|
|
"loss": 0.3057,
|
|
"step": 17630
|
|
},
|
|
{
|
|
"epoch": 2.1240216736905477,
|
|
"grad_norm": 4.874579906463623,
|
|
"learning_rate": 5.331275140539094e-07,
|
|
"loss": 0.3096,
|
|
"step": 17640
|
|
},
|
|
{
|
|
"epoch": 2.125225767609874,
|
|
"grad_norm": 4.7379350662231445,
|
|
"learning_rate": 5.326032651944453e-07,
|
|
"loss": 0.3178,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 2.1264298615291994,
|
|
"grad_norm": 4.660308361053467,
|
|
"learning_rate": 5.320789803371344e-07,
|
|
"loss": 0.3121,
|
|
"step": 17660
|
|
},
|
|
{
|
|
"epoch": 2.127633955448525,
|
|
"grad_norm": 4.264311790466309,
|
|
"learning_rate": 5.315546600608486e-07,
|
|
"loss": 0.3041,
|
|
"step": 17670
|
|
},
|
|
{
|
|
"epoch": 2.1288380493678507,
|
|
"grad_norm": 5.007218360900879,
|
|
"learning_rate": 5.310303049444995e-07,
|
|
"loss": 0.3133,
|
|
"step": 17680
|
|
},
|
|
{
|
|
"epoch": 2.1300421432871763,
|
|
"grad_norm": 4.878419399261475,
|
|
"learning_rate": 5.305059155670369e-07,
|
|
"loss": 0.307,
|
|
"step": 17690
|
|
},
|
|
{
|
|
"epoch": 2.131246237206502,
|
|
"grad_norm": 4.373286724090576,
|
|
"learning_rate": 5.299814925074485e-07,
|
|
"loss": 0.2988,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 2.1324503311258276,
|
|
"grad_norm": 4.705572128295898,
|
|
"learning_rate": 5.294570363447589e-07,
|
|
"loss": 0.3101,
|
|
"step": 17710
|
|
},
|
|
{
|
|
"epoch": 2.1336544250451537,
|
|
"grad_norm": 5.6706461906433105,
|
|
"learning_rate": 5.2893254765803e-07,
|
|
"loss": 0.3182,
|
|
"step": 17720
|
|
},
|
|
{
|
|
"epoch": 2.1348585189644793,
|
|
"grad_norm": 4.4038896560668945,
|
|
"learning_rate": 5.284080270263586e-07,
|
|
"loss": 0.3055,
|
|
"step": 17730
|
|
},
|
|
{
|
|
"epoch": 2.136062612883805,
|
|
"grad_norm": 4.746342658996582,
|
|
"learning_rate": 5.278834750288776e-07,
|
|
"loss": 0.3098,
|
|
"step": 17740
|
|
},
|
|
{
|
|
"epoch": 2.1372667068031306,
|
|
"grad_norm": 4.472485065460205,
|
|
"learning_rate": 5.273588922447543e-07,
|
|
"loss": 0.3192,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 2.138470800722456,
|
|
"grad_norm": 5.553606033325195,
|
|
"learning_rate": 5.268342792531897e-07,
|
|
"loss": 0.3328,
|
|
"step": 17760
|
|
},
|
|
{
|
|
"epoch": 2.1396748946417823,
|
|
"grad_norm": 5.298537731170654,
|
|
"learning_rate": 5.263096366334183e-07,
|
|
"loss": 0.3072,
|
|
"step": 17770
|
|
},
|
|
{
|
|
"epoch": 2.140878988561108,
|
|
"grad_norm": 4.98936128616333,
|
|
"learning_rate": 5.257849649647077e-07,
|
|
"loss": 0.3131,
|
|
"step": 17780
|
|
},
|
|
{
|
|
"epoch": 2.1420830824804336,
|
|
"grad_norm": 4.389891147613525,
|
|
"learning_rate": 5.252602648263569e-07,
|
|
"loss": 0.3142,
|
|
"step": 17790
|
|
},
|
|
{
|
|
"epoch": 2.143287176399759,
|
|
"grad_norm": 4.614076614379883,
|
|
"learning_rate": 5.24735536797697e-07,
|
|
"loss": 0.3075,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 2.144491270319085,
|
|
"grad_norm": 5.098964214324951,
|
|
"learning_rate": 5.242107814580893e-07,
|
|
"loss": 0.3125,
|
|
"step": 17810
|
|
},
|
|
{
|
|
"epoch": 2.1456953642384105,
|
|
"grad_norm": 4.502909183502197,
|
|
"learning_rate": 5.236859993869258e-07,
|
|
"loss": 0.2986,
|
|
"step": 17820
|
|
},
|
|
{
|
|
"epoch": 2.146899458157736,
|
|
"grad_norm": 5.02591609954834,
|
|
"learning_rate": 5.231611911636276e-07,
|
|
"loss": 0.294,
|
|
"step": 17830
|
|
},
|
|
{
|
|
"epoch": 2.148103552077062,
|
|
"grad_norm": 4.412136077880859,
|
|
"learning_rate": 5.226363573676447e-07,
|
|
"loss": 0.3085,
|
|
"step": 17840
|
|
},
|
|
{
|
|
"epoch": 2.149307645996388,
|
|
"grad_norm": 4.393168926239014,
|
|
"learning_rate": 5.221114985784558e-07,
|
|
"loss": 0.3145,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 2.1505117399157134,
|
|
"grad_norm": 4.741860389709473,
|
|
"learning_rate": 5.215866153755666e-07,
|
|
"loss": 0.3194,
|
|
"step": 17860
|
|
},
|
|
{
|
|
"epoch": 2.151715833835039,
|
|
"grad_norm": 4.4850006103515625,
|
|
"learning_rate": 5.210617083385101e-07,
|
|
"loss": 0.3015,
|
|
"step": 17870
|
|
},
|
|
{
|
|
"epoch": 2.1529199277543647,
|
|
"grad_norm": 5.466598033905029,
|
|
"learning_rate": 5.205367780468455e-07,
|
|
"loss": 0.311,
|
|
"step": 17880
|
|
},
|
|
{
|
|
"epoch": 2.1541240216736908,
|
|
"grad_norm": 5.164214611053467,
|
|
"learning_rate": 5.200118250801578e-07,
|
|
"loss": 0.3161,
|
|
"step": 17890
|
|
},
|
|
{
|
|
"epoch": 2.1553281155930164,
|
|
"grad_norm": 4.714061737060547,
|
|
"learning_rate": 5.194868500180567e-07,
|
|
"loss": 0.3171,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 2.156532209512342,
|
|
"grad_norm": 4.755367279052734,
|
|
"learning_rate": 5.189618534401768e-07,
|
|
"loss": 0.3059,
|
|
"step": 17910
|
|
},
|
|
{
|
|
"epoch": 2.1577363034316677,
|
|
"grad_norm": 4.605241298675537,
|
|
"learning_rate": 5.184368359261761e-07,
|
|
"loss": 0.3207,
|
|
"step": 17920
|
|
},
|
|
{
|
|
"epoch": 2.1589403973509933,
|
|
"grad_norm": 5.180820465087891,
|
|
"learning_rate": 5.179117980557357e-07,
|
|
"loss": 0.3097,
|
|
"step": 17930
|
|
},
|
|
{
|
|
"epoch": 2.160144491270319,
|
|
"grad_norm": 5.053746700286865,
|
|
"learning_rate": 5.173867404085594e-07,
|
|
"loss": 0.3208,
|
|
"step": 17940
|
|
},
|
|
{
|
|
"epoch": 2.1613485851896446,
|
|
"grad_norm": 4.809300899505615,
|
|
"learning_rate": 5.168616635643728e-07,
|
|
"loss": 0.3009,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 2.1625526791089706,
|
|
"grad_norm": 4.434291839599609,
|
|
"learning_rate": 5.163365681029224e-07,
|
|
"loss": 0.3118,
|
|
"step": 17960
|
|
},
|
|
{
|
|
"epoch": 2.1637567730282963,
|
|
"grad_norm": 3.94570255279541,
|
|
"learning_rate": 5.158114546039756e-07,
|
|
"loss": 0.3081,
|
|
"step": 17970
|
|
},
|
|
{
|
|
"epoch": 2.164960866947622,
|
|
"grad_norm": 4.972118854522705,
|
|
"learning_rate": 5.152863236473195e-07,
|
|
"loss": 0.3,
|
|
"step": 17980
|
|
},
|
|
{
|
|
"epoch": 2.1661649608669475,
|
|
"grad_norm": 5.422942161560059,
|
|
"learning_rate": 5.147611758127608e-07,
|
|
"loss": 0.3039,
|
|
"step": 17990
|
|
},
|
|
{
|
|
"epoch": 2.167369054786273,
|
|
"grad_norm": 4.45037317276001,
|
|
"learning_rate": 5.142360116801242e-07,
|
|
"loss": 0.3158,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 2.1685731487055993,
|
|
"grad_norm": 5.098633289337158,
|
|
"learning_rate": 5.137108318292533e-07,
|
|
"loss": 0.2949,
|
|
"step": 18010
|
|
},
|
|
{
|
|
"epoch": 2.169777242624925,
|
|
"grad_norm": 5.256601810455322,
|
|
"learning_rate": 5.131856368400082e-07,
|
|
"loss": 0.3037,
|
|
"step": 18020
|
|
},
|
|
{
|
|
"epoch": 2.1709813365442505,
|
|
"grad_norm": 5.189584732055664,
|
|
"learning_rate": 5.126604272922659e-07,
|
|
"loss": 0.3256,
|
|
"step": 18030
|
|
},
|
|
{
|
|
"epoch": 2.172185430463576,
|
|
"grad_norm": 4.259381294250488,
|
|
"learning_rate": 5.121352037659201e-07,
|
|
"loss": 0.3051,
|
|
"step": 18040
|
|
},
|
|
{
|
|
"epoch": 2.173389524382902,
|
|
"grad_norm": 4.795348644256592,
|
|
"learning_rate": 5.116099668408791e-07,
|
|
"loss": 0.3002,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 2.1745936183022274,
|
|
"grad_norm": 5.63735818862915,
|
|
"learning_rate": 5.110847170970665e-07,
|
|
"loss": 0.313,
|
|
"step": 18060
|
|
},
|
|
{
|
|
"epoch": 2.175797712221553,
|
|
"grad_norm": 6.581758975982666,
|
|
"learning_rate": 5.1055945511442e-07,
|
|
"loss": 0.3014,
|
|
"step": 18070
|
|
},
|
|
{
|
|
"epoch": 2.177001806140879,
|
|
"grad_norm": 5.026032447814941,
|
|
"learning_rate": 5.100341814728904e-07,
|
|
"loss": 0.3009,
|
|
"step": 18080
|
|
},
|
|
{
|
|
"epoch": 2.1782059000602048,
|
|
"grad_norm": 4.6837263107299805,
|
|
"learning_rate": 5.095088967524423e-07,
|
|
"loss": 0.3251,
|
|
"step": 18090
|
|
},
|
|
{
|
|
"epoch": 2.1794099939795304,
|
|
"grad_norm": 4.637839317321777,
|
|
"learning_rate": 5.089836015330513e-07,
|
|
"loss": 0.3177,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 2.180614087898856,
|
|
"grad_norm": 4.267435550689697,
|
|
"learning_rate": 5.084582963947057e-07,
|
|
"loss": 0.3003,
|
|
"step": 18110
|
|
},
|
|
{
|
|
"epoch": 2.1818181818181817,
|
|
"grad_norm": 4.481462001800537,
|
|
"learning_rate": 5.07932981917404e-07,
|
|
"loss": 0.3084,
|
|
"step": 18120
|
|
},
|
|
{
|
|
"epoch": 2.1830222757375077,
|
|
"grad_norm": 5.001600742340088,
|
|
"learning_rate": 5.074076586811554e-07,
|
|
"loss": 0.3117,
|
|
"step": 18130
|
|
},
|
|
{
|
|
"epoch": 2.1842263696568334,
|
|
"grad_norm": 4.785762310028076,
|
|
"learning_rate": 5.068823272659785e-07,
|
|
"loss": 0.3044,
|
|
"step": 18140
|
|
},
|
|
{
|
|
"epoch": 2.185430463576159,
|
|
"grad_norm": 4.241122245788574,
|
|
"learning_rate": 5.063569882519014e-07,
|
|
"loss": 0.3114,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 2.1866345574954846,
|
|
"grad_norm": 4.614393711090088,
|
|
"learning_rate": 5.0583164221896e-07,
|
|
"loss": 0.3143,
|
|
"step": 18160
|
|
},
|
|
{
|
|
"epoch": 2.1878386514148103,
|
|
"grad_norm": 5.790137767791748,
|
|
"learning_rate": 5.053062897471985e-07,
|
|
"loss": 0.3086,
|
|
"step": 18170
|
|
},
|
|
{
|
|
"epoch": 2.189042745334136,
|
|
"grad_norm": 5.027008056640625,
|
|
"learning_rate": 5.047809314166677e-07,
|
|
"loss": 0.2996,
|
|
"step": 18180
|
|
},
|
|
{
|
|
"epoch": 2.190246839253462,
|
|
"grad_norm": 4.725672245025635,
|
|
"learning_rate": 5.042555678074251e-07,
|
|
"loss": 0.3101,
|
|
"step": 18190
|
|
},
|
|
{
|
|
"epoch": 2.1914509331727876,
|
|
"grad_norm": 4.756001949310303,
|
|
"learning_rate": 5.037301994995342e-07,
|
|
"loss": 0.2892,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 2.1926550270921132,
|
|
"grad_norm": 3.9560751914978027,
|
|
"learning_rate": 5.032048270730634e-07,
|
|
"loss": 0.3118,
|
|
"step": 18210
|
|
},
|
|
{
|
|
"epoch": 2.193859121011439,
|
|
"grad_norm": 4.681294918060303,
|
|
"learning_rate": 5.026794511080859e-07,
|
|
"loss": 0.306,
|
|
"step": 18220
|
|
},
|
|
{
|
|
"epoch": 2.1950632149307645,
|
|
"grad_norm": 5.220909118652344,
|
|
"learning_rate": 5.021540721846787e-07,
|
|
"loss": 0.3089,
|
|
"step": 18230
|
|
},
|
|
{
|
|
"epoch": 2.19626730885009,
|
|
"grad_norm": 4.095883369445801,
|
|
"learning_rate": 5.016286908829218e-07,
|
|
"loss": 0.3179,
|
|
"step": 18240
|
|
},
|
|
{
|
|
"epoch": 2.197471402769416,
|
|
"grad_norm": 4.485768795013428,
|
|
"learning_rate": 5.011033077828982e-07,
|
|
"loss": 0.3037,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 2.198675496688742,
|
|
"grad_norm": 4.850970268249512,
|
|
"learning_rate": 5.00577923464693e-07,
|
|
"loss": 0.3098,
|
|
"step": 18260
|
|
},
|
|
{
|
|
"epoch": 2.1998795906080675,
|
|
"grad_norm": 4.3276848793029785,
|
|
"learning_rate": 5.000525385083919e-07,
|
|
"loss": 0.3117,
|
|
"step": 18270
|
|
},
|
|
{
|
|
"epoch": 2.201083684527393,
|
|
"grad_norm": 4.39775276184082,
|
|
"learning_rate": 4.995271534940823e-07,
|
|
"loss": 0.3185,
|
|
"step": 18280
|
|
},
|
|
{
|
|
"epoch": 2.2022877784467187,
|
|
"grad_norm": 4.972282409667969,
|
|
"learning_rate": 4.99001769001851e-07,
|
|
"loss": 0.3131,
|
|
"step": 18290
|
|
},
|
|
{
|
|
"epoch": 2.2034918723660444,
|
|
"grad_norm": 4.450355052947998,
|
|
"learning_rate": 4.984763856117842e-07,
|
|
"loss": 0.3052,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 2.2046959662853705,
|
|
"grad_norm": 4.771944046020508,
|
|
"learning_rate": 4.979510039039674e-07,
|
|
"loss": 0.3087,
|
|
"step": 18310
|
|
},
|
|
{
|
|
"epoch": 2.205900060204696,
|
|
"grad_norm": 4.077056407928467,
|
|
"learning_rate": 4.974256244584838e-07,
|
|
"loss": 0.2991,
|
|
"step": 18320
|
|
},
|
|
{
|
|
"epoch": 2.2071041541240217,
|
|
"grad_norm": 4.485861778259277,
|
|
"learning_rate": 4.969002478554139e-07,
|
|
"loss": 0.3117,
|
|
"step": 18330
|
|
},
|
|
{
|
|
"epoch": 2.2083082480433474,
|
|
"grad_norm": 4.26900053024292,
|
|
"learning_rate": 4.963748746748358e-07,
|
|
"loss": 0.299,
|
|
"step": 18340
|
|
},
|
|
{
|
|
"epoch": 2.209512341962673,
|
|
"grad_norm": 5.258630752563477,
|
|
"learning_rate": 4.958495054968235e-07,
|
|
"loss": 0.3109,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 2.2107164358819986,
|
|
"grad_norm": 5.4050774574279785,
|
|
"learning_rate": 4.953241409014459e-07,
|
|
"loss": 0.3263,
|
|
"step": 18360
|
|
},
|
|
{
|
|
"epoch": 2.2119205298013247,
|
|
"grad_norm": 4.431223392486572,
|
|
"learning_rate": 4.947987814687679e-07,
|
|
"loss": 0.3131,
|
|
"step": 18370
|
|
},
|
|
{
|
|
"epoch": 2.2131246237206503,
|
|
"grad_norm": 5.015274524688721,
|
|
"learning_rate": 4.942734277788481e-07,
|
|
"loss": 0.3122,
|
|
"step": 18380
|
|
},
|
|
{
|
|
"epoch": 2.214328717639976,
|
|
"grad_norm": 5.460362911224365,
|
|
"learning_rate": 4.937480804117392e-07,
|
|
"loss": 0.3049,
|
|
"step": 18390
|
|
},
|
|
{
|
|
"epoch": 2.2155328115593016,
|
|
"grad_norm": 4.469453811645508,
|
|
"learning_rate": 4.93222739947486e-07,
|
|
"loss": 0.3109,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 2.2167369054786272,
|
|
"grad_norm": 4.560921669006348,
|
|
"learning_rate": 4.926974069661265e-07,
|
|
"loss": 0.3155,
|
|
"step": 18410
|
|
},
|
|
{
|
|
"epoch": 2.217940999397953,
|
|
"grad_norm": 4.696376800537109,
|
|
"learning_rate": 4.921720820476904e-07,
|
|
"loss": 0.3256,
|
|
"step": 18420
|
|
},
|
|
{
|
|
"epoch": 2.219145093317279,
|
|
"grad_norm": 4.80272102355957,
|
|
"learning_rate": 4.916467657721984e-07,
|
|
"loss": 0.3172,
|
|
"step": 18430
|
|
},
|
|
{
|
|
"epoch": 2.2203491872366046,
|
|
"grad_norm": 4.686549663543701,
|
|
"learning_rate": 4.911214587196612e-07,
|
|
"loss": 0.3044,
|
|
"step": 18440
|
|
},
|
|
{
|
|
"epoch": 2.22155328115593,
|
|
"grad_norm": 4.5141921043396,
|
|
"learning_rate": 4.9059616147008e-07,
|
|
"loss": 0.296,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 2.222757375075256,
|
|
"grad_norm": 4.311396598815918,
|
|
"learning_rate": 4.900708746034446e-07,
|
|
"loss": 0.3052,
|
|
"step": 18460
|
|
},
|
|
{
|
|
"epoch": 2.2239614689945815,
|
|
"grad_norm": 4.644687175750732,
|
|
"learning_rate": 4.895455986997341e-07,
|
|
"loss": 0.3091,
|
|
"step": 18470
|
|
},
|
|
{
|
|
"epoch": 2.225165562913907,
|
|
"grad_norm": 4.708485126495361,
|
|
"learning_rate": 4.890203343389144e-07,
|
|
"loss": 0.3126,
|
|
"step": 18480
|
|
},
|
|
{
|
|
"epoch": 2.226369656833233,
|
|
"grad_norm": 4.648069381713867,
|
|
"learning_rate": 4.884950821009394e-07,
|
|
"loss": 0.3303,
|
|
"step": 18490
|
|
},
|
|
{
|
|
"epoch": 2.227573750752559,
|
|
"grad_norm": 5.3636555671691895,
|
|
"learning_rate": 4.8796984256575e-07,
|
|
"loss": 0.308,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 2.2287778446718844,
|
|
"grad_norm": 4.061014652252197,
|
|
"learning_rate": 4.874446163132719e-07,
|
|
"loss": 0.2957,
|
|
"step": 18510
|
|
},
|
|
{
|
|
"epoch": 2.22998193859121,
|
|
"grad_norm": 6.169346332550049,
|
|
"learning_rate": 4.869194039234169e-07,
|
|
"loss": 0.318,
|
|
"step": 18520
|
|
},
|
|
{
|
|
"epoch": 2.2311860325105357,
|
|
"grad_norm": 4.9474053382873535,
|
|
"learning_rate": 4.863942059760817e-07,
|
|
"loss": 0.3112,
|
|
"step": 18530
|
|
},
|
|
{
|
|
"epoch": 2.2323901264298613,
|
|
"grad_norm": 4.635356903076172,
|
|
"learning_rate": 4.858690230511465e-07,
|
|
"loss": 0.3006,
|
|
"step": 18540
|
|
},
|
|
{
|
|
"epoch": 2.2335942203491874,
|
|
"grad_norm": 4.872357368469238,
|
|
"learning_rate": 4.85343855728475e-07,
|
|
"loss": 0.315,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 2.234798314268513,
|
|
"grad_norm": 4.909818172454834,
|
|
"learning_rate": 4.848187045879141e-07,
|
|
"loss": 0.2983,
|
|
"step": 18560
|
|
},
|
|
{
|
|
"epoch": 2.2360024081878387,
|
|
"grad_norm": 5.507841110229492,
|
|
"learning_rate": 4.842935702092923e-07,
|
|
"loss": 0.2919,
|
|
"step": 18570
|
|
},
|
|
{
|
|
"epoch": 2.2372065021071643,
|
|
"grad_norm": 4.438649654388428,
|
|
"learning_rate": 4.837684531724202e-07,
|
|
"loss": 0.3012,
|
|
"step": 18580
|
|
},
|
|
{
|
|
"epoch": 2.23841059602649,
|
|
"grad_norm": 4.70427942276001,
|
|
"learning_rate": 4.832433540570885e-07,
|
|
"loss": 0.3076,
|
|
"step": 18590
|
|
},
|
|
{
|
|
"epoch": 2.2396146899458156,
|
|
"grad_norm": 4.81848669052124,
|
|
"learning_rate": 4.827182734430687e-07,
|
|
"loss": 0.3021,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 2.2408187838651417,
|
|
"grad_norm": 4.911860466003418,
|
|
"learning_rate": 4.821932119101116e-07,
|
|
"loss": 0.3109,
|
|
"step": 18610
|
|
},
|
|
{
|
|
"epoch": 2.2420228777844673,
|
|
"grad_norm": 5.092623233795166,
|
|
"learning_rate": 4.816681700379472e-07,
|
|
"loss": 0.3243,
|
|
"step": 18620
|
|
},
|
|
{
|
|
"epoch": 2.243226971703793,
|
|
"grad_norm": 4.224728584289551,
|
|
"learning_rate": 4.811431484062832e-07,
|
|
"loss": 0.3128,
|
|
"step": 18630
|
|
},
|
|
{
|
|
"epoch": 2.2444310656231186,
|
|
"grad_norm": 4.93331241607666,
|
|
"learning_rate": 4.806181475948057e-07,
|
|
"loss": 0.3147,
|
|
"step": 18640
|
|
},
|
|
{
|
|
"epoch": 2.245635159542444,
|
|
"grad_norm": 6.220354080200195,
|
|
"learning_rate": 4.800931681831773e-07,
|
|
"loss": 0.2964,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 2.24683925346177,
|
|
"grad_norm": 5.004923343658447,
|
|
"learning_rate": 4.795682107510375e-07,
|
|
"loss": 0.3172,
|
|
"step": 18660
|
|
},
|
|
{
|
|
"epoch": 2.248043347381096,
|
|
"grad_norm": 5.164400577545166,
|
|
"learning_rate": 4.790432758780005e-07,
|
|
"loss": 0.3063,
|
|
"step": 18670
|
|
},
|
|
{
|
|
"epoch": 2.2492474413004215,
|
|
"grad_norm": 5.098756313323975,
|
|
"learning_rate": 4.785183641436569e-07,
|
|
"loss": 0.3045,
|
|
"step": 18680
|
|
},
|
|
{
|
|
"epoch": 2.250451535219747,
|
|
"grad_norm": 4.363048553466797,
|
|
"learning_rate": 4.779934761275706e-07,
|
|
"loss": 0.3084,
|
|
"step": 18690
|
|
},
|
|
{
|
|
"epoch": 2.251655629139073,
|
|
"grad_norm": 5.233163833618164,
|
|
"learning_rate": 4.774686124092804e-07,
|
|
"loss": 0.316,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 2.2528597230583984,
|
|
"grad_norm": 4.870039463043213,
|
|
"learning_rate": 4.769437735682972e-07,
|
|
"loss": 0.3008,
|
|
"step": 18710
|
|
},
|
|
{
|
|
"epoch": 2.254063816977724,
|
|
"grad_norm": 5.44446325302124,
|
|
"learning_rate": 4.7641896018410506e-07,
|
|
"loss": 0.3139,
|
|
"step": 18720
|
|
},
|
|
{
|
|
"epoch": 2.25526791089705,
|
|
"grad_norm": 4.950879096984863,
|
|
"learning_rate": 4.758941728361599e-07,
|
|
"loss": 0.3108,
|
|
"step": 18730
|
|
},
|
|
{
|
|
"epoch": 2.2564720048163758,
|
|
"grad_norm": 4.887548446655273,
|
|
"learning_rate": 4.7536941210388895e-07,
|
|
"loss": 0.3195,
|
|
"step": 18740
|
|
},
|
|
{
|
|
"epoch": 2.2576760987357014,
|
|
"grad_norm": 6.180630207061768,
|
|
"learning_rate": 4.7484467856668946e-07,
|
|
"loss": 0.3112,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 2.258880192655027,
|
|
"grad_norm": 5.481302738189697,
|
|
"learning_rate": 4.743199728039294e-07,
|
|
"loss": 0.3124,
|
|
"step": 18760
|
|
},
|
|
{
|
|
"epoch": 2.2600842865743527,
|
|
"grad_norm": 4.6261677742004395,
|
|
"learning_rate": 4.737952953949457e-07,
|
|
"loss": 0.3058,
|
|
"step": 18770
|
|
},
|
|
{
|
|
"epoch": 2.2612883804936788,
|
|
"grad_norm": 4.097585201263428,
|
|
"learning_rate": 4.732706469190442e-07,
|
|
"loss": 0.3271,
|
|
"step": 18780
|
|
},
|
|
{
|
|
"epoch": 2.2624924744130044,
|
|
"grad_norm": 5.000282287597656,
|
|
"learning_rate": 4.7274602795549836e-07,
|
|
"loss": 0.317,
|
|
"step": 18790
|
|
},
|
|
{
|
|
"epoch": 2.26369656833233,
|
|
"grad_norm": 4.3350958824157715,
|
|
"learning_rate": 4.7222143908354943e-07,
|
|
"loss": 0.3083,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 2.2649006622516556,
|
|
"grad_norm": 4.336573123931885,
|
|
"learning_rate": 4.7169688088240555e-07,
|
|
"loss": 0.3139,
|
|
"step": 18810
|
|
},
|
|
{
|
|
"epoch": 2.2661047561709813,
|
|
"grad_norm": 4.1952900886535645,
|
|
"learning_rate": 4.7117235393124064e-07,
|
|
"loss": 0.294,
|
|
"step": 18820
|
|
},
|
|
{
|
|
"epoch": 2.267308850090307,
|
|
"grad_norm": 5.418072700500488,
|
|
"learning_rate": 4.7064785880919414e-07,
|
|
"loss": 0.3185,
|
|
"step": 18830
|
|
},
|
|
{
|
|
"epoch": 2.2685129440096325,
|
|
"grad_norm": 5.001430511474609,
|
|
"learning_rate": 4.701233960953708e-07,
|
|
"loss": 0.3108,
|
|
"step": 18840
|
|
},
|
|
{
|
|
"epoch": 2.2697170379289586,
|
|
"grad_norm": 5.28980827331543,
|
|
"learning_rate": 4.69598966368839e-07,
|
|
"loss": 0.3149,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 2.2709211318482843,
|
|
"grad_norm": 5.221833229064941,
|
|
"learning_rate": 4.6907457020863095e-07,
|
|
"loss": 0.3106,
|
|
"step": 18860
|
|
},
|
|
{
|
|
"epoch": 2.27212522576761,
|
|
"grad_norm": 4.259886264801025,
|
|
"learning_rate": 4.6855020819374196e-07,
|
|
"loss": 0.3159,
|
|
"step": 18870
|
|
},
|
|
{
|
|
"epoch": 2.2733293196869355,
|
|
"grad_norm": 5.210353851318359,
|
|
"learning_rate": 4.680258809031293e-07,
|
|
"loss": 0.306,
|
|
"step": 18880
|
|
},
|
|
{
|
|
"epoch": 2.274533413606261,
|
|
"grad_norm": 4.933556079864502,
|
|
"learning_rate": 4.6750158891571246e-07,
|
|
"loss": 0.2988,
|
|
"step": 18890
|
|
},
|
|
{
|
|
"epoch": 2.2757375075255872,
|
|
"grad_norm": 5.060166358947754,
|
|
"learning_rate": 4.669773328103712e-07,
|
|
"loss": 0.3298,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 2.276941601444913,
|
|
"grad_norm": 5.316260814666748,
|
|
"learning_rate": 4.664531131659461e-07,
|
|
"loss": 0.3193,
|
|
"step": 18910
|
|
},
|
|
{
|
|
"epoch": 2.2781456953642385,
|
|
"grad_norm": 4.371904373168945,
|
|
"learning_rate": 4.659289305612375e-07,
|
|
"loss": 0.3181,
|
|
"step": 18920
|
|
},
|
|
{
|
|
"epoch": 2.279349789283564,
|
|
"grad_norm": 4.114840984344482,
|
|
"learning_rate": 4.65404785575005e-07,
|
|
"loss": 0.3089,
|
|
"step": 18930
|
|
},
|
|
{
|
|
"epoch": 2.2805538832028898,
|
|
"grad_norm": 4.94135046005249,
|
|
"learning_rate": 4.64880678785966e-07,
|
|
"loss": 0.3158,
|
|
"step": 18940
|
|
},
|
|
{
|
|
"epoch": 2.2817579771222154,
|
|
"grad_norm": 5.033153057098389,
|
|
"learning_rate": 4.6435661077279633e-07,
|
|
"loss": 0.3087,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 2.282962071041541,
|
|
"grad_norm": 4.434708595275879,
|
|
"learning_rate": 4.638325821141289e-07,
|
|
"loss": 0.3031,
|
|
"step": 18960
|
|
},
|
|
{
|
|
"epoch": 2.284166164960867,
|
|
"grad_norm": 4.674195766448975,
|
|
"learning_rate": 4.6330859338855325e-07,
|
|
"loss": 0.3227,
|
|
"step": 18970
|
|
},
|
|
{
|
|
"epoch": 2.2853702588801927,
|
|
"grad_norm": 4.624505043029785,
|
|
"learning_rate": 4.6278464517461434e-07,
|
|
"loss": 0.2994,
|
|
"step": 18980
|
|
},
|
|
{
|
|
"epoch": 2.2865743527995184,
|
|
"grad_norm": 4.435290336608887,
|
|
"learning_rate": 4.622607380508129e-07,
|
|
"loss": 0.3125,
|
|
"step": 18990
|
|
},
|
|
{
|
|
"epoch": 2.287778446718844,
|
|
"grad_norm": 4.538943767547607,
|
|
"learning_rate": 4.6173687259560417e-07,
|
|
"loss": 0.3166,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 2.2889825406381696,
|
|
"grad_norm": 5.1769890785217285,
|
|
"learning_rate": 4.6121304938739754e-07,
|
|
"loss": 0.2978,
|
|
"step": 19010
|
|
},
|
|
{
|
|
"epoch": 2.2901866345574957,
|
|
"grad_norm": 4.897463321685791,
|
|
"learning_rate": 4.606892690045551e-07,
|
|
"loss": 0.2857,
|
|
"step": 19020
|
|
},
|
|
{
|
|
"epoch": 2.2913907284768213,
|
|
"grad_norm": 5.332199573516846,
|
|
"learning_rate": 4.601655320253924e-07,
|
|
"loss": 0.3082,
|
|
"step": 19030
|
|
},
|
|
{
|
|
"epoch": 2.292594822396147,
|
|
"grad_norm": 4.842720985412598,
|
|
"learning_rate": 4.5964183902817677e-07,
|
|
"loss": 0.3003,
|
|
"step": 19040
|
|
},
|
|
{
|
|
"epoch": 2.2937989163154726,
|
|
"grad_norm": 4.277060031890869,
|
|
"learning_rate": 4.5911819059112724e-07,
|
|
"loss": 0.3027,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 2.2950030102347982,
|
|
"grad_norm": 4.499503135681152,
|
|
"learning_rate": 4.5859458729241287e-07,
|
|
"loss": 0.311,
|
|
"step": 19060
|
|
},
|
|
{
|
|
"epoch": 2.296207104154124,
|
|
"grad_norm": 5.2861762046813965,
|
|
"learning_rate": 4.580710297101537e-07,
|
|
"loss": 0.3197,
|
|
"step": 19070
|
|
},
|
|
{
|
|
"epoch": 2.2974111980734495,
|
|
"grad_norm": 4.3773112297058105,
|
|
"learning_rate": 4.5754751842241905e-07,
|
|
"loss": 0.3113,
|
|
"step": 19080
|
|
},
|
|
{
|
|
"epoch": 2.2986152919927756,
|
|
"grad_norm": 4.447787284851074,
|
|
"learning_rate": 4.5702405400722703e-07,
|
|
"loss": 0.3037,
|
|
"step": 19090
|
|
},
|
|
{
|
|
"epoch": 2.299819385912101,
|
|
"grad_norm": 5.014771938323975,
|
|
"learning_rate": 4.5650063704254395e-07,
|
|
"loss": 0.3018,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 2.301023479831427,
|
|
"grad_norm": 4.333285331726074,
|
|
"learning_rate": 4.55977268106284e-07,
|
|
"loss": 0.3176,
|
|
"step": 19110
|
|
},
|
|
{
|
|
"epoch": 2.3022275737507525,
|
|
"grad_norm": 6.291433334350586,
|
|
"learning_rate": 4.5545394777630786e-07,
|
|
"loss": 0.3335,
|
|
"step": 19120
|
|
},
|
|
{
|
|
"epoch": 2.303431667670078,
|
|
"grad_norm": 4.657562255859375,
|
|
"learning_rate": 4.5493067663042325e-07,
|
|
"loss": 0.3059,
|
|
"step": 19130
|
|
},
|
|
{
|
|
"epoch": 2.304635761589404,
|
|
"grad_norm": 4.472227573394775,
|
|
"learning_rate": 4.544074552463829e-07,
|
|
"loss": 0.3074,
|
|
"step": 19140
|
|
},
|
|
{
|
|
"epoch": 2.30583985550873,
|
|
"grad_norm": 5.011964797973633,
|
|
"learning_rate": 4.5388428420188486e-07,
|
|
"loss": 0.3036,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 2.3070439494280555,
|
|
"grad_norm": 5.620879173278809,
|
|
"learning_rate": 4.533611640745718e-07,
|
|
"loss": 0.31,
|
|
"step": 19160
|
|
},
|
|
{
|
|
"epoch": 2.308248043347381,
|
|
"grad_norm": 5.25240421295166,
|
|
"learning_rate": 4.5283809544202996e-07,
|
|
"loss": 0.328,
|
|
"step": 19170
|
|
},
|
|
{
|
|
"epoch": 2.3094521372667067,
|
|
"grad_norm": 4.3917317390441895,
|
|
"learning_rate": 4.5231507888178856e-07,
|
|
"loss": 0.3129,
|
|
"step": 19180
|
|
},
|
|
{
|
|
"epoch": 2.3106562311860324,
|
|
"grad_norm": 4.568994998931885,
|
|
"learning_rate": 4.517921149713196e-07,
|
|
"loss": 0.3057,
|
|
"step": 19190
|
|
},
|
|
{
|
|
"epoch": 2.311860325105358,
|
|
"grad_norm": 4.5026726722717285,
|
|
"learning_rate": 4.512692042880372e-07,
|
|
"loss": 0.2997,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 2.313064419024684,
|
|
"grad_norm": 3.986133098602295,
|
|
"learning_rate": 4.507463474092959e-07,
|
|
"loss": 0.2952,
|
|
"step": 19210
|
|
},
|
|
{
|
|
"epoch": 2.3142685129440097,
|
|
"grad_norm": 4.367317199707031,
|
|
"learning_rate": 4.5022354491239145e-07,
|
|
"loss": 0.3036,
|
|
"step": 19220
|
|
},
|
|
{
|
|
"epoch": 2.3154726068633353,
|
|
"grad_norm": 5.649072170257568,
|
|
"learning_rate": 4.497007973745595e-07,
|
|
"loss": 0.3173,
|
|
"step": 19230
|
|
},
|
|
{
|
|
"epoch": 2.316676700782661,
|
|
"grad_norm": 5.655643463134766,
|
|
"learning_rate": 4.4917810537297514e-07,
|
|
"loss": 0.327,
|
|
"step": 19240
|
|
},
|
|
{
|
|
"epoch": 2.3178807947019866,
|
|
"grad_norm": 5.137732982635498,
|
|
"learning_rate": 4.4865546948475147e-07,
|
|
"loss": 0.3065,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 2.3190848886213127,
|
|
"grad_norm": 4.715443134307861,
|
|
"learning_rate": 4.481328902869404e-07,
|
|
"loss": 0.3207,
|
|
"step": 19260
|
|
},
|
|
{
|
|
"epoch": 2.3202889825406383,
|
|
"grad_norm": 3.9082722663879395,
|
|
"learning_rate": 4.476103683565308e-07,
|
|
"loss": 0.3074,
|
|
"step": 19270
|
|
},
|
|
{
|
|
"epoch": 2.321493076459964,
|
|
"grad_norm": 4.448252201080322,
|
|
"learning_rate": 4.4708790427044887e-07,
|
|
"loss": 0.3063,
|
|
"step": 19280
|
|
},
|
|
{
|
|
"epoch": 2.3226971703792896,
|
|
"grad_norm": 4.547604560852051,
|
|
"learning_rate": 4.465654986055559e-07,
|
|
"loss": 0.3098,
|
|
"step": 19290
|
|
},
|
|
{
|
|
"epoch": 2.323901264298615,
|
|
"grad_norm": 5.669996738433838,
|
|
"learning_rate": 4.460431519386497e-07,
|
|
"loss": 0.3188,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 2.325105358217941,
|
|
"grad_norm": 5.271092891693115,
|
|
"learning_rate": 4.4552086484646246e-07,
|
|
"loss": 0.2948,
|
|
"step": 19310
|
|
},
|
|
{
|
|
"epoch": 2.3263094521372665,
|
|
"grad_norm": 5.6719231605529785,
|
|
"learning_rate": 4.4499863790566087e-07,
|
|
"loss": 0.3089,
|
|
"step": 19320
|
|
},
|
|
{
|
|
"epoch": 2.3275135460565926,
|
|
"grad_norm": 5.9080657958984375,
|
|
"learning_rate": 4.444764716928447e-07,
|
|
"loss": 0.3195,
|
|
"step": 19330
|
|
},
|
|
{
|
|
"epoch": 2.328717639975918,
|
|
"grad_norm": 5.201897144317627,
|
|
"learning_rate": 4.43954366784547e-07,
|
|
"loss": 0.2979,
|
|
"step": 19340
|
|
},
|
|
{
|
|
"epoch": 2.329921733895244,
|
|
"grad_norm": 4.319961071014404,
|
|
"learning_rate": 4.4343232375723343e-07,
|
|
"loss": 0.3059,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 2.3311258278145695,
|
|
"grad_norm": 4.492523670196533,
|
|
"learning_rate": 4.4291034318730086e-07,
|
|
"loss": 0.2941,
|
|
"step": 19360
|
|
},
|
|
{
|
|
"epoch": 2.332329921733895,
|
|
"grad_norm": 5.589833736419678,
|
|
"learning_rate": 4.4238842565107715e-07,
|
|
"loss": 0.3089,
|
|
"step": 19370
|
|
},
|
|
{
|
|
"epoch": 2.333534015653221,
|
|
"grad_norm": 4.234698295593262,
|
|
"learning_rate": 4.4186657172482105e-07,
|
|
"loss": 0.3012,
|
|
"step": 19380
|
|
},
|
|
{
|
|
"epoch": 2.334738109572547,
|
|
"grad_norm": 4.777867317199707,
|
|
"learning_rate": 4.413447819847206e-07,
|
|
"loss": 0.3083,
|
|
"step": 19390
|
|
},
|
|
{
|
|
"epoch": 2.3359422034918724,
|
|
"grad_norm": 5.0551533699035645,
|
|
"learning_rate": 4.4082305700689334e-07,
|
|
"loss": 0.3056,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 2.337146297411198,
|
|
"grad_norm": 4.407803535461426,
|
|
"learning_rate": 4.40301397367385e-07,
|
|
"loss": 0.3137,
|
|
"step": 19410
|
|
},
|
|
{
|
|
"epoch": 2.3383503913305237,
|
|
"grad_norm": 4.408458709716797,
|
|
"learning_rate": 4.3977980364216925e-07,
|
|
"loss": 0.3234,
|
|
"step": 19420
|
|
},
|
|
{
|
|
"epoch": 2.3395544852498493,
|
|
"grad_norm": 5.100025653839111,
|
|
"learning_rate": 4.392582764071471e-07,
|
|
"loss": 0.3053,
|
|
"step": 19430
|
|
},
|
|
{
|
|
"epoch": 2.340758579169175,
|
|
"grad_norm": 4.870809078216553,
|
|
"learning_rate": 4.3873681623814634e-07,
|
|
"loss": 0.2973,
|
|
"step": 19440
|
|
},
|
|
{
|
|
"epoch": 2.341962673088501,
|
|
"grad_norm": 5.078246116638184,
|
|
"learning_rate": 4.3821542371092e-07,
|
|
"loss": 0.3042,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 2.3431667670078267,
|
|
"grad_norm": 4.400288105010986,
|
|
"learning_rate": 4.3769409940114706e-07,
|
|
"loss": 0.3012,
|
|
"step": 19460
|
|
},
|
|
{
|
|
"epoch": 2.3443708609271523,
|
|
"grad_norm": 5.289750576019287,
|
|
"learning_rate": 4.3717284388443123e-07,
|
|
"loss": 0.3149,
|
|
"step": 19470
|
|
},
|
|
{
|
|
"epoch": 2.345574954846478,
|
|
"grad_norm": 4.133148670196533,
|
|
"learning_rate": 4.3665165773629955e-07,
|
|
"loss": 0.311,
|
|
"step": 19480
|
|
},
|
|
{
|
|
"epoch": 2.3467790487658036,
|
|
"grad_norm": 4.689704418182373,
|
|
"learning_rate": 4.361305415322032e-07,
|
|
"loss": 0.2985,
|
|
"step": 19490
|
|
},
|
|
{
|
|
"epoch": 2.3479831426851296,
|
|
"grad_norm": 5.3425822257995605,
|
|
"learning_rate": 4.35609495847516e-07,
|
|
"loss": 0.3252,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 2.3491872366044553,
|
|
"grad_norm": 4.8020524978637695,
|
|
"learning_rate": 4.350885212575338e-07,
|
|
"loss": 0.3017,
|
|
"step": 19510
|
|
},
|
|
{
|
|
"epoch": 2.350391330523781,
|
|
"grad_norm": 3.823481798171997,
|
|
"learning_rate": 4.345676183374737e-07,
|
|
"loss": 0.3163,
|
|
"step": 19520
|
|
},
|
|
{
|
|
"epoch": 2.3515954244431065,
|
|
"grad_norm": 5.067866802215576,
|
|
"learning_rate": 4.3404678766247393e-07,
|
|
"loss": 0.2985,
|
|
"step": 19530
|
|
},
|
|
{
|
|
"epoch": 2.352799518362432,
|
|
"grad_norm": 4.470125198364258,
|
|
"learning_rate": 4.335260298075931e-07,
|
|
"loss": 0.3215,
|
|
"step": 19540
|
|
},
|
|
{
|
|
"epoch": 2.354003612281758,
|
|
"grad_norm": 4.854072093963623,
|
|
"learning_rate": 4.330053453478094e-07,
|
|
"loss": 0.3139,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 2.3552077062010834,
|
|
"grad_norm": 4.061732292175293,
|
|
"learning_rate": 4.3248473485801943e-07,
|
|
"loss": 0.2944,
|
|
"step": 19560
|
|
},
|
|
{
|
|
"epoch": 2.3564118001204095,
|
|
"grad_norm": 4.881399154663086,
|
|
"learning_rate": 4.319641989130387e-07,
|
|
"loss": 0.2958,
|
|
"step": 19570
|
|
},
|
|
{
|
|
"epoch": 2.357615894039735,
|
|
"grad_norm": 4.650146007537842,
|
|
"learning_rate": 4.3144373808760026e-07,
|
|
"loss": 0.3092,
|
|
"step": 19580
|
|
},
|
|
{
|
|
"epoch": 2.358819987959061,
|
|
"grad_norm": 5.014580249786377,
|
|
"learning_rate": 4.3092335295635444e-07,
|
|
"loss": 0.3143,
|
|
"step": 19590
|
|
},
|
|
{
|
|
"epoch": 2.3600240818783864,
|
|
"grad_norm": 5.064713478088379,
|
|
"learning_rate": 4.304030440938673e-07,
|
|
"loss": 0.3106,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 2.361228175797712,
|
|
"grad_norm": 4.044290065765381,
|
|
"learning_rate": 4.298828120746213e-07,
|
|
"loss": 0.3024,
|
|
"step": 19610
|
|
},
|
|
{
|
|
"epoch": 2.362432269717038,
|
|
"grad_norm": 5.447383403778076,
|
|
"learning_rate": 4.29362657473014e-07,
|
|
"loss": 0.3147,
|
|
"step": 19620
|
|
},
|
|
{
|
|
"epoch": 2.3636363636363638,
|
|
"grad_norm": 4.447105884552002,
|
|
"learning_rate": 4.2884258086335745e-07,
|
|
"loss": 0.303,
|
|
"step": 19630
|
|
},
|
|
{
|
|
"epoch": 2.3648404575556894,
|
|
"grad_norm": 4.2513957023620605,
|
|
"learning_rate": 4.2832258281987724e-07,
|
|
"loss": 0.3107,
|
|
"step": 19640
|
|
},
|
|
{
|
|
"epoch": 2.366044551475015,
|
|
"grad_norm": 5.619822025299072,
|
|
"learning_rate": 4.2780266391671277e-07,
|
|
"loss": 0.3212,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 2.3672486453943407,
|
|
"grad_norm": 5.056023597717285,
|
|
"learning_rate": 4.272828247279155e-07,
|
|
"loss": 0.298,
|
|
"step": 19660
|
|
},
|
|
{
|
|
"epoch": 2.3684527393136663,
|
|
"grad_norm": 4.584505558013916,
|
|
"learning_rate": 4.267630658274495e-07,
|
|
"loss": 0.3069,
|
|
"step": 19670
|
|
},
|
|
{
|
|
"epoch": 2.3696568332329924,
|
|
"grad_norm": 5.227287292480469,
|
|
"learning_rate": 4.2624338778918964e-07,
|
|
"loss": 0.296,
|
|
"step": 19680
|
|
},
|
|
{
|
|
"epoch": 2.370860927152318,
|
|
"grad_norm": 4.425261974334717,
|
|
"learning_rate": 4.2572379118692155e-07,
|
|
"loss": 0.3093,
|
|
"step": 19690
|
|
},
|
|
{
|
|
"epoch": 2.3720650210716436,
|
|
"grad_norm": 4.10771369934082,
|
|
"learning_rate": 4.2520427659434134e-07,
|
|
"loss": 0.295,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 2.3732691149909693,
|
|
"grad_norm": 4.561648845672607,
|
|
"learning_rate": 4.2468484458505456e-07,
|
|
"loss": 0.3006,
|
|
"step": 19710
|
|
},
|
|
{
|
|
"epoch": 2.374473208910295,
|
|
"grad_norm": 3.9050345420837402,
|
|
"learning_rate": 4.241654957325748e-07,
|
|
"loss": 0.3016,
|
|
"step": 19720
|
|
},
|
|
{
|
|
"epoch": 2.3756773028296205,
|
|
"grad_norm": 5.106329917907715,
|
|
"learning_rate": 4.2364623061032477e-07,
|
|
"loss": 0.3043,
|
|
"step": 19730
|
|
},
|
|
{
|
|
"epoch": 2.3768813967489466,
|
|
"grad_norm": 6.0447211265563965,
|
|
"learning_rate": 4.231270497916343e-07,
|
|
"loss": 0.3114,
|
|
"step": 19740
|
|
},
|
|
{
|
|
"epoch": 2.3780854906682722,
|
|
"grad_norm": 4.171956539154053,
|
|
"learning_rate": 4.2260795384974037e-07,
|
|
"loss": 0.3033,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 2.379289584587598,
|
|
"grad_norm": 4.500546932220459,
|
|
"learning_rate": 4.2208894335778573e-07,
|
|
"loss": 0.3066,
|
|
"step": 19760
|
|
},
|
|
{
|
|
"epoch": 2.3804936785069235,
|
|
"grad_norm": 5.30014181137085,
|
|
"learning_rate": 4.215700188888192e-07,
|
|
"loss": 0.3008,
|
|
"step": 19770
|
|
},
|
|
{
|
|
"epoch": 2.381697772426249,
|
|
"grad_norm": 4.23181676864624,
|
|
"learning_rate": 4.2105118101579497e-07,
|
|
"loss": 0.2925,
|
|
"step": 19780
|
|
},
|
|
{
|
|
"epoch": 2.3829018663455748,
|
|
"grad_norm": 4.446700096130371,
|
|
"learning_rate": 4.205324303115706e-07,
|
|
"loss": 0.3142,
|
|
"step": 19790
|
|
},
|
|
{
|
|
"epoch": 2.384105960264901,
|
|
"grad_norm": 5.344078063964844,
|
|
"learning_rate": 4.2001376734890824e-07,
|
|
"loss": 0.3053,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 2.3853100541842265,
|
|
"grad_norm": 5.066955089569092,
|
|
"learning_rate": 4.1949519270047295e-07,
|
|
"loss": 0.3071,
|
|
"step": 19810
|
|
},
|
|
{
|
|
"epoch": 2.386514148103552,
|
|
"grad_norm": 4.834653377532959,
|
|
"learning_rate": 4.1897670693883255e-07,
|
|
"loss": 0.3039,
|
|
"step": 19820
|
|
},
|
|
{
|
|
"epoch": 2.3877182420228777,
|
|
"grad_norm": 4.982695579528809,
|
|
"learning_rate": 4.1845831063645586e-07,
|
|
"loss": 0.3007,
|
|
"step": 19830
|
|
},
|
|
{
|
|
"epoch": 2.3889223359422034,
|
|
"grad_norm": 5.261125564575195,
|
|
"learning_rate": 4.1794000436571374e-07,
|
|
"loss": 0.3121,
|
|
"step": 19840
|
|
},
|
|
{
|
|
"epoch": 2.390126429861529,
|
|
"grad_norm": 5.1389570236206055,
|
|
"learning_rate": 4.174217886988775e-07,
|
|
"loss": 0.3058,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 2.391330523780855,
|
|
"grad_norm": 4.307366371154785,
|
|
"learning_rate": 4.169036642081183e-07,
|
|
"loss": 0.3008,
|
|
"step": 19860
|
|
},
|
|
{
|
|
"epoch": 2.3925346177001807,
|
|
"grad_norm": 5.068446636199951,
|
|
"learning_rate": 4.163856314655064e-07,
|
|
"loss": 0.3145,
|
|
"step": 19870
|
|
},
|
|
{
|
|
"epoch": 2.3937387116195064,
|
|
"grad_norm": 5.377712249755859,
|
|
"learning_rate": 4.1586769104301124e-07,
|
|
"loss": 0.3047,
|
|
"step": 19880
|
|
},
|
|
{
|
|
"epoch": 2.394942805538832,
|
|
"grad_norm": 5.161853313446045,
|
|
"learning_rate": 4.153498435124999e-07,
|
|
"loss": 0.3111,
|
|
"step": 19890
|
|
},
|
|
{
|
|
"epoch": 2.3961468994581576,
|
|
"grad_norm": 4.217031002044678,
|
|
"learning_rate": 4.1483208944573745e-07,
|
|
"loss": 0.2886,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 2.3973509933774833,
|
|
"grad_norm": 4.948873996734619,
|
|
"learning_rate": 4.1431442941438486e-07,
|
|
"loss": 0.3138,
|
|
"step": 19910
|
|
},
|
|
{
|
|
"epoch": 2.3985550872968093,
|
|
"grad_norm": 5.304249286651611,
|
|
"learning_rate": 4.1379686399000016e-07,
|
|
"loss": 0.3013,
|
|
"step": 19920
|
|
},
|
|
{
|
|
"epoch": 2.399759181216135,
|
|
"grad_norm": 5.372039318084717,
|
|
"learning_rate": 4.132793937440365e-07,
|
|
"loss": 0.316,
|
|
"step": 19930
|
|
},
|
|
{
|
|
"epoch": 2.4009632751354606,
|
|
"grad_norm": 5.1526265144348145,
|
|
"learning_rate": 4.127620192478421e-07,
|
|
"loss": 0.3177,
|
|
"step": 19940
|
|
},
|
|
{
|
|
"epoch": 2.4021673690547862,
|
|
"grad_norm": 4.650707244873047,
|
|
"learning_rate": 4.122447410726591e-07,
|
|
"loss": 0.3014,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 2.403371462974112,
|
|
"grad_norm": 4.576737403869629,
|
|
"learning_rate": 4.1172755978962395e-07,
|
|
"loss": 0.3069,
|
|
"step": 19960
|
|
},
|
|
{
|
|
"epoch": 2.4045755568934375,
|
|
"grad_norm": 5.201079845428467,
|
|
"learning_rate": 4.1121047596976534e-07,
|
|
"loss": 0.3151,
|
|
"step": 19970
|
|
},
|
|
{
|
|
"epoch": 2.4057796508127636,
|
|
"grad_norm": 4.859030723571777,
|
|
"learning_rate": 4.1069349018400503e-07,
|
|
"loss": 0.298,
|
|
"step": 19980
|
|
},
|
|
{
|
|
"epoch": 2.406983744732089,
|
|
"grad_norm": 5.44400691986084,
|
|
"learning_rate": 4.101766030031562e-07,
|
|
"loss": 0.303,
|
|
"step": 19990
|
|
},
|
|
{
|
|
"epoch": 2.408187838651415,
|
|
"grad_norm": 4.533078193664551,
|
|
"learning_rate": 4.0965981499792307e-07,
|
|
"loss": 0.3055,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 2.4093919325707405,
|
|
"grad_norm": 5.147141456604004,
|
|
"learning_rate": 4.0914312673890054e-07,
|
|
"loss": 0.3141,
|
|
"step": 20010
|
|
},
|
|
{
|
|
"epoch": 2.410596026490066,
|
|
"grad_norm": 4.530623912811279,
|
|
"learning_rate": 4.0862653879657373e-07,
|
|
"loss": 0.3205,
|
|
"step": 20020
|
|
},
|
|
{
|
|
"epoch": 2.411800120409392,
|
|
"grad_norm": 4.804474830627441,
|
|
"learning_rate": 4.08110051741316e-07,
|
|
"loss": 0.3113,
|
|
"step": 20030
|
|
},
|
|
{
|
|
"epoch": 2.413004214328718,
|
|
"grad_norm": 4.642183780670166,
|
|
"learning_rate": 4.0759366614339015e-07,
|
|
"loss": 0.3115,
|
|
"step": 20040
|
|
},
|
|
{
|
|
"epoch": 2.4142083082480434,
|
|
"grad_norm": 4.975921630859375,
|
|
"learning_rate": 4.0707738257294685e-07,
|
|
"loss": 0.3165,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 2.415412402167369,
|
|
"grad_norm": 4.621540546417236,
|
|
"learning_rate": 4.065612016000241e-07,
|
|
"loss": 0.2914,
|
|
"step": 20060
|
|
},
|
|
{
|
|
"epoch": 2.4166164960866947,
|
|
"grad_norm": 4.194451808929443,
|
|
"learning_rate": 4.060451237945462e-07,
|
|
"loss": 0.3035,
|
|
"step": 20070
|
|
},
|
|
{
|
|
"epoch": 2.4178205900060203,
|
|
"grad_norm": 4.82729959487915,
|
|
"learning_rate": 4.05529149726324e-07,
|
|
"loss": 0.3068,
|
|
"step": 20080
|
|
},
|
|
{
|
|
"epoch": 2.419024683925346,
|
|
"grad_norm": 5.17459774017334,
|
|
"learning_rate": 4.050132799650538e-07,
|
|
"loss": 0.3092,
|
|
"step": 20090
|
|
},
|
|
{
|
|
"epoch": 2.420228777844672,
|
|
"grad_norm": 5.787187576293945,
|
|
"learning_rate": 4.0449751508031666e-07,
|
|
"loss": 0.3168,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 2.4214328717639977,
|
|
"grad_norm": 4.466209411621094,
|
|
"learning_rate": 4.039818556415775e-07,
|
|
"loss": 0.296,
|
|
"step": 20110
|
|
},
|
|
{
|
|
"epoch": 2.4226369656833233,
|
|
"grad_norm": 4.929852485656738,
|
|
"learning_rate": 4.034663022181852e-07,
|
|
"loss": 0.3135,
|
|
"step": 20120
|
|
},
|
|
{
|
|
"epoch": 2.423841059602649,
|
|
"grad_norm": 4.523739337921143,
|
|
"learning_rate": 4.029508553793718e-07,
|
|
"loss": 0.288,
|
|
"step": 20130
|
|
},
|
|
{
|
|
"epoch": 2.4250451535219746,
|
|
"grad_norm": 7.000367641448975,
|
|
"learning_rate": 4.0243551569425095e-07,
|
|
"loss": 0.3105,
|
|
"step": 20140
|
|
},
|
|
{
|
|
"epoch": 2.4262492474413007,
|
|
"grad_norm": 6.229575157165527,
|
|
"learning_rate": 4.019202837318185e-07,
|
|
"loss": 0.3166,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 2.4274533413606263,
|
|
"grad_norm": 5.243337154388428,
|
|
"learning_rate": 4.0140516006095134e-07,
|
|
"loss": 0.3046,
|
|
"step": 20160
|
|
},
|
|
{
|
|
"epoch": 2.428657435279952,
|
|
"grad_norm": 4.598159313201904,
|
|
"learning_rate": 4.0089014525040685e-07,
|
|
"loss": 0.3064,
|
|
"step": 20170
|
|
},
|
|
{
|
|
"epoch": 2.4298615291992776,
|
|
"grad_norm": 4.482394695281982,
|
|
"learning_rate": 4.003752398688218e-07,
|
|
"loss": 0.3097,
|
|
"step": 20180
|
|
},
|
|
{
|
|
"epoch": 2.431065623118603,
|
|
"grad_norm": 5.39198637008667,
|
|
"learning_rate": 3.9986044448471244e-07,
|
|
"loss": 0.3112,
|
|
"step": 20190
|
|
},
|
|
{
|
|
"epoch": 2.432269717037929,
|
|
"grad_norm": 4.356963634490967,
|
|
"learning_rate": 3.9934575966647375e-07,
|
|
"loss": 0.3006,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 2.4334738109572545,
|
|
"grad_norm": 4.211975574493408,
|
|
"learning_rate": 3.9883118598237837e-07,
|
|
"loss": 0.2989,
|
|
"step": 20210
|
|
},
|
|
{
|
|
"epoch": 2.4346779048765805,
|
|
"grad_norm": 5.301422119140625,
|
|
"learning_rate": 3.9831672400057605e-07,
|
|
"loss": 0.3178,
|
|
"step": 20220
|
|
},
|
|
{
|
|
"epoch": 2.435881998795906,
|
|
"grad_norm": 4.181766510009766,
|
|
"learning_rate": 3.978023742890937e-07,
|
|
"loss": 0.3066,
|
|
"step": 20230
|
|
},
|
|
{
|
|
"epoch": 2.437086092715232,
|
|
"grad_norm": 5.18208122253418,
|
|
"learning_rate": 3.9728813741583383e-07,
|
|
"loss": 0.3001,
|
|
"step": 20240
|
|
},
|
|
{
|
|
"epoch": 2.4382901866345574,
|
|
"grad_norm": 5.382752418518066,
|
|
"learning_rate": 3.967740139485748e-07,
|
|
"loss": 0.3088,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 2.439494280553883,
|
|
"grad_norm": 5.215182304382324,
|
|
"learning_rate": 3.9626000445496934e-07,
|
|
"loss": 0.2882,
|
|
"step": 20260
|
|
},
|
|
{
|
|
"epoch": 2.440698374473209,
|
|
"grad_norm": 5.133399963378906,
|
|
"learning_rate": 3.957461095025444e-07,
|
|
"loss": 0.3303,
|
|
"step": 20270
|
|
},
|
|
{
|
|
"epoch": 2.4419024683925348,
|
|
"grad_norm": 5.194669246673584,
|
|
"learning_rate": 3.952323296587007e-07,
|
|
"loss": 0.3172,
|
|
"step": 20280
|
|
},
|
|
{
|
|
"epoch": 2.4431065623118604,
|
|
"grad_norm": 4.95144510269165,
|
|
"learning_rate": 3.947186654907119e-07,
|
|
"loss": 0.3138,
|
|
"step": 20290
|
|
},
|
|
{
|
|
"epoch": 2.444310656231186,
|
|
"grad_norm": 5.0588812828063965,
|
|
"learning_rate": 3.9420511756572346e-07,
|
|
"loss": 0.3058,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 2.4455147501505117,
|
|
"grad_norm": 5.033606052398682,
|
|
"learning_rate": 3.936916864507529e-07,
|
|
"loss": 0.3161,
|
|
"step": 20310
|
|
},
|
|
{
|
|
"epoch": 2.4467188440698373,
|
|
"grad_norm": 5.006187915802002,
|
|
"learning_rate": 3.9317837271268876e-07,
|
|
"loss": 0.2993,
|
|
"step": 20320
|
|
},
|
|
{
|
|
"epoch": 2.447922937989163,
|
|
"grad_norm": 4.955638408660889,
|
|
"learning_rate": 3.926651769182901e-07,
|
|
"loss": 0.3023,
|
|
"step": 20330
|
|
},
|
|
{
|
|
"epoch": 2.449127031908489,
|
|
"grad_norm": 4.786928653717041,
|
|
"learning_rate": 3.9215209963418513e-07,
|
|
"loss": 0.3207,
|
|
"step": 20340
|
|
},
|
|
{
|
|
"epoch": 2.4503311258278146,
|
|
"grad_norm": 4.456767559051514,
|
|
"learning_rate": 3.9163914142687177e-07,
|
|
"loss": 0.3142,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 2.4515352197471403,
|
|
"grad_norm": 5.671106338500977,
|
|
"learning_rate": 3.911263028627164e-07,
|
|
"loss": 0.3125,
|
|
"step": 20360
|
|
},
|
|
{
|
|
"epoch": 2.452739313666466,
|
|
"grad_norm": 5.525556564331055,
|
|
"learning_rate": 3.9061358450795344e-07,
|
|
"loss": 0.2972,
|
|
"step": 20370
|
|
},
|
|
{
|
|
"epoch": 2.4539434075857915,
|
|
"grad_norm": 4.18988561630249,
|
|
"learning_rate": 3.9010098692868397e-07,
|
|
"loss": 0.2971,
|
|
"step": 20380
|
|
},
|
|
{
|
|
"epoch": 2.4551475015051176,
|
|
"grad_norm": 5.705048561096191,
|
|
"learning_rate": 3.895885106908763e-07,
|
|
"loss": 0.3094,
|
|
"step": 20390
|
|
},
|
|
{
|
|
"epoch": 2.4563515954244433,
|
|
"grad_norm": 5.453742980957031,
|
|
"learning_rate": 3.890761563603647e-07,
|
|
"loss": 0.3079,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 2.457555689343769,
|
|
"grad_norm": 4.007357120513916,
|
|
"learning_rate": 3.885639245028488e-07,
|
|
"loss": 0.3119,
|
|
"step": 20410
|
|
},
|
|
{
|
|
"epoch": 2.4587597832630945,
|
|
"grad_norm": 5.247729301452637,
|
|
"learning_rate": 3.8805181568389255e-07,
|
|
"loss": 0.3047,
|
|
"step": 20420
|
|
},
|
|
{
|
|
"epoch": 2.45996387718242,
|
|
"grad_norm": 4.143746852874756,
|
|
"learning_rate": 3.8753983046892465e-07,
|
|
"loss": 0.3062,
|
|
"step": 20430
|
|
},
|
|
{
|
|
"epoch": 2.461167971101746,
|
|
"grad_norm": 4.356471538543701,
|
|
"learning_rate": 3.8702796942323736e-07,
|
|
"loss": 0.3095,
|
|
"step": 20440
|
|
},
|
|
{
|
|
"epoch": 2.4623720650210714,
|
|
"grad_norm": 4.553625106811523,
|
|
"learning_rate": 3.8651623311198516e-07,
|
|
"loss": 0.3117,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 2.4635761589403975,
|
|
"grad_norm": 4.882122039794922,
|
|
"learning_rate": 3.860046221001855e-07,
|
|
"loss": 0.322,
|
|
"step": 20460
|
|
},
|
|
{
|
|
"epoch": 2.464780252859723,
|
|
"grad_norm": 5.218991756439209,
|
|
"learning_rate": 3.854931369527172e-07,
|
|
"loss": 0.3138,
|
|
"step": 20470
|
|
},
|
|
{
|
|
"epoch": 2.4659843467790488,
|
|
"grad_norm": 5.427024841308594,
|
|
"learning_rate": 3.849817782343201e-07,
|
|
"loss": 0.3125,
|
|
"step": 20480
|
|
},
|
|
{
|
|
"epoch": 2.4671884406983744,
|
|
"grad_norm": 4.729675769805908,
|
|
"learning_rate": 3.8447054650959447e-07,
|
|
"loss": 0.2925,
|
|
"step": 20490
|
|
},
|
|
{
|
|
"epoch": 2.4683925346177,
|
|
"grad_norm": 5.330557346343994,
|
|
"learning_rate": 3.8395944234300053e-07,
|
|
"loss": 0.2968,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 2.469596628537026,
|
|
"grad_norm": 4.960201740264893,
|
|
"learning_rate": 3.834484662988573e-07,
|
|
"loss": 0.3147,
|
|
"step": 20510
|
|
},
|
|
{
|
|
"epoch": 2.4708007224563517,
|
|
"grad_norm": 4.888551235198975,
|
|
"learning_rate": 3.829376189413427e-07,
|
|
"loss": 0.3098,
|
|
"step": 20520
|
|
},
|
|
{
|
|
"epoch": 2.4720048163756774,
|
|
"grad_norm": 4.717561721801758,
|
|
"learning_rate": 3.824269008344924e-07,
|
|
"loss": 0.3018,
|
|
"step": 20530
|
|
},
|
|
{
|
|
"epoch": 2.473208910295003,
|
|
"grad_norm": 4.666635990142822,
|
|
"learning_rate": 3.8191631254219927e-07,
|
|
"loss": 0.2942,
|
|
"step": 20540
|
|
},
|
|
{
|
|
"epoch": 2.4744130042143286,
|
|
"grad_norm": 5.138599872589111,
|
|
"learning_rate": 3.8140585462821296e-07,
|
|
"loss": 0.2922,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 2.4756170981336543,
|
|
"grad_norm": 5.150256633758545,
|
|
"learning_rate": 3.808955276561395e-07,
|
|
"loss": 0.3039,
|
|
"step": 20560
|
|
},
|
|
{
|
|
"epoch": 2.47682119205298,
|
|
"grad_norm": 5.677982807159424,
|
|
"learning_rate": 3.8038533218943954e-07,
|
|
"loss": 0.2928,
|
|
"step": 20570
|
|
},
|
|
{
|
|
"epoch": 2.478025285972306,
|
|
"grad_norm": 4.552664756774902,
|
|
"learning_rate": 3.798752687914292e-07,
|
|
"loss": 0.3108,
|
|
"step": 20580
|
|
},
|
|
{
|
|
"epoch": 2.4792293798916316,
|
|
"grad_norm": 4.48048210144043,
|
|
"learning_rate": 3.7936533802527855e-07,
|
|
"loss": 0.3159,
|
|
"step": 20590
|
|
},
|
|
{
|
|
"epoch": 2.4804334738109572,
|
|
"grad_norm": 4.3352370262146,
|
|
"learning_rate": 3.7885554045401147e-07,
|
|
"loss": 0.3079,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 2.481637567730283,
|
|
"grad_norm": 4.1587653160095215,
|
|
"learning_rate": 3.783458766405042e-07,
|
|
"loss": 0.3036,
|
|
"step": 20610
|
|
},
|
|
{
|
|
"epoch": 2.4828416616496085,
|
|
"grad_norm": 4.668213844299316,
|
|
"learning_rate": 3.7783634714748584e-07,
|
|
"loss": 0.3003,
|
|
"step": 20620
|
|
},
|
|
{
|
|
"epoch": 2.4840457555689346,
|
|
"grad_norm": 4.186696529388428,
|
|
"learning_rate": 3.7732695253753697e-07,
|
|
"loss": 0.3192,
|
|
"step": 20630
|
|
},
|
|
{
|
|
"epoch": 2.48524984948826,
|
|
"grad_norm": 4.841115951538086,
|
|
"learning_rate": 3.7681769337308954e-07,
|
|
"loss": 0.3064,
|
|
"step": 20640
|
|
},
|
|
{
|
|
"epoch": 2.486453943407586,
|
|
"grad_norm": 4.4625020027160645,
|
|
"learning_rate": 3.7630857021642514e-07,
|
|
"loss": 0.3059,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 2.4876580373269115,
|
|
"grad_norm": 4.459711074829102,
|
|
"learning_rate": 3.757995836296761e-07,
|
|
"loss": 0.2925,
|
|
"step": 20660
|
|
},
|
|
{
|
|
"epoch": 2.488862131246237,
|
|
"grad_norm": 4.983307361602783,
|
|
"learning_rate": 3.7529073417482345e-07,
|
|
"loss": 0.2961,
|
|
"step": 20670
|
|
},
|
|
{
|
|
"epoch": 2.4900662251655628,
|
|
"grad_norm": 4.813161373138428,
|
|
"learning_rate": 3.747820224136973e-07,
|
|
"loss": 0.3138,
|
|
"step": 20680
|
|
},
|
|
{
|
|
"epoch": 2.4912703190848884,
|
|
"grad_norm": 4.922794342041016,
|
|
"learning_rate": 3.742734489079748e-07,
|
|
"loss": 0.3219,
|
|
"step": 20690
|
|
},
|
|
{
|
|
"epoch": 2.4924744130042145,
|
|
"grad_norm": 5.428676128387451,
|
|
"learning_rate": 3.737650142191814e-07,
|
|
"loss": 0.3077,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 2.49367850692354,
|
|
"grad_norm": 4.670940399169922,
|
|
"learning_rate": 3.7325671890868895e-07,
|
|
"loss": 0.3035,
|
|
"step": 20710
|
|
},
|
|
{
|
|
"epoch": 2.4948826008428657,
|
|
"grad_norm": 4.245230674743652,
|
|
"learning_rate": 3.727485635377153e-07,
|
|
"loss": 0.3102,
|
|
"step": 20720
|
|
},
|
|
{
|
|
"epoch": 2.4960866947621914,
|
|
"grad_norm": 4.281071186065674,
|
|
"learning_rate": 3.7224054866732366e-07,
|
|
"loss": 0.2848,
|
|
"step": 20730
|
|
},
|
|
{
|
|
"epoch": 2.497290788681517,
|
|
"grad_norm": 4.969486236572266,
|
|
"learning_rate": 3.717326748584227e-07,
|
|
"loss": 0.3109,
|
|
"step": 20740
|
|
},
|
|
{
|
|
"epoch": 2.498494882600843,
|
|
"grad_norm": 6.3518500328063965,
|
|
"learning_rate": 3.712249426717647e-07,
|
|
"loss": 0.321,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 2.4996989765201687,
|
|
"grad_norm": 4.896385192871094,
|
|
"learning_rate": 3.707173526679458e-07,
|
|
"loss": 0.3096,
|
|
"step": 20760
|
|
},
|
|
{
|
|
"epoch": 2.5009030704394943,
|
|
"grad_norm": 4.546391487121582,
|
|
"learning_rate": 3.702099054074054e-07,
|
|
"loss": 0.3153,
|
|
"step": 20770
|
|
},
|
|
{
|
|
"epoch": 2.50210716435882,
|
|
"grad_norm": 4.817781925201416,
|
|
"learning_rate": 3.6970260145042475e-07,
|
|
"loss": 0.3072,
|
|
"step": 20780
|
|
},
|
|
{
|
|
"epoch": 2.5033112582781456,
|
|
"grad_norm": 4.495319366455078,
|
|
"learning_rate": 3.691954413571276e-07,
|
|
"loss": 0.316,
|
|
"step": 20790
|
|
},
|
|
{
|
|
"epoch": 2.5045153521974717,
|
|
"grad_norm": 4.200586318969727,
|
|
"learning_rate": 3.6868842568747826e-07,
|
|
"loss": 0.3146,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 2.505719446116797,
|
|
"grad_norm": 5.999356269836426,
|
|
"learning_rate": 3.681815550012816e-07,
|
|
"loss": 0.3087,
|
|
"step": 20810
|
|
},
|
|
{
|
|
"epoch": 2.506923540036123,
|
|
"grad_norm": 4.140690326690674,
|
|
"learning_rate": 3.676748298581828e-07,
|
|
"loss": 0.2786,
|
|
"step": 20820
|
|
},
|
|
{
|
|
"epoch": 2.5081276339554486,
|
|
"grad_norm": 4.519384384155273,
|
|
"learning_rate": 3.6716825081766634e-07,
|
|
"loss": 0.3073,
|
|
"step": 20830
|
|
},
|
|
{
|
|
"epoch": 2.509331727874774,
|
|
"grad_norm": 4.580509185791016,
|
|
"learning_rate": 3.6666181843905477e-07,
|
|
"loss": 0.3224,
|
|
"step": 20840
|
|
},
|
|
{
|
|
"epoch": 2.5105358217941,
|
|
"grad_norm": 4.371671676635742,
|
|
"learning_rate": 3.661555332815092e-07,
|
|
"loss": 0.303,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 2.5117399157134255,
|
|
"grad_norm": 5.235719680786133,
|
|
"learning_rate": 3.656493959040283e-07,
|
|
"loss": 0.3104,
|
|
"step": 20860
|
|
},
|
|
{
|
|
"epoch": 2.5129440096327516,
|
|
"grad_norm": 5.564718246459961,
|
|
"learning_rate": 3.651434068654474e-07,
|
|
"loss": 0.3111,
|
|
"step": 20870
|
|
},
|
|
{
|
|
"epoch": 2.514148103552077,
|
|
"grad_norm": 4.76020622253418,
|
|
"learning_rate": 3.646375667244378e-07,
|
|
"loss": 0.3153,
|
|
"step": 20880
|
|
},
|
|
{
|
|
"epoch": 2.515352197471403,
|
|
"grad_norm": 4.534407138824463,
|
|
"learning_rate": 3.6413187603950667e-07,
|
|
"loss": 0.305,
|
|
"step": 20890
|
|
},
|
|
{
|
|
"epoch": 2.5165562913907285,
|
|
"grad_norm": 5.413814067840576,
|
|
"learning_rate": 3.636263353689962e-07,
|
|
"loss": 0.3088,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 2.517760385310054,
|
|
"grad_norm": 5.003753185272217,
|
|
"learning_rate": 3.6312094527108307e-07,
|
|
"loss": 0.3146,
|
|
"step": 20910
|
|
},
|
|
{
|
|
"epoch": 2.51896447922938,
|
|
"grad_norm": 5.368070125579834,
|
|
"learning_rate": 3.6261570630377713e-07,
|
|
"loss": 0.3131,
|
|
"step": 20920
|
|
},
|
|
{
|
|
"epoch": 2.5201685731487053,
|
|
"grad_norm": 5.054159641265869,
|
|
"learning_rate": 3.621106190249219e-07,
|
|
"loss": 0.2967,
|
|
"step": 20930
|
|
},
|
|
{
|
|
"epoch": 2.5213726670680314,
|
|
"grad_norm": 5.523135185241699,
|
|
"learning_rate": 3.616056839921932e-07,
|
|
"loss": 0.3154,
|
|
"step": 20940
|
|
},
|
|
{
|
|
"epoch": 2.522576760987357,
|
|
"grad_norm": 5.352376937866211,
|
|
"learning_rate": 3.6110090176309914e-07,
|
|
"loss": 0.3033,
|
|
"step": 20950
|
|
},
|
|
{
|
|
"epoch": 2.5237808549066827,
|
|
"grad_norm": 3.677163600921631,
|
|
"learning_rate": 3.605962728949783e-07,
|
|
"loss": 0.3198,
|
|
"step": 20960
|
|
},
|
|
{
|
|
"epoch": 2.5249849488260083,
|
|
"grad_norm": 4.4316840171813965,
|
|
"learning_rate": 3.6009179794500067e-07,
|
|
"loss": 0.304,
|
|
"step": 20970
|
|
},
|
|
{
|
|
"epoch": 2.526189042745334,
|
|
"grad_norm": 4.927300453186035,
|
|
"learning_rate": 3.5958747747016603e-07,
|
|
"loss": 0.3221,
|
|
"step": 20980
|
|
},
|
|
{
|
|
"epoch": 2.52739313666466,
|
|
"grad_norm": 5.448822975158691,
|
|
"learning_rate": 3.590833120273038e-07,
|
|
"loss": 0.3186,
|
|
"step": 20990
|
|
},
|
|
{
|
|
"epoch": 2.5285972305839857,
|
|
"grad_norm": 4.188570022583008,
|
|
"learning_rate": 3.5857930217307163e-07,
|
|
"loss": 0.3015,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 2.5298013245033113,
|
|
"grad_norm": 4.157015323638916,
|
|
"learning_rate": 3.580754484639561e-07,
|
|
"loss": 0.2909,
|
|
"step": 21010
|
|
},
|
|
{
|
|
"epoch": 2.531005418422637,
|
|
"grad_norm": 4.773519992828369,
|
|
"learning_rate": 3.5757175145627107e-07,
|
|
"loss": 0.3034,
|
|
"step": 21020
|
|
},
|
|
{
|
|
"epoch": 2.5322095123419626,
|
|
"grad_norm": 5.435080051422119,
|
|
"learning_rate": 3.570682117061573e-07,
|
|
"loss": 0.3148,
|
|
"step": 21030
|
|
},
|
|
{
|
|
"epoch": 2.5334136062612886,
|
|
"grad_norm": 4.959787368774414,
|
|
"learning_rate": 3.56564829769582e-07,
|
|
"loss": 0.3115,
|
|
"step": 21040
|
|
},
|
|
{
|
|
"epoch": 2.534617700180614,
|
|
"grad_norm": 4.7358880043029785,
|
|
"learning_rate": 3.5606160620233815e-07,
|
|
"loss": 0.3078,
|
|
"step": 21050
|
|
},
|
|
{
|
|
"epoch": 2.53582179409994,
|
|
"grad_norm": 4.220034599304199,
|
|
"learning_rate": 3.5555854156004404e-07,
|
|
"loss": 0.298,
|
|
"step": 21060
|
|
},
|
|
{
|
|
"epoch": 2.5370258880192655,
|
|
"grad_norm": 4.433871746063232,
|
|
"learning_rate": 3.550556363981422e-07,
|
|
"loss": 0.2809,
|
|
"step": 21070
|
|
},
|
|
{
|
|
"epoch": 2.538229981938591,
|
|
"grad_norm": 4.491239070892334,
|
|
"learning_rate": 3.5455289127189907e-07,
|
|
"loss": 0.3179,
|
|
"step": 21080
|
|
},
|
|
{
|
|
"epoch": 2.539434075857917,
|
|
"grad_norm": 4.969503879547119,
|
|
"learning_rate": 3.540503067364047e-07,
|
|
"loss": 0.3018,
|
|
"step": 21090
|
|
},
|
|
{
|
|
"epoch": 2.5406381697772424,
|
|
"grad_norm": 4.266849040985107,
|
|
"learning_rate": 3.535478833465717e-07,
|
|
"loss": 0.3121,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 2.5418422636965685,
|
|
"grad_norm": 4.8507771492004395,
|
|
"learning_rate": 3.5304562165713435e-07,
|
|
"loss": 0.317,
|
|
"step": 21110
|
|
},
|
|
{
|
|
"epoch": 2.543046357615894,
|
|
"grad_norm": 4.610383987426758,
|
|
"learning_rate": 3.525435222226491e-07,
|
|
"loss": 0.3083,
|
|
"step": 21120
|
|
},
|
|
{
|
|
"epoch": 2.54425045153522,
|
|
"grad_norm": 4.408012390136719,
|
|
"learning_rate": 3.5204158559749275e-07,
|
|
"loss": 0.3141,
|
|
"step": 21130
|
|
},
|
|
{
|
|
"epoch": 2.5454545454545454,
|
|
"grad_norm": 5.178010940551758,
|
|
"learning_rate": 3.5153981233586274e-07,
|
|
"loss": 0.3106,
|
|
"step": 21140
|
|
},
|
|
{
|
|
"epoch": 2.546658639373871,
|
|
"grad_norm": 4.6306681632995605,
|
|
"learning_rate": 3.5103820299177535e-07,
|
|
"loss": 0.3086,
|
|
"step": 21150
|
|
},
|
|
{
|
|
"epoch": 2.547862733293197,
|
|
"grad_norm": 5.366611003875732,
|
|
"learning_rate": 3.505367581190668e-07,
|
|
"loss": 0.2985,
|
|
"step": 21160
|
|
},
|
|
{
|
|
"epoch": 2.5490668272125223,
|
|
"grad_norm": 5.572306156158447,
|
|
"learning_rate": 3.5003547827139125e-07,
|
|
"loss": 0.2976,
|
|
"step": 21170
|
|
},
|
|
{
|
|
"epoch": 2.5502709211318484,
|
|
"grad_norm": 5.326085090637207,
|
|
"learning_rate": 3.495343640022209e-07,
|
|
"loss": 0.2971,
|
|
"step": 21180
|
|
},
|
|
{
|
|
"epoch": 2.551475015051174,
|
|
"grad_norm": 7.600101947784424,
|
|
"learning_rate": 3.4903341586484456e-07,
|
|
"loss": 0.2961,
|
|
"step": 21190
|
|
},
|
|
{
|
|
"epoch": 2.5526791089704997,
|
|
"grad_norm": 4.568670272827148,
|
|
"learning_rate": 3.4853263441236834e-07,
|
|
"loss": 0.3142,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 2.5538832028898253,
|
|
"grad_norm": 4.9445695877075195,
|
|
"learning_rate": 3.480320201977138e-07,
|
|
"loss": 0.2988,
|
|
"step": 21210
|
|
},
|
|
{
|
|
"epoch": 2.555087296809151,
|
|
"grad_norm": 5.26786994934082,
|
|
"learning_rate": 3.475315737736183e-07,
|
|
"loss": 0.3074,
|
|
"step": 21220
|
|
},
|
|
{
|
|
"epoch": 2.556291390728477,
|
|
"grad_norm": 4.316328525543213,
|
|
"learning_rate": 3.4703129569263323e-07,
|
|
"loss": 0.2917,
|
|
"step": 21230
|
|
},
|
|
{
|
|
"epoch": 2.5574954846478026,
|
|
"grad_norm": 4.018758773803711,
|
|
"learning_rate": 3.465311865071248e-07,
|
|
"loss": 0.2967,
|
|
"step": 21240
|
|
},
|
|
{
|
|
"epoch": 2.5586995785671283,
|
|
"grad_norm": 5.121528625488281,
|
|
"learning_rate": 3.460312467692725e-07,
|
|
"loss": 0.3061,
|
|
"step": 21250
|
|
},
|
|
{
|
|
"epoch": 2.559903672486454,
|
|
"grad_norm": 4.710129261016846,
|
|
"learning_rate": 3.4553147703106886e-07,
|
|
"loss": 0.3074,
|
|
"step": 21260
|
|
},
|
|
{
|
|
"epoch": 2.5611077664057795,
|
|
"grad_norm": 4.447737216949463,
|
|
"learning_rate": 3.4503187784431825e-07,
|
|
"loss": 0.3062,
|
|
"step": 21270
|
|
},
|
|
{
|
|
"epoch": 2.5623118603251056,
|
|
"grad_norm": 4.8179612159729,
|
|
"learning_rate": 3.445324497606372e-07,
|
|
"loss": 0.3007,
|
|
"step": 21280
|
|
},
|
|
{
|
|
"epoch": 2.563515954244431,
|
|
"grad_norm": 4.53162956237793,
|
|
"learning_rate": 3.440331933314532e-07,
|
|
"loss": 0.3103,
|
|
"step": 21290
|
|
},
|
|
{
|
|
"epoch": 2.564720048163757,
|
|
"grad_norm": 4.889903545379639,
|
|
"learning_rate": 3.435341091080042e-07,
|
|
"loss": 0.3109,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 2.5659241420830825,
|
|
"grad_norm": 4.858291149139404,
|
|
"learning_rate": 3.430351976413378e-07,
|
|
"loss": 0.3191,
|
|
"step": 21310
|
|
},
|
|
{
|
|
"epoch": 2.567128236002408,
|
|
"grad_norm": 4.58107852935791,
|
|
"learning_rate": 3.425364594823114e-07,
|
|
"loss": 0.2853,
|
|
"step": 21320
|
|
},
|
|
{
|
|
"epoch": 2.5683323299217338,
|
|
"grad_norm": 5.6206207275390625,
|
|
"learning_rate": 3.420378951815903e-07,
|
|
"loss": 0.3081,
|
|
"step": 21330
|
|
},
|
|
{
|
|
"epoch": 2.5695364238410594,
|
|
"grad_norm": 5.069255352020264,
|
|
"learning_rate": 3.4153950528964866e-07,
|
|
"loss": 0.3034,
|
|
"step": 21340
|
|
},
|
|
{
|
|
"epoch": 2.5707405177603855,
|
|
"grad_norm": 5.086771488189697,
|
|
"learning_rate": 3.4104129035676743e-07,
|
|
"loss": 0.318,
|
|
"step": 21350
|
|
},
|
|
{
|
|
"epoch": 2.571944611679711,
|
|
"grad_norm": 5.416161060333252,
|
|
"learning_rate": 3.4054325093303447e-07,
|
|
"loss": 0.3062,
|
|
"step": 21360
|
|
},
|
|
{
|
|
"epoch": 2.5731487055990367,
|
|
"grad_norm": 4.536307334899902,
|
|
"learning_rate": 3.4004538756834415e-07,
|
|
"loss": 0.3028,
|
|
"step": 21370
|
|
},
|
|
{
|
|
"epoch": 2.5743527995183624,
|
|
"grad_norm": 4.512822151184082,
|
|
"learning_rate": 3.3954770081239657e-07,
|
|
"loss": 0.3046,
|
|
"step": 21380
|
|
},
|
|
{
|
|
"epoch": 2.575556893437688,
|
|
"grad_norm": 5.5262322425842285,
|
|
"learning_rate": 3.39050191214696e-07,
|
|
"loss": 0.3012,
|
|
"step": 21390
|
|
},
|
|
{
|
|
"epoch": 2.576760987357014,
|
|
"grad_norm": 5.3342509269714355,
|
|
"learning_rate": 3.38552859324552e-07,
|
|
"loss": 0.3046,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 2.5779650812763397,
|
|
"grad_norm": 4.271503925323486,
|
|
"learning_rate": 3.380557056910778e-07,
|
|
"loss": 0.3097,
|
|
"step": 21410
|
|
},
|
|
{
|
|
"epoch": 2.5791691751956654,
|
|
"grad_norm": 4.600352764129639,
|
|
"learning_rate": 3.375587308631891e-07,
|
|
"loss": 0.3094,
|
|
"step": 21420
|
|
},
|
|
{
|
|
"epoch": 2.580373269114991,
|
|
"grad_norm": 4.630692958831787,
|
|
"learning_rate": 3.3706193538960493e-07,
|
|
"loss": 0.3117,
|
|
"step": 21430
|
|
},
|
|
{
|
|
"epoch": 2.5815773630343166,
|
|
"grad_norm": 4.425769329071045,
|
|
"learning_rate": 3.3656531981884604e-07,
|
|
"loss": 0.3097,
|
|
"step": 21440
|
|
},
|
|
{
|
|
"epoch": 2.5827814569536423,
|
|
"grad_norm": 4.963135242462158,
|
|
"learning_rate": 3.3606888469923474e-07,
|
|
"loss": 0.3079,
|
|
"step": 21450
|
|
},
|
|
{
|
|
"epoch": 2.583985550872968,
|
|
"grad_norm": 5.204167366027832,
|
|
"learning_rate": 3.3557263057889344e-07,
|
|
"loss": 0.2965,
|
|
"step": 21460
|
|
},
|
|
{
|
|
"epoch": 2.585189644792294,
|
|
"grad_norm": 4.431160926818848,
|
|
"learning_rate": 3.3507655800574554e-07,
|
|
"loss": 0.2973,
|
|
"step": 21470
|
|
},
|
|
{
|
|
"epoch": 2.5863937387116196,
|
|
"grad_norm": 5.386955261230469,
|
|
"learning_rate": 3.345806675275134e-07,
|
|
"loss": 0.3035,
|
|
"step": 21480
|
|
},
|
|
{
|
|
"epoch": 2.5875978326309452,
|
|
"grad_norm": 4.363948345184326,
|
|
"learning_rate": 3.340849596917189e-07,
|
|
"loss": 0.2848,
|
|
"step": 21490
|
|
},
|
|
{
|
|
"epoch": 2.588801926550271,
|
|
"grad_norm": 4.813036918640137,
|
|
"learning_rate": 3.3358943504568147e-07,
|
|
"loss": 0.3086,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 2.5900060204695965,
|
|
"grad_norm": 4.847212791442871,
|
|
"learning_rate": 3.3309409413651895e-07,
|
|
"loss": 0.2939,
|
|
"step": 21510
|
|
},
|
|
{
|
|
"epoch": 2.5912101143889226,
|
|
"grad_norm": 6.291325569152832,
|
|
"learning_rate": 3.3259893751114606e-07,
|
|
"loss": 0.3117,
|
|
"step": 21520
|
|
},
|
|
{
|
|
"epoch": 2.592414208308248,
|
|
"grad_norm": 5.317537307739258,
|
|
"learning_rate": 3.321039657162742e-07,
|
|
"loss": 0.3222,
|
|
"step": 21530
|
|
},
|
|
{
|
|
"epoch": 2.593618302227574,
|
|
"grad_norm": 4.0502190589904785,
|
|
"learning_rate": 3.3160917929841027e-07,
|
|
"loss": 0.2994,
|
|
"step": 21540
|
|
},
|
|
{
|
|
"epoch": 2.5948223961468995,
|
|
"grad_norm": 5.079105377197266,
|
|
"learning_rate": 3.3111457880385686e-07,
|
|
"loss": 0.3002,
|
|
"step": 21550
|
|
},
|
|
{
|
|
"epoch": 2.596026490066225,
|
|
"grad_norm": 5.073225975036621,
|
|
"learning_rate": 3.3062016477871147e-07,
|
|
"loss": 0.2969,
|
|
"step": 21560
|
|
},
|
|
{
|
|
"epoch": 2.5972305839855507,
|
|
"grad_norm": 5.702369689941406,
|
|
"learning_rate": 3.3012593776886524e-07,
|
|
"loss": 0.3229,
|
|
"step": 21570
|
|
},
|
|
{
|
|
"epoch": 2.5984346779048764,
|
|
"grad_norm": 5.685046672821045,
|
|
"learning_rate": 3.296318983200028e-07,
|
|
"loss": 0.3149,
|
|
"step": 21580
|
|
},
|
|
{
|
|
"epoch": 2.5996387718242024,
|
|
"grad_norm": 5.351219654083252,
|
|
"learning_rate": 3.2913804697760244e-07,
|
|
"loss": 0.3116,
|
|
"step": 21590
|
|
},
|
|
{
|
|
"epoch": 2.600842865743528,
|
|
"grad_norm": 4.610897541046143,
|
|
"learning_rate": 3.286443842869338e-07,
|
|
"loss": 0.3092,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 2.6020469596628537,
|
|
"grad_norm": 4.982673168182373,
|
|
"learning_rate": 3.2815091079305895e-07,
|
|
"loss": 0.2942,
|
|
"step": 21610
|
|
},
|
|
{
|
|
"epoch": 2.6032510535821793,
|
|
"grad_norm": 5.005990982055664,
|
|
"learning_rate": 3.2765762704083067e-07,
|
|
"loss": 0.311,
|
|
"step": 21620
|
|
},
|
|
{
|
|
"epoch": 2.604455147501505,
|
|
"grad_norm": 4.512310028076172,
|
|
"learning_rate": 3.271645335748923e-07,
|
|
"loss": 0.3267,
|
|
"step": 21630
|
|
},
|
|
{
|
|
"epoch": 2.605659241420831,
|
|
"grad_norm": 4.117137432098389,
|
|
"learning_rate": 3.2667163093967716e-07,
|
|
"loss": 0.3003,
|
|
"step": 21640
|
|
},
|
|
{
|
|
"epoch": 2.6068633353401567,
|
|
"grad_norm": 5.019242763519287,
|
|
"learning_rate": 3.2617891967940806e-07,
|
|
"loss": 0.2979,
|
|
"step": 21650
|
|
},
|
|
{
|
|
"epoch": 2.6080674292594823,
|
|
"grad_norm": 4.304302215576172,
|
|
"learning_rate": 3.2568640033809597e-07,
|
|
"loss": 0.3009,
|
|
"step": 21660
|
|
},
|
|
{
|
|
"epoch": 2.609271523178808,
|
|
"grad_norm": 5.543119430541992,
|
|
"learning_rate": 3.2519407345954043e-07,
|
|
"loss": 0.3085,
|
|
"step": 21670
|
|
},
|
|
{
|
|
"epoch": 2.6104756170981336,
|
|
"grad_norm": 4.892364025115967,
|
|
"learning_rate": 3.247019395873283e-07,
|
|
"loss": 0.2965,
|
|
"step": 21680
|
|
},
|
|
{
|
|
"epoch": 2.611679711017459,
|
|
"grad_norm": 3.9560534954071045,
|
|
"learning_rate": 3.242099992648336e-07,
|
|
"loss": 0.2994,
|
|
"step": 21690
|
|
},
|
|
{
|
|
"epoch": 2.612883804936785,
|
|
"grad_norm": 4.653574466705322,
|
|
"learning_rate": 3.2371825303521604e-07,
|
|
"loss": 0.3072,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 2.614087898856111,
|
|
"grad_norm": 4.340296268463135,
|
|
"learning_rate": 3.232267014414216e-07,
|
|
"loss": 0.2965,
|
|
"step": 21710
|
|
},
|
|
{
|
|
"epoch": 2.6152919927754366,
|
|
"grad_norm": 3.889099597930908,
|
|
"learning_rate": 3.2273534502618136e-07,
|
|
"loss": 0.3212,
|
|
"step": 21720
|
|
},
|
|
{
|
|
"epoch": 2.616496086694762,
|
|
"grad_norm": 4.952009201049805,
|
|
"learning_rate": 3.2224418433201033e-07,
|
|
"loss": 0.3121,
|
|
"step": 21730
|
|
},
|
|
{
|
|
"epoch": 2.617700180614088,
|
|
"grad_norm": 5.229816913604736,
|
|
"learning_rate": 3.2175321990120797e-07,
|
|
"loss": 0.304,
|
|
"step": 21740
|
|
},
|
|
{
|
|
"epoch": 2.6189042745334135,
|
|
"grad_norm": 4.951354503631592,
|
|
"learning_rate": 3.2126245227585693e-07,
|
|
"loss": 0.3024,
|
|
"step": 21750
|
|
},
|
|
{
|
|
"epoch": 2.6201083684527395,
|
|
"grad_norm": 5.034163475036621,
|
|
"learning_rate": 3.2077188199782257e-07,
|
|
"loss": 0.3057,
|
|
"step": 21760
|
|
},
|
|
{
|
|
"epoch": 2.621312462372065,
|
|
"grad_norm": 5.984414100646973,
|
|
"learning_rate": 3.20281509608752e-07,
|
|
"loss": 0.3209,
|
|
"step": 21770
|
|
},
|
|
{
|
|
"epoch": 2.622516556291391,
|
|
"grad_norm": 4.373472213745117,
|
|
"learning_rate": 3.1979133565007434e-07,
|
|
"loss": 0.2947,
|
|
"step": 21780
|
|
},
|
|
{
|
|
"epoch": 2.6237206502107164,
|
|
"grad_norm": 4.750053405761719,
|
|
"learning_rate": 3.193013606629994e-07,
|
|
"loss": 0.3196,
|
|
"step": 21790
|
|
},
|
|
{
|
|
"epoch": 2.624924744130042,
|
|
"grad_norm": 4.528110027313232,
|
|
"learning_rate": 3.188115851885174e-07,
|
|
"loss": 0.3053,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 2.6261288380493677,
|
|
"grad_norm": 4.8642072677612305,
|
|
"learning_rate": 3.1832200976739786e-07,
|
|
"loss": 0.3328,
|
|
"step": 21810
|
|
},
|
|
{
|
|
"epoch": 2.6273329319686933,
|
|
"grad_norm": 4.624762535095215,
|
|
"learning_rate": 3.1783263494019e-07,
|
|
"loss": 0.3123,
|
|
"step": 21820
|
|
},
|
|
{
|
|
"epoch": 2.6285370258880194,
|
|
"grad_norm": 4.700741767883301,
|
|
"learning_rate": 3.1734346124722135e-07,
|
|
"loss": 0.3011,
|
|
"step": 21830
|
|
},
|
|
{
|
|
"epoch": 2.629741119807345,
|
|
"grad_norm": 5.0118021965026855,
|
|
"learning_rate": 3.1685448922859716e-07,
|
|
"loss": 0.3163,
|
|
"step": 21840
|
|
},
|
|
{
|
|
"epoch": 2.6309452137266707,
|
|
"grad_norm": 5.321165084838867,
|
|
"learning_rate": 3.1636571942420014e-07,
|
|
"loss": 0.3019,
|
|
"step": 21850
|
|
},
|
|
{
|
|
"epoch": 2.6321493076459963,
|
|
"grad_norm": 5.864070892333984,
|
|
"learning_rate": 3.1587715237368996e-07,
|
|
"loss": 0.3027,
|
|
"step": 21860
|
|
},
|
|
{
|
|
"epoch": 2.633353401565322,
|
|
"grad_norm": 4.458745956420898,
|
|
"learning_rate": 3.1538878861650194e-07,
|
|
"loss": 0.3152,
|
|
"step": 21870
|
|
},
|
|
{
|
|
"epoch": 2.634557495484648,
|
|
"grad_norm": 4.945919036865234,
|
|
"learning_rate": 3.149006286918474e-07,
|
|
"loss": 0.3238,
|
|
"step": 21880
|
|
},
|
|
{
|
|
"epoch": 2.6357615894039736,
|
|
"grad_norm": 4.671433448791504,
|
|
"learning_rate": 3.144126731387126e-07,
|
|
"loss": 0.2941,
|
|
"step": 21890
|
|
},
|
|
{
|
|
"epoch": 2.6369656833232993,
|
|
"grad_norm": 5.389127731323242,
|
|
"learning_rate": 3.1392492249585744e-07,
|
|
"loss": 0.3223,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 2.638169777242625,
|
|
"grad_norm": 5.42547607421875,
|
|
"learning_rate": 3.134373773018165e-07,
|
|
"loss": 0.305,
|
|
"step": 21910
|
|
},
|
|
{
|
|
"epoch": 2.6393738711619505,
|
|
"grad_norm": 5.633350849151611,
|
|
"learning_rate": 3.129500380948973e-07,
|
|
"loss": 0.296,
|
|
"step": 21920
|
|
},
|
|
{
|
|
"epoch": 2.640577965081276,
|
|
"grad_norm": 4.668237209320068,
|
|
"learning_rate": 3.1246290541317937e-07,
|
|
"loss": 0.3032,
|
|
"step": 21930
|
|
},
|
|
{
|
|
"epoch": 2.641782059000602,
|
|
"grad_norm": 4.56117057800293,
|
|
"learning_rate": 3.119759797945147e-07,
|
|
"loss": 0.3036,
|
|
"step": 21940
|
|
},
|
|
{
|
|
"epoch": 2.642986152919928,
|
|
"grad_norm": 5.208002090454102,
|
|
"learning_rate": 3.114892617765266e-07,
|
|
"loss": 0.2983,
|
|
"step": 21950
|
|
},
|
|
{
|
|
"epoch": 2.6441902468392535,
|
|
"grad_norm": 4.775214195251465,
|
|
"learning_rate": 3.110027518966094e-07,
|
|
"loss": 0.3104,
|
|
"step": 21960
|
|
},
|
|
{
|
|
"epoch": 2.645394340758579,
|
|
"grad_norm": 4.55642032623291,
|
|
"learning_rate": 3.1051645069192675e-07,
|
|
"loss": 0.3162,
|
|
"step": 21970
|
|
},
|
|
{
|
|
"epoch": 2.646598434677905,
|
|
"grad_norm": 4.810263156890869,
|
|
"learning_rate": 3.1003035869941295e-07,
|
|
"loss": 0.2958,
|
|
"step": 21980
|
|
},
|
|
{
|
|
"epoch": 2.6478025285972304,
|
|
"grad_norm": 4.988792896270752,
|
|
"learning_rate": 3.0954447645577063e-07,
|
|
"loss": 0.308,
|
|
"step": 21990
|
|
},
|
|
{
|
|
"epoch": 2.6490066225165565,
|
|
"grad_norm": 4.394057273864746,
|
|
"learning_rate": 3.0905880449747134e-07,
|
|
"loss": 0.2995,
|
|
"step": 22000
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 33220,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 4,
|
|
"save_steps": 1000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 5.886664442836628e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|