11573 lines
280 KiB
JSON
11573 lines
280 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1647,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0006074411541381929,
|
|
"grad_norm": 4.187357914113359,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.3011,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0012148823082763858,
|
|
"grad_norm": 4.498512475534671,
|
|
"learning_rate": 6.060606060606061e-08,
|
|
"loss": 1.3126,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0018223234624145787,
|
|
"grad_norm": 4.0434971741932495,
|
|
"learning_rate": 1.2121212121212122e-07,
|
|
"loss": 1.2803,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0024297646165527716,
|
|
"grad_norm": 4.162307471665992,
|
|
"learning_rate": 1.8181818181818183e-07,
|
|
"loss": 1.3162,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0030372057706909645,
|
|
"grad_norm": 4.278843392925189,
|
|
"learning_rate": 2.4242424242424244e-07,
|
|
"loss": 1.3803,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0036446469248291574,
|
|
"grad_norm": 4.123864416498828,
|
|
"learning_rate": 3.0303030303030305e-07,
|
|
"loss": 1.3463,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.00425208807896735,
|
|
"grad_norm": 4.304171763132361,
|
|
"learning_rate": 3.6363636363636366e-07,
|
|
"loss": 1.3397,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.004859529233105543,
|
|
"grad_norm": 3.9277126463319307,
|
|
"learning_rate": 4.242424242424243e-07,
|
|
"loss": 1.3189,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.005466970387243736,
|
|
"grad_norm": 3.922407631596546,
|
|
"learning_rate": 4.848484848484849e-07,
|
|
"loss": 1.2963,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.006074411541381929,
|
|
"grad_norm": 3.895403645466344,
|
|
"learning_rate": 5.454545454545455e-07,
|
|
"loss": 1.2897,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.006681852695520122,
|
|
"grad_norm": 4.0159782701678814,
|
|
"learning_rate": 6.060606060606061e-07,
|
|
"loss": 1.2556,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.007289293849658315,
|
|
"grad_norm": 4.3312492479849976,
|
|
"learning_rate": 6.666666666666667e-07,
|
|
"loss": 1.2826,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.007896735003796507,
|
|
"grad_norm": 3.8648934637566277,
|
|
"learning_rate": 7.272727272727273e-07,
|
|
"loss": 1.3033,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.0085041761579347,
|
|
"grad_norm": 4.286961260584155,
|
|
"learning_rate": 7.878787878787879e-07,
|
|
"loss": 1.3182,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.009111617312072893,
|
|
"grad_norm": 3.9563681075623696,
|
|
"learning_rate": 8.484848484848486e-07,
|
|
"loss": 1.2456,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.009719058466211086,
|
|
"grad_norm": 4.609717367893727,
|
|
"learning_rate": 9.090909090909091e-07,
|
|
"loss": 1.3312,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.010326499620349278,
|
|
"grad_norm": 3.8182264142948212,
|
|
"learning_rate": 9.696969696969698e-07,
|
|
"loss": 1.2702,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.010933940774487472,
|
|
"grad_norm": 3.9030184249626765,
|
|
"learning_rate": 1.0303030303030304e-06,
|
|
"loss": 1.2982,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.011541381928625664,
|
|
"grad_norm": 3.630670067928241,
|
|
"learning_rate": 1.090909090909091e-06,
|
|
"loss": 1.2928,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.012148823082763858,
|
|
"grad_norm": 3.627122940350712,
|
|
"learning_rate": 1.1515151515151516e-06,
|
|
"loss": 1.3399,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.01275626423690205,
|
|
"grad_norm": 3.4178400978584498,
|
|
"learning_rate": 1.2121212121212122e-06,
|
|
"loss": 1.201,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.013363705391040244,
|
|
"grad_norm": 3.8805835742800308,
|
|
"learning_rate": 1.2727272727272728e-06,
|
|
"loss": 1.3176,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.013971146545178436,
|
|
"grad_norm": 5.15609913207104,
|
|
"learning_rate": 1.3333333333333334e-06,
|
|
"loss": 1.2253,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.01457858769931663,
|
|
"grad_norm": 3.361843882497272,
|
|
"learning_rate": 1.3939393939393942e-06,
|
|
"loss": 1.2849,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.015186028853454821,
|
|
"grad_norm": 3.419552970347977,
|
|
"learning_rate": 1.4545454545454546e-06,
|
|
"loss": 1.0856,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.015793470007593013,
|
|
"grad_norm": 4.990297184335331,
|
|
"learning_rate": 1.5151515151515152e-06,
|
|
"loss": 1.1542,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.01640091116173121,
|
|
"grad_norm": 3.6910946706226024,
|
|
"learning_rate": 1.5757575757575759e-06,
|
|
"loss": 1.1332,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.0170083523158694,
|
|
"grad_norm": 3.2707017108314105,
|
|
"learning_rate": 1.6363636363636365e-06,
|
|
"loss": 1.0859,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.017615793470007593,
|
|
"grad_norm": 3.6848753932789022,
|
|
"learning_rate": 1.6969696969696973e-06,
|
|
"loss": 1.1462,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.018223234624145785,
|
|
"grad_norm": 3.539975633137712,
|
|
"learning_rate": 1.7575757575757577e-06,
|
|
"loss": 1.1017,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.018830675778283977,
|
|
"grad_norm": 5.020856655134608,
|
|
"learning_rate": 1.8181818181818183e-06,
|
|
"loss": 1.1052,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.019438116932422173,
|
|
"grad_norm": 2.8322427452989403,
|
|
"learning_rate": 1.878787878787879e-06,
|
|
"loss": 1.0702,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.020045558086560365,
|
|
"grad_norm": 11.978063076522911,
|
|
"learning_rate": 1.9393939393939395e-06,
|
|
"loss": 1.0783,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.020652999240698557,
|
|
"grad_norm": 3.0620204118757615,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 0.9578,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.02126044039483675,
|
|
"grad_norm": 2.6396729812241992,
|
|
"learning_rate": 2.0606060606060607e-06,
|
|
"loss": 1.0325,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.021867881548974944,
|
|
"grad_norm": 2.605944141300131,
|
|
"learning_rate": 2.1212121212121216e-06,
|
|
"loss": 0.9263,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.022475322703113136,
|
|
"grad_norm": 2.253997875811574,
|
|
"learning_rate": 2.181818181818182e-06,
|
|
"loss": 0.9275,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.023082763857251328,
|
|
"grad_norm": 2.713661141656442,
|
|
"learning_rate": 2.2424242424242428e-06,
|
|
"loss": 0.9474,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.02369020501138952,
|
|
"grad_norm": 3.6449984569878833,
|
|
"learning_rate": 2.303030303030303e-06,
|
|
"loss": 0.9734,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.024297646165527716,
|
|
"grad_norm": 2.810830857915748,
|
|
"learning_rate": 2.363636363636364e-06,
|
|
"loss": 0.8991,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.024905087319665908,
|
|
"grad_norm": 2.567407308011712,
|
|
"learning_rate": 2.4242424242424244e-06,
|
|
"loss": 0.8963,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.0255125284738041,
|
|
"grad_norm": 2.637128320235312,
|
|
"learning_rate": 2.4848484848484848e-06,
|
|
"loss": 0.8778,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.026119969627942292,
|
|
"grad_norm": 3.0903128789155754,
|
|
"learning_rate": 2.5454545454545456e-06,
|
|
"loss": 0.8738,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.026727410782080487,
|
|
"grad_norm": 2.6726103064757214,
|
|
"learning_rate": 2.6060606060606064e-06,
|
|
"loss": 0.8107,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.02733485193621868,
|
|
"grad_norm": 2.4879173840412006,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": 0.8709,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.02794229309035687,
|
|
"grad_norm": 2.7470444516164294,
|
|
"learning_rate": 2.7272727272727272e-06,
|
|
"loss": 0.8008,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.028549734244495063,
|
|
"grad_norm": 3.0832323670913566,
|
|
"learning_rate": 2.7878787878787885e-06,
|
|
"loss": 0.8585,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.02915717539863326,
|
|
"grad_norm": 2.3479372726052334,
|
|
"learning_rate": 2.848484848484849e-06,
|
|
"loss": 0.799,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.02976461655277145,
|
|
"grad_norm": 3.333892504748674,
|
|
"learning_rate": 2.9090909090909093e-06,
|
|
"loss": 0.7643,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.030372057706909643,
|
|
"grad_norm": 4.005716160822946,
|
|
"learning_rate": 2.96969696969697e-06,
|
|
"loss": 0.7075,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.030979498861047835,
|
|
"grad_norm": 2.9637475493256393,
|
|
"learning_rate": 3.0303030303030305e-06,
|
|
"loss": 0.752,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.03158694001518603,
|
|
"grad_norm": 6.448028143148418,
|
|
"learning_rate": 3.090909090909091e-06,
|
|
"loss": 0.7298,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.03219438116932422,
|
|
"grad_norm": 2.5226109840726267,
|
|
"learning_rate": 3.1515151515151517e-06,
|
|
"loss": 0.6652,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.03280182232346242,
|
|
"grad_norm": 2.493123673145167,
|
|
"learning_rate": 3.2121212121212125e-06,
|
|
"loss": 0.6731,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.033409263477600606,
|
|
"grad_norm": 2.605987670786732,
|
|
"learning_rate": 3.272727272727273e-06,
|
|
"loss": 0.7381,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.0340167046317388,
|
|
"grad_norm": 2.29936704850675,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.6703,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.03462414578587699,
|
|
"grad_norm": 5.095296867406457,
|
|
"learning_rate": 3.3939393939393946e-06,
|
|
"loss": 0.6278,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.035231586940015186,
|
|
"grad_norm": 4.103295631128622,
|
|
"learning_rate": 3.454545454545455e-06,
|
|
"loss": 0.6501,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.03583902809415338,
|
|
"grad_norm": 3.266586264244061,
|
|
"learning_rate": 3.5151515151515154e-06,
|
|
"loss": 0.6557,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.03644646924829157,
|
|
"grad_norm": 1.996000103297209,
|
|
"learning_rate": 3.575757575757576e-06,
|
|
"loss": 0.605,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.037053910402429766,
|
|
"grad_norm": 2.616817368318823,
|
|
"learning_rate": 3.6363636363636366e-06,
|
|
"loss": 0.6313,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.037661351556567954,
|
|
"grad_norm": 2.977002184120951,
|
|
"learning_rate": 3.6969696969696974e-06,
|
|
"loss": 0.6025,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.03826879271070615,
|
|
"grad_norm": 1.8839479352203694,
|
|
"learning_rate": 3.757575757575758e-06,
|
|
"loss": 0.5803,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.038876233864844345,
|
|
"grad_norm": 1.8084677913612675,
|
|
"learning_rate": 3.818181818181819e-06,
|
|
"loss": 0.6036,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.039483675018982534,
|
|
"grad_norm": 3.298041724732867,
|
|
"learning_rate": 3.878787878787879e-06,
|
|
"loss": 0.5619,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.04009111617312073,
|
|
"grad_norm": 3.210051259243264,
|
|
"learning_rate": 3.93939393939394e-06,
|
|
"loss": 0.6117,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.040698557327258925,
|
|
"grad_norm": 1.7548473190282035,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.6115,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.04130599848139711,
|
|
"grad_norm": 4.027003074298192,
|
|
"learning_rate": 4.060606060606061e-06,
|
|
"loss": 0.555,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.04191343963553531,
|
|
"grad_norm": 1.7049695945671344,
|
|
"learning_rate": 4.1212121212121215e-06,
|
|
"loss": 0.5281,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.0425208807896735,
|
|
"grad_norm": 1.9295753107848288,
|
|
"learning_rate": 4.181818181818182e-06,
|
|
"loss": 0.4976,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.04312832194381169,
|
|
"grad_norm": 5.6724940525137315,
|
|
"learning_rate": 4.242424242424243e-06,
|
|
"loss": 0.5599,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.04373576309794989,
|
|
"grad_norm": 2.5165077043031925,
|
|
"learning_rate": 4.303030303030303e-06,
|
|
"loss": 0.5043,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.04434320425208808,
|
|
"grad_norm": 2.7600416718167438,
|
|
"learning_rate": 4.363636363636364e-06,
|
|
"loss": 0.5835,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.04495064540622627,
|
|
"grad_norm": 4.577709098881687,
|
|
"learning_rate": 4.424242424242425e-06,
|
|
"loss": 0.5495,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.04555808656036447,
|
|
"grad_norm": 3.185211056113932,
|
|
"learning_rate": 4.4848484848484855e-06,
|
|
"loss": 0.5371,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.046165527714502656,
|
|
"grad_norm": 1.7788256477959088,
|
|
"learning_rate": 4.5454545454545455e-06,
|
|
"loss": 0.4778,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.04677296886864085,
|
|
"grad_norm": 2.0763650855771254,
|
|
"learning_rate": 4.606060606060606e-06,
|
|
"loss": 0.4913,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.04738041002277904,
|
|
"grad_norm": 2.1625612516573605,
|
|
"learning_rate": 4.666666666666667e-06,
|
|
"loss": 0.453,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.047987851176917236,
|
|
"grad_norm": 2.186861003935316,
|
|
"learning_rate": 4.727272727272728e-06,
|
|
"loss": 0.559,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.04859529233105543,
|
|
"grad_norm": 2.7651744089253056,
|
|
"learning_rate": 4.787878787878788e-06,
|
|
"loss": 0.4996,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.04920273348519362,
|
|
"grad_norm": 1.8551809178974643,
|
|
"learning_rate": 4.848484848484849e-06,
|
|
"loss": 0.5501,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.049810174639331815,
|
|
"grad_norm": 1.8080650695028413,
|
|
"learning_rate": 4.90909090909091e-06,
|
|
"loss": 0.5637,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.05041761579347001,
|
|
"grad_norm": 1.830828959106882,
|
|
"learning_rate": 4.9696969696969696e-06,
|
|
"loss": 0.4904,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.0510250569476082,
|
|
"grad_norm": 1.7016759327222268,
|
|
"learning_rate": 5.030303030303031e-06,
|
|
"loss": 0.485,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.051632498101746395,
|
|
"grad_norm": 1.5379148608918123,
|
|
"learning_rate": 5.090909090909091e-06,
|
|
"loss": 0.4898,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.052239939255884583,
|
|
"grad_norm": 1.8661306362602696,
|
|
"learning_rate": 5.151515151515152e-06,
|
|
"loss": 0.4896,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.05284738041002278,
|
|
"grad_norm": 2.0602919860599482,
|
|
"learning_rate": 5.212121212121213e-06,
|
|
"loss": 0.4677,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.053454821564160974,
|
|
"grad_norm": 1.772568608105656,
|
|
"learning_rate": 5.272727272727273e-06,
|
|
"loss": 0.4233,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.05406226271829916,
|
|
"grad_norm": 1.6245327242364143,
|
|
"learning_rate": 5.333333333333334e-06,
|
|
"loss": 0.4784,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.05466970387243736,
|
|
"grad_norm": 1.4521657126765681,
|
|
"learning_rate": 5.3939393939393945e-06,
|
|
"loss": 0.4646,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.05527714502657555,
|
|
"grad_norm": 6.093340972704043,
|
|
"learning_rate": 5.4545454545454545e-06,
|
|
"loss": 0.4966,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.05588458618071374,
|
|
"grad_norm": 3.72118634286479,
|
|
"learning_rate": 5.515151515151515e-06,
|
|
"loss": 0.5535,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.05649202733485194,
|
|
"grad_norm": 2.058084505025063,
|
|
"learning_rate": 5.575757575757577e-06,
|
|
"loss": 0.4948,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.05709946848899013,
|
|
"grad_norm": 1.6056271512619456,
|
|
"learning_rate": 5.636363636363636e-06,
|
|
"loss": 0.4807,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.05770690964312832,
|
|
"grad_norm": 2.583298049219201,
|
|
"learning_rate": 5.696969696969698e-06,
|
|
"loss": 0.4591,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.05831435079726652,
|
|
"grad_norm": 1.5715554881698062,
|
|
"learning_rate": 5.7575757575757586e-06,
|
|
"loss": 0.477,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.058921791951404706,
|
|
"grad_norm": 2.8917057921549425,
|
|
"learning_rate": 5.8181818181818185e-06,
|
|
"loss": 0.5028,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.0595292331055429,
|
|
"grad_norm": 1.620684043715009,
|
|
"learning_rate": 5.878787878787879e-06,
|
|
"loss": 0.4828,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.06013667425968109,
|
|
"grad_norm": 1.375161799406732,
|
|
"learning_rate": 5.93939393939394e-06,
|
|
"loss": 0.4249,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.060744115413819286,
|
|
"grad_norm": 1.4172846210611314,
|
|
"learning_rate": 6e-06,
|
|
"loss": 0.3987,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.06135155656795748,
|
|
"grad_norm": 1.8383152351112293,
|
|
"learning_rate": 6.060606060606061e-06,
|
|
"loss": 0.459,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.06195899772209567,
|
|
"grad_norm": 1.609009063333299,
|
|
"learning_rate": 6.121212121212121e-06,
|
|
"loss": 0.4439,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.06256643887623387,
|
|
"grad_norm": 2.8231836394185734,
|
|
"learning_rate": 6.181818181818182e-06,
|
|
"loss": 0.529,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.06317388003037205,
|
|
"grad_norm": 1.4932765424378436,
|
|
"learning_rate": 6.2424242424242434e-06,
|
|
"loss": 0.4752,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.06378132118451026,
|
|
"grad_norm": 1.4265072751982764,
|
|
"learning_rate": 6.303030303030303e-06,
|
|
"loss": 0.4559,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.06438876233864844,
|
|
"grad_norm": 1.7232201589379095,
|
|
"learning_rate": 6.363636363636364e-06,
|
|
"loss": 0.4081,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.06499620349278663,
|
|
"grad_norm": 1.9090397829519035,
|
|
"learning_rate": 6.424242424242425e-06,
|
|
"loss": 0.4906,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.06560364464692484,
|
|
"grad_norm": 5.566791403419221,
|
|
"learning_rate": 6.484848484848485e-06,
|
|
"loss": 0.5131,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.06621108580106302,
|
|
"grad_norm": 2.1515254306159153,
|
|
"learning_rate": 6.545454545454546e-06,
|
|
"loss": 0.4312,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.06681852695520121,
|
|
"grad_norm": 1.53243371760272,
|
|
"learning_rate": 6.606060606060607e-06,
|
|
"loss": 0.4083,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.0674259681093394,
|
|
"grad_norm": 1.5854107306879113,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 0.4338,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.0680334092634776,
|
|
"grad_norm": 1.7854671288961894,
|
|
"learning_rate": 6.7272727272727275e-06,
|
|
"loss": 0.4259,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.06864085041761579,
|
|
"grad_norm": 1.603406823497176,
|
|
"learning_rate": 6.787878787878789e-06,
|
|
"loss": 0.4359,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.06924829157175398,
|
|
"grad_norm": 1.4707223980643627,
|
|
"learning_rate": 6.848484848484849e-06,
|
|
"loss": 0.4215,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.06985573272589218,
|
|
"grad_norm": 1.6534737889131494,
|
|
"learning_rate": 6.90909090909091e-06,
|
|
"loss": 0.4414,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.07046317388003037,
|
|
"grad_norm": 1.532739371631904,
|
|
"learning_rate": 6.969696969696971e-06,
|
|
"loss": 0.4297,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.07107061503416856,
|
|
"grad_norm": 1.4297734448243231,
|
|
"learning_rate": 7.030303030303031e-06,
|
|
"loss": 0.4479,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.07167805618830676,
|
|
"grad_norm": 1.4036156057391842,
|
|
"learning_rate": 7.0909090909090916e-06,
|
|
"loss": 0.4278,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.07228549734244495,
|
|
"grad_norm": 1.6693103514376786,
|
|
"learning_rate": 7.151515151515152e-06,
|
|
"loss": 0.4375,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.07289293849658314,
|
|
"grad_norm": 1.8164935073923107,
|
|
"learning_rate": 7.212121212121212e-06,
|
|
"loss": 0.3987,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.07350037965072134,
|
|
"grad_norm": 1.6026752209068431,
|
|
"learning_rate": 7.272727272727273e-06,
|
|
"loss": 0.4095,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.07410782080485953,
|
|
"grad_norm": 1.652701468229036,
|
|
"learning_rate": 7.333333333333333e-06,
|
|
"loss": 0.4165,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.07471526195899772,
|
|
"grad_norm": 1.5322103898261112,
|
|
"learning_rate": 7.393939393939395e-06,
|
|
"loss": 0.4534,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.07532270311313591,
|
|
"grad_norm": 1.5253210647459732,
|
|
"learning_rate": 7.454545454545456e-06,
|
|
"loss": 0.4315,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.07593014426727411,
|
|
"grad_norm": 1.774830478931673,
|
|
"learning_rate": 7.515151515151516e-06,
|
|
"loss": 0.4044,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.0765375854214123,
|
|
"grad_norm": 1.887705476184069,
|
|
"learning_rate": 7.5757575757575764e-06,
|
|
"loss": 0.4153,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.07714502657555049,
|
|
"grad_norm": 2.38958143214645,
|
|
"learning_rate": 7.636363636363638e-06,
|
|
"loss": 0.3912,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.07775246772968869,
|
|
"grad_norm": 1.6032007121408827,
|
|
"learning_rate": 7.696969696969696e-06,
|
|
"loss": 0.3989,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.07835990888382688,
|
|
"grad_norm": 1.3782945465938736,
|
|
"learning_rate": 7.757575757575758e-06,
|
|
"loss": 0.4357,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.07896735003796507,
|
|
"grad_norm": 1.332527605688991,
|
|
"learning_rate": 7.81818181818182e-06,
|
|
"loss": 0.3616,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.07957479119210327,
|
|
"grad_norm": 1.8131621493966852,
|
|
"learning_rate": 7.87878787878788e-06,
|
|
"loss": 0.4498,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.08018223234624146,
|
|
"grad_norm": 4.745812951213747,
|
|
"learning_rate": 7.93939393939394e-06,
|
|
"loss": 0.4247,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.08078967350037965,
|
|
"grad_norm": 2.294695141070265,
|
|
"learning_rate": 8.000000000000001e-06,
|
|
"loss": 0.3744,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.08139711465451785,
|
|
"grad_norm": 1.6356983798797735,
|
|
"learning_rate": 8.060606060606061e-06,
|
|
"loss": 0.4373,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.08200455580865604,
|
|
"grad_norm": 1.9911745307305873,
|
|
"learning_rate": 8.121212121212121e-06,
|
|
"loss": 0.4294,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.08261199696279423,
|
|
"grad_norm": 1.3737076521844085,
|
|
"learning_rate": 8.181818181818183e-06,
|
|
"loss": 0.4106,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.08321943811693243,
|
|
"grad_norm": 1.5919226800718935,
|
|
"learning_rate": 8.242424242424243e-06,
|
|
"loss": 0.4537,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.08382687927107062,
|
|
"grad_norm": 1.4783198785793517,
|
|
"learning_rate": 8.303030303030305e-06,
|
|
"loss": 0.3788,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.0844343204252088,
|
|
"grad_norm": 1.6410317800155916,
|
|
"learning_rate": 8.363636363636365e-06,
|
|
"loss": 0.3867,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.085041761579347,
|
|
"grad_norm": 1.6735556190365386,
|
|
"learning_rate": 8.424242424242425e-06,
|
|
"loss": 0.4355,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.0856492027334852,
|
|
"grad_norm": 1.5607650619109157,
|
|
"learning_rate": 8.484848484848486e-06,
|
|
"loss": 0.4044,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.08625664388762339,
|
|
"grad_norm": 1.4477035531105342,
|
|
"learning_rate": 8.545454545454546e-06,
|
|
"loss": 0.4603,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.08686408504176157,
|
|
"grad_norm": 2.315589018866609,
|
|
"learning_rate": 8.606060606060606e-06,
|
|
"loss": 0.3701,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.08747152619589978,
|
|
"grad_norm": 1.282913362768846,
|
|
"learning_rate": 8.666666666666668e-06,
|
|
"loss": 0.4052,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.08807896735003796,
|
|
"grad_norm": 1.441795861741466,
|
|
"learning_rate": 8.727272727272728e-06,
|
|
"loss": 0.3743,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.08868640850417615,
|
|
"grad_norm": 1.6705399490739978,
|
|
"learning_rate": 8.787878787878788e-06,
|
|
"loss": 0.4156,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.08929384965831436,
|
|
"grad_norm": 1.3022330139774325,
|
|
"learning_rate": 8.84848484848485e-06,
|
|
"loss": 0.403,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.08990129081245254,
|
|
"grad_norm": 1.3289138423968723,
|
|
"learning_rate": 8.90909090909091e-06,
|
|
"loss": 0.4241,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.09050873196659073,
|
|
"grad_norm": 1.9110745404266152,
|
|
"learning_rate": 8.969696969696971e-06,
|
|
"loss": 0.3843,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.09111617312072894,
|
|
"grad_norm": 1.4735930505959485,
|
|
"learning_rate": 9.030303030303031e-06,
|
|
"loss": 0.3963,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.09172361427486712,
|
|
"grad_norm": 1.542399354189347,
|
|
"learning_rate": 9.090909090909091e-06,
|
|
"loss": 0.3909,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.09233105542900531,
|
|
"grad_norm": 3.7339191436590244,
|
|
"learning_rate": 9.151515151515153e-06,
|
|
"loss": 0.3864,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.0929384965831435,
|
|
"grad_norm": 1.3925338316644487,
|
|
"learning_rate": 9.212121212121213e-06,
|
|
"loss": 0.3766,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.0935459377372817,
|
|
"grad_norm": 1.4555873503122925,
|
|
"learning_rate": 9.272727272727273e-06,
|
|
"loss": 0.352,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.09415337889141989,
|
|
"grad_norm": 1.5097093818651814,
|
|
"learning_rate": 9.333333333333334e-06,
|
|
"loss": 0.3938,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.09476082004555808,
|
|
"grad_norm": 1.5316324465377855,
|
|
"learning_rate": 9.393939393939396e-06,
|
|
"loss": 0.3722,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.09536826119969628,
|
|
"grad_norm": 1.3962340669474915,
|
|
"learning_rate": 9.454545454545456e-06,
|
|
"loss": 0.4142,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.09597570235383447,
|
|
"grad_norm": 1.2746895088209749,
|
|
"learning_rate": 9.515151515151516e-06,
|
|
"loss": 0.3723,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.09658314350797266,
|
|
"grad_norm": 1.818570241099621,
|
|
"learning_rate": 9.575757575757576e-06,
|
|
"loss": 0.4198,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.09719058466211086,
|
|
"grad_norm": 1.2766595136162489,
|
|
"learning_rate": 9.636363636363638e-06,
|
|
"loss": 0.3832,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.09779802581624905,
|
|
"grad_norm": 1.2291199390553869,
|
|
"learning_rate": 9.696969696969698e-06,
|
|
"loss": 0.3825,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.09840546697038724,
|
|
"grad_norm": 1.316405861184458,
|
|
"learning_rate": 9.757575757575758e-06,
|
|
"loss": 0.3418,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.09901290812452544,
|
|
"grad_norm": 1.9801803743837283,
|
|
"learning_rate": 9.81818181818182e-06,
|
|
"loss": 0.3554,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.09962034927866363,
|
|
"grad_norm": 1.3735171287256052,
|
|
"learning_rate": 9.87878787878788e-06,
|
|
"loss": 0.4433,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.10022779043280182,
|
|
"grad_norm": 1.548447451053059,
|
|
"learning_rate": 9.939393939393939e-06,
|
|
"loss": 0.3988,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.10083523158694002,
|
|
"grad_norm": 1.9424963562307047,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4318,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.10144267274107821,
|
|
"grad_norm": 1.4796993703531827,
|
|
"learning_rate": 9.999988765773283e-06,
|
|
"loss": 0.3901,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.1020501138952164,
|
|
"grad_norm": 2.718931345910638,
|
|
"learning_rate": 9.99995506314361e-06,
|
|
"loss": 0.3733,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.10265755504935459,
|
|
"grad_norm": 1.5993151857382264,
|
|
"learning_rate": 9.999898892262433e-06,
|
|
"loss": 0.3702,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.10326499620349279,
|
|
"grad_norm": 2.0474737975378914,
|
|
"learning_rate": 9.99982025338217e-06,
|
|
"loss": 0.3925,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.10387243735763098,
|
|
"grad_norm": 1.36801258921155,
|
|
"learning_rate": 9.999719146856191e-06,
|
|
"loss": 0.3601,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.10447987851176917,
|
|
"grad_norm": 2.62979787342245,
|
|
"learning_rate": 9.999595573138845e-06,
|
|
"loss": 0.3676,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.10508731966590737,
|
|
"grad_norm": 1.651611235990868,
|
|
"learning_rate": 9.99944953278543e-06,
|
|
"loss": 0.3896,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.10569476082004556,
|
|
"grad_norm": 1.537437823682991,
|
|
"learning_rate": 9.99928102645221e-06,
|
|
"loss": 0.3931,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.10630220197418375,
|
|
"grad_norm": 1.483529775109398,
|
|
"learning_rate": 9.999090054896397e-06,
|
|
"loss": 0.4084,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.10690964312832195,
|
|
"grad_norm": 1.846243435186876,
|
|
"learning_rate": 9.99887661897616e-06,
|
|
"loss": 0.4019,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.10751708428246014,
|
|
"grad_norm": 1.9400365455774309,
|
|
"learning_rate": 9.998640719650609e-06,
|
|
"loss": 0.353,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.10812452543659833,
|
|
"grad_norm": 1.6331047828876988,
|
|
"learning_rate": 9.99838235797981e-06,
|
|
"loss": 0.3934,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.10873196659073653,
|
|
"grad_norm": 1.3201503553736853,
|
|
"learning_rate": 9.998101535124758e-06,
|
|
"loss": 0.3784,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.10933940774487472,
|
|
"grad_norm": 1.5036507542871547,
|
|
"learning_rate": 9.997798252347382e-06,
|
|
"loss": 0.3829,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.1099468488990129,
|
|
"grad_norm": 1.8873362444241242,
|
|
"learning_rate": 9.997472511010543e-06,
|
|
"loss": 0.3468,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.1105542900531511,
|
|
"grad_norm": 1.7817085438588727,
|
|
"learning_rate": 9.99712431257802e-06,
|
|
"loss": 0.3942,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.1111617312072893,
|
|
"grad_norm": 1.4579688480639608,
|
|
"learning_rate": 9.99675365861451e-06,
|
|
"loss": 0.3493,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.11176917236142749,
|
|
"grad_norm": 1.3333541821120902,
|
|
"learning_rate": 9.996360550785619e-06,
|
|
"loss": 0.3748,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.11237661351556567,
|
|
"grad_norm": 1.4462662425656174,
|
|
"learning_rate": 9.995944990857848e-06,
|
|
"loss": 0.3929,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.11298405466970388,
|
|
"grad_norm": 2.8011059160646785,
|
|
"learning_rate": 9.9955069806986e-06,
|
|
"loss": 0.3832,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.11359149582384206,
|
|
"grad_norm": 1.795646163613219,
|
|
"learning_rate": 9.995046522276152e-06,
|
|
"loss": 0.3726,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.11419893697798025,
|
|
"grad_norm": 1.6154980564971173,
|
|
"learning_rate": 9.994563617659665e-06,
|
|
"loss": 0.4186,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.11480637813211846,
|
|
"grad_norm": 4.098449801468239,
|
|
"learning_rate": 9.994058269019163e-06,
|
|
"loss": 0.3649,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.11541381928625664,
|
|
"grad_norm": 2.0366760130819452,
|
|
"learning_rate": 9.993530478625524e-06,
|
|
"loss": 0.3889,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.11602126044039483,
|
|
"grad_norm": 1.3683556243295762,
|
|
"learning_rate": 9.992980248850476e-06,
|
|
"loss": 0.3505,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.11662870159453304,
|
|
"grad_norm": 1.631109364001529,
|
|
"learning_rate": 9.992407582166582e-06,
|
|
"loss": 0.3716,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.11723614274867122,
|
|
"grad_norm": 1.2863914949680042,
|
|
"learning_rate": 9.99181248114723e-06,
|
|
"loss": 0.353,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.11784358390280941,
|
|
"grad_norm": 2.3770367523137512,
|
|
"learning_rate": 9.991194948466615e-06,
|
|
"loss": 0.3212,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.11845102505694761,
|
|
"grad_norm": 1.251810621948992,
|
|
"learning_rate": 9.990554986899745e-06,
|
|
"loss": 0.3383,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.1190584662110858,
|
|
"grad_norm": 1.3642089088744158,
|
|
"learning_rate": 9.989892599322404e-06,
|
|
"loss": 0.4176,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.11966590736522399,
|
|
"grad_norm": 1.6922372698689354,
|
|
"learning_rate": 9.98920778871116e-06,
|
|
"loss": 0.3655,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.12027334851936218,
|
|
"grad_norm": 3.407996768283883,
|
|
"learning_rate": 9.988500558143337e-06,
|
|
"loss": 0.3844,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.12088078967350038,
|
|
"grad_norm": 1.609402206597202,
|
|
"learning_rate": 9.987770910797014e-06,
|
|
"loss": 0.4128,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.12148823082763857,
|
|
"grad_norm": 1.3140316757802264,
|
|
"learning_rate": 9.987018849950996e-06,
|
|
"loss": 0.3962,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.12209567198177676,
|
|
"grad_norm": 1.7469090493735866,
|
|
"learning_rate": 9.986244378984817e-06,
|
|
"loss": 0.3691,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.12270311313591496,
|
|
"grad_norm": 1.33925380250546,
|
|
"learning_rate": 9.985447501378706e-06,
|
|
"loss": 0.3566,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.12331055429005315,
|
|
"grad_norm": 1.809637543111782,
|
|
"learning_rate": 9.984628220713587e-06,
|
|
"loss": 0.33,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.12391799544419134,
|
|
"grad_norm": 8.810976676413983,
|
|
"learning_rate": 9.983786540671052e-06,
|
|
"loss": 0.3745,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.12452543659832954,
|
|
"grad_norm": 1.5678141251242255,
|
|
"learning_rate": 9.98292246503335e-06,
|
|
"loss": 0.3863,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.12513287775246773,
|
|
"grad_norm": 1.2444290855149283,
|
|
"learning_rate": 9.982035997683372e-06,
|
|
"loss": 0.3449,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.12574031890660592,
|
|
"grad_norm": 1.262107824746014,
|
|
"learning_rate": 9.981127142604628e-06,
|
|
"loss": 0.3553,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.1263477600607441,
|
|
"grad_norm": 2.261971867644215,
|
|
"learning_rate": 9.980195903881231e-06,
|
|
"loss": 0.3929,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.1269552012148823,
|
|
"grad_norm": 1.8939164327645688,
|
|
"learning_rate": 9.979242285697878e-06,
|
|
"loss": 0.3894,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.1275626423690205,
|
|
"grad_norm": 1.3109993122933488,
|
|
"learning_rate": 9.978266292339838e-06,
|
|
"loss": 0.3462,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.1281700835231587,
|
|
"grad_norm": 1.2689263598262492,
|
|
"learning_rate": 9.97726792819292e-06,
|
|
"loss": 0.3756,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.1287775246772969,
|
|
"grad_norm": 1.413911853511702,
|
|
"learning_rate": 9.976247197743465e-06,
|
|
"loss": 0.3517,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.12938496583143508,
|
|
"grad_norm": 1.1477594568471046,
|
|
"learning_rate": 9.975204105578318e-06,
|
|
"loss": 0.3524,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.12999240698557327,
|
|
"grad_norm": 1.3268983856893921,
|
|
"learning_rate": 9.974138656384815e-06,
|
|
"loss": 0.3703,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.13059984813971146,
|
|
"grad_norm": 1.6085096632167164,
|
|
"learning_rate": 9.973050854950756e-06,
|
|
"loss": 0.4107,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.13120728929384967,
|
|
"grad_norm": 1.2710644494087506,
|
|
"learning_rate": 9.97194070616438e-06,
|
|
"loss": 0.3704,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.13181473044798786,
|
|
"grad_norm": 1.7007435080900732,
|
|
"learning_rate": 9.970808215014357e-06,
|
|
"loss": 0.3616,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.13242217160212605,
|
|
"grad_norm": 2.8035141258704965,
|
|
"learning_rate": 9.969653386589749e-06,
|
|
"loss": 0.3476,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.13302961275626424,
|
|
"grad_norm": 1.4530105606551262,
|
|
"learning_rate": 9.968476226079997e-06,
|
|
"loss": 0.3658,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.13363705391040243,
|
|
"grad_norm": 1.365931702075457,
|
|
"learning_rate": 9.967276738774897e-06,
|
|
"loss": 0.3559,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.13424449506454061,
|
|
"grad_norm": 2.4105147491908845,
|
|
"learning_rate": 9.966054930064577e-06,
|
|
"loss": 0.3464,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.1348519362186788,
|
|
"grad_norm": 1.460590918478701,
|
|
"learning_rate": 9.964810805439464e-06,
|
|
"loss": 0.3835,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.13545937737281702,
|
|
"grad_norm": 1.4592160813082191,
|
|
"learning_rate": 9.96354437049027e-06,
|
|
"loss": 0.3649,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.1360668185269552,
|
|
"grad_norm": 1.3706484221475523,
|
|
"learning_rate": 9.962255630907964e-06,
|
|
"loss": 0.306,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.1366742596810934,
|
|
"grad_norm": 1.9923965820088605,
|
|
"learning_rate": 9.96094459248374e-06,
|
|
"loss": 0.4094,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.13728170083523158,
|
|
"grad_norm": 1.212563998181567,
|
|
"learning_rate": 9.959611261108999e-06,
|
|
"loss": 0.3601,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.13788914198936977,
|
|
"grad_norm": 1.0692124626260124,
|
|
"learning_rate": 9.95825564277532e-06,
|
|
"loss": 0.3532,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.13849658314350796,
|
|
"grad_norm": 1.1033511924540673,
|
|
"learning_rate": 9.956877743574437e-06,
|
|
"loss": 0.3384,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.13910402429764618,
|
|
"grad_norm": 1.2707398286483838,
|
|
"learning_rate": 9.955477569698197e-06,
|
|
"loss": 0.3367,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.13971146545178437,
|
|
"grad_norm": 1.405772249647566,
|
|
"learning_rate": 9.954055127438554e-06,
|
|
"loss": 0.3673,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.14031890660592256,
|
|
"grad_norm": 1.8473504788016004,
|
|
"learning_rate": 9.952610423187516e-06,
|
|
"loss": 0.4095,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.14092634776006074,
|
|
"grad_norm": 1.4016757128131039,
|
|
"learning_rate": 9.951143463437145e-06,
|
|
"loss": 0.3503,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.14153378891419893,
|
|
"grad_norm": 1.3608962633812787,
|
|
"learning_rate": 9.949654254779499e-06,
|
|
"loss": 0.3897,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.14214123006833712,
|
|
"grad_norm": 1.2731854733891155,
|
|
"learning_rate": 9.948142803906623e-06,
|
|
"loss": 0.3596,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.1427486712224753,
|
|
"grad_norm": 2.744204155126872,
|
|
"learning_rate": 9.946609117610508e-06,
|
|
"loss": 0.3311,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.14335611237661353,
|
|
"grad_norm": 1.4126443556826698,
|
|
"learning_rate": 9.94505320278307e-06,
|
|
"loss": 0.3417,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.14396355353075171,
|
|
"grad_norm": 1.8771416206071374,
|
|
"learning_rate": 9.943475066416105e-06,
|
|
"loss": 0.3246,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.1445709946848899,
|
|
"grad_norm": 1.662514482004372,
|
|
"learning_rate": 9.94187471560127e-06,
|
|
"loss": 0.336,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.1451784358390281,
|
|
"grad_norm": 1.3252537752081341,
|
|
"learning_rate": 9.940252157530048e-06,
|
|
"loss": 0.3728,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.14578587699316628,
|
|
"grad_norm": 1.4379575767599655,
|
|
"learning_rate": 9.938607399493714e-06,
|
|
"loss": 0.3349,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.14639331814730447,
|
|
"grad_norm": 1.7085189808679144,
|
|
"learning_rate": 9.936940448883299e-06,
|
|
"loss": 0.3732,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.14700075930144268,
|
|
"grad_norm": 1.3475867031172948,
|
|
"learning_rate": 9.935251313189564e-06,
|
|
"loss": 0.3614,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.14760820045558087,
|
|
"grad_norm": 1.3757718265613308,
|
|
"learning_rate": 9.933540000002966e-06,
|
|
"loss": 0.3495,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.14821564160971906,
|
|
"grad_norm": 2.5569808145548016,
|
|
"learning_rate": 9.931806517013612e-06,
|
|
"loss": 0.3846,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.14882308276385725,
|
|
"grad_norm": 2.3675332462526724,
|
|
"learning_rate": 9.930050872011242e-06,
|
|
"loss": 0.3927,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.14943052391799544,
|
|
"grad_norm": 1.257087872816816,
|
|
"learning_rate": 9.92827307288518e-06,
|
|
"loss": 0.347,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.15003796507213363,
|
|
"grad_norm": 1.1999532829094337,
|
|
"learning_rate": 9.926473127624306e-06,
|
|
"loss": 0.3099,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.15064540622627182,
|
|
"grad_norm": 1.3143658832484064,
|
|
"learning_rate": 9.924651044317017e-06,
|
|
"loss": 0.3476,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.15125284738041003,
|
|
"grad_norm": 1.2916975919820823,
|
|
"learning_rate": 9.922806831151192e-06,
|
|
"loss": 0.3829,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.15186028853454822,
|
|
"grad_norm": 1.3792160543399081,
|
|
"learning_rate": 9.920940496414153e-06,
|
|
"loss": 0.3414,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.1524677296886864,
|
|
"grad_norm": 1.552844179932476,
|
|
"learning_rate": 9.919052048492633e-06,
|
|
"loss": 0.329,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.1530751708428246,
|
|
"grad_norm": 1.2281468240128537,
|
|
"learning_rate": 9.917141495872733e-06,
|
|
"loss": 0.3112,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.1536826119969628,
|
|
"grad_norm": 1.2302352301120831,
|
|
"learning_rate": 9.915208847139883e-06,
|
|
"loss": 0.3576,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.15429005315110098,
|
|
"grad_norm": 1.618659315298819,
|
|
"learning_rate": 9.913254110978812e-06,
|
|
"loss": 0.3669,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.1548974943052392,
|
|
"grad_norm": 1.8288474552445757,
|
|
"learning_rate": 9.911277296173498e-06,
|
|
"loss": 0.3572,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.15550493545937738,
|
|
"grad_norm": 1.1302723791543823,
|
|
"learning_rate": 9.909278411607134e-06,
|
|
"loss": 0.3432,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.15611237661351557,
|
|
"grad_norm": 1.1877481410718438,
|
|
"learning_rate": 9.90725746626209e-06,
|
|
"loss": 0.3096,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.15671981776765376,
|
|
"grad_norm": 1.168952450453289,
|
|
"learning_rate": 9.90521446921987e-06,
|
|
"loss": 0.3067,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.15732725892179195,
|
|
"grad_norm": 1.3157190721989047,
|
|
"learning_rate": 9.903149429661072e-06,
|
|
"loss": 0.3666,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.15793470007593013,
|
|
"grad_norm": 1.1390654533947697,
|
|
"learning_rate": 9.90106235686534e-06,
|
|
"loss": 0.3506,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.15854214123006835,
|
|
"grad_norm": 4.00051251257636,
|
|
"learning_rate": 9.89895326021134e-06,
|
|
"loss": 0.3249,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.15914958238420654,
|
|
"grad_norm": 1.2234262044458446,
|
|
"learning_rate": 9.896822149176695e-06,
|
|
"loss": 0.3318,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.15975702353834473,
|
|
"grad_norm": 1.3897253859010028,
|
|
"learning_rate": 9.894669033337962e-06,
|
|
"loss": 0.396,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.16036446469248292,
|
|
"grad_norm": 1.7242712965980627,
|
|
"learning_rate": 9.892493922370575e-06,
|
|
"loss": 0.3188,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.1609719058466211,
|
|
"grad_norm": 6.4297649243853225,
|
|
"learning_rate": 9.89029682604881e-06,
|
|
"loss": 0.3379,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.1615793470007593,
|
|
"grad_norm": 1.1537471226413523,
|
|
"learning_rate": 9.888077754245741e-06,
|
|
"loss": 0.3493,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.16218678815489748,
|
|
"grad_norm": 1.6875687617066326,
|
|
"learning_rate": 9.88583671693319e-06,
|
|
"loss": 0.3608,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.1627942293090357,
|
|
"grad_norm": 1.2568745648537023,
|
|
"learning_rate": 9.883573724181683e-06,
|
|
"loss": 0.3795,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.1634016704631739,
|
|
"grad_norm": 3.273702543597702,
|
|
"learning_rate": 9.881288786160413e-06,
|
|
"loss": 0.3669,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.16400911161731208,
|
|
"grad_norm": 1.6681243106945143,
|
|
"learning_rate": 9.878981913137178e-06,
|
|
"loss": 0.3045,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.16461655277145026,
|
|
"grad_norm": 1.3040049020132651,
|
|
"learning_rate": 9.87665311547836e-06,
|
|
"loss": 0.3748,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.16522399392558845,
|
|
"grad_norm": 1.4186566442108688,
|
|
"learning_rate": 9.87430240364885e-06,
|
|
"loss": 0.317,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.16583143507972664,
|
|
"grad_norm": 1.2927229360317918,
|
|
"learning_rate": 9.871929788212022e-06,
|
|
"loss": 0.3444,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.16643887623386486,
|
|
"grad_norm": 1.2231558908365099,
|
|
"learning_rate": 9.869535279829674e-06,
|
|
"loss": 0.3606,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.16704631738800305,
|
|
"grad_norm": 1.9640328612851339,
|
|
"learning_rate": 9.867118889261988e-06,
|
|
"loss": 0.3473,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.16765375854214123,
|
|
"grad_norm": 1.627402530642274,
|
|
"learning_rate": 9.864680627367476e-06,
|
|
"loss": 0.3278,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.16826119969627942,
|
|
"grad_norm": 1.2427133272428312,
|
|
"learning_rate": 9.862220505102933e-06,
|
|
"loss": 0.3521,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.1688686408504176,
|
|
"grad_norm": 1.3565240591384948,
|
|
"learning_rate": 9.859738533523384e-06,
|
|
"loss": 0.319,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.1694760820045558,
|
|
"grad_norm": 1.2107487526492342,
|
|
"learning_rate": 9.857234723782044e-06,
|
|
"loss": 0.3352,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.170083523158694,
|
|
"grad_norm": 1.3180319945310117,
|
|
"learning_rate": 9.854709087130261e-06,
|
|
"loss": 0.3139,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.1706909643128322,
|
|
"grad_norm": 3.2813293101863916,
|
|
"learning_rate": 9.852161634917463e-06,
|
|
"loss": 0.3349,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.1712984054669704,
|
|
"grad_norm": 1.245803428057021,
|
|
"learning_rate": 9.849592378591113e-06,
|
|
"loss": 0.3077,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.17190584662110858,
|
|
"grad_norm": 1.223977954013305,
|
|
"learning_rate": 9.847001329696653e-06,
|
|
"loss": 0.3069,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.17251328777524677,
|
|
"grad_norm": 1.3069739682646992,
|
|
"learning_rate": 9.844388499877457e-06,
|
|
"loss": 0.3291,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.17312072892938496,
|
|
"grad_norm": 1.131190253610025,
|
|
"learning_rate": 9.841753900874774e-06,
|
|
"loss": 0.3289,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.17372817008352315,
|
|
"grad_norm": 1.34440810872577,
|
|
"learning_rate": 9.839097544527674e-06,
|
|
"loss": 0.3267,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.17433561123766136,
|
|
"grad_norm": 1.3644537366382798,
|
|
"learning_rate": 9.836419442773004e-06,
|
|
"loss": 0.3443,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.17494305239179955,
|
|
"grad_norm": 1.2569085833757287,
|
|
"learning_rate": 9.833719607645325e-06,
|
|
"loss": 0.3241,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.17555049354593774,
|
|
"grad_norm": 1.2077036555513847,
|
|
"learning_rate": 9.830998051276858e-06,
|
|
"loss": 0.3541,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.17615793470007593,
|
|
"grad_norm": 3.9279665228825187,
|
|
"learning_rate": 9.82825478589744e-06,
|
|
"loss": 0.3666,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.17676537585421412,
|
|
"grad_norm": 1.3633793584504903,
|
|
"learning_rate": 9.825489823834454e-06,
|
|
"loss": 0.3162,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.1773728170083523,
|
|
"grad_norm": 1.190852630500219,
|
|
"learning_rate": 9.822703177512783e-06,
|
|
"loss": 0.3281,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.1779802581624905,
|
|
"grad_norm": 1.032612295087311,
|
|
"learning_rate": 9.819894859454756e-06,
|
|
"loss": 0.2902,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.1785876993166287,
|
|
"grad_norm": 2.2666599075970058,
|
|
"learning_rate": 9.817064882280085e-06,
|
|
"loss": 0.3872,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.1791951404707669,
|
|
"grad_norm": 1.5056944572723148,
|
|
"learning_rate": 9.814213258705813e-06,
|
|
"loss": 0.4009,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.1798025816249051,
|
|
"grad_norm": 1.2750096010881427,
|
|
"learning_rate": 9.811340001546252e-06,
|
|
"loss": 0.335,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.18041002277904328,
|
|
"grad_norm": 1.3167579603123851,
|
|
"learning_rate": 9.808445123712934e-06,
|
|
"loss": 0.3789,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.18101746393318147,
|
|
"grad_norm": 1.2917298455538913,
|
|
"learning_rate": 9.805528638214543e-06,
|
|
"loss": 0.365,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.18162490508731965,
|
|
"grad_norm": 1.2249270654309992,
|
|
"learning_rate": 9.802590558156863e-06,
|
|
"loss": 0.3267,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.18223234624145787,
|
|
"grad_norm": 1.28147771791881,
|
|
"learning_rate": 9.799630896742716e-06,
|
|
"loss": 0.3258,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.18283978739559606,
|
|
"grad_norm": 2.076161195627259,
|
|
"learning_rate": 9.796649667271905e-06,
|
|
"loss": 0.3588,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.18344722854973425,
|
|
"grad_norm": 1.1215708430697366,
|
|
"learning_rate": 9.793646883141155e-06,
|
|
"loss": 0.32,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.18405466970387244,
|
|
"grad_norm": 1.2637743993804484,
|
|
"learning_rate": 9.790622557844047e-06,
|
|
"loss": 0.3561,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.18466211085801063,
|
|
"grad_norm": 2.4961674050461635,
|
|
"learning_rate": 9.787576704970965e-06,
|
|
"loss": 0.343,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.1852695520121488,
|
|
"grad_norm": 1.8062025541980924,
|
|
"learning_rate": 9.784509338209026e-06,
|
|
"loss": 0.339,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.185876993166287,
|
|
"grad_norm": 1.1705663644423028,
|
|
"learning_rate": 9.781420471342035e-06,
|
|
"loss": 0.3204,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.18648443432042522,
|
|
"grad_norm": 1.3501675244896367,
|
|
"learning_rate": 9.778310118250397e-06,
|
|
"loss": 0.3598,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.1870918754745634,
|
|
"grad_norm": 1.2093391302114258,
|
|
"learning_rate": 9.77517829291108e-06,
|
|
"loss": 0.3397,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.1876993166287016,
|
|
"grad_norm": 1.3119917853957324,
|
|
"learning_rate": 9.772025009397538e-06,
|
|
"loss": 0.3291,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.18830675778283978,
|
|
"grad_norm": 3.4574677223030217,
|
|
"learning_rate": 9.768850281879651e-06,
|
|
"loss": 0.3297,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.18891419893697797,
|
|
"grad_norm": 1.3155763470245156,
|
|
"learning_rate": 9.765654124623664e-06,
|
|
"loss": 0.3317,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.18952164009111616,
|
|
"grad_norm": 1.2868677150111685,
|
|
"learning_rate": 9.762436551992117e-06,
|
|
"loss": 0.3545,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.19012908124525438,
|
|
"grad_norm": 1.2047620112147,
|
|
"learning_rate": 9.759197578443787e-06,
|
|
"loss": 0.3282,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.19073652239939257,
|
|
"grad_norm": 1.341450099932963,
|
|
"learning_rate": 9.755937218533622e-06,
|
|
"loss": 0.348,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.19134396355353075,
|
|
"grad_norm": 1.3549028797747085,
|
|
"learning_rate": 9.752655486912666e-06,
|
|
"loss": 0.3258,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.19195140470766894,
|
|
"grad_norm": 1.2140305820046362,
|
|
"learning_rate": 9.74935239832801e-06,
|
|
"loss": 0.3441,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.19255884586180713,
|
|
"grad_norm": 1.3240982666908445,
|
|
"learning_rate": 9.746027967622709e-06,
|
|
"loss": 0.3322,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.19316628701594532,
|
|
"grad_norm": 1.1866493550648762,
|
|
"learning_rate": 9.742682209735727e-06,
|
|
"loss": 0.3387,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.19377372817008354,
|
|
"grad_norm": 1.4708765807963506,
|
|
"learning_rate": 9.739315139701868e-06,
|
|
"loss": 0.3234,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.19438116932422173,
|
|
"grad_norm": 1.0795492879673514,
|
|
"learning_rate": 9.735926772651703e-06,
|
|
"loss": 0.3182,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.19498861047835991,
|
|
"grad_norm": 1.1995704100657156,
|
|
"learning_rate": 9.732517123811502e-06,
|
|
"loss": 0.3267,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.1955960516324981,
|
|
"grad_norm": 1.2807722640717565,
|
|
"learning_rate": 9.729086208503174e-06,
|
|
"loss": 0.3439,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.1962034927866363,
|
|
"grad_norm": 1.1439191799968789,
|
|
"learning_rate": 9.725634042144192e-06,
|
|
"loss": 0.3035,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.19681093394077448,
|
|
"grad_norm": 1.123574240810596,
|
|
"learning_rate": 9.722160640247523e-06,
|
|
"loss": 0.3402,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.19741837509491267,
|
|
"grad_norm": 1.9809732054403608,
|
|
"learning_rate": 9.71866601842156e-06,
|
|
"loss": 0.3596,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.19802581624905088,
|
|
"grad_norm": 3.2675821795654474,
|
|
"learning_rate": 9.715150192370054e-06,
|
|
"loss": 0.3378,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.19863325740318907,
|
|
"grad_norm": 1.1513389970747174,
|
|
"learning_rate": 9.71161317789204e-06,
|
|
"loss": 0.312,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.19924069855732726,
|
|
"grad_norm": 1.0732590413444016,
|
|
"learning_rate": 9.708054990881763e-06,
|
|
"loss": 0.3028,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.19984813971146545,
|
|
"grad_norm": 1.130054869998939,
|
|
"learning_rate": 9.70447564732862e-06,
|
|
"loss": 0.3161,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.20045558086560364,
|
|
"grad_norm": 1.4279288053692063,
|
|
"learning_rate": 9.700875163317072e-06,
|
|
"loss": 0.3159,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.20106302201974183,
|
|
"grad_norm": 1.4180293061625155,
|
|
"learning_rate": 9.69725355502658e-06,
|
|
"loss": 0.3555,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.20167046317388004,
|
|
"grad_norm": 1.5620650901784414,
|
|
"learning_rate": 9.693610838731532e-06,
|
|
"loss": 0.3256,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.20227790432801823,
|
|
"grad_norm": 1.3488433094850794,
|
|
"learning_rate": 9.689947030801168e-06,
|
|
"loss": 0.358,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.20288534548215642,
|
|
"grad_norm": 1.2086835793396953,
|
|
"learning_rate": 9.686262147699507e-06,
|
|
"loss": 0.3648,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.2034927866362946,
|
|
"grad_norm": 1.0080244863547254,
|
|
"learning_rate": 9.682556205985274e-06,
|
|
"loss": 0.3197,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.2041002277904328,
|
|
"grad_norm": 1.0405594952124566,
|
|
"learning_rate": 9.678829222311827e-06,
|
|
"loss": 0.304,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.204707668944571,
|
|
"grad_norm": 1.669499151030841,
|
|
"learning_rate": 9.675081213427076e-06,
|
|
"loss": 0.3282,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.20531511009870917,
|
|
"grad_norm": 1.162339657589905,
|
|
"learning_rate": 9.671312196173413e-06,
|
|
"loss": 0.328,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.2059225512528474,
|
|
"grad_norm": 1.1111055689988498,
|
|
"learning_rate": 9.667522187487635e-06,
|
|
"loss": 0.3352,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.20652999240698558,
|
|
"grad_norm": 1.3779787045612117,
|
|
"learning_rate": 9.663711204400872e-06,
|
|
"loss": 0.3575,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.20713743356112377,
|
|
"grad_norm": 1.6323496019886752,
|
|
"learning_rate": 9.659879264038499e-06,
|
|
"loss": 0.365,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.20774487471526196,
|
|
"grad_norm": 1.493535511167361,
|
|
"learning_rate": 9.656026383620076e-06,
|
|
"loss": 0.3445,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.20835231586940015,
|
|
"grad_norm": 1.3056823613349453,
|
|
"learning_rate": 9.65215258045925e-06,
|
|
"loss": 0.2948,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.20895975702353833,
|
|
"grad_norm": 1.0670662511449958,
|
|
"learning_rate": 9.6482578719637e-06,
|
|
"loss": 0.3139,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.20956719817767655,
|
|
"grad_norm": 1.3642861541498819,
|
|
"learning_rate": 9.644342275635036e-06,
|
|
"loss": 0.3015,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.21017463933181474,
|
|
"grad_norm": 1.0747742911930387,
|
|
"learning_rate": 9.640405809068743e-06,
|
|
"loss": 0.3228,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.21078208048595293,
|
|
"grad_norm": 1.1565608956431175,
|
|
"learning_rate": 9.636448489954077e-06,
|
|
"loss": 0.307,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.21138952164009112,
|
|
"grad_norm": 1.195151098550731,
|
|
"learning_rate": 9.632470336074009e-06,
|
|
"loss": 0.3284,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.2119969627942293,
|
|
"grad_norm": 1.1885220245152495,
|
|
"learning_rate": 9.628471365305134e-06,
|
|
"loss": 0.3437,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.2126044039483675,
|
|
"grad_norm": 1.0344142275699475,
|
|
"learning_rate": 9.624451595617588e-06,
|
|
"loss": 0.3185,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.21321184510250568,
|
|
"grad_norm": 1.2656391323297032,
|
|
"learning_rate": 9.620411045074972e-06,
|
|
"loss": 0.3626,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.2138192862566439,
|
|
"grad_norm": 1.0752778164280428,
|
|
"learning_rate": 9.616349731834271e-06,
|
|
"loss": 0.3225,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.2144267274107821,
|
|
"grad_norm": 1.2178645720798402,
|
|
"learning_rate": 9.612267674145772e-06,
|
|
"loss": 0.3534,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.21503416856492027,
|
|
"grad_norm": 1.4072309869153488,
|
|
"learning_rate": 9.608164890352977e-06,
|
|
"loss": 0.3459,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.21564160971905846,
|
|
"grad_norm": 1.1875602285502396,
|
|
"learning_rate": 9.604041398892528e-06,
|
|
"loss": 0.3288,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.21624905087319665,
|
|
"grad_norm": 1.2188563316023242,
|
|
"learning_rate": 9.599897218294122e-06,
|
|
"loss": 0.3509,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.21685649202733484,
|
|
"grad_norm": 1.1569315648201919,
|
|
"learning_rate": 9.595732367180422e-06,
|
|
"loss": 0.3173,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.21746393318147306,
|
|
"grad_norm": 1.5018233135579402,
|
|
"learning_rate": 9.591546864266983e-06,
|
|
"loss": 0.3507,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.21807137433561125,
|
|
"grad_norm": 1.0272557252775882,
|
|
"learning_rate": 9.58734072836216e-06,
|
|
"loss": 0.3001,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.21867881548974943,
|
|
"grad_norm": 1.245040066414171,
|
|
"learning_rate": 9.583113978367026e-06,
|
|
"loss": 0.2957,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.21928625664388762,
|
|
"grad_norm": 1.3275806437802142,
|
|
"learning_rate": 9.578866633275289e-06,
|
|
"loss": 0.3383,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.2198936977980258,
|
|
"grad_norm": 1.1727768085477153,
|
|
"learning_rate": 9.574598712173202e-06,
|
|
"loss": 0.2735,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.220501138952164,
|
|
"grad_norm": 1.22146926918798,
|
|
"learning_rate": 9.570310234239483e-06,
|
|
"loss": 0.3166,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.2211085801063022,
|
|
"grad_norm": 1.2281212541536195,
|
|
"learning_rate": 9.56600121874523e-06,
|
|
"loss": 0.3249,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.2217160212604404,
|
|
"grad_norm": 2.3299644415456036,
|
|
"learning_rate": 9.561671685053818e-06,
|
|
"loss": 0.3467,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.2223234624145786,
|
|
"grad_norm": 1.1619894211736224,
|
|
"learning_rate": 9.557321652620839e-06,
|
|
"loss": 0.3077,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.22293090356871678,
|
|
"grad_norm": 1.1786921246836153,
|
|
"learning_rate": 9.55295114099399e-06,
|
|
"loss": 0.3294,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.22353834472285497,
|
|
"grad_norm": 1.1859186341534969,
|
|
"learning_rate": 9.548560169812997e-06,
|
|
"loss": 0.3167,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.22414578587699316,
|
|
"grad_norm": 1.1441591110703015,
|
|
"learning_rate": 9.544148758809528e-06,
|
|
"loss": 0.3193,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.22475322703113135,
|
|
"grad_norm": 1.1128313603192685,
|
|
"learning_rate": 9.539716927807102e-06,
|
|
"loss": 0.3093,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.22536066818526956,
|
|
"grad_norm": 1.4910675750318487,
|
|
"learning_rate": 9.535264696720993e-06,
|
|
"loss": 0.3253,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.22596810933940775,
|
|
"grad_norm": 1.189666585401165,
|
|
"learning_rate": 9.530792085558151e-06,
|
|
"loss": 0.3558,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.22657555049354594,
|
|
"grad_norm": 1.2061368174942724,
|
|
"learning_rate": 9.526299114417108e-06,
|
|
"loss": 0.3253,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.22718299164768413,
|
|
"grad_norm": 1.0468374477355344,
|
|
"learning_rate": 9.521785803487888e-06,
|
|
"loss": 0.3178,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.22779043280182232,
|
|
"grad_norm": 1.2500589524530483,
|
|
"learning_rate": 9.517252173051912e-06,
|
|
"loss": 0.3066,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.2283978739559605,
|
|
"grad_norm": 2.396102762626989,
|
|
"learning_rate": 9.512698243481914e-06,
|
|
"loss": 0.3087,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.22900531511009872,
|
|
"grad_norm": 13.857607430818112,
|
|
"learning_rate": 9.508124035241843e-06,
|
|
"loss": 0.3001,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.2296127562642369,
|
|
"grad_norm": 1.5821099485242913,
|
|
"learning_rate": 9.50352956888678e-06,
|
|
"loss": 0.3393,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.2302201974183751,
|
|
"grad_norm": 1.2074450897701234,
|
|
"learning_rate": 9.498914865062831e-06,
|
|
"loss": 0.334,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.2308276385725133,
|
|
"grad_norm": 1.1721133873450802,
|
|
"learning_rate": 9.49427994450705e-06,
|
|
"loss": 0.3285,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.23143507972665148,
|
|
"grad_norm": 1.3430574212868829,
|
|
"learning_rate": 9.489624828047336e-06,
|
|
"loss": 0.3137,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.23204252088078967,
|
|
"grad_norm": 1.5915750295490272,
|
|
"learning_rate": 9.484949536602343e-06,
|
|
"loss": 0.3505,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.23264996203492785,
|
|
"grad_norm": 1.440309732703764,
|
|
"learning_rate": 9.480254091181385e-06,
|
|
"loss": 0.3441,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.23325740318906607,
|
|
"grad_norm": 1.2699897467088066,
|
|
"learning_rate": 9.47553851288434e-06,
|
|
"loss": 0.328,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.23386484434320426,
|
|
"grad_norm": 1.118564432611521,
|
|
"learning_rate": 9.470802822901558e-06,
|
|
"loss": 0.2914,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.23447228549734245,
|
|
"grad_norm": 2.267610070857606,
|
|
"learning_rate": 9.466047042513767e-06,
|
|
"loss": 0.3194,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.23507972665148064,
|
|
"grad_norm": 1.1933730736468051,
|
|
"learning_rate": 9.461271193091971e-06,
|
|
"loss": 0.3329,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.23568716780561882,
|
|
"grad_norm": 1.2144519426747942,
|
|
"learning_rate": 9.45647529609736e-06,
|
|
"loss": 0.3295,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.236294608959757,
|
|
"grad_norm": 1.3580165469389258,
|
|
"learning_rate": 9.451659373081214e-06,
|
|
"loss": 0.3447,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.23690205011389523,
|
|
"grad_norm": 1.1100635616742127,
|
|
"learning_rate": 9.4468234456848e-06,
|
|
"loss": 0.3337,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.23750949126803342,
|
|
"grad_norm": 1.3428354458732323,
|
|
"learning_rate": 9.44196753563928e-06,
|
|
"loss": 0.2838,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.2381169324221716,
|
|
"grad_norm": 1.3921865014793011,
|
|
"learning_rate": 9.437091664765611e-06,
|
|
"loss": 0.3256,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.2387243735763098,
|
|
"grad_norm": 1.0558795045496834,
|
|
"learning_rate": 9.43219585497445e-06,
|
|
"loss": 0.2924,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.23933181473044798,
|
|
"grad_norm": 1.0852216391713467,
|
|
"learning_rate": 9.427280128266049e-06,
|
|
"loss": 0.3159,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.23993925588458617,
|
|
"grad_norm": 1.2663888095002538,
|
|
"learning_rate": 9.422344506730168e-06,
|
|
"loss": 0.3223,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.24054669703872436,
|
|
"grad_norm": 1.2598248186127823,
|
|
"learning_rate": 9.41738901254596e-06,
|
|
"loss": 0.3114,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.24115413819286258,
|
|
"grad_norm": 1.2212737926468238,
|
|
"learning_rate": 9.412413667981884e-06,
|
|
"loss": 0.365,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.24176157934700077,
|
|
"grad_norm": 1.1959984881869385,
|
|
"learning_rate": 9.4074184953956e-06,
|
|
"loss": 0.3723,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.24236902050113895,
|
|
"grad_norm": 1.291676435173795,
|
|
"learning_rate": 9.402403517233867e-06,
|
|
"loss": 0.3455,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.24297646165527714,
|
|
"grad_norm": 1.1960840171763427,
|
|
"learning_rate": 9.397368756032445e-06,
|
|
"loss": 0.3453,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.24358390280941533,
|
|
"grad_norm": 1.0826676561819115,
|
|
"learning_rate": 9.392314234415999e-06,
|
|
"loss": 0.3047,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.24419134396355352,
|
|
"grad_norm": 1.1755710795468963,
|
|
"learning_rate": 9.38723997509798e-06,
|
|
"loss": 0.313,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.24479878511769174,
|
|
"grad_norm": 1.3499086111109924,
|
|
"learning_rate": 9.38214600088054e-06,
|
|
"loss": 0.3285,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.24540622627182992,
|
|
"grad_norm": 1.3200873501528223,
|
|
"learning_rate": 9.37703233465443e-06,
|
|
"loss": 0.369,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.2460136674259681,
|
|
"grad_norm": 1.3826162413858059,
|
|
"learning_rate": 9.371898999398876e-06,
|
|
"loss": 0.3527,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.2466211085801063,
|
|
"grad_norm": 1.2278576280498825,
|
|
"learning_rate": 9.366746018181503e-06,
|
|
"loss": 0.3277,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.2472285497342445,
|
|
"grad_norm": 1.1329545797723328,
|
|
"learning_rate": 9.361573414158215e-06,
|
|
"loss": 0.3229,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.24783599088838268,
|
|
"grad_norm": 1.0477461146627898,
|
|
"learning_rate": 9.356381210573092e-06,
|
|
"loss": 0.2919,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.24844343204252087,
|
|
"grad_norm": 2.4554214547877704,
|
|
"learning_rate": 9.351169430758293e-06,
|
|
"loss": 0.3438,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.24905087319665908,
|
|
"grad_norm": 3.5352794640669685,
|
|
"learning_rate": 9.345938098133946e-06,
|
|
"loss": 0.3262,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.24965831435079727,
|
|
"grad_norm": 2.021698412413005,
|
|
"learning_rate": 9.340687236208037e-06,
|
|
"loss": 0.3011,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.25026575550493546,
|
|
"grad_norm": 2.2039087798756465,
|
|
"learning_rate": 9.33541686857632e-06,
|
|
"loss": 0.3663,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.25087319665907365,
|
|
"grad_norm": 1.2067385283486605,
|
|
"learning_rate": 9.330127018922195e-06,
|
|
"loss": 0.3212,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.25148063781321184,
|
|
"grad_norm": 1.2316558886892726,
|
|
"learning_rate": 9.324817711016609e-06,
|
|
"loss": 0.3419,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.25208807896735,
|
|
"grad_norm": 1.4875274199476636,
|
|
"learning_rate": 9.31948896871795e-06,
|
|
"loss": 0.3348,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.2526955201214882,
|
|
"grad_norm": 1.1115019195847107,
|
|
"learning_rate": 9.31414081597194e-06,
|
|
"loss": 0.3512,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.2533029612756264,
|
|
"grad_norm": 1.2295268471367569,
|
|
"learning_rate": 9.30877327681152e-06,
|
|
"loss": 0.3503,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.2539104024297646,
|
|
"grad_norm": 1.0786831844077154,
|
|
"learning_rate": 9.303386375356752e-06,
|
|
"loss": 0.3162,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.25451784358390284,
|
|
"grad_norm": 1.0475351605214382,
|
|
"learning_rate": 9.297980135814706e-06,
|
|
"loss": 0.3103,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.255125284738041,
|
|
"grad_norm": 1.0774726768233147,
|
|
"learning_rate": 9.292554582479349e-06,
|
|
"loss": 0.3187,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.2557327258921792,
|
|
"grad_norm": 1.1341288746675326,
|
|
"learning_rate": 9.28710973973144e-06,
|
|
"loss": 0.3267,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.2563401670463174,
|
|
"grad_norm": 1.7271345636020505,
|
|
"learning_rate": 9.281645632038417e-06,
|
|
"loss": 0.3329,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.2569476082004556,
|
|
"grad_norm": 1.1865979526898363,
|
|
"learning_rate": 9.276162283954293e-06,
|
|
"loss": 0.3148,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.2575550493545938,
|
|
"grad_norm": 1.050767524282803,
|
|
"learning_rate": 9.270659720119533e-06,
|
|
"loss": 0.3431,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.25816249050873197,
|
|
"grad_norm": 1.196771620264985,
|
|
"learning_rate": 9.265137965260962e-06,
|
|
"loss": 0.3422,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.25876993166287016,
|
|
"grad_norm": 1.0955500339958768,
|
|
"learning_rate": 9.259597044191635e-06,
|
|
"loss": 0.3195,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.25937737281700834,
|
|
"grad_norm": 1.115960598799573,
|
|
"learning_rate": 9.254036981810741e-06,
|
|
"loss": 0.3238,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.25998481397114653,
|
|
"grad_norm": 1.0005694364136173,
|
|
"learning_rate": 9.248457803103476e-06,
|
|
"loss": 0.309,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.2605922551252847,
|
|
"grad_norm": 1.0726223391150986,
|
|
"learning_rate": 9.242859533140947e-06,
|
|
"loss": 0.3031,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.2611996962794229,
|
|
"grad_norm": 1.1657341953685123,
|
|
"learning_rate": 9.237242197080045e-06,
|
|
"loss": 0.2901,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.2618071374335611,
|
|
"grad_norm": 1.013927670160624,
|
|
"learning_rate": 9.231605820163343e-06,
|
|
"loss": 0.2932,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.26241457858769934,
|
|
"grad_norm": 1.1498383770847378,
|
|
"learning_rate": 9.225950427718974e-06,
|
|
"loss": 0.3333,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.26302201974183753,
|
|
"grad_norm": 1.2571593199046411,
|
|
"learning_rate": 9.220276045160524e-06,
|
|
"loss": 0.3098,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.2636294608959757,
|
|
"grad_norm": 1.004792456164235,
|
|
"learning_rate": 9.21458269798691e-06,
|
|
"loss": 0.2914,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.2642369020501139,
|
|
"grad_norm": 1.0509819009913075,
|
|
"learning_rate": 9.208870411782276e-06,
|
|
"loss": 0.3191,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.2648443432042521,
|
|
"grad_norm": 1.0808764700945872,
|
|
"learning_rate": 9.203139212215868e-06,
|
|
"loss": 0.3397,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.2654517843583903,
|
|
"grad_norm": 1.9069945576857954,
|
|
"learning_rate": 9.197389125041925e-06,
|
|
"loss": 0.3696,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.2660592255125285,
|
|
"grad_norm": 0.9868084753868711,
|
|
"learning_rate": 9.191620176099559e-06,
|
|
"loss": 0.2926,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.26666666666666666,
|
|
"grad_norm": 1.236174799741463,
|
|
"learning_rate": 9.185832391312644e-06,
|
|
"loss": 0.3532,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.26727410782080485,
|
|
"grad_norm": 1.085032203623591,
|
|
"learning_rate": 9.180025796689692e-06,
|
|
"loss": 0.3313,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.26788154897494304,
|
|
"grad_norm": 0.9582279787310294,
|
|
"learning_rate": 9.174200418323746e-06,
|
|
"loss": 0.2886,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.26848899012908123,
|
|
"grad_norm": 0.9169437747174185,
|
|
"learning_rate": 9.168356282392253e-06,
|
|
"loss": 0.2921,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.2690964312832194,
|
|
"grad_norm": 1.0775595515767766,
|
|
"learning_rate": 9.16249341515695e-06,
|
|
"loss": 0.3391,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.2697038724373576,
|
|
"grad_norm": 1.0089375719934834,
|
|
"learning_rate": 9.156611842963753e-06,
|
|
"loss": 0.3159,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.27031131359149585,
|
|
"grad_norm": 1.2417432186550288,
|
|
"learning_rate": 9.150711592242627e-06,
|
|
"loss": 0.3654,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.27091875474563404,
|
|
"grad_norm": 1.0274792842367653,
|
|
"learning_rate": 9.144792689507471e-06,
|
|
"loss": 0.3107,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.2715261958997722,
|
|
"grad_norm": 1.1641760376218897,
|
|
"learning_rate": 9.138855161356006e-06,
|
|
"loss": 0.3219,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.2721336370539104,
|
|
"grad_norm": 1.038602346175002,
|
|
"learning_rate": 9.132899034469648e-06,
|
|
"loss": 0.3262,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.2727410782080486,
|
|
"grad_norm": 1.0193170343688194,
|
|
"learning_rate": 9.126924335613385e-06,
|
|
"loss": 0.2947,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.2733485193621868,
|
|
"grad_norm": 1.0412898987806527,
|
|
"learning_rate": 9.120931091635669e-06,
|
|
"loss": 0.2982,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.273955960516325,
|
|
"grad_norm": 1.0086545373941327,
|
|
"learning_rate": 9.114919329468283e-06,
|
|
"loss": 0.3189,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.27456340167046317,
|
|
"grad_norm": 0.9279048193591232,
|
|
"learning_rate": 9.108889076126226e-06,
|
|
"loss": 0.2653,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.27517084282460136,
|
|
"grad_norm": 0.995776916618753,
|
|
"learning_rate": 9.102840358707594e-06,
|
|
"loss": 0.2785,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.27577828397873955,
|
|
"grad_norm": 1.1209448523987642,
|
|
"learning_rate": 9.09677320439345e-06,
|
|
"loss": 0.3484,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.27638572513287774,
|
|
"grad_norm": 1.0086795340037955,
|
|
"learning_rate": 9.090687640447709e-06,
|
|
"loss": 0.3039,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.2769931662870159,
|
|
"grad_norm": 1.0635853354673632,
|
|
"learning_rate": 9.084583694217012e-06,
|
|
"loss": 0.3368,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.2776006074411541,
|
|
"grad_norm": 1.3855504059825723,
|
|
"learning_rate": 9.07846139313061e-06,
|
|
"loss": 0.3416,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.27820804859529236,
|
|
"grad_norm": 1.0574619979291167,
|
|
"learning_rate": 9.072320764700223e-06,
|
|
"loss": 0.2921,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.27881548974943055,
|
|
"grad_norm": 0.9945293438865139,
|
|
"learning_rate": 9.066161836519942e-06,
|
|
"loss": 0.2738,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.27942293090356873,
|
|
"grad_norm": 1.0200515276803628,
|
|
"learning_rate": 9.059984636266082e-06,
|
|
"loss": 0.3244,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.2800303720577069,
|
|
"grad_norm": 0.9552331626040478,
|
|
"learning_rate": 9.053789191697072e-06,
|
|
"loss": 0.2867,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.2806378132118451,
|
|
"grad_norm": 0.9682413028760446,
|
|
"learning_rate": 9.047575530653324e-06,
|
|
"loss": 0.2914,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.2812452543659833,
|
|
"grad_norm": 1.0309908640635839,
|
|
"learning_rate": 9.041343681057106e-06,
|
|
"loss": 0.2882,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.2818526955201215,
|
|
"grad_norm": 1.038796502390337,
|
|
"learning_rate": 9.035093670912424e-06,
|
|
"loss": 0.2814,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.2824601366742597,
|
|
"grad_norm": 1.1394035890796435,
|
|
"learning_rate": 9.028825528304892e-06,
|
|
"loss": 0.3444,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.28306757782839786,
|
|
"grad_norm": 1.4464664575484871,
|
|
"learning_rate": 9.022539281401601e-06,
|
|
"loss": 0.3403,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.28367501898253605,
|
|
"grad_norm": 1.725019753808846,
|
|
"learning_rate": 9.016234958451002e-06,
|
|
"loss": 0.3225,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.28428246013667424,
|
|
"grad_norm": 1.390207557379254,
|
|
"learning_rate": 9.009912587782772e-06,
|
|
"loss": 0.2972,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.28488990129081243,
|
|
"grad_norm": 1.1656375132150465,
|
|
"learning_rate": 9.00357219780769e-06,
|
|
"loss": 0.3025,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.2854973424449506,
|
|
"grad_norm": 2.458767731063712,
|
|
"learning_rate": 8.997213817017508e-06,
|
|
"loss": 0.3368,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.28610478359908886,
|
|
"grad_norm": 1.0665249956470793,
|
|
"learning_rate": 8.990837473984818e-06,
|
|
"loss": 0.3208,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.28671222475322705,
|
|
"grad_norm": 1.1467648154057113,
|
|
"learning_rate": 8.984443197362938e-06,
|
|
"loss": 0.2963,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.28731966590736524,
|
|
"grad_norm": 1.188817694944298,
|
|
"learning_rate": 8.978031015885767e-06,
|
|
"loss": 0.3049,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.28792710706150343,
|
|
"grad_norm": 0.9664646995536351,
|
|
"learning_rate": 8.971600958367668e-06,
|
|
"loss": 0.2873,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.2885345482156416,
|
|
"grad_norm": 1.0103760175347862,
|
|
"learning_rate": 8.965153053703325e-06,
|
|
"loss": 0.2933,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.2891419893697798,
|
|
"grad_norm": 0.969093412655753,
|
|
"learning_rate": 8.958687330867634e-06,
|
|
"loss": 0.3211,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.289749430523918,
|
|
"grad_norm": 1.025864636561286,
|
|
"learning_rate": 8.952203818915548e-06,
|
|
"loss": 0.3216,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.2903568716780562,
|
|
"grad_norm": 1.109916705902771,
|
|
"learning_rate": 8.94570254698197e-06,
|
|
"loss": 0.2862,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.29096431283219437,
|
|
"grad_norm": 0.9713323047580559,
|
|
"learning_rate": 8.939183544281597e-06,
|
|
"loss": 0.3105,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.29157175398633256,
|
|
"grad_norm": 1.081777308673117,
|
|
"learning_rate": 8.932646840108818e-06,
|
|
"loss": 0.3272,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.29217919514047075,
|
|
"grad_norm": 0.967596641805163,
|
|
"learning_rate": 8.926092463837557e-06,
|
|
"loss": 0.2761,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.29278663629460894,
|
|
"grad_norm": 1.155987695983836,
|
|
"learning_rate": 8.919520444921153e-06,
|
|
"loss": 0.3064,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.2933940774487471,
|
|
"grad_norm": 1.3696382864069947,
|
|
"learning_rate": 8.912930812892228e-06,
|
|
"loss": 0.2865,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.29400151860288537,
|
|
"grad_norm": 1.0400446945814443,
|
|
"learning_rate": 8.906323597362547e-06,
|
|
"loss": 0.2686,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.29460895975702356,
|
|
"grad_norm": 1.0325058068789128,
|
|
"learning_rate": 8.899698828022895e-06,
|
|
"loss": 0.2879,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.29521640091116175,
|
|
"grad_norm": 1.0137176891852828,
|
|
"learning_rate": 8.893056534642938e-06,
|
|
"loss": 0.3086,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.29582384206529994,
|
|
"grad_norm": 1.2122910237107214,
|
|
"learning_rate": 8.886396747071085e-06,
|
|
"loss": 0.3277,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.2964312832194381,
|
|
"grad_norm": 1.073984642104128,
|
|
"learning_rate": 8.879719495234363e-06,
|
|
"loss": 0.3181,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.2970387243735763,
|
|
"grad_norm": 1.095543075839678,
|
|
"learning_rate": 8.873024809138272e-06,
|
|
"loss": 0.3102,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.2976461655277145,
|
|
"grad_norm": 1.01242570236515,
|
|
"learning_rate": 8.866312718866669e-06,
|
|
"loss": 0.2998,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.2982536066818527,
|
|
"grad_norm": 0.9488282118596906,
|
|
"learning_rate": 8.859583254581604e-06,
|
|
"loss": 0.3099,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.2988610478359909,
|
|
"grad_norm": 0.9722813663051023,
|
|
"learning_rate": 8.852836446523213e-06,
|
|
"loss": 0.3386,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.29946848899012907,
|
|
"grad_norm": 1.0326428240517584,
|
|
"learning_rate": 8.846072325009562e-06,
|
|
"loss": 0.2987,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.30007593014426726,
|
|
"grad_norm": 1.0268489993356678,
|
|
"learning_rate": 8.83929092043652e-06,
|
|
"loss": 0.3282,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.30068337129840544,
|
|
"grad_norm": 1.0504404390194961,
|
|
"learning_rate": 8.832492263277624e-06,
|
|
"loss": 0.331,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.30129081245254363,
|
|
"grad_norm": 1.0223568612198137,
|
|
"learning_rate": 8.825676384083936e-06,
|
|
"loss": 0.3073,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.3018982536066819,
|
|
"grad_norm": 0.9798177174047413,
|
|
"learning_rate": 8.818843313483907e-06,
|
|
"loss": 0.2886,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.30250569476082007,
|
|
"grad_norm": 1.0532247166652768,
|
|
"learning_rate": 8.811993082183243e-06,
|
|
"loss": 0.2974,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.30311313591495825,
|
|
"grad_norm": 0.975121772940988,
|
|
"learning_rate": 8.805125720964766e-06,
|
|
"loss": 0.2997,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.30372057706909644,
|
|
"grad_norm": 1.137742002586217,
|
|
"learning_rate": 8.798241260688273e-06,
|
|
"loss": 0.3188,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.30432801822323463,
|
|
"grad_norm": 2.779671078767294,
|
|
"learning_rate": 8.791339732290398e-06,
|
|
"loss": 0.3156,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.3049354593773728,
|
|
"grad_norm": 1.2822627424235575,
|
|
"learning_rate": 8.784421166784476e-06,
|
|
"loss": 0.3006,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.305542900531511,
|
|
"grad_norm": 0.9483433674131612,
|
|
"learning_rate": 8.7774855952604e-06,
|
|
"loss": 0.2795,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.3061503416856492,
|
|
"grad_norm": 1.0109525806584068,
|
|
"learning_rate": 8.770533048884483e-06,
|
|
"loss": 0.3045,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.3067577828397874,
|
|
"grad_norm": 0.9625557196195801,
|
|
"learning_rate": 8.763563558899317e-06,
|
|
"loss": 0.2759,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.3073652239939256,
|
|
"grad_norm": 1.0453305774145718,
|
|
"learning_rate": 8.756577156623636e-06,
|
|
"loss": 0.3117,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.30797266514806376,
|
|
"grad_norm": 0.8875823603586328,
|
|
"learning_rate": 8.749573873452169e-06,
|
|
"loss": 0.2716,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.30858010630220195,
|
|
"grad_norm": 0.968540908249718,
|
|
"learning_rate": 8.742553740855507e-06,
|
|
"loss": 0.2851,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.30918754745634014,
|
|
"grad_norm": 0.9903789827417551,
|
|
"learning_rate": 8.735516790379952e-06,
|
|
"loss": 0.2897,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.3097949886104784,
|
|
"grad_norm": 1.0711211077943226,
|
|
"learning_rate": 8.728463053647382e-06,
|
|
"loss": 0.2584,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.31040242976461657,
|
|
"grad_norm": 1.0473520524330442,
|
|
"learning_rate": 8.721392562355113e-06,
|
|
"loss": 0.3144,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.31100987091875476,
|
|
"grad_norm": 1.0396429716589337,
|
|
"learning_rate": 8.71430534827574e-06,
|
|
"loss": 0.3046,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.31161731207289295,
|
|
"grad_norm": 2.22911229260033,
|
|
"learning_rate": 8.707201443257015e-06,
|
|
"loss": 0.3096,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.31222475322703114,
|
|
"grad_norm": 1.0461555035207433,
|
|
"learning_rate": 8.700080879221689e-06,
|
|
"loss": 0.3344,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.3128321943811693,
|
|
"grad_norm": 1.0243099223034418,
|
|
"learning_rate": 8.692943688167371e-06,
|
|
"loss": 0.3317,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.3134396355353075,
|
|
"grad_norm": 1.0001264307440594,
|
|
"learning_rate": 8.685789902166395e-06,
|
|
"loss": 0.3035,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.3140470766894457,
|
|
"grad_norm": 1.067152048322313,
|
|
"learning_rate": 8.67861955336566e-06,
|
|
"loss": 0.305,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.3146545178435839,
|
|
"grad_norm": 1.030162408720675,
|
|
"learning_rate": 8.671432673986493e-06,
|
|
"loss": 0.3161,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.3152619589977221,
|
|
"grad_norm": 0.9760695402458939,
|
|
"learning_rate": 8.664229296324514e-06,
|
|
"loss": 0.3157,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.31586940015186027,
|
|
"grad_norm": 0.9705147238493051,
|
|
"learning_rate": 8.657009452749466e-06,
|
|
"loss": 0.3048,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.31647684130599846,
|
|
"grad_norm": 0.8938796766666607,
|
|
"learning_rate": 8.649773175705099e-06,
|
|
"loss": 0.2668,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.3170842824601367,
|
|
"grad_norm": 1.011233752497999,
|
|
"learning_rate": 8.642520497709001e-06,
|
|
"loss": 0.3098,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.3176917236142749,
|
|
"grad_norm": 1.137929526930306,
|
|
"learning_rate": 8.635251451352463e-06,
|
|
"loss": 0.3015,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.3182991647684131,
|
|
"grad_norm": 1.0130770256069037,
|
|
"learning_rate": 8.627966069300332e-06,
|
|
"loss": 0.3245,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.31890660592255127,
|
|
"grad_norm": 1.1354507509321872,
|
|
"learning_rate": 8.620664384290863e-06,
|
|
"loss": 0.3039,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.31951404707668946,
|
|
"grad_norm": 1.2939991366637584,
|
|
"learning_rate": 8.613346429135567e-06,
|
|
"loss": 0.3078,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.32012148823082764,
|
|
"grad_norm": 1.1684109323065632,
|
|
"learning_rate": 8.606012236719073e-06,
|
|
"loss": 0.3385,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.32072892938496583,
|
|
"grad_norm": 1.9632990216219541,
|
|
"learning_rate": 8.598661839998972e-06,
|
|
"loss": 0.2775,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.321336370539104,
|
|
"grad_norm": 1.0999059016474897,
|
|
"learning_rate": 8.591295272005674e-06,
|
|
"loss": 0.2942,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.3219438116932422,
|
|
"grad_norm": 1.1924126987732822,
|
|
"learning_rate": 8.583912565842258e-06,
|
|
"loss": 0.2957,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.3225512528473804,
|
|
"grad_norm": 0.9830056635559057,
|
|
"learning_rate": 8.576513754684318e-06,
|
|
"loss": 0.2871,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.3231586940015186,
|
|
"grad_norm": 1.003723254647249,
|
|
"learning_rate": 8.569098871779828e-06,
|
|
"loss": 0.3159,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.3237661351556568,
|
|
"grad_norm": 1.1546050485627521,
|
|
"learning_rate": 8.561667950448973e-06,
|
|
"loss": 0.3274,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.32437357630979496,
|
|
"grad_norm": 7.898389008169903,
|
|
"learning_rate": 8.554221024084019e-06,
|
|
"loss": 0.2923,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.3249810174639332,
|
|
"grad_norm": 1.066007060725776,
|
|
"learning_rate": 8.546758126149148e-06,
|
|
"loss": 0.3172,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.3255884586180714,
|
|
"grad_norm": 1.0571265266645584,
|
|
"learning_rate": 8.539279290180315e-06,
|
|
"loss": 0.3294,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.3261958997722096,
|
|
"grad_norm": 1.1006676423033956,
|
|
"learning_rate": 8.531784549785098e-06,
|
|
"loss": 0.3524,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.3268033409263478,
|
|
"grad_norm": 1.1991261856518842,
|
|
"learning_rate": 8.524273938642539e-06,
|
|
"loss": 0.3158,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.32741078208048596,
|
|
"grad_norm": 1.0695433507748728,
|
|
"learning_rate": 8.516747490503001e-06,
|
|
"loss": 0.3318,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.32801822323462415,
|
|
"grad_norm": 1.009828314645369,
|
|
"learning_rate": 8.509205239188017e-06,
|
|
"loss": 0.3034,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.32862566438876234,
|
|
"grad_norm": 1.0614942016734776,
|
|
"learning_rate": 8.501647218590127e-06,
|
|
"loss": 0.3249,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.32923310554290053,
|
|
"grad_norm": 1.0535816660890713,
|
|
"learning_rate": 8.494073462672743e-06,
|
|
"loss": 0.3245,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.3298405466970387,
|
|
"grad_norm": 1.1116235063360127,
|
|
"learning_rate": 8.486484005469977e-06,
|
|
"loss": 0.3111,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.3304479878511769,
|
|
"grad_norm": 1.2499481577520084,
|
|
"learning_rate": 8.478878881086505e-06,
|
|
"loss": 0.2774,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.3310554290053151,
|
|
"grad_norm": 1.2434383006954242,
|
|
"learning_rate": 8.471258123697403e-06,
|
|
"loss": 0.3591,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.3316628701594533,
|
|
"grad_norm": 2.629111128401328,
|
|
"learning_rate": 8.463621767547998e-06,
|
|
"loss": 0.2964,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.33227031131359147,
|
|
"grad_norm": 1.0085205462907783,
|
|
"learning_rate": 8.455969846953711e-06,
|
|
"loss": 0.2782,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.3328777524677297,
|
|
"grad_norm": 1.0091177434087597,
|
|
"learning_rate": 8.448302396299906e-06,
|
|
"loss": 0.2923,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.3334851936218679,
|
|
"grad_norm": 0.9548710150038249,
|
|
"learning_rate": 8.440619450041736e-06,
|
|
"loss": 0.256,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.3340926347760061,
|
|
"grad_norm": 1.0061487620910332,
|
|
"learning_rate": 8.432921042703985e-06,
|
|
"loss": 0.2978,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.3347000759301443,
|
|
"grad_norm": 1.0205034766401575,
|
|
"learning_rate": 8.425207208880914e-06,
|
|
"loss": 0.3307,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.33530751708428247,
|
|
"grad_norm": 0.9995329253562294,
|
|
"learning_rate": 8.417477983236107e-06,
|
|
"loss": 0.3149,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.33591495823842066,
|
|
"grad_norm": 0.9454465727784129,
|
|
"learning_rate": 8.409733400502311e-06,
|
|
"loss": 0.3152,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.33652239939255885,
|
|
"grad_norm": 1.1810082686737633,
|
|
"learning_rate": 8.401973495481289e-06,
|
|
"loss": 0.2706,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.33712984054669703,
|
|
"grad_norm": 0.8806178339494458,
|
|
"learning_rate": 8.39419830304365e-06,
|
|
"loss": 0.2847,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.3377372817008352,
|
|
"grad_norm": 1.1386249132431194,
|
|
"learning_rate": 8.386407858128707e-06,
|
|
"loss": 0.2851,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.3383447228549734,
|
|
"grad_norm": 1.028806142818622,
|
|
"learning_rate": 8.378602195744308e-06,
|
|
"loss": 0.3078,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.3389521640091116,
|
|
"grad_norm": 1.0457874244613665,
|
|
"learning_rate": 8.370781350966683e-06,
|
|
"loss": 0.3397,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.3395596051632498,
|
|
"grad_norm": 0.8684256285368641,
|
|
"learning_rate": 8.362945358940295e-06,
|
|
"loss": 0.2842,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.340167046317388,
|
|
"grad_norm": 0.9167915425454639,
|
|
"learning_rate": 8.355094254877665e-06,
|
|
"loss": 0.2658,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.3407744874715262,
|
|
"grad_norm": 1.0174829618995838,
|
|
"learning_rate": 8.347228074059227e-06,
|
|
"loss": 0.3266,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.3413819286256644,
|
|
"grad_norm": 0.960163727054609,
|
|
"learning_rate": 8.339346851833163e-06,
|
|
"loss": 0.2889,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.3419893697798026,
|
|
"grad_norm": 0.9190612974975497,
|
|
"learning_rate": 8.33145062361525e-06,
|
|
"loss": 0.2993,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.3425968109339408,
|
|
"grad_norm": 1.260688931755127,
|
|
"learning_rate": 8.323539424888695e-06,
|
|
"loss": 0.3011,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.343204252088079,
|
|
"grad_norm": 0.8862857315713791,
|
|
"learning_rate": 8.315613291203977e-06,
|
|
"loss": 0.2745,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.34381169324221716,
|
|
"grad_norm": 1.534384818793313,
|
|
"learning_rate": 8.30767225817869e-06,
|
|
"loss": 0.3015,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.34441913439635535,
|
|
"grad_norm": 1.0421030425662317,
|
|
"learning_rate": 8.299716361497377e-06,
|
|
"loss": 0.2937,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.34502657555049354,
|
|
"grad_norm": 1.078821559929711,
|
|
"learning_rate": 8.291745636911382e-06,
|
|
"loss": 0.3104,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.34563401670463173,
|
|
"grad_norm": 1.0058600245745741,
|
|
"learning_rate": 8.283760120238672e-06,
|
|
"loss": 0.3077,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.3462414578587699,
|
|
"grad_norm": 0.8608607521318042,
|
|
"learning_rate": 8.27575984736369e-06,
|
|
"loss": 0.2701,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.3468488990129081,
|
|
"grad_norm": 1.0527010355534043,
|
|
"learning_rate": 8.26774485423719e-06,
|
|
"loss": 0.3196,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.3474563401670463,
|
|
"grad_norm": 0.92875430774757,
|
|
"learning_rate": 8.259715176876069e-06,
|
|
"loss": 0.2782,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.3480637813211845,
|
|
"grad_norm": 1.3921020583644004,
|
|
"learning_rate": 8.251670851363214e-06,
|
|
"loss": 0.3346,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.34867122247532273,
|
|
"grad_norm": 0.9590626440697687,
|
|
"learning_rate": 8.243611913847337e-06,
|
|
"loss": 0.2824,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.3492786636294609,
|
|
"grad_norm": 0.9520186243816708,
|
|
"learning_rate": 8.235538400542809e-06,
|
|
"loss": 0.27,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.3498861047835991,
|
|
"grad_norm": 0.9591343436311952,
|
|
"learning_rate": 8.2274503477295e-06,
|
|
"loss": 0.2719,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.3504935459377373,
|
|
"grad_norm": 1.0279410178241586,
|
|
"learning_rate": 8.21934779175262e-06,
|
|
"loss": 0.3191,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.3511009870918755,
|
|
"grad_norm": 0.941703751218152,
|
|
"learning_rate": 8.211230769022552e-06,
|
|
"loss": 0.302,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.35170842824601367,
|
|
"grad_norm": 0.9536297735317896,
|
|
"learning_rate": 8.203099316014679e-06,
|
|
"loss": 0.3236,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.35231586940015186,
|
|
"grad_norm": 1.0851175895827145,
|
|
"learning_rate": 8.19495346926924e-06,
|
|
"loss": 0.3174,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.35292331055429005,
|
|
"grad_norm": 0.9375847401656001,
|
|
"learning_rate": 8.18679326539115e-06,
|
|
"loss": 0.2984,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.35353075170842824,
|
|
"grad_norm": 1.2513870142868544,
|
|
"learning_rate": 8.178618741049841e-06,
|
|
"loss": 0.3142,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.3541381928625664,
|
|
"grad_norm": 1.0326942039827636,
|
|
"learning_rate": 8.170429932979097e-06,
|
|
"loss": 0.3118,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.3547456340167046,
|
|
"grad_norm": 0.97117910690105,
|
|
"learning_rate": 8.162226877976886e-06,
|
|
"loss": 0.3114,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.3553530751708428,
|
|
"grad_norm": 1.105458493777876,
|
|
"learning_rate": 8.154009612905205e-06,
|
|
"loss": 0.3252,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.355960516324981,
|
|
"grad_norm": 1.1341020674749815,
|
|
"learning_rate": 8.145778174689897e-06,
|
|
"loss": 0.3388,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.35656795747911924,
|
|
"grad_norm": 0.8971554688120575,
|
|
"learning_rate": 8.137532600320502e-06,
|
|
"loss": 0.2955,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.3571753986332574,
|
|
"grad_norm": 1.0151980524067494,
|
|
"learning_rate": 8.129272926850079e-06,
|
|
"loss": 0.2949,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.3577828397873956,
|
|
"grad_norm": 0.941672453155367,
|
|
"learning_rate": 8.120999191395048e-06,
|
|
"loss": 0.2819,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.3583902809415338,
|
|
"grad_norm": 1.057217581283722,
|
|
"learning_rate": 8.112711431135014e-06,
|
|
"loss": 0.288,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.358997722095672,
|
|
"grad_norm": 1.375861607230768,
|
|
"learning_rate": 8.10440968331261e-06,
|
|
"loss": 0.2897,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.3596051632498102,
|
|
"grad_norm": 6.874880808118071,
|
|
"learning_rate": 8.096093985233323e-06,
|
|
"loss": 0.3182,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.36021260440394837,
|
|
"grad_norm": 1.0459697501354848,
|
|
"learning_rate": 8.087764374265325e-06,
|
|
"loss": 0.3171,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.36082004555808656,
|
|
"grad_norm": 1.0424410896636964,
|
|
"learning_rate": 8.079420887839316e-06,
|
|
"loss": 0.2841,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.36142748671222474,
|
|
"grad_norm": 0.9398782517670305,
|
|
"learning_rate": 8.071063563448341e-06,
|
|
"loss": 0.2975,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.36203492786636293,
|
|
"grad_norm": 0.9740196516759212,
|
|
"learning_rate": 8.062692438647628e-06,
|
|
"loss": 0.3001,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.3626423690205011,
|
|
"grad_norm": 1.1401753407703934,
|
|
"learning_rate": 8.054307551054427e-06,
|
|
"loss": 0.3006,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.3632498101746393,
|
|
"grad_norm": 1.5471172495911225,
|
|
"learning_rate": 8.045908938347828e-06,
|
|
"loss": 0.2829,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.3638572513287775,
|
|
"grad_norm": 1.024433089085474,
|
|
"learning_rate": 8.037496638268599e-06,
|
|
"loss": 0.3338,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.36446469248291574,
|
|
"grad_norm": 1.0602623846991535,
|
|
"learning_rate": 8.029070688619013e-06,
|
|
"loss": 0.2817,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.36507213363705393,
|
|
"grad_norm": 0.9832965892055614,
|
|
"learning_rate": 8.020631127262681e-06,
|
|
"loss": 0.2928,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.3656795747911921,
|
|
"grad_norm": 1.0377272606893748,
|
|
"learning_rate": 8.012177992124385e-06,
|
|
"loss": 0.3163,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.3662870159453303,
|
|
"grad_norm": 1.162756936534878,
|
|
"learning_rate": 8.003711321189895e-06,
|
|
"loss": 0.3026,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.3668944570994685,
|
|
"grad_norm": 0.9081590107485684,
|
|
"learning_rate": 7.995231152505815e-06,
|
|
"loss": 0.278,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.3675018982536067,
|
|
"grad_norm": 1.0285104632076432,
|
|
"learning_rate": 7.986737524179398e-06,
|
|
"loss": 0.3198,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.3681093394077449,
|
|
"grad_norm": 1.0089602950227836,
|
|
"learning_rate": 7.978230474378383e-06,
|
|
"loss": 0.2896,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.36871678056188306,
|
|
"grad_norm": 0.9253892326788151,
|
|
"learning_rate": 7.96971004133082e-06,
|
|
"loss": 0.2973,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.36932422171602125,
|
|
"grad_norm": 0.9373958997004501,
|
|
"learning_rate": 7.961176263324902e-06,
|
|
"loss": 0.2702,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.36993166287015944,
|
|
"grad_norm": 1.0340689976239543,
|
|
"learning_rate": 7.952629178708783e-06,
|
|
"loss": 0.3086,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.3705391040242976,
|
|
"grad_norm": 0.9434565199275107,
|
|
"learning_rate": 7.944068825890424e-06,
|
|
"loss": 0.2844,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.3711465451784358,
|
|
"grad_norm": 2.0206497983922165,
|
|
"learning_rate": 7.935495243337397e-06,
|
|
"loss": 0.2996,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.371753986332574,
|
|
"grad_norm": 1.220826721106363,
|
|
"learning_rate": 7.92690846957673e-06,
|
|
"loss": 0.2564,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.37236142748671225,
|
|
"grad_norm": 0.9495640406098673,
|
|
"learning_rate": 7.918308543194735e-06,
|
|
"loss": 0.2789,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.37296886864085044,
|
|
"grad_norm": 1.1839526557503743,
|
|
"learning_rate": 7.909695502836814e-06,
|
|
"loss": 0.3291,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.3735763097949886,
|
|
"grad_norm": 0.9422030510378584,
|
|
"learning_rate": 7.90106938720731e-06,
|
|
"loss": 0.2658,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.3741837509491268,
|
|
"grad_norm": 2.6158876200316943,
|
|
"learning_rate": 7.892430235069317e-06,
|
|
"loss": 0.3267,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.374791192103265,
|
|
"grad_norm": 1.0569339252672818,
|
|
"learning_rate": 7.883778085244514e-06,
|
|
"loss": 0.3302,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.3753986332574032,
|
|
"grad_norm": 0.9481970640626258,
|
|
"learning_rate": 7.875112976612984e-06,
|
|
"loss": 0.2825,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.3760060744115414,
|
|
"grad_norm": 0.9971575781640729,
|
|
"learning_rate": 7.866434948113046e-06,
|
|
"loss": 0.2988,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.37661351556567957,
|
|
"grad_norm": 0.9807473222056035,
|
|
"learning_rate": 7.857744038741076e-06,
|
|
"loss": 0.3192,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.37722095671981776,
|
|
"grad_norm": 1.0243743769936524,
|
|
"learning_rate": 7.849040287551331e-06,
|
|
"loss": 0.3149,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.37782839787395595,
|
|
"grad_norm": 0.9586280652231142,
|
|
"learning_rate": 7.84032373365578e-06,
|
|
"loss": 0.2815,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.37843583902809413,
|
|
"grad_norm": 0.9369075735191363,
|
|
"learning_rate": 7.831594416223916e-06,
|
|
"loss": 0.3008,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.3790432801822323,
|
|
"grad_norm": 3.308019410340958,
|
|
"learning_rate": 7.822852374482597e-06,
|
|
"loss": 0.3148,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.37965072133637057,
|
|
"grad_norm": 1.164444173629437,
|
|
"learning_rate": 7.814097647715848e-06,
|
|
"loss": 0.3058,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.38025816249050876,
|
|
"grad_norm": 1.4177016659749746,
|
|
"learning_rate": 7.805330275264707e-06,
|
|
"loss": 0.2889,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.38086560364464694,
|
|
"grad_norm": 1.4357393277200918,
|
|
"learning_rate": 7.796550296527032e-06,
|
|
"loss": 0.2636,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.38147304479878513,
|
|
"grad_norm": 1.0253772001951136,
|
|
"learning_rate": 7.787757750957335e-06,
|
|
"loss": 0.3026,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.3820804859529233,
|
|
"grad_norm": 0.926193976032409,
|
|
"learning_rate": 7.778952678066591e-06,
|
|
"loss": 0.2613,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.3826879271070615,
|
|
"grad_norm": 0.9522660951838047,
|
|
"learning_rate": 7.77013511742208e-06,
|
|
"loss": 0.3146,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.3832953682611997,
|
|
"grad_norm": 1.0242396609306572,
|
|
"learning_rate": 7.761305108647188e-06,
|
|
"loss": 0.2957,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.3839028094153379,
|
|
"grad_norm": 0.9802706086421458,
|
|
"learning_rate": 7.752462691421245e-06,
|
|
"loss": 0.2947,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.3845102505694761,
|
|
"grad_norm": 0.9593635598943385,
|
|
"learning_rate": 7.743607905479338e-06,
|
|
"loss": 0.3063,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.38511769172361426,
|
|
"grad_norm": 0.9149349282493551,
|
|
"learning_rate": 7.734740790612137e-06,
|
|
"loss": 0.2824,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.38572513287775245,
|
|
"grad_norm": 0.8104078792665621,
|
|
"learning_rate": 7.72586138666571e-06,
|
|
"loss": 0.2546,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.38633257403189064,
|
|
"grad_norm": 1.0286565476756544,
|
|
"learning_rate": 7.716969733541357e-06,
|
|
"loss": 0.2704,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.38694001518602883,
|
|
"grad_norm": 0.8490863136577027,
|
|
"learning_rate": 7.708065871195413e-06,
|
|
"loss": 0.2606,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.3875474563401671,
|
|
"grad_norm": 1.0903672375251494,
|
|
"learning_rate": 7.699149839639086e-06,
|
|
"loss": 0.3175,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.38815489749430526,
|
|
"grad_norm": 1.4642223666112006,
|
|
"learning_rate": 7.690221678938258e-06,
|
|
"loss": 0.3641,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.38876233864844345,
|
|
"grad_norm": 0.9534979437652094,
|
|
"learning_rate": 7.681281429213328e-06,
|
|
"loss": 0.2731,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.38936977980258164,
|
|
"grad_norm": 1.1167627986990094,
|
|
"learning_rate": 7.672329130639007e-06,
|
|
"loss": 0.2791,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.38997722095671983,
|
|
"grad_norm": 1.1634813414266154,
|
|
"learning_rate": 7.663364823444157e-06,
|
|
"loss": 0.3173,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.390584662110858,
|
|
"grad_norm": 0.9674791602076483,
|
|
"learning_rate": 7.654388547911605e-06,
|
|
"loss": 0.3198,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.3911921032649962,
|
|
"grad_norm": 0.9326003350894531,
|
|
"learning_rate": 7.645400344377953e-06,
|
|
"loss": 0.2446,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.3917995444191344,
|
|
"grad_norm": 1.0664043566671932,
|
|
"learning_rate": 7.63640025323341e-06,
|
|
"loss": 0.2897,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.3924069855732726,
|
|
"grad_norm": 1.1042940465708586,
|
|
"learning_rate": 7.627388314921602e-06,
|
|
"loss": 0.2964,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.39301442672741077,
|
|
"grad_norm": 0.9931594882104945,
|
|
"learning_rate": 7.61836456993939e-06,
|
|
"loss": 0.28,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.39362186788154896,
|
|
"grad_norm": 1.471343399649596,
|
|
"learning_rate": 7.609329058836694e-06,
|
|
"loss": 0.3354,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.39422930903568715,
|
|
"grad_norm": 1.368059808767781,
|
|
"learning_rate": 7.600281822216307e-06,
|
|
"loss": 0.312,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.39483675018982534,
|
|
"grad_norm": 0.9867176052292568,
|
|
"learning_rate": 7.59122290073371e-06,
|
|
"loss": 0.2691,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.3954441913439636,
|
|
"grad_norm": 1.0007304468451435,
|
|
"learning_rate": 7.582152335096896e-06,
|
|
"loss": 0.2817,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.39605163249810177,
|
|
"grad_norm": 1.0529002222158175,
|
|
"learning_rate": 7.5730701660661795e-06,
|
|
"loss": 0.2804,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.39665907365223996,
|
|
"grad_norm": 0.9111676203769294,
|
|
"learning_rate": 7.563976434454021e-06,
|
|
"loss": 0.2674,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.39726651480637815,
|
|
"grad_norm": 0.928422868586772,
|
|
"learning_rate": 7.554871181124836e-06,
|
|
"loss": 0.2842,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.39787395596051633,
|
|
"grad_norm": 0.9776630589483184,
|
|
"learning_rate": 7.5457544469948164e-06,
|
|
"loss": 0.2891,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.3984813971146545,
|
|
"grad_norm": 0.9696610718502006,
|
|
"learning_rate": 7.536626273031747e-06,
|
|
"loss": 0.2815,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.3990888382687927,
|
|
"grad_norm": 0.9352484358853785,
|
|
"learning_rate": 7.5274867002548154e-06,
|
|
"loss": 0.2666,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.3996962794229309,
|
|
"grad_norm": 0.9663340246117754,
|
|
"learning_rate": 7.5183357697344395e-06,
|
|
"loss": 0.2834,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.4003037205770691,
|
|
"grad_norm": 1.0770927677521116,
|
|
"learning_rate": 7.509173522592066e-06,
|
|
"loss": 0.3175,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.4009111617312073,
|
|
"grad_norm": 0.9005571117374582,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 0.2918,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.40151860288534547,
|
|
"grad_norm": 1.053393785111227,
|
|
"learning_rate": 7.4908152431812175e-06,
|
|
"loss": 0.2636,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.40212604403948365,
|
|
"grad_norm": 0.9355577937726377,
|
|
"learning_rate": 7.481619293409173e-06,
|
|
"loss": 0.2656,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.40273348519362184,
|
|
"grad_norm": 1.067579553680483,
|
|
"learning_rate": 7.472412192007619e-06,
|
|
"loss": 0.2734,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.4033409263477601,
|
|
"grad_norm": 0.8813079944231941,
|
|
"learning_rate": 7.4631939803504215e-06,
|
|
"loss": 0.2411,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.4039483675018983,
|
|
"grad_norm": 1.008354055836425,
|
|
"learning_rate": 7.453964699861376e-06,
|
|
"loss": 0.2621,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.40455580865603646,
|
|
"grad_norm": 1.3145794725051865,
|
|
"learning_rate": 7.44472439201401e-06,
|
|
"loss": 0.3353,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.40516324981017465,
|
|
"grad_norm": 1.0499020867013482,
|
|
"learning_rate": 7.435473098331411e-06,
|
|
"loss": 0.309,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.40577069096431284,
|
|
"grad_norm": 1.0498570111550733,
|
|
"learning_rate": 7.426210860386032e-06,
|
|
"loss": 0.2863,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.40637813211845103,
|
|
"grad_norm": 0.9381854841212456,
|
|
"learning_rate": 7.416937719799502e-06,
|
|
"loss": 0.3162,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.4069855732725892,
|
|
"grad_norm": 1.0356224890807904,
|
|
"learning_rate": 7.407653718242449e-06,
|
|
"loss": 0.2835,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.4075930144267274,
|
|
"grad_norm": 1.1471319493901884,
|
|
"learning_rate": 7.398358897434303e-06,
|
|
"loss": 0.2995,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.4082004555808656,
|
|
"grad_norm": 0.9661407586663348,
|
|
"learning_rate": 7.3890532991431174e-06,
|
|
"loss": 0.2815,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.4088078967350038,
|
|
"grad_norm": 1.236474905505436,
|
|
"learning_rate": 7.379736965185369e-06,
|
|
"loss": 0.3244,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.409415337889142,
|
|
"grad_norm": 1.0600796964601655,
|
|
"learning_rate": 7.370409937425781e-06,
|
|
"loss": 0.2994,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.41002277904328016,
|
|
"grad_norm": 0.9816189966958595,
|
|
"learning_rate": 7.361072257777132e-06,
|
|
"loss": 0.3046,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.41063022019741835,
|
|
"grad_norm": 1.1164647876294425,
|
|
"learning_rate": 7.3517239682000675e-06,
|
|
"loss": 0.2676,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.4112376613515566,
|
|
"grad_norm": 0.836806710584883,
|
|
"learning_rate": 7.342365110702907e-06,
|
|
"loss": 0.2233,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.4118451025056948,
|
|
"grad_norm": 1.0083900885239343,
|
|
"learning_rate": 7.332995727341462e-06,
|
|
"loss": 0.3282,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.41245254365983297,
|
|
"grad_norm": 0.8982701658241349,
|
|
"learning_rate": 7.323615860218844e-06,
|
|
"loss": 0.2366,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.41305998481397116,
|
|
"grad_norm": 0.8535635256156215,
|
|
"learning_rate": 7.314225551485273e-06,
|
|
"loss": 0.2424,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.41366742596810935,
|
|
"grad_norm": 1.122301845672972,
|
|
"learning_rate": 7.304824843337893e-06,
|
|
"loss": 0.3075,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.41427486712224754,
|
|
"grad_norm": 0.9413071187408356,
|
|
"learning_rate": 7.295413778020579e-06,
|
|
"loss": 0.2862,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.4148823082763857,
|
|
"grad_norm": 1.0778162018478468,
|
|
"learning_rate": 7.285992397823747e-06,
|
|
"loss": 0.3039,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.4154897494305239,
|
|
"grad_norm": 1.0058545387610198,
|
|
"learning_rate": 7.276560745084167e-06,
|
|
"loss": 0.2982,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.4160971905846621,
|
|
"grad_norm": 1.247146796633824,
|
|
"learning_rate": 7.267118862184767e-06,
|
|
"loss": 0.2769,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.4167046317388003,
|
|
"grad_norm": 0.9580540977436537,
|
|
"learning_rate": 7.257666791554448e-06,
|
|
"loss": 0.3057,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.4173120728929385,
|
|
"grad_norm": 4.047797878525422,
|
|
"learning_rate": 7.248204575667893e-06,
|
|
"loss": 0.2686,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.41791951404707667,
|
|
"grad_norm": 0.9842953093021652,
|
|
"learning_rate": 7.2387322570453724e-06,
|
|
"loss": 0.2819,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.41852695520121486,
|
|
"grad_norm": 0.9798887505619451,
|
|
"learning_rate": 7.229249878252558e-06,
|
|
"loss": 0.2659,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.4191343963553531,
|
|
"grad_norm": 1.0112521392212896,
|
|
"learning_rate": 7.219757481900325e-06,
|
|
"loss": 0.2878,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.4197418375094913,
|
|
"grad_norm": 0.9383147067402705,
|
|
"learning_rate": 7.210255110644569e-06,
|
|
"loss": 0.3096,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.4203492786636295,
|
|
"grad_norm": 0.9751290815844371,
|
|
"learning_rate": 7.2007428071860045e-06,
|
|
"loss": 0.3036,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.42095671981776767,
|
|
"grad_norm": 0.954940864157478,
|
|
"learning_rate": 7.191220614269981e-06,
|
|
"loss": 0.2748,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.42156416097190585,
|
|
"grad_norm": 0.955968898320391,
|
|
"learning_rate": 7.181688574686292e-06,
|
|
"loss": 0.294,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.42217160212604404,
|
|
"grad_norm": 1.0024277031805326,
|
|
"learning_rate": 7.17214673126897e-06,
|
|
"loss": 0.2817,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.42277904328018223,
|
|
"grad_norm": 1.1088991455813102,
|
|
"learning_rate": 7.162595126896111e-06,
|
|
"loss": 0.2699,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.4233864844343204,
|
|
"grad_norm": 0.9068874217571249,
|
|
"learning_rate": 7.15303380448967e-06,
|
|
"loss": 0.2666,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.4239939255884586,
|
|
"grad_norm": 0.9846540050512566,
|
|
"learning_rate": 7.143462807015271e-06,
|
|
"loss": 0.2996,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.4246013667425968,
|
|
"grad_norm": 1.2920144608956823,
|
|
"learning_rate": 7.133882177482019e-06,
|
|
"loss": 0.283,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.425208807896735,
|
|
"grad_norm": 0.9844981703237418,
|
|
"learning_rate": 7.1242919589422974e-06,
|
|
"loss": 0.283,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.4258162490508732,
|
|
"grad_norm": 1.0792092917236789,
|
|
"learning_rate": 7.114692194491583e-06,
|
|
"loss": 0.2771,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.42642369020501136,
|
|
"grad_norm": 1.1366050037820674,
|
|
"learning_rate": 7.105082927268247e-06,
|
|
"loss": 0.3462,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.4270311313591496,
|
|
"grad_norm": 1.9892569077431685,
|
|
"learning_rate": 7.095464200453366e-06,
|
|
"loss": 0.2657,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.4276385725132878,
|
|
"grad_norm": 1.030861638233516,
|
|
"learning_rate": 7.085836057270521e-06,
|
|
"loss": 0.3113,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.428246013667426,
|
|
"grad_norm": 1.070680183715172,
|
|
"learning_rate": 7.07619854098561e-06,
|
|
"loss": 0.2622,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.4288534548215642,
|
|
"grad_norm": 1.0086658824371333,
|
|
"learning_rate": 7.066551694906651e-06,
|
|
"loss": 0.254,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.42946089597570236,
|
|
"grad_norm": 1.2178606474849707,
|
|
"learning_rate": 7.056895562383585e-06,
|
|
"loss": 0.283,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.43006833712984055,
|
|
"grad_norm": 1.0905334429821039,
|
|
"learning_rate": 7.047230186808085e-06,
|
|
"loss": 0.2979,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.43067577828397874,
|
|
"grad_norm": 1.0513698337032553,
|
|
"learning_rate": 7.0375556116133605e-06,
|
|
"loss": 0.3149,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.4312832194381169,
|
|
"grad_norm": 0.905817609480293,
|
|
"learning_rate": 7.027871880273959e-06,
|
|
"loss": 0.267,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.4318906605922551,
|
|
"grad_norm": 0.8779355405635809,
|
|
"learning_rate": 7.018179036305574e-06,
|
|
"loss": 0.2777,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.4324981017463933,
|
|
"grad_norm": 0.9741991469020347,
|
|
"learning_rate": 7.008477123264849e-06,
|
|
"loss": 0.2881,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.4331055429005315,
|
|
"grad_norm": 1.0958805822729734,
|
|
"learning_rate": 6.9987661847491786e-06,
|
|
"loss": 0.2688,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.4337129840546697,
|
|
"grad_norm": 0.9812819976899622,
|
|
"learning_rate": 6.989046264396516e-06,
|
|
"loss": 0.3073,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.43432042520880787,
|
|
"grad_norm": 0.9280917573243459,
|
|
"learning_rate": 6.9793174058851805e-06,
|
|
"loss": 0.2942,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.4349278663629461,
|
|
"grad_norm": 0.9208122228856829,
|
|
"learning_rate": 6.96957965293365e-06,
|
|
"loss": 0.2603,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.4355353075170843,
|
|
"grad_norm": 1.0481626035767557,
|
|
"learning_rate": 6.959833049300376e-06,
|
|
"loss": 0.2659,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.4361427486712225,
|
|
"grad_norm": 0.8888068906740508,
|
|
"learning_rate": 6.9500776387835785e-06,
|
|
"loss": 0.2626,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.4367501898253607,
|
|
"grad_norm": 0.9005255044425835,
|
|
"learning_rate": 6.940313465221057e-06,
|
|
"loss": 0.283,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.43735763097949887,
|
|
"grad_norm": 0.9313053275341852,
|
|
"learning_rate": 6.9305405724899876e-06,
|
|
"loss": 0.2878,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.43796507213363706,
|
|
"grad_norm": 1.0001487266577187,
|
|
"learning_rate": 6.920759004506723e-06,
|
|
"loss": 0.2381,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.43857251328777525,
|
|
"grad_norm": 1.104902424962282,
|
|
"learning_rate": 6.91096880522661e-06,
|
|
"loss": 0.3125,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.43917995444191343,
|
|
"grad_norm": 0.9710695360675384,
|
|
"learning_rate": 6.90117001864377e-06,
|
|
"loss": 0.2778,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.4397873955960516,
|
|
"grad_norm": 0.8790773492913548,
|
|
"learning_rate": 6.891362688790925e-06,
|
|
"loss": 0.2741,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.4403948367501898,
|
|
"grad_norm": 1.27448461099361,
|
|
"learning_rate": 6.8815468597391785e-06,
|
|
"loss": 0.2961,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.441002277904328,
|
|
"grad_norm": 1.8340194618418497,
|
|
"learning_rate": 6.871722575597829e-06,
|
|
"loss": 0.2806,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.4416097190584662,
|
|
"grad_norm": 0.9529710613769301,
|
|
"learning_rate": 6.8618898805141744e-06,
|
|
"loss": 0.2788,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.4422171602126044,
|
|
"grad_norm": 1.1180921584509682,
|
|
"learning_rate": 6.8520488186733e-06,
|
|
"loss": 0.2916,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.4428246013667426,
|
|
"grad_norm": 0.9569903924698052,
|
|
"learning_rate": 6.8421994342979e-06,
|
|
"loss": 0.297,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.4434320425208808,
|
|
"grad_norm": 0.9663109439234678,
|
|
"learning_rate": 6.832341771648057e-06,
|
|
"loss": 0.2934,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.444039483675019,
|
|
"grad_norm": 0.9525035732233259,
|
|
"learning_rate": 6.822475875021057e-06,
|
|
"loss": 0.2924,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.4446469248291572,
|
|
"grad_norm": 1.6736139762109075,
|
|
"learning_rate": 6.812601788751192e-06,
|
|
"loss": 0.2875,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.4452543659832954,
|
|
"grad_norm": 1.3961577247630492,
|
|
"learning_rate": 6.802719557209547e-06,
|
|
"loss": 0.2723,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.44586180713743356,
|
|
"grad_norm": 1.0192583283909253,
|
|
"learning_rate": 6.792829224803816e-06,
|
|
"loss": 0.2902,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.44646924829157175,
|
|
"grad_norm": 1.403077195941947,
|
|
"learning_rate": 6.782930835978094e-06,
|
|
"loss": 0.3298,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.44707668944570994,
|
|
"grad_norm": 0.912658050114097,
|
|
"learning_rate": 6.773024435212678e-06,
|
|
"loss": 0.2654,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.44768413059984813,
|
|
"grad_norm": 1.0482352218310602,
|
|
"learning_rate": 6.76311006702387e-06,
|
|
"loss": 0.27,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.4482915717539863,
|
|
"grad_norm": 0.8572502039817809,
|
|
"learning_rate": 6.753187775963773e-06,
|
|
"loss": 0.245,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.4488990129081245,
|
|
"grad_norm": 1.0913308805109663,
|
|
"learning_rate": 6.743257606620094e-06,
|
|
"loss": 0.2551,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.4495064540622627,
|
|
"grad_norm": 0.9439876776084938,
|
|
"learning_rate": 6.733319603615941e-06,
|
|
"loss": 0.274,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.45011389521640094,
|
|
"grad_norm": 0.9593594109115189,
|
|
"learning_rate": 6.723373811609628e-06,
|
|
"loss": 0.2698,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.4507213363705391,
|
|
"grad_norm": 1.065483676595223,
|
|
"learning_rate": 6.713420275294467e-06,
|
|
"loss": 0.3096,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.4513287775246773,
|
|
"grad_norm": 1.0245095563483146,
|
|
"learning_rate": 6.703459039398571e-06,
|
|
"loss": 0.3101,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.4519362186788155,
|
|
"grad_norm": 0.8544225145366473,
|
|
"learning_rate": 6.693490148684654e-06,
|
|
"loss": 0.2478,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.4525436598329537,
|
|
"grad_norm": 1.1603691969888617,
|
|
"learning_rate": 6.683513647949826e-06,
|
|
"loss": 0.3075,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.4531511009870919,
|
|
"grad_norm": 1.052592239520445,
|
|
"learning_rate": 6.673529582025398e-06,
|
|
"loss": 0.2737,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.45375854214123007,
|
|
"grad_norm": 3.117949544460077,
|
|
"learning_rate": 6.66353799577667e-06,
|
|
"loss": 0.2791,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.45436598329536826,
|
|
"grad_norm": 0.9613711534282726,
|
|
"learning_rate": 6.653538934102743e-06,
|
|
"loss": 0.3014,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.45497342444950645,
|
|
"grad_norm": 0.9748105893226726,
|
|
"learning_rate": 6.643532441936307e-06,
|
|
"loss": 0.2749,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.45558086560364464,
|
|
"grad_norm": 0.981233876336576,
|
|
"learning_rate": 6.633518564243442e-06,
|
|
"loss": 0.2971,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.4561883067577828,
|
|
"grad_norm": 0.9457213271619488,
|
|
"learning_rate": 6.6234973460234184e-06,
|
|
"loss": 0.2868,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.456795747911921,
|
|
"grad_norm": 2.686802516445107,
|
|
"learning_rate": 6.6134688323084884e-06,
|
|
"loss": 0.2731,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.4574031890660592,
|
|
"grad_norm": 0.9223972209238886,
|
|
"learning_rate": 6.603433068163694e-06,
|
|
"loss": 0.2616,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.45801063022019745,
|
|
"grad_norm": 1.025121154265423,
|
|
"learning_rate": 6.593390098686653e-06,
|
|
"loss": 0.2907,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.45861807137433563,
|
|
"grad_norm": 0.9622665588756997,
|
|
"learning_rate": 6.583339969007364e-06,
|
|
"loss": 0.3044,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.4592255125284738,
|
|
"grad_norm": 0.9057032918476721,
|
|
"learning_rate": 6.573282724288001e-06,
|
|
"loss": 0.2728,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.459832953682612,
|
|
"grad_norm": 0.9234610842082313,
|
|
"learning_rate": 6.563218409722712e-06,
|
|
"loss": 0.276,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.4604403948367502,
|
|
"grad_norm": 0.958933012871059,
|
|
"learning_rate": 6.553147070537413e-06,
|
|
"loss": 0.2777,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.4610478359908884,
|
|
"grad_norm": 1.0680318113393608,
|
|
"learning_rate": 6.543068751989585e-06,
|
|
"loss": 0.2765,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.4616552771450266,
|
|
"grad_norm": 0.9152338969224387,
|
|
"learning_rate": 6.532983499368078e-06,
|
|
"loss": 0.2931,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.46226271829916477,
|
|
"grad_norm": 0.9026579775576341,
|
|
"learning_rate": 6.522891357992895e-06,
|
|
"loss": 0.2519,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.46287015945330295,
|
|
"grad_norm": 0.9283253808655062,
|
|
"learning_rate": 6.512792373215e-06,
|
|
"loss": 0.2804,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.46347760060744114,
|
|
"grad_norm": 0.9173677882111563,
|
|
"learning_rate": 6.502686590416105e-06,
|
|
"loss": 0.2734,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.46408504176157933,
|
|
"grad_norm": 0.9717106384462386,
|
|
"learning_rate": 6.492574055008474e-06,
|
|
"loss": 0.2671,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.4646924829157175,
|
|
"grad_norm": 1.0114399277783983,
|
|
"learning_rate": 6.482454812434711e-06,
|
|
"loss": 0.2843,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.4652999240698557,
|
|
"grad_norm": 0.9171006340811538,
|
|
"learning_rate": 6.472328908167562e-06,
|
|
"loss": 0.2744,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.46590736522399395,
|
|
"grad_norm": 1.0070946523066167,
|
|
"learning_rate": 6.4621963877097105e-06,
|
|
"loss": 0.2838,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.46651480637813214,
|
|
"grad_norm": 0.9699071928177333,
|
|
"learning_rate": 6.452057296593568e-06,
|
|
"loss": 0.3075,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.46712224753227033,
|
|
"grad_norm": 1.2714437302384922,
|
|
"learning_rate": 6.441911680381074e-06,
|
|
"loss": 0.2803,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.4677296886864085,
|
|
"grad_norm": 0.8642721638499515,
|
|
"learning_rate": 6.431759584663492e-06,
|
|
"loss": 0.2594,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.4683371298405467,
|
|
"grad_norm": 0.9642319769554307,
|
|
"learning_rate": 6.421601055061195e-06,
|
|
"loss": 0.294,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.4689445709946849,
|
|
"grad_norm": 0.9718176611982676,
|
|
"learning_rate": 6.411436137223479e-06,
|
|
"loss": 0.276,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.4695520121488231,
|
|
"grad_norm": 1.044555593128521,
|
|
"learning_rate": 6.401264876828335e-06,
|
|
"loss": 0.2739,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.47015945330296127,
|
|
"grad_norm": 1.2599360050994348,
|
|
"learning_rate": 6.391087319582264e-06,
|
|
"loss": 0.2689,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.47076689445709946,
|
|
"grad_norm": 1.0049522316698594,
|
|
"learning_rate": 6.38090351122006e-06,
|
|
"loss": 0.2523,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.47137433561123765,
|
|
"grad_norm": 0.966627950881477,
|
|
"learning_rate": 6.370713497504607e-06,
|
|
"loss": 0.2443,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.47198177676537584,
|
|
"grad_norm": 0.9792802121829564,
|
|
"learning_rate": 6.360517324226676e-06,
|
|
"loss": 0.2783,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.472589217919514,
|
|
"grad_norm": 0.8715410479118867,
|
|
"learning_rate": 6.350315037204714e-06,
|
|
"loss": 0.272,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.4731966590736522,
|
|
"grad_norm": 1.0001665140104699,
|
|
"learning_rate": 6.340106682284645e-06,
|
|
"loss": 0.2838,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.47380410022779046,
|
|
"grad_norm": 0.934121158767379,
|
|
"learning_rate": 6.329892305339659e-06,
|
|
"loss": 0.2557,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.47441154138192865,
|
|
"grad_norm": 1.00237931674153,
|
|
"learning_rate": 6.319671952270004e-06,
|
|
"loss": 0.2729,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.47501898253606684,
|
|
"grad_norm": 1.222988126809238,
|
|
"learning_rate": 6.309445669002787e-06,
|
|
"loss": 0.2493,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.475626423690205,
|
|
"grad_norm": 1.107165014150917,
|
|
"learning_rate": 6.299213501491761e-06,
|
|
"loss": 0.3008,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.4762338648443432,
|
|
"grad_norm": 1.2066643126228012,
|
|
"learning_rate": 6.288975495717124e-06,
|
|
"loss": 0.2867,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.4768413059984814,
|
|
"grad_norm": 0.9028058894006271,
|
|
"learning_rate": 6.2787316976853045e-06,
|
|
"loss": 0.2495,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.4774487471526196,
|
|
"grad_norm": 0.8790390490726189,
|
|
"learning_rate": 6.268482153428763e-06,
|
|
"loss": 0.2348,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.4780561883067578,
|
|
"grad_norm": 0.9822253223369787,
|
|
"learning_rate": 6.258226909005783e-06,
|
|
"loss": 0.2809,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.47866362946089597,
|
|
"grad_norm": 0.9996010163514261,
|
|
"learning_rate": 6.247966010500258e-06,
|
|
"loss": 0.2794,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.47927107061503416,
|
|
"grad_norm": 0.95106120425031,
|
|
"learning_rate": 6.237699504021495e-06,
|
|
"loss": 0.2892,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.47987851176917234,
|
|
"grad_norm": 0.9498966653557624,
|
|
"learning_rate": 6.227427435703997e-06,
|
|
"loss": 0.277,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.48048595292331053,
|
|
"grad_norm": 0.9516961785802474,
|
|
"learning_rate": 6.217149851707261e-06,
|
|
"loss": 0.253,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.4810933940774487,
|
|
"grad_norm": 0.948579051817591,
|
|
"learning_rate": 6.206866798215571e-06,
|
|
"loss": 0.2675,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.48170083523158697,
|
|
"grad_norm": 0.8987027506170783,
|
|
"learning_rate": 6.1965783214377895e-06,
|
|
"loss": 0.2498,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.48230827638572515,
|
|
"grad_norm": 0.8985403214008115,
|
|
"learning_rate": 6.186284467607149e-06,
|
|
"loss": 0.2594,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.48291571753986334,
|
|
"grad_norm": 0.9277532386845908,
|
|
"learning_rate": 6.175985282981042e-06,
|
|
"loss": 0.2644,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.48352315869400153,
|
|
"grad_norm": 1.1995838839316881,
|
|
"learning_rate": 6.165680813840822e-06,
|
|
"loss": 0.2546,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.4841305998481397,
|
|
"grad_norm": 1.801894016231239,
|
|
"learning_rate": 6.155371106491584e-06,
|
|
"loss": 0.3234,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.4847380410022779,
|
|
"grad_norm": 0.906615804627482,
|
|
"learning_rate": 6.1450562072619635e-06,
|
|
"loss": 0.245,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.4853454821564161,
|
|
"grad_norm": 1.0302579157248837,
|
|
"learning_rate": 6.134736162503929e-06,
|
|
"loss": 0.2631,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.4859529233105543,
|
|
"grad_norm": 0.9384658833738736,
|
|
"learning_rate": 6.124411018592568e-06,
|
|
"loss": 0.2632,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.4865603644646925,
|
|
"grad_norm": 0.8947013802717417,
|
|
"learning_rate": 6.114080821925885e-06,
|
|
"loss": 0.272,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.48716780561883066,
|
|
"grad_norm": 1.3316960693705406,
|
|
"learning_rate": 6.103745618924587e-06,
|
|
"loss": 0.2577,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.48777524677296885,
|
|
"grad_norm": 1.0028338947085624,
|
|
"learning_rate": 6.09340545603188e-06,
|
|
"loss": 0.2925,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.48838268792710704,
|
|
"grad_norm": 1.0004427933236308,
|
|
"learning_rate": 6.0830603797132574e-06,
|
|
"loss": 0.2582,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.48899012908124523,
|
|
"grad_norm": 0.9812737917561756,
|
|
"learning_rate": 6.072710436456293e-06,
|
|
"loss": 0.2832,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.4895975702353835,
|
|
"grad_norm": 0.9548200362731749,
|
|
"learning_rate": 6.0623556727704306e-06,
|
|
"loss": 0.2676,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.49020501138952166,
|
|
"grad_norm": 0.9929722264569331,
|
|
"learning_rate": 6.051996135186774e-06,
|
|
"loss": 0.289,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.49081245254365985,
|
|
"grad_norm": 1.2854841849196001,
|
|
"learning_rate": 6.041631870257882e-06,
|
|
"loss": 0.2847,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.49141989369779804,
|
|
"grad_norm": 1.0000863726047369,
|
|
"learning_rate": 6.0312629245575534e-06,
|
|
"loss": 0.3195,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.4920273348519362,
|
|
"grad_norm": 0.9562458448178358,
|
|
"learning_rate": 6.020889344680627e-06,
|
|
"loss": 0.3136,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.4926347760060744,
|
|
"grad_norm": 3.4103757955936653,
|
|
"learning_rate": 6.010511177242757e-06,
|
|
"loss": 0.285,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.4932422171602126,
|
|
"grad_norm": 0.9236674911746163,
|
|
"learning_rate": 6.000128468880223e-06,
|
|
"loss": 0.2651,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.4938496583143508,
|
|
"grad_norm": 1.50715645288713,
|
|
"learning_rate": 5.989741266249701e-06,
|
|
"loss": 0.2961,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.494457099468489,
|
|
"grad_norm": 1.2283119233067459,
|
|
"learning_rate": 5.979349616028067e-06,
|
|
"loss": 0.3049,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.49506454062262717,
|
|
"grad_norm": 1.0356567298104182,
|
|
"learning_rate": 5.9689535649121855e-06,
|
|
"loss": 0.2891,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.49567198177676536,
|
|
"grad_norm": 0.9560994912289477,
|
|
"learning_rate": 5.958553159618693e-06,
|
|
"loss": 0.2619,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.49627942293090355,
|
|
"grad_norm": 0.8976018878445121,
|
|
"learning_rate": 5.948148446883794e-06,
|
|
"loss": 0.2753,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.49688686408504174,
|
|
"grad_norm": 1.057276020274219,
|
|
"learning_rate": 5.937739473463047e-06,
|
|
"loss": 0.3255,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.49749430523918,
|
|
"grad_norm": 0.9249628177128568,
|
|
"learning_rate": 5.927326286131162e-06,
|
|
"loss": 0.2774,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.49810174639331817,
|
|
"grad_norm": 0.9503742934078119,
|
|
"learning_rate": 5.916908931681781e-06,
|
|
"loss": 0.2771,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.49870918754745636,
|
|
"grad_norm": 1.2103474150033977,
|
|
"learning_rate": 5.906487456927273e-06,
|
|
"loss": 0.2949,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.49931662870159454,
|
|
"grad_norm": 0.9494133739592079,
|
|
"learning_rate": 5.896061908698521e-06,
|
|
"loss": 0.2771,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.49992406985573273,
|
|
"grad_norm": 0.9595078564844419,
|
|
"learning_rate": 5.885632333844714e-06,
|
|
"loss": 0.2746,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.5005315110098709,
|
|
"grad_norm": 0.8889046754711648,
|
|
"learning_rate": 5.8751987792331365e-06,
|
|
"loss": 0.2728,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.5011389521640092,
|
|
"grad_norm": 0.9984221504514925,
|
|
"learning_rate": 5.864761291748956e-06,
|
|
"loss": 0.2669,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.5017463933181473,
|
|
"grad_norm": 1.0642385661388791,
|
|
"learning_rate": 5.854319918295012e-06,
|
|
"loss": 0.2711,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.5023538344722855,
|
|
"grad_norm": 1.1473823060409172,
|
|
"learning_rate": 5.843874705791607e-06,
|
|
"loss": 0.2463,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.5029612756264237,
|
|
"grad_norm": 1.0570678499864414,
|
|
"learning_rate": 5.833425701176294e-06,
|
|
"loss": 0.3234,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.5035687167805619,
|
|
"grad_norm": 1.1140589963258467,
|
|
"learning_rate": 5.82297295140367e-06,
|
|
"loss": 0.2757,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.5041761579347,
|
|
"grad_norm": 0.8357461454559573,
|
|
"learning_rate": 5.812516503445158e-06,
|
|
"loss": 0.2555,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.5047835990888383,
|
|
"grad_norm": 0.9851929754213505,
|
|
"learning_rate": 5.8020564042888015e-06,
|
|
"loss": 0.2864,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.5053910402429764,
|
|
"grad_norm": 0.9649129922182244,
|
|
"learning_rate": 5.79159270093905e-06,
|
|
"loss": 0.2871,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.5059984813971147,
|
|
"grad_norm": 0.9056292022966355,
|
|
"learning_rate": 5.781125440416552e-06,
|
|
"loss": 0.2611,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.5066059225512528,
|
|
"grad_norm": 1.2561528927314698,
|
|
"learning_rate": 5.770654669757935e-06,
|
|
"loss": 0.2938,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.507213363705391,
|
|
"grad_norm": 1.429458180902475,
|
|
"learning_rate": 5.760180436015604e-06,
|
|
"loss": 0.2726,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.5078208048595292,
|
|
"grad_norm": 0.9977501941214085,
|
|
"learning_rate": 5.749702786257529e-06,
|
|
"loss": 0.2808,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.5084282460136674,
|
|
"grad_norm": 0.8542206729805362,
|
|
"learning_rate": 5.739221767567025e-06,
|
|
"loss": 0.2515,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.5090356871678057,
|
|
"grad_norm": 1.1569449493080222,
|
|
"learning_rate": 5.7287374270425475e-06,
|
|
"loss": 0.2764,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.5096431283219438,
|
|
"grad_norm": 0.9444050762828973,
|
|
"learning_rate": 5.718249811797482e-06,
|
|
"loss": 0.2895,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.510250569476082,
|
|
"grad_norm": 0.9584586543782557,
|
|
"learning_rate": 5.707758968959923e-06,
|
|
"loss": 0.2746,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.5108580106302202,
|
|
"grad_norm": 0.8938350182024917,
|
|
"learning_rate": 5.69726494567248e-06,
|
|
"loss": 0.2646,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.5114654517843584,
|
|
"grad_norm": 0.9952500187835128,
|
|
"learning_rate": 5.686767789092041e-06,
|
|
"loss": 0.2927,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.5120728929384966,
|
|
"grad_norm": 0.8980910835024705,
|
|
"learning_rate": 5.676267546389587e-06,
|
|
"loss": 0.255,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.5126803340926348,
|
|
"grad_norm": 0.9195976062449588,
|
|
"learning_rate": 5.6657642647499545e-06,
|
|
"loss": 0.2825,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.5132877752467729,
|
|
"grad_norm": 1.0411762343769737,
|
|
"learning_rate": 5.655257991371646e-06,
|
|
"loss": 0.2614,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.5138952164009112,
|
|
"grad_norm": 0.8901603514925267,
|
|
"learning_rate": 5.644748773466606e-06,
|
|
"loss": 0.2739,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.5145026575550493,
|
|
"grad_norm": 0.995174149677262,
|
|
"learning_rate": 5.6342366582600035e-06,
|
|
"loss": 0.3136,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.5151100987091876,
|
|
"grad_norm": 1.7141964167900545,
|
|
"learning_rate": 5.62372169299004e-06,
|
|
"loss": 0.2931,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.5157175398633257,
|
|
"grad_norm": 0.9986737322206383,
|
|
"learning_rate": 5.613203924907711e-06,
|
|
"loss": 0.2635,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.5163249810174639,
|
|
"grad_norm": 0.9184311409988306,
|
|
"learning_rate": 5.6026834012766155e-06,
|
|
"loss": 0.2523,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.5169324221716022,
|
|
"grad_norm": 1.0089379913752443,
|
|
"learning_rate": 5.592160169372734e-06,
|
|
"loss": 0.2884,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.5175398633257403,
|
|
"grad_norm": 0.9856271821475499,
|
|
"learning_rate": 5.581634276484211e-06,
|
|
"loss": 0.2701,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.5181473044798786,
|
|
"grad_norm": 1.4723310483980634,
|
|
"learning_rate": 5.571105769911159e-06,
|
|
"loss": 0.3305,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.5187547456340167,
|
|
"grad_norm": 0.972595176753459,
|
|
"learning_rate": 5.560574696965425e-06,
|
|
"loss": 0.2401,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.5193621867881549,
|
|
"grad_norm": 0.9359549675099497,
|
|
"learning_rate": 5.550041104970398e-06,
|
|
"loss": 0.2647,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.5199696279422931,
|
|
"grad_norm": 1.0700686033849647,
|
|
"learning_rate": 5.539505041260779e-06,
|
|
"loss": 0.2867,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.5205770690964313,
|
|
"grad_norm": 1.8194028377094806,
|
|
"learning_rate": 5.528966553182379e-06,
|
|
"loss": 0.2492,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.5211845102505694,
|
|
"grad_norm": 0.9354512112441699,
|
|
"learning_rate": 5.518425688091906e-06,
|
|
"loss": 0.2945,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.5217919514047077,
|
|
"grad_norm": 0.8575059431791342,
|
|
"learning_rate": 5.507882493356745e-06,
|
|
"loss": 0.2579,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.5223993925588458,
|
|
"grad_norm": 0.8801723522103503,
|
|
"learning_rate": 5.497337016354757e-06,
|
|
"loss": 0.2843,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.5230068337129841,
|
|
"grad_norm": 1.055054705643407,
|
|
"learning_rate": 5.486789304474047e-06,
|
|
"loss": 0.2463,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.5236142748671222,
|
|
"grad_norm": 0.9847077421418023,
|
|
"learning_rate": 5.476239405112775e-06,
|
|
"loss": 0.2961,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.5242217160212604,
|
|
"grad_norm": 0.9528909479486961,
|
|
"learning_rate": 5.465687365678921e-06,
|
|
"loss": 0.2883,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.5248291571753987,
|
|
"grad_norm": 0.9331008172836965,
|
|
"learning_rate": 5.45513323359009e-06,
|
|
"loss": 0.3138,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.5254365983295368,
|
|
"grad_norm": 0.9666089596639151,
|
|
"learning_rate": 5.444577056273284e-06,
|
|
"loss": 0.2755,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.5260440394836751,
|
|
"grad_norm": 0.9347956099649323,
|
|
"learning_rate": 5.434018881164702e-06,
|
|
"loss": 0.2701,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.5266514806378132,
|
|
"grad_norm": 0.8850204653085014,
|
|
"learning_rate": 5.423458755709516e-06,
|
|
"loss": 0.284,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.5272589217919514,
|
|
"grad_norm": 0.846363898916957,
|
|
"learning_rate": 5.412896727361663e-06,
|
|
"loss": 0.2381,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.5278663629460896,
|
|
"grad_norm": 1.0043627806351294,
|
|
"learning_rate": 5.402332843583631e-06,
|
|
"loss": 0.2748,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.5284738041002278,
|
|
"grad_norm": 1.0846231794328964,
|
|
"learning_rate": 5.391767151846247e-06,
|
|
"loss": 0.2717,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.529081245254366,
|
|
"grad_norm": 1.046609605860445,
|
|
"learning_rate": 5.381199699628459e-06,
|
|
"loss": 0.2982,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.5296886864085042,
|
|
"grad_norm": 0.912932730088245,
|
|
"learning_rate": 5.370630534417133e-06,
|
|
"loss": 0.2531,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.5302961275626423,
|
|
"grad_norm": 1.1500626604620934,
|
|
"learning_rate": 5.360059703706823e-06,
|
|
"loss": 0.2995,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.5309035687167806,
|
|
"grad_norm": 1.0117429899245785,
|
|
"learning_rate": 5.349487254999579e-06,
|
|
"loss": 0.2959,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.5315110098709187,
|
|
"grad_norm": 1.2473392199840632,
|
|
"learning_rate": 5.3389132358047115e-06,
|
|
"loss": 0.28,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.532118451025057,
|
|
"grad_norm": 0.9342083750338547,
|
|
"learning_rate": 5.328337693638591e-06,
|
|
"loss": 0.2856,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.5327258921791952,
|
|
"grad_norm": 0.9455670305571291,
|
|
"learning_rate": 5.317760676024436e-06,
|
|
"loss": 0.2757,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.5333333333333333,
|
|
"grad_norm": 1.7595753273077446,
|
|
"learning_rate": 5.307182230492089e-06,
|
|
"loss": 0.2757,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.5339407744874716,
|
|
"grad_norm": 0.8967029314384648,
|
|
"learning_rate": 5.296602404577814e-06,
|
|
"loss": 0.2455,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.5345482156416097,
|
|
"grad_norm": 1.3247032268401917,
|
|
"learning_rate": 5.286021245824075e-06,
|
|
"loss": 0.2947,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.535155656795748,
|
|
"grad_norm": 0.9423673536300602,
|
|
"learning_rate": 5.275438801779328e-06,
|
|
"loss": 0.2687,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.5357630979498861,
|
|
"grad_norm": 0.9635039934677604,
|
|
"learning_rate": 5.264855119997803e-06,
|
|
"loss": 0.283,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.5363705391040243,
|
|
"grad_norm": 0.8537857300482113,
|
|
"learning_rate": 5.254270248039291e-06,
|
|
"loss": 0.2448,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.5369779802581625,
|
|
"grad_norm": 0.983767837657037,
|
|
"learning_rate": 5.243684233468933e-06,
|
|
"loss": 0.285,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.5375854214123007,
|
|
"grad_norm": 0.8691004474077461,
|
|
"learning_rate": 5.233097123857004e-06,
|
|
"loss": 0.2556,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.5381928625664388,
|
|
"grad_norm": 0.9137454862081088,
|
|
"learning_rate": 5.222508966778702e-06,
|
|
"loss": 0.2484,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.5388003037205771,
|
|
"grad_norm": 0.9557993995814925,
|
|
"learning_rate": 5.211919809813927e-06,
|
|
"loss": 0.2568,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.5394077448747152,
|
|
"grad_norm": 0.9810092195544174,
|
|
"learning_rate": 5.201329700547077e-06,
|
|
"loss": 0.296,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.5400151860288535,
|
|
"grad_norm": 0.9230699355344197,
|
|
"learning_rate": 5.190738686566826e-06,
|
|
"loss": 0.2641,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.5406226271829917,
|
|
"grad_norm": 1.6906836035620436,
|
|
"learning_rate": 5.180146815465915e-06,
|
|
"loss": 0.28,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.5412300683371298,
|
|
"grad_norm": 0.9241916165908446,
|
|
"learning_rate": 5.169554134840937e-06,
|
|
"loss": 0.2646,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.5418375094912681,
|
|
"grad_norm": 0.9358728519319159,
|
|
"learning_rate": 5.158960692292122e-06,
|
|
"loss": 0.267,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.5424449506454062,
|
|
"grad_norm": 1.1048133481375857,
|
|
"learning_rate": 5.148366535423126e-06,
|
|
"loss": 0.2777,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.5430523917995445,
|
|
"grad_norm": 0.9378744728399504,
|
|
"learning_rate": 5.137771711840811e-06,
|
|
"loss": 0.2678,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.5436598329536826,
|
|
"grad_norm": 12.537370676924667,
|
|
"learning_rate": 5.1271762691550375e-06,
|
|
"loss": 0.2639,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.5442672741078208,
|
|
"grad_norm": 0.9266142790273743,
|
|
"learning_rate": 5.116580254978447e-06,
|
|
"loss": 0.2659,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.544874715261959,
|
|
"grad_norm": 0.8872145017815025,
|
|
"learning_rate": 5.1059837169262506e-06,
|
|
"loss": 0.2657,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.5454821564160972,
|
|
"grad_norm": 0.9596765024185803,
|
|
"learning_rate": 5.095386702616012e-06,
|
|
"loss": 0.2737,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.5460895975702353,
|
|
"grad_norm": 0.8505452588238741,
|
|
"learning_rate": 5.084789259667437e-06,
|
|
"loss": 0.2229,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.5466970387243736,
|
|
"grad_norm": 0.9742318710472475,
|
|
"learning_rate": 5.074191435702155e-06,
|
|
"loss": 0.2621,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.5473044798785117,
|
|
"grad_norm": 0.9174362261127138,
|
|
"learning_rate": 5.06359327834351e-06,
|
|
"loss": 0.2735,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.54791192103265,
|
|
"grad_norm": 1.0480082974980471,
|
|
"learning_rate": 5.05299483521634e-06,
|
|
"loss": 0.2804,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.5485193621867882,
|
|
"grad_norm": 0.9044085564794266,
|
|
"learning_rate": 5.0423961539467754e-06,
|
|
"loss": 0.251,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.5491268033409263,
|
|
"grad_norm": 1.0846067776083739,
|
|
"learning_rate": 5.031797282162007e-06,
|
|
"loss": 0.275,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.5497342444950646,
|
|
"grad_norm": 1.054126628964702,
|
|
"learning_rate": 5.021198267490088e-06,
|
|
"loss": 0.3109,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.5503416856492027,
|
|
"grad_norm": 0.9731663440580473,
|
|
"learning_rate": 5.010599157559713e-06,
|
|
"loss": 0.2744,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.550949126803341,
|
|
"grad_norm": 0.935385971022661,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.2833,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.5515565679574791,
|
|
"grad_norm": 0.95732647622787,
|
|
"learning_rate": 4.98940084244029e-06,
|
|
"loss": 0.2738,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.5521640091116173,
|
|
"grad_norm": 0.9778546392598653,
|
|
"learning_rate": 4.9788017325099134e-06,
|
|
"loss": 0.2902,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.5527714502657555,
|
|
"grad_norm": 1.161637726781862,
|
|
"learning_rate": 4.968202717837996e-06,
|
|
"loss": 0.2448,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.5533788914198937,
|
|
"grad_norm": 1.0325179748583595,
|
|
"learning_rate": 4.957603846053225e-06,
|
|
"loss": 0.2777,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.5539863325740318,
|
|
"grad_norm": 0.9385803768928881,
|
|
"learning_rate": 4.947005164783661e-06,
|
|
"loss": 0.252,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.5545937737281701,
|
|
"grad_norm": 0.8813294108193872,
|
|
"learning_rate": 4.936406721656492e-06,
|
|
"loss": 0.262,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.5552012148823082,
|
|
"grad_norm": 1.0450236387704728,
|
|
"learning_rate": 4.925808564297847e-06,
|
|
"loss": 0.2809,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.5558086560364465,
|
|
"grad_norm": 0.9743579657662962,
|
|
"learning_rate": 4.915210740332564e-06,
|
|
"loss": 0.2727,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.5564160971905847,
|
|
"grad_norm": 1.1211404996380052,
|
|
"learning_rate": 4.9046132973839895e-06,
|
|
"loss": 0.3337,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.5570235383447228,
|
|
"grad_norm": 0.9501582701592088,
|
|
"learning_rate": 4.894016283073753e-06,
|
|
"loss": 0.2598,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.5576309794988611,
|
|
"grad_norm": 1.023090140186835,
|
|
"learning_rate": 4.883419745021554e-06,
|
|
"loss": 0.2483,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.5582384206529992,
|
|
"grad_norm": 0.9242815154848782,
|
|
"learning_rate": 4.872823730844966e-06,
|
|
"loss": 0.255,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.5588458618071375,
|
|
"grad_norm": 1.0437928013824458,
|
|
"learning_rate": 4.862228288159191e-06,
|
|
"loss": 0.2804,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.5594533029612756,
|
|
"grad_norm": 0.928662371565127,
|
|
"learning_rate": 4.851633464576876e-06,
|
|
"loss": 0.2487,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.5600607441154138,
|
|
"grad_norm": 0.8860877007011291,
|
|
"learning_rate": 4.841039307707878e-06,
|
|
"loss": 0.2567,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.560668185269552,
|
|
"grad_norm": 0.9520319656600132,
|
|
"learning_rate": 4.8304458651590645e-06,
|
|
"loss": 0.2736,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.5612756264236902,
|
|
"grad_norm": 0.9185300392147626,
|
|
"learning_rate": 4.819853184534085e-06,
|
|
"loss": 0.2638,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.5618830675778284,
|
|
"grad_norm": 0.8949825038070447,
|
|
"learning_rate": 4.809261313433176e-06,
|
|
"loss": 0.2582,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.5624905087319666,
|
|
"grad_norm": 1.0105141831102669,
|
|
"learning_rate": 4.798670299452926e-06,
|
|
"loss": 0.2606,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.5630979498861047,
|
|
"grad_norm": 0.8759995193174664,
|
|
"learning_rate": 4.788080190186075e-06,
|
|
"loss": 0.2725,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.563705391040243,
|
|
"grad_norm": 0.9842928399442494,
|
|
"learning_rate": 4.7774910332213005e-06,
|
|
"loss": 0.2889,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.5643128321943812,
|
|
"grad_norm": 0.9982390837595454,
|
|
"learning_rate": 4.766902876142996e-06,
|
|
"loss": 0.2536,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.5649202733485194,
|
|
"grad_norm": 0.8982965289569348,
|
|
"learning_rate": 4.756315766531069e-06,
|
|
"loss": 0.2748,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.5655277145026576,
|
|
"grad_norm": 0.9053592532295419,
|
|
"learning_rate": 4.74572975196071e-06,
|
|
"loss": 0.2453,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.5661351556567957,
|
|
"grad_norm": 0.9728048806580342,
|
|
"learning_rate": 4.735144880002199e-06,
|
|
"loss": 0.2834,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.566742596810934,
|
|
"grad_norm": 1.0237268565258881,
|
|
"learning_rate": 4.724561198220672e-06,
|
|
"loss": 0.2525,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.5673500379650721,
|
|
"grad_norm": 0.8905565256751365,
|
|
"learning_rate": 4.713978754175926e-06,
|
|
"loss": 0.2698,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.5679574791192104,
|
|
"grad_norm": 0.9139326984298836,
|
|
"learning_rate": 4.703397595422188e-06,
|
|
"loss": 0.2674,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.5685649202733485,
|
|
"grad_norm": 0.8833013039618229,
|
|
"learning_rate": 4.692817769507912e-06,
|
|
"loss": 0.2684,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.5691723614274867,
|
|
"grad_norm": 0.9238763026108181,
|
|
"learning_rate": 4.682239323975566e-06,
|
|
"loss": 0.2558,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.5697798025816249,
|
|
"grad_norm": 1.5115130854908239,
|
|
"learning_rate": 4.671662306361409e-06,
|
|
"loss": 0.2935,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.5703872437357631,
|
|
"grad_norm": 0.9482923489278967,
|
|
"learning_rate": 4.66108676419529e-06,
|
|
"loss": 0.294,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.5709946848899012,
|
|
"grad_norm": 0.9477355385020175,
|
|
"learning_rate": 4.6505127450004216e-06,
|
|
"loss": 0.2632,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.5716021260440395,
|
|
"grad_norm": 0.9464692568004452,
|
|
"learning_rate": 4.6399402962931775e-06,
|
|
"loss": 0.2688,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.5722095671981777,
|
|
"grad_norm": 1.5567830309937185,
|
|
"learning_rate": 4.62936946558287e-06,
|
|
"loss": 0.2712,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.5728170083523159,
|
|
"grad_norm": 0.9424657036172921,
|
|
"learning_rate": 4.618800300371543e-06,
|
|
"loss": 0.2545,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.5734244495064541,
|
|
"grad_norm": 0.8823928171248179,
|
|
"learning_rate": 4.608232848153757e-06,
|
|
"loss": 0.2412,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.5740318906605922,
|
|
"grad_norm": 0.9999498692731567,
|
|
"learning_rate": 4.597667156416371e-06,
|
|
"loss": 0.2893,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.5746393318147305,
|
|
"grad_norm": 1.3850490330933427,
|
|
"learning_rate": 4.587103272638339e-06,
|
|
"loss": 0.272,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.5752467729688686,
|
|
"grad_norm": 0.9012990165306813,
|
|
"learning_rate": 4.576541244290484e-06,
|
|
"loss": 0.2735,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.5758542141230069,
|
|
"grad_norm": 0.9444183793088788,
|
|
"learning_rate": 4.565981118835299e-06,
|
|
"loss": 0.2747,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.576461655277145,
|
|
"grad_norm": 1.0837085618557278,
|
|
"learning_rate": 4.555422943726715e-06,
|
|
"loss": 0.318,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.5770690964312832,
|
|
"grad_norm": 0.9744715635599894,
|
|
"learning_rate": 4.5448667664099125e-06,
|
|
"loss": 0.2991,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.5776765375854214,
|
|
"grad_norm": 0.8967007719257041,
|
|
"learning_rate": 4.534312634321081e-06,
|
|
"loss": 0.2748,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.5782839787395596,
|
|
"grad_norm": 0.9164171208837589,
|
|
"learning_rate": 4.523760594887228e-06,
|
|
"loss": 0.246,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.5788914198936977,
|
|
"grad_norm": 0.8748749367095119,
|
|
"learning_rate": 4.513210695525954e-06,
|
|
"loss": 0.2521,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.579498861047836,
|
|
"grad_norm": 0.9504021763832875,
|
|
"learning_rate": 4.5026629836452445e-06,
|
|
"loss": 0.2965,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.5801063022019742,
|
|
"grad_norm": 1.0694432667746632,
|
|
"learning_rate": 4.492117506643256e-06,
|
|
"loss": 0.2487,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.5807137433561124,
|
|
"grad_norm": 0.9126259590301874,
|
|
"learning_rate": 4.481574311908096e-06,
|
|
"loss": 0.2714,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.5813211845102506,
|
|
"grad_norm": 0.8752331832756258,
|
|
"learning_rate": 4.471033446817623e-06,
|
|
"loss": 0.2645,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.5819286256643887,
|
|
"grad_norm": 0.9292223873298213,
|
|
"learning_rate": 4.460494958739223e-06,
|
|
"loss": 0.2827,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.582536066818527,
|
|
"grad_norm": 0.9249697636061491,
|
|
"learning_rate": 4.449958895029604e-06,
|
|
"loss": 0.2889,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.5831435079726651,
|
|
"grad_norm": 0.896078007157414,
|
|
"learning_rate": 4.439425303034576e-06,
|
|
"loss": 0.2438,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.5837509491268034,
|
|
"grad_norm": 0.994404825370418,
|
|
"learning_rate": 4.428894230088842e-06,
|
|
"loss": 0.2567,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.5843583902809415,
|
|
"grad_norm": 1.003722332588181,
|
|
"learning_rate": 4.418365723515791e-06,
|
|
"loss": 0.2777,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.5849658314350797,
|
|
"grad_norm": 0.9051722268130655,
|
|
"learning_rate": 4.407839830627269e-06,
|
|
"loss": 0.2723,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.5855732725892179,
|
|
"grad_norm": 0.9993245696949893,
|
|
"learning_rate": 4.397316598723385e-06,
|
|
"loss": 0.261,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.5861807137433561,
|
|
"grad_norm": 1.129318363010175,
|
|
"learning_rate": 4.38679607509229e-06,
|
|
"loss": 0.2785,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.5867881548974943,
|
|
"grad_norm": 0.9652429592155066,
|
|
"learning_rate": 4.376278307009962e-06,
|
|
"loss": 0.3026,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.5873955960516325,
|
|
"grad_norm": 1.08394459651315,
|
|
"learning_rate": 4.365763341739996e-06,
|
|
"loss": 0.2583,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.5880030372057707,
|
|
"grad_norm": 0.9919795857924707,
|
|
"learning_rate": 4.355251226533396e-06,
|
|
"loss": 0.2825,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.5886104783599089,
|
|
"grad_norm": 1.3252968659585784,
|
|
"learning_rate": 4.344742008628356e-06,
|
|
"loss": 0.2525,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.5892179195140471,
|
|
"grad_norm": 1.2622690111591564,
|
|
"learning_rate": 4.334235735250047e-06,
|
|
"loss": 0.2289,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.5898253606681853,
|
|
"grad_norm": 1.051902837554305,
|
|
"learning_rate": 4.3237324536104165e-06,
|
|
"loss": 0.2478,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.5904328018223235,
|
|
"grad_norm": 1.0527055472641202,
|
|
"learning_rate": 4.313232210907959e-06,
|
|
"loss": 0.2898,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.5910402429764616,
|
|
"grad_norm": 1.0101095079376434,
|
|
"learning_rate": 4.302735054327523e-06,
|
|
"loss": 0.2682,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.5916476841305999,
|
|
"grad_norm": 0.8621015161725616,
|
|
"learning_rate": 4.292241031040077e-06,
|
|
"loss": 0.2485,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.592255125284738,
|
|
"grad_norm": 0.8726005630346255,
|
|
"learning_rate": 4.28175018820252e-06,
|
|
"loss": 0.249,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.5928625664388762,
|
|
"grad_norm": 0.9751348393117523,
|
|
"learning_rate": 4.271262572957453e-06,
|
|
"loss": 0.2877,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.5934700075930144,
|
|
"grad_norm": 3.66261461720758,
|
|
"learning_rate": 4.2607782324329776e-06,
|
|
"loss": 0.3063,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.5940774487471526,
|
|
"grad_norm": 0.8943406986539186,
|
|
"learning_rate": 4.250297213742473e-06,
|
|
"loss": 0.2405,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.5946848899012908,
|
|
"grad_norm": 1.1389623298222618,
|
|
"learning_rate": 4.239819563984397e-06,
|
|
"loss": 0.304,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.595292331055429,
|
|
"grad_norm": 0.9274441396848276,
|
|
"learning_rate": 4.229345330242067e-06,
|
|
"loss": 0.2592,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.5958997722095672,
|
|
"grad_norm": 0.920248304426081,
|
|
"learning_rate": 4.21887455958345e-06,
|
|
"loss": 0.2656,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.5965072133637054,
|
|
"grad_norm": 1.0045840102748669,
|
|
"learning_rate": 4.2084072990609505e-06,
|
|
"loss": 0.2793,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.5971146545178436,
|
|
"grad_norm": 1.0001754794903,
|
|
"learning_rate": 4.1979435957111984e-06,
|
|
"loss": 0.2836,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.5977220956719818,
|
|
"grad_norm": 0.9044442095717217,
|
|
"learning_rate": 4.187483496554844e-06,
|
|
"loss": 0.2583,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.59832953682612,
|
|
"grad_norm": 0.9671571248544576,
|
|
"learning_rate": 4.17702704859633e-06,
|
|
"loss": 0.2675,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.5989369779802581,
|
|
"grad_norm": 1.0757021127620503,
|
|
"learning_rate": 4.166574298823707e-06,
|
|
"loss": 0.2676,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.5995444191343964,
|
|
"grad_norm": 0.9735187115759455,
|
|
"learning_rate": 4.156125294208396e-06,
|
|
"loss": 0.242,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.6001518602885345,
|
|
"grad_norm": 0.9405044204067678,
|
|
"learning_rate": 4.145680081704989e-06,
|
|
"loss": 0.2516,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.6007593014426728,
|
|
"grad_norm": 0.9651846089243521,
|
|
"learning_rate": 4.135238708251045e-06,
|
|
"loss": 0.2758,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.6013667425968109,
|
|
"grad_norm": 0.9646286486818011,
|
|
"learning_rate": 4.1248012207668635e-06,
|
|
"loss": 0.2565,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.6019741837509491,
|
|
"grad_norm": 1.003985298512368,
|
|
"learning_rate": 4.1143676661552876e-06,
|
|
"loss": 0.2828,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.6025816249050873,
|
|
"grad_norm": 1.3272162976579898,
|
|
"learning_rate": 4.103938091301479e-06,
|
|
"loss": 0.2374,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.6031890660592255,
|
|
"grad_norm": 1.029165825211411,
|
|
"learning_rate": 4.093512543072729e-06,
|
|
"loss": 0.256,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.6037965072133638,
|
|
"grad_norm": 1.015855525425474,
|
|
"learning_rate": 4.08309106831822e-06,
|
|
"loss": 0.2732,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.6044039483675019,
|
|
"grad_norm": 1.0983207807319615,
|
|
"learning_rate": 4.07267371386884e-06,
|
|
"loss": 0.2808,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.6050113895216401,
|
|
"grad_norm": 1.0434667435702822,
|
|
"learning_rate": 4.062260526536955e-06,
|
|
"loss": 0.2936,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.6056188306757783,
|
|
"grad_norm": 2.7705088228683183,
|
|
"learning_rate": 4.051851553116208e-06,
|
|
"loss": 0.2797,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.6062262718299165,
|
|
"grad_norm": 1.0175524502413011,
|
|
"learning_rate": 4.041446840381309e-06,
|
|
"loss": 0.2847,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.6068337129840546,
|
|
"grad_norm": 1.0263313633646869,
|
|
"learning_rate": 4.0310464350878145e-06,
|
|
"loss": 0.2803,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.6074411541381929,
|
|
"grad_norm": 0.9619358823368712,
|
|
"learning_rate": 4.0206503839719335e-06,
|
|
"loss": 0.2762,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.608048595292331,
|
|
"grad_norm": 1.1203236915172694,
|
|
"learning_rate": 4.0102587337503e-06,
|
|
"loss": 0.2813,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.6086560364464693,
|
|
"grad_norm": 0.9232892656793803,
|
|
"learning_rate": 3.999871531119779e-06,
|
|
"loss": 0.2791,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.6092634776006074,
|
|
"grad_norm": 1.2408650869198945,
|
|
"learning_rate": 3.989488822757244e-06,
|
|
"loss": 0.2529,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.6098709187547456,
|
|
"grad_norm": 1.3861837200018423,
|
|
"learning_rate": 3.9791106553193746e-06,
|
|
"loss": 0.2681,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.6104783599088838,
|
|
"grad_norm": 1.94554998345937,
|
|
"learning_rate": 3.968737075442449e-06,
|
|
"loss": 0.2774,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.611085801063022,
|
|
"grad_norm": 1.049688439654031,
|
|
"learning_rate": 3.9583681297421194e-06,
|
|
"loss": 0.2738,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.6116932422171603,
|
|
"grad_norm": 1.1130517893828742,
|
|
"learning_rate": 3.9480038648132285e-06,
|
|
"loss": 0.258,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.6123006833712984,
|
|
"grad_norm": 0.9091158603102817,
|
|
"learning_rate": 3.937644327229572e-06,
|
|
"loss": 0.256,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.6129081245254366,
|
|
"grad_norm": 0.8593163726216395,
|
|
"learning_rate": 3.927289563543709e-06,
|
|
"loss": 0.2139,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.6135155656795748,
|
|
"grad_norm": 2.2304132729314436,
|
|
"learning_rate": 3.916939620286743e-06,
|
|
"loss": 0.3042,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.614123006833713,
|
|
"grad_norm": 0.8846471003483667,
|
|
"learning_rate": 3.906594543968122e-06,
|
|
"loss": 0.2461,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.6147304479878511,
|
|
"grad_norm": 0.9578022421490531,
|
|
"learning_rate": 3.896254381075416e-06,
|
|
"loss": 0.2135,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.6153378891419894,
|
|
"grad_norm": 0.9488296987772777,
|
|
"learning_rate": 3.885919178074116e-06,
|
|
"loss": 0.2656,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.6159453302961275,
|
|
"grad_norm": 0.9095777905606226,
|
|
"learning_rate": 3.875588981407433e-06,
|
|
"loss": 0.2696,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.6165527714502658,
|
|
"grad_norm": 1.172457556031403,
|
|
"learning_rate": 3.865263837496072e-06,
|
|
"loss": 0.2807,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.6171602126044039,
|
|
"grad_norm": 0.880155690724818,
|
|
"learning_rate": 3.854943792738037e-06,
|
|
"loss": 0.2724,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.6177676537585421,
|
|
"grad_norm": 0.9726329271199242,
|
|
"learning_rate": 3.844628893508417e-06,
|
|
"loss": 0.2849,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.6183750949126803,
|
|
"grad_norm": 0.9428793397994366,
|
|
"learning_rate": 3.834319186159179e-06,
|
|
"loss": 0.2807,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.6189825360668185,
|
|
"grad_norm": 1.1315897196957132,
|
|
"learning_rate": 3.8240147170189575e-06,
|
|
"loss": 0.2674,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.6195899772209568,
|
|
"grad_norm": 0.9181683595405062,
|
|
"learning_rate": 3.8137155323928526e-06,
|
|
"loss": 0.2801,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.6201974183750949,
|
|
"grad_norm": 1.2695788430486319,
|
|
"learning_rate": 3.803421678562213e-06,
|
|
"loss": 0.2464,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.6208048595292331,
|
|
"grad_norm": 0.9830056869409215,
|
|
"learning_rate": 3.7931332017844302e-06,
|
|
"loss": 0.2219,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.6214123006833713,
|
|
"grad_norm": 0.9647382303431887,
|
|
"learning_rate": 3.7828501482927416e-06,
|
|
"loss": 0.2841,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.6220197418375095,
|
|
"grad_norm": 0.9717568307189096,
|
|
"learning_rate": 3.7725725642960047e-06,
|
|
"loss": 0.2977,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.6226271829916477,
|
|
"grad_norm": 0.9411703602062548,
|
|
"learning_rate": 3.7623004959785066e-06,
|
|
"loss": 0.2373,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.6232346241457859,
|
|
"grad_norm": 1.0071266637924543,
|
|
"learning_rate": 3.752033989499742e-06,
|
|
"loss": 0.2786,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.623842065299924,
|
|
"grad_norm": 0.9059441456779171,
|
|
"learning_rate": 3.7417730909942184e-06,
|
|
"loss": 0.231,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.6244495064540623,
|
|
"grad_norm": 1.012261264702401,
|
|
"learning_rate": 3.7315178465712364e-06,
|
|
"loss": 0.2623,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.6250569476082004,
|
|
"grad_norm": 0.9552757738267845,
|
|
"learning_rate": 3.721268302314698e-06,
|
|
"loss": 0.262,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.6256643887623387,
|
|
"grad_norm": 1.027708835953696,
|
|
"learning_rate": 3.7110245042828786e-06,
|
|
"loss": 0.2576,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.6262718299164769,
|
|
"grad_norm": 1.4132850004379782,
|
|
"learning_rate": 3.70078649850824e-06,
|
|
"loss": 0.2911,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.626879271070615,
|
|
"grad_norm": 1.299324195215338,
|
|
"learning_rate": 3.690554330997215e-06,
|
|
"loss": 0.2699,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.6274867122247533,
|
|
"grad_norm": 0.9195896367280948,
|
|
"learning_rate": 3.6803280477299975e-06,
|
|
"loss": 0.2449,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.6280941533788914,
|
|
"grad_norm": 1.2092094482741347,
|
|
"learning_rate": 3.670107694660343e-06,
|
|
"loss": 0.3003,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.6287015945330297,
|
|
"grad_norm": 0.9323263275420439,
|
|
"learning_rate": 3.659893317715355e-06,
|
|
"loss": 0.2712,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.6293090356871678,
|
|
"grad_norm": 0.9032020829812812,
|
|
"learning_rate": 3.6496849627952875e-06,
|
|
"loss": 0.2838,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.629916476841306,
|
|
"grad_norm": 0.9083525940287548,
|
|
"learning_rate": 3.639482675773324e-06,
|
|
"loss": 0.2729,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.6305239179954442,
|
|
"grad_norm": 0.9352716602967113,
|
|
"learning_rate": 3.6292865024953945e-06,
|
|
"loss": 0.2541,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.6311313591495824,
|
|
"grad_norm": 1.1060377092126286,
|
|
"learning_rate": 3.6190964887799418e-06,
|
|
"loss": 0.3177,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.6317388003037205,
|
|
"grad_norm": 0.9438466117610348,
|
|
"learning_rate": 3.6089126804177373e-06,
|
|
"loss": 0.2253,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.6323462414578588,
|
|
"grad_norm": 0.9463389610475652,
|
|
"learning_rate": 3.5987351231716665e-06,
|
|
"loss": 0.2484,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.6329536826119969,
|
|
"grad_norm": 1.0525074350281083,
|
|
"learning_rate": 3.5885638627765228e-06,
|
|
"loss": 0.2747,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.6335611237661352,
|
|
"grad_norm": 0.9145186747529931,
|
|
"learning_rate": 3.5783989449388063e-06,
|
|
"loss": 0.2631,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.6341685649202734,
|
|
"grad_norm": 0.8785607963190103,
|
|
"learning_rate": 3.568240415336509e-06,
|
|
"loss": 0.2438,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.6347760060744115,
|
|
"grad_norm": 0.9826184323548174,
|
|
"learning_rate": 3.5580883196189265e-06,
|
|
"loss": 0.2784,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.6353834472285498,
|
|
"grad_norm": 1.2684340321470944,
|
|
"learning_rate": 3.547942703406433e-06,
|
|
"loss": 0.2494,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.6359908883826879,
|
|
"grad_norm": 1.080171576140306,
|
|
"learning_rate": 3.5378036122902907e-06,
|
|
"loss": 0.2277,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.6365983295368262,
|
|
"grad_norm": 1.0670243919181694,
|
|
"learning_rate": 3.52767109183244e-06,
|
|
"loss": 0.2479,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.6372057706909643,
|
|
"grad_norm": 1.16999981733913,
|
|
"learning_rate": 3.5175451875652906e-06,
|
|
"loss": 0.3218,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.6378132118451025,
|
|
"grad_norm": 0.9540949950156941,
|
|
"learning_rate": 3.507425944991529e-06,
|
|
"loss": 0.2782,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.6384206529992407,
|
|
"grad_norm": 1.091171127154527,
|
|
"learning_rate": 3.4973134095838943e-06,
|
|
"loss": 0.2587,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.6390280941533789,
|
|
"grad_norm": 0.9498496318574927,
|
|
"learning_rate": 3.4872076267850015e-06,
|
|
"loss": 0.2541,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.639635535307517,
|
|
"grad_norm": 1.0314901642287697,
|
|
"learning_rate": 3.4771086420071053e-06,
|
|
"loss": 0.2664,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.6402429764616553,
|
|
"grad_norm": 0.9996172120430332,
|
|
"learning_rate": 3.4670165006319236e-06,
|
|
"loss": 0.2799,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.6408504176157934,
|
|
"grad_norm": 0.9350416152513497,
|
|
"learning_rate": 3.4569312480104157e-06,
|
|
"loss": 0.2829,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.6414578587699317,
|
|
"grad_norm": 1.1632073226641764,
|
|
"learning_rate": 3.4468529294625895e-06,
|
|
"loss": 0.2574,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.6420652999240699,
|
|
"grad_norm": 0.9447749084911037,
|
|
"learning_rate": 3.4367815902772917e-06,
|
|
"loss": 0.2562,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.642672741078208,
|
|
"grad_norm": 1.2070485411268692,
|
|
"learning_rate": 3.4267172757120005e-06,
|
|
"loss": 0.2635,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.6432801822323463,
|
|
"grad_norm": 0.9408532287008231,
|
|
"learning_rate": 3.416660030992639e-06,
|
|
"loss": 0.2631,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.6438876233864844,
|
|
"grad_norm": 1.279419634233724,
|
|
"learning_rate": 3.406609901313349e-06,
|
|
"loss": 0.2716,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.6444950645406227,
|
|
"grad_norm": 1.0437711278438688,
|
|
"learning_rate": 3.396566931836308e-06,
|
|
"loss": 0.2633,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.6451025056947608,
|
|
"grad_norm": 0.9477234648384033,
|
|
"learning_rate": 3.386531167691512e-06,
|
|
"loss": 0.2551,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.645709946848899,
|
|
"grad_norm": 0.8821252495312435,
|
|
"learning_rate": 3.3765026539765832e-06,
|
|
"loss": 0.2484,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.6463173880030372,
|
|
"grad_norm": 1.0630208248660034,
|
|
"learning_rate": 3.36648143575656e-06,
|
|
"loss": 0.2724,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.6469248291571754,
|
|
"grad_norm": 0.8969008349261947,
|
|
"learning_rate": 3.3564675580636946e-06,
|
|
"loss": 0.2544,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.6475322703113136,
|
|
"grad_norm": 0.8758051487969973,
|
|
"learning_rate": 3.3464610658972584e-06,
|
|
"loss": 0.2518,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.6481397114654518,
|
|
"grad_norm": 0.9068783910815723,
|
|
"learning_rate": 3.3364620042233316e-06,
|
|
"loss": 0.2362,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.6487471526195899,
|
|
"grad_norm": 0.9711243894474835,
|
|
"learning_rate": 3.326470417974604e-06,
|
|
"loss": 0.2417,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.6493545937737282,
|
|
"grad_norm": 0.9040626259172609,
|
|
"learning_rate": 3.3164863520501744e-06,
|
|
"loss": 0.2289,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.6499620349278664,
|
|
"grad_norm": 4.042527322341976,
|
|
"learning_rate": 3.3065098513153473e-06,
|
|
"loss": 0.2839,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.6505694760820045,
|
|
"grad_norm": 0.8905148331919976,
|
|
"learning_rate": 3.29654096060143e-06,
|
|
"loss": 0.2758,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.6511769172361428,
|
|
"grad_norm": 1.0792077197141197,
|
|
"learning_rate": 3.2865797247055354e-06,
|
|
"loss": 0.2662,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.6517843583902809,
|
|
"grad_norm": 0.9497844499158115,
|
|
"learning_rate": 3.2766261883903744e-06,
|
|
"loss": 0.2549,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.6523917995444192,
|
|
"grad_norm": 1.0939981168516681,
|
|
"learning_rate": 3.266680396384061e-06,
|
|
"loss": 0.293,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.6529992406985573,
|
|
"grad_norm": 2.703463930587898,
|
|
"learning_rate": 3.256742393379909e-06,
|
|
"loss": 0.225,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.6536066818526955,
|
|
"grad_norm": 0.8280179235274505,
|
|
"learning_rate": 3.2468122240362287e-06,
|
|
"loss": 0.224,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.6542141230068337,
|
|
"grad_norm": 1.439064209594826,
|
|
"learning_rate": 3.2368899329761316e-06,
|
|
"loss": 0.2607,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.6548215641609719,
|
|
"grad_norm": 0.8868653685170421,
|
|
"learning_rate": 3.226975564787322e-06,
|
|
"loss": 0.2276,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.6554290053151101,
|
|
"grad_norm": 1.0980898669737127,
|
|
"learning_rate": 3.2170691640219077e-06,
|
|
"loss": 0.2648,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.6560364464692483,
|
|
"grad_norm": 0.9994606567591712,
|
|
"learning_rate": 3.2071707751961838e-06,
|
|
"loss": 0.2785,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.6566438876233864,
|
|
"grad_norm": 1.0995716769114432,
|
|
"learning_rate": 3.197280442790455e-06,
|
|
"loss": 0.2503,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.6572513287775247,
|
|
"grad_norm": 0.9205922048045703,
|
|
"learning_rate": 3.187398211248811e-06,
|
|
"loss": 0.2367,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.6578587699316629,
|
|
"grad_norm": 0.9635183689443267,
|
|
"learning_rate": 3.1775241249789434e-06,
|
|
"loss": 0.254,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.6584662110858011,
|
|
"grad_norm": 0.924480671064993,
|
|
"learning_rate": 3.1676582283519454e-06,
|
|
"loss": 0.2279,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.6590736522399393,
|
|
"grad_norm": 0.906465381610937,
|
|
"learning_rate": 3.1578005657021004e-06,
|
|
"loss": 0.2285,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.6596810933940774,
|
|
"grad_norm": 0.92471932747414,
|
|
"learning_rate": 3.1479511813267006e-06,
|
|
"loss": 0.2655,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.6602885345482157,
|
|
"grad_norm": 1.0142140195825766,
|
|
"learning_rate": 3.1381101194858264e-06,
|
|
"loss": 0.2407,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.6608959757023538,
|
|
"grad_norm": 1.0686680921013536,
|
|
"learning_rate": 3.1282774244021717e-06,
|
|
"loss": 0.2604,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.661503416856492,
|
|
"grad_norm": 1.0244620367393154,
|
|
"learning_rate": 3.118453140260823e-06,
|
|
"loss": 0.284,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.6621108580106302,
|
|
"grad_norm": 1.0170876360692431,
|
|
"learning_rate": 3.1086373112090762e-06,
|
|
"loss": 0.2523,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.6627182991647684,
|
|
"grad_norm": 0.9802160451886665,
|
|
"learning_rate": 3.0988299813562304e-06,
|
|
"loss": 0.2783,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.6633257403189066,
|
|
"grad_norm": 0.9103056036600278,
|
|
"learning_rate": 3.089031194773392e-06,
|
|
"loss": 0.2502,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.6639331814730448,
|
|
"grad_norm": 1.359229570037281,
|
|
"learning_rate": 3.079240995493279e-06,
|
|
"loss": 0.2479,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.6645406226271829,
|
|
"grad_norm": 1.0565783940509939,
|
|
"learning_rate": 3.069459427510014e-06,
|
|
"loss": 0.2442,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.6651480637813212,
|
|
"grad_norm": 0.8822214330244847,
|
|
"learning_rate": 3.0596865347789444e-06,
|
|
"loss": 0.2722,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.6657555049354594,
|
|
"grad_norm": 0.9150480419339629,
|
|
"learning_rate": 3.049922361216422e-06,
|
|
"loss": 0.2425,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.6663629460895976,
|
|
"grad_norm": 0.9517674541707122,
|
|
"learning_rate": 3.040166950699626e-06,
|
|
"loss": 0.2496,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.6669703872437358,
|
|
"grad_norm": 0.9599880059742387,
|
|
"learning_rate": 3.0304203470663507e-06,
|
|
"loss": 0.2619,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.6675778283978739,
|
|
"grad_norm": 0.9460242432498148,
|
|
"learning_rate": 3.0206825941148203e-06,
|
|
"loss": 0.3065,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.6681852695520122,
|
|
"grad_norm": 1.4106793360221765,
|
|
"learning_rate": 3.0109537356034856e-06,
|
|
"loss": 0.2737,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.6687927107061503,
|
|
"grad_norm": 1.2737238922916891,
|
|
"learning_rate": 3.001233815250823e-06,
|
|
"loss": 0.2899,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.6694001518602886,
|
|
"grad_norm": 0.9517912503819469,
|
|
"learning_rate": 2.991522876735154e-06,
|
|
"loss": 0.2624,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.6700075930144267,
|
|
"grad_norm": 1.015322451634877,
|
|
"learning_rate": 2.981820963694427e-06,
|
|
"loss": 0.2301,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.6706150341685649,
|
|
"grad_norm": 1.4417460163504778,
|
|
"learning_rate": 2.9721281197260427e-06,
|
|
"loss": 0.2864,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.6712224753227031,
|
|
"grad_norm": 1.3786447145331062,
|
|
"learning_rate": 2.9624443883866403e-06,
|
|
"loss": 0.2441,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.6718299164768413,
|
|
"grad_norm": 1.0007659923579586,
|
|
"learning_rate": 2.9527698131919156e-06,
|
|
"loss": 0.2891,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.6724373576309794,
|
|
"grad_norm": 1.1633207582868845,
|
|
"learning_rate": 2.9431044376164165e-06,
|
|
"loss": 0.2978,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.6730447987851177,
|
|
"grad_norm": 1.032656472550036,
|
|
"learning_rate": 2.9334483050933506e-06,
|
|
"loss": 0.2507,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.6736522399392559,
|
|
"grad_norm": 0.9333208809560491,
|
|
"learning_rate": 2.9238014590143925e-06,
|
|
"loss": 0.2376,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 0.6742596810933941,
|
|
"grad_norm": 1.1289827469541969,
|
|
"learning_rate": 2.91416394272948e-06,
|
|
"loss": 0.2582,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.6748671222475323,
|
|
"grad_norm": 0.925760485434696,
|
|
"learning_rate": 2.904535799546636e-06,
|
|
"loss": 0.2177,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 0.6754745634016704,
|
|
"grad_norm": 0.9162175321455921,
|
|
"learning_rate": 2.894917072731753e-06,
|
|
"loss": 0.2607,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.6760820045558087,
|
|
"grad_norm": 1.0213577123976934,
|
|
"learning_rate": 2.8853078055084192e-06,
|
|
"loss": 0.2588,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 0.6766894457099468,
|
|
"grad_norm": 0.9140846537600611,
|
|
"learning_rate": 2.8757080410577042e-06,
|
|
"loss": 0.2701,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.6772968868640851,
|
|
"grad_norm": 0.9558677038661029,
|
|
"learning_rate": 2.866117822517982e-06,
|
|
"loss": 0.2078,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.6779043280182232,
|
|
"grad_norm": 1.6365480186656665,
|
|
"learning_rate": 2.8565371929847286e-06,
|
|
"loss": 0.2519,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.6785117691723614,
|
|
"grad_norm": 0.8999981416609766,
|
|
"learning_rate": 2.846966195510332e-06,
|
|
"loss": 0.2586,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 0.6791192103264996,
|
|
"grad_norm": 0.8986580797788825,
|
|
"learning_rate": 2.83740487310389e-06,
|
|
"loss": 0.2651,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.6797266514806378,
|
|
"grad_norm": 1.0174347095785217,
|
|
"learning_rate": 2.82785326873103e-06,
|
|
"loss": 0.2593,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 0.680334092634776,
|
|
"grad_norm": 0.922083211098202,
|
|
"learning_rate": 2.81831142531371e-06,
|
|
"loss": 0.2597,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.6809415337889142,
|
|
"grad_norm": 1.006983963737431,
|
|
"learning_rate": 2.8087793857300193e-06,
|
|
"loss": 0.2682,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 0.6815489749430524,
|
|
"grad_norm": 1.8659752629573085,
|
|
"learning_rate": 2.7992571928139984e-06,
|
|
"loss": 0.2481,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.6821564160971906,
|
|
"grad_norm": 0.9701288254765418,
|
|
"learning_rate": 2.7897448893554335e-06,
|
|
"loss": 0.2581,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 0.6827638572513288,
|
|
"grad_norm": 0.9122901540097156,
|
|
"learning_rate": 2.780242518099675e-06,
|
|
"loss": 0.2503,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.683371298405467,
|
|
"grad_norm": 0.9807303601001196,
|
|
"learning_rate": 2.7707501217474443e-06,
|
|
"loss": 0.2744,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.6839787395596052,
|
|
"grad_norm": 6.645271440733989,
|
|
"learning_rate": 2.761267742954629e-06,
|
|
"loss": 0.2524,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.6845861807137433,
|
|
"grad_norm": 1.534147172507746,
|
|
"learning_rate": 2.7517954243321097e-06,
|
|
"loss": 0.2659,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 0.6851936218678816,
|
|
"grad_norm": 0.9373688303596897,
|
|
"learning_rate": 2.7423332084455543e-06,
|
|
"loss": 0.2851,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.6858010630220197,
|
|
"grad_norm": 0.8831368522306644,
|
|
"learning_rate": 2.7328811378152355e-06,
|
|
"loss": 0.2557,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 0.686408504176158,
|
|
"grad_norm": 0.9679109580287109,
|
|
"learning_rate": 2.723439254915834e-06,
|
|
"loss": 0.275,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.6870159453302961,
|
|
"grad_norm": 1.2218248931218192,
|
|
"learning_rate": 2.714007602176254e-06,
|
|
"loss": 0.2413,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 0.6876233864844343,
|
|
"grad_norm": 0.9708101090046806,
|
|
"learning_rate": 2.704586221979422e-06,
|
|
"loss": 0.2645,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.6882308276385725,
|
|
"grad_norm": 1.2522386234048026,
|
|
"learning_rate": 2.695175156662107e-06,
|
|
"loss": 0.2574,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 0.6888382687927107,
|
|
"grad_norm": 0.8419129242667286,
|
|
"learning_rate": 2.6857744485147286e-06,
|
|
"loss": 0.2383,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.689445709946849,
|
|
"grad_norm": 1.0193831420490371,
|
|
"learning_rate": 2.6763841397811576e-06,
|
|
"loss": 0.2735,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.6900531511009871,
|
|
"grad_norm": 0.9241296407909637,
|
|
"learning_rate": 2.667004272658541e-06,
|
|
"loss": 0.2768,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.6906605922551253,
|
|
"grad_norm": 1.1359832523999245,
|
|
"learning_rate": 2.6576348892970947e-06,
|
|
"loss": 0.2636,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 0.6912680334092635,
|
|
"grad_norm": 0.8941017093322563,
|
|
"learning_rate": 2.6482760317999338e-06,
|
|
"loss": 0.2559,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.6918754745634017,
|
|
"grad_norm": 0.8968310666010292,
|
|
"learning_rate": 2.638927742222868e-06,
|
|
"loss": 0.2537,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 0.6924829157175398,
|
|
"grad_norm": 0.959662408955417,
|
|
"learning_rate": 2.629590062574221e-06,
|
|
"loss": 0.2656,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.6930903568716781,
|
|
"grad_norm": 0.9000247977135409,
|
|
"learning_rate": 2.6202630348146323e-06,
|
|
"loss": 0.2899,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 0.6936977980258162,
|
|
"grad_norm": 1.0079650585712254,
|
|
"learning_rate": 2.610946700856885e-06,
|
|
"loss": 0.267,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.6943052391799545,
|
|
"grad_norm": 0.8890790658066724,
|
|
"learning_rate": 2.6016411025656973e-06,
|
|
"loss": 0.2535,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 0.6949126803340926,
|
|
"grad_norm": 0.9198656469536414,
|
|
"learning_rate": 2.592346281757552e-06,
|
|
"loss": 0.2509,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.6955201214882308,
|
|
"grad_norm": 0.9738974660627011,
|
|
"learning_rate": 2.583062280200501e-06,
|
|
"loss": 0.2593,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.696127562642369,
|
|
"grad_norm": 0.9837219510435016,
|
|
"learning_rate": 2.5737891396139713e-06,
|
|
"loss": 0.255,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.6967350037965072,
|
|
"grad_norm": 0.9076420454349192,
|
|
"learning_rate": 2.5645269016685905e-06,
|
|
"loss": 0.2704,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 0.6973424449506455,
|
|
"grad_norm": 0.8898433106915349,
|
|
"learning_rate": 2.5552756079859904e-06,
|
|
"loss": 0.2594,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.6979498861047836,
|
|
"grad_norm": 0.9063509680084296,
|
|
"learning_rate": 2.5460353001386263e-06,
|
|
"loss": 0.2529,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 0.6985573272589218,
|
|
"grad_norm": 0.8948107859742076,
|
|
"learning_rate": 2.5368060196495785e-06,
|
|
"loss": 0.2564,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.69916476841306,
|
|
"grad_norm": 0.8945325429627021,
|
|
"learning_rate": 2.527587807992383e-06,
|
|
"loss": 0.2387,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 0.6997722095671982,
|
|
"grad_norm": 0.9769838924293288,
|
|
"learning_rate": 2.5183807065908296e-06,
|
|
"loss": 0.2542,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.7003796507213363,
|
|
"grad_norm": 0.9645249197834942,
|
|
"learning_rate": 2.5091847568187834e-06,
|
|
"loss": 0.2281,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 0.7009870918754746,
|
|
"grad_norm": 0.9496141254681564,
|
|
"learning_rate": 2.5000000000000015e-06,
|
|
"loss": 0.241,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.7015945330296127,
|
|
"grad_norm": 0.9380133404008738,
|
|
"learning_rate": 2.4908264774079355e-06,
|
|
"loss": 0.2605,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.702201974183751,
|
|
"grad_norm": 0.9074627496390306,
|
|
"learning_rate": 2.4816642302655634e-06,
|
|
"loss": 0.2541,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.7028094153378891,
|
|
"grad_norm": 1.0027152368026724,
|
|
"learning_rate": 2.4725132997451833e-06,
|
|
"loss": 0.2601,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 0.7034168564920273,
|
|
"grad_norm": 2.6289745813296284,
|
|
"learning_rate": 2.4633737269682546e-06,
|
|
"loss": 0.3022,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.7040242976461655,
|
|
"grad_norm": 0.9370322619588107,
|
|
"learning_rate": 2.454245553005184e-06,
|
|
"loss": 0.2643,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 0.7046317388003037,
|
|
"grad_norm": 1.468995991623161,
|
|
"learning_rate": 2.445128818875166e-06,
|
|
"loss": 0.2852,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.705239179954442,
|
|
"grad_norm": 0.9901499615769476,
|
|
"learning_rate": 2.4360235655459804e-06,
|
|
"loss": 0.3014,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 0.7058466211085801,
|
|
"grad_norm": 0.9762587690316699,
|
|
"learning_rate": 2.4269298339338205e-06,
|
|
"loss": 0.2464,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.7064540622627183,
|
|
"grad_norm": 0.9823369995064071,
|
|
"learning_rate": 2.4178476649031057e-06,
|
|
"loss": 0.2611,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 0.7070615034168565,
|
|
"grad_norm": 1.040540505906759,
|
|
"learning_rate": 2.408777099266291e-06,
|
|
"loss": 0.2628,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.7076689445709947,
|
|
"grad_norm": 0.9626462256229749,
|
|
"learning_rate": 2.3997181777836955e-06,
|
|
"loss": 0.3069,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.7082763857251329,
|
|
"grad_norm": 1.2283451848928204,
|
|
"learning_rate": 2.3906709411633073e-06,
|
|
"loss": 0.2405,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.7088838268792711,
|
|
"grad_norm": 0.9137970515612295,
|
|
"learning_rate": 2.381635430060611e-06,
|
|
"loss": 0.28,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 0.7094912680334092,
|
|
"grad_norm": 0.9656636216601993,
|
|
"learning_rate": 2.3726116850783987e-06,
|
|
"loss": 0.2696,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.7100987091875475,
|
|
"grad_norm": 0.9461355671018838,
|
|
"learning_rate": 2.3635997467665905e-06,
|
|
"loss": 0.2527,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 0.7107061503416856,
|
|
"grad_norm": 0.8499788622610774,
|
|
"learning_rate": 2.354599655622049e-06,
|
|
"loss": 0.2425,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.7113135914958238,
|
|
"grad_norm": 0.9394600691367851,
|
|
"learning_rate": 2.3456114520883956e-06,
|
|
"loss": 0.2478,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 0.711921032649962,
|
|
"grad_norm": 0.9539174173321666,
|
|
"learning_rate": 2.3366351765558437e-06,
|
|
"loss": 0.2552,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.7125284738041002,
|
|
"grad_norm": 1.0414224878560323,
|
|
"learning_rate": 2.3276708693609947e-06,
|
|
"loss": 0.2798,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 0.7131359149582385,
|
|
"grad_norm": 0.9245170066700932,
|
|
"learning_rate": 2.318718570786675e-06,
|
|
"loss": 0.2463,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.7137433561123766,
|
|
"grad_norm": 0.9803347614971838,
|
|
"learning_rate": 2.309778321061742e-06,
|
|
"loss": 0.2416,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.7143507972665148,
|
|
"grad_norm": 0.9130379562604788,
|
|
"learning_rate": 2.3008501603609147e-06,
|
|
"loss": 0.275,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.714958238420653,
|
|
"grad_norm": 0.8761644255482913,
|
|
"learning_rate": 2.2919341288045853e-06,
|
|
"loss": 0.2502,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 0.7155656795747912,
|
|
"grad_norm": 0.9584496523002601,
|
|
"learning_rate": 2.283030266458644e-06,
|
|
"loss": 0.2754,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.7161731207289294,
|
|
"grad_norm": 0.8665475599966695,
|
|
"learning_rate": 2.2741386133342923e-06,
|
|
"loss": 0.2505,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 0.7167805618830676,
|
|
"grad_norm": 0.8576308093825102,
|
|
"learning_rate": 2.265259209387867e-06,
|
|
"loss": 0.2304,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.7173880030372057,
|
|
"grad_norm": 0.9335114757524509,
|
|
"learning_rate": 2.256392094520664e-06,
|
|
"loss": 0.2697,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 0.717995444191344,
|
|
"grad_norm": 1.2455184951743299,
|
|
"learning_rate": 2.2475373085787568e-06,
|
|
"loss": 0.2644,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.7186028853454821,
|
|
"grad_norm": 1.8108379220736726,
|
|
"learning_rate": 2.238694891352814e-06,
|
|
"loss": 0.2637,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 0.7192103264996204,
|
|
"grad_norm": 0.884996855921727,
|
|
"learning_rate": 2.229864882577921e-06,
|
|
"loss": 0.2303,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.7198177676537585,
|
|
"grad_norm": 0.9318262199049523,
|
|
"learning_rate": 2.2210473219334083e-06,
|
|
"loss": 0.255,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.7204252088078967,
|
|
"grad_norm": 0.8164042083609618,
|
|
"learning_rate": 2.2122422490426676e-06,
|
|
"loss": 0.2384,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.721032649962035,
|
|
"grad_norm": 1.0115748704170144,
|
|
"learning_rate": 2.203449703472969e-06,
|
|
"loss": 0.268,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 0.7216400911161731,
|
|
"grad_norm": 1.1883752167190924,
|
|
"learning_rate": 2.194669724735296e-06,
|
|
"loss": 0.2755,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.7222475322703114,
|
|
"grad_norm": 1.0308833660996168,
|
|
"learning_rate": 2.1859023522841543e-06,
|
|
"loss": 0.2327,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 0.7228549734244495,
|
|
"grad_norm": 0.9120682340740801,
|
|
"learning_rate": 2.1771476255174056e-06,
|
|
"loss": 0.2735,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.7234624145785877,
|
|
"grad_norm": 1.013727297073577,
|
|
"learning_rate": 2.1684055837760837e-06,
|
|
"loss": 0.2757,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 0.7240698557327259,
|
|
"grad_norm": 0.9458973783887059,
|
|
"learning_rate": 2.159676266344222e-06,
|
|
"loss": 0.268,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.7246772968868641,
|
|
"grad_norm": 1.3758680862531418,
|
|
"learning_rate": 2.1509597124486693e-06,
|
|
"loss": 0.2367,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 0.7252847380410022,
|
|
"grad_norm": 1.2033412446646528,
|
|
"learning_rate": 2.1422559612589266e-06,
|
|
"loss": 0.2964,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.7258921791951405,
|
|
"grad_norm": 1.0020458048011924,
|
|
"learning_rate": 2.1335650518869555e-06,
|
|
"loss": 0.2625,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.7264996203492786,
|
|
"grad_norm": 0.956244480904364,
|
|
"learning_rate": 2.124887023387017e-06,
|
|
"loss": 0.2974,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.7271070615034169,
|
|
"grad_norm": 0.969906166075625,
|
|
"learning_rate": 2.1162219147554884e-06,
|
|
"loss": 0.2858,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 0.727714502657555,
|
|
"grad_norm": 1.242369815476469,
|
|
"learning_rate": 2.1075697649306838e-06,
|
|
"loss": 0.2651,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.7283219438116932,
|
|
"grad_norm": 1.0002620329734284,
|
|
"learning_rate": 2.09893061279269e-06,
|
|
"loss": 0.2611,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 0.7289293849658315,
|
|
"grad_norm": 1.2055397902089622,
|
|
"learning_rate": 2.0903044971631854e-06,
|
|
"loss": 0.2498,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.7295368261199696,
|
|
"grad_norm": 0.9189460422388009,
|
|
"learning_rate": 2.0816914568052664e-06,
|
|
"loss": 0.2549,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 0.7301442672741079,
|
|
"grad_norm": 0.9278551686313573,
|
|
"learning_rate": 2.0730915304232692e-06,
|
|
"loss": 0.2753,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.730751708428246,
|
|
"grad_norm": 0.9259931376538301,
|
|
"learning_rate": 2.0645047566626057e-06,
|
|
"loss": 0.2429,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 0.7313591495823842,
|
|
"grad_norm": 1.0024167165948739,
|
|
"learning_rate": 2.055931174109579e-06,
|
|
"loss": 0.2923,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.7319665907365224,
|
|
"grad_norm": 0.8930847111145748,
|
|
"learning_rate": 2.0473708212912167e-06,
|
|
"loss": 0.2416,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.7325740318906606,
|
|
"grad_norm": 0.8864928181427251,
|
|
"learning_rate": 2.0388237366751005e-06,
|
|
"loss": 0.2538,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.7331814730447987,
|
|
"grad_norm": 0.8850193923988731,
|
|
"learning_rate": 2.030289958669181e-06,
|
|
"loss": 0.2649,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 0.733788914198937,
|
|
"grad_norm": 0.9739541510077735,
|
|
"learning_rate": 2.02176952562162e-06,
|
|
"loss": 0.2517,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.7343963553530751,
|
|
"grad_norm": 0.9799826210952297,
|
|
"learning_rate": 2.013262475820602e-06,
|
|
"loss": 0.2716,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 0.7350037965072134,
|
|
"grad_norm": 1.0600437940139478,
|
|
"learning_rate": 2.004768847494186e-06,
|
|
"loss": 0.2365,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.7356112376613515,
|
|
"grad_norm": 0.9207976884534176,
|
|
"learning_rate": 1.996288678810105e-06,
|
|
"loss": 0.2632,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 0.7362186788154897,
|
|
"grad_norm": 1.0260990612904581,
|
|
"learning_rate": 1.987822007875617e-06,
|
|
"loss": 0.2675,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.736826119969628,
|
|
"grad_norm": 1.0643126752862775,
|
|
"learning_rate": 1.979368872737319e-06,
|
|
"loss": 0.2282,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 0.7374335611237661,
|
|
"grad_norm": 1.002822192943713,
|
|
"learning_rate": 1.9709293113809876e-06,
|
|
"loss": 0.237,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.7380410022779044,
|
|
"grad_norm": 0.8146421380435214,
|
|
"learning_rate": 1.962503361731403e-06,
|
|
"loss": 0.2347,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.7386484434320425,
|
|
"grad_norm": 1.44451984462559,
|
|
"learning_rate": 1.954091061652172e-06,
|
|
"loss": 0.249,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.7392558845861807,
|
|
"grad_norm": 1.0048037676428558,
|
|
"learning_rate": 1.945692448945574e-06,
|
|
"loss": 0.2684,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 0.7398633257403189,
|
|
"grad_norm": 0.9134185361949188,
|
|
"learning_rate": 1.9373075613523728e-06,
|
|
"loss": 0.269,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.7404707668944571,
|
|
"grad_norm": 1.3119909261331324,
|
|
"learning_rate": 1.928936436551661e-06,
|
|
"loss": 0.2422,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 0.7410782080485953,
|
|
"grad_norm": 0.8520218708347329,
|
|
"learning_rate": 1.920579112160685e-06,
|
|
"loss": 0.2199,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.7416856492027335,
|
|
"grad_norm": 2.6495798687566086,
|
|
"learning_rate": 1.912235625734676e-06,
|
|
"loss": 0.2854,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 0.7422930903568716,
|
|
"grad_norm": 1.6880210137763603,
|
|
"learning_rate": 1.903906014766681e-06,
|
|
"loss": 0.2761,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.7429005315110099,
|
|
"grad_norm": 0.9375069285048585,
|
|
"learning_rate": 1.8955903166873924e-06,
|
|
"loss": 0.25,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 0.743507972665148,
|
|
"grad_norm": 1.239196974591738,
|
|
"learning_rate": 1.8872885688649879e-06,
|
|
"loss": 0.2876,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.7441154138192863,
|
|
"grad_norm": 0.9780811220285901,
|
|
"learning_rate": 1.8790008086049534e-06,
|
|
"loss": 0.255,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.7447228549734245,
|
|
"grad_norm": 0.9109861670066071,
|
|
"learning_rate": 1.8707270731499223e-06,
|
|
"loss": 0.2401,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 0.7453302961275626,
|
|
"grad_norm": 0.9572237400053779,
|
|
"learning_rate": 1.862467399679499e-06,
|
|
"loss": 0.2855,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 0.7459377372817009,
|
|
"grad_norm": 0.9835745008775473,
|
|
"learning_rate": 1.854221825310103e-06,
|
|
"loss": 0.2376,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 0.746545178435839,
|
|
"grad_norm": 0.9473916357579606,
|
|
"learning_rate": 1.8459903870947954e-06,
|
|
"loss": 0.277,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 0.7471526195899773,
|
|
"grad_norm": 0.8905602752123002,
|
|
"learning_rate": 1.8377731220231144e-06,
|
|
"loss": 0.2506,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.7477600607441154,
|
|
"grad_norm": 0.9604834177033152,
|
|
"learning_rate": 1.829570067020906e-06,
|
|
"loss": 0.2448,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 0.7483675018982536,
|
|
"grad_norm": 0.8845596702356572,
|
|
"learning_rate": 1.8213812589501611e-06,
|
|
"loss": 0.2547,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.7489749430523918,
|
|
"grad_norm": 0.9925482849261942,
|
|
"learning_rate": 1.813206734608851e-06,
|
|
"loss": 0.2603,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 0.74958238420653,
|
|
"grad_norm": 1.0974123543068024,
|
|
"learning_rate": 1.8050465307307602e-06,
|
|
"loss": 0.2461,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 0.7501898253606681,
|
|
"grad_norm": 0.9382971676426907,
|
|
"learning_rate": 1.7969006839853227e-06,
|
|
"loss": 0.2226,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.7507972665148064,
|
|
"grad_norm": 1.6609571035124198,
|
|
"learning_rate": 1.78876923097745e-06,
|
|
"loss": 0.2553,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 0.7514047076689445,
|
|
"grad_norm": 1.0951347816044532,
|
|
"learning_rate": 1.7806522082473809e-06,
|
|
"loss": 0.2549,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 0.7520121488230828,
|
|
"grad_norm": 0.9563113436198466,
|
|
"learning_rate": 1.7725496522704998e-06,
|
|
"loss": 0.2582,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 0.752619589977221,
|
|
"grad_norm": 0.9531971950924529,
|
|
"learning_rate": 1.7644615994571934e-06,
|
|
"loss": 0.2509,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 0.7532270311313591,
|
|
"grad_norm": 1.0124203802056453,
|
|
"learning_rate": 1.7563880861526656e-06,
|
|
"loss": 0.2444,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.7538344722854974,
|
|
"grad_norm": 0.9577538726500576,
|
|
"learning_rate": 1.748329148636787e-06,
|
|
"loss": 0.2236,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 0.7544419134396355,
|
|
"grad_norm": 0.9178846740365786,
|
|
"learning_rate": 1.7402848231239317e-06,
|
|
"loss": 0.2544,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 0.7550493545937738,
|
|
"grad_norm": 0.9382975967726378,
|
|
"learning_rate": 1.73225514576281e-06,
|
|
"loss": 0.2665,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 0.7556567957479119,
|
|
"grad_norm": 1.3363746570955906,
|
|
"learning_rate": 1.7242401526363095e-06,
|
|
"loss": 0.2745,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 0.7562642369020501,
|
|
"grad_norm": 1.3353177833317331,
|
|
"learning_rate": 1.7162398797613284e-06,
|
|
"loss": 0.251,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.7568716780561883,
|
|
"grad_norm": 1.0235724081052306,
|
|
"learning_rate": 1.70825436308862e-06,
|
|
"loss": 0.2699,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 0.7574791192103265,
|
|
"grad_norm": 0.9858229899690142,
|
|
"learning_rate": 1.7002836385026234e-06,
|
|
"loss": 0.2429,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 0.7580865603644646,
|
|
"grad_norm": 1.1427480361449285,
|
|
"learning_rate": 1.692327741821312e-06,
|
|
"loss": 0.2733,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 0.7586940015186029,
|
|
"grad_norm": 0.9835216641075001,
|
|
"learning_rate": 1.6843867087960252e-06,
|
|
"loss": 0.2671,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 0.7593014426727411,
|
|
"grad_norm": 0.9784659480769877,
|
|
"learning_rate": 1.676460575111306e-06,
|
|
"loss": 0.2515,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.7599088838268793,
|
|
"grad_norm": 0.9481282295943736,
|
|
"learning_rate": 1.6685493763847515e-06,
|
|
"loss": 0.259,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 0.7605163249810175,
|
|
"grad_norm": 1.0452063539012255,
|
|
"learning_rate": 1.6606531481668364e-06,
|
|
"loss": 0.2633,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 0.7611237661351556,
|
|
"grad_norm": 0.9745231850326872,
|
|
"learning_rate": 1.6527719259407743e-06,
|
|
"loss": 0.249,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 0.7617312072892939,
|
|
"grad_norm": 1.3873562933934764,
|
|
"learning_rate": 1.6449057451223354e-06,
|
|
"loss": 0.253,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 0.762338648443432,
|
|
"grad_norm": 1.1512772469160202,
|
|
"learning_rate": 1.6370546410597066e-06,
|
|
"loss": 0.2799,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.7629460895975703,
|
|
"grad_norm": 1.0250344913669225,
|
|
"learning_rate": 1.6292186490333172e-06,
|
|
"loss": 0.265,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 0.7635535307517084,
|
|
"grad_norm": 1.1545097210078017,
|
|
"learning_rate": 1.6213978042556938e-06,
|
|
"loss": 0.2319,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 0.7641609719058466,
|
|
"grad_norm": 0.9063231723472821,
|
|
"learning_rate": 1.6135921418712959e-06,
|
|
"loss": 0.2512,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 0.7647684130599848,
|
|
"grad_norm": 1.061377737003138,
|
|
"learning_rate": 1.6058016969563512e-06,
|
|
"loss": 0.2598,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 0.765375854214123,
|
|
"grad_norm": 0.9337491753620247,
|
|
"learning_rate": 1.5980265045187139e-06,
|
|
"loss": 0.2707,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.7659832953682612,
|
|
"grad_norm": 1.2607704019846233,
|
|
"learning_rate": 1.5902665994976896e-06,
|
|
"loss": 0.269,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 0.7665907365223994,
|
|
"grad_norm": 1.086835681982817,
|
|
"learning_rate": 1.5825220167638945e-06,
|
|
"loss": 0.2215,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 0.7671981776765376,
|
|
"grad_norm": 1.0948737291989328,
|
|
"learning_rate": 1.5747927911190858e-06,
|
|
"loss": 0.2713,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 0.7678056188306758,
|
|
"grad_norm": 1.102976831652949,
|
|
"learning_rate": 1.567078957296016e-06,
|
|
"loss": 0.266,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 0.768413059984814,
|
|
"grad_norm": 1.046644859411465,
|
|
"learning_rate": 1.5593805499582659e-06,
|
|
"loss": 0.2365,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.7690205011389522,
|
|
"grad_norm": 0.8613582389098838,
|
|
"learning_rate": 1.5516976037000941e-06,
|
|
"loss": 0.2188,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 0.7696279422930904,
|
|
"grad_norm": 1.7227164484553419,
|
|
"learning_rate": 1.544030153046291e-06,
|
|
"loss": 0.2567,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 0.7702353834472285,
|
|
"grad_norm": 2.92880493600265,
|
|
"learning_rate": 1.5363782324520033e-06,
|
|
"loss": 0.2803,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 0.7708428246013668,
|
|
"grad_norm": 1.0122911998142148,
|
|
"learning_rate": 1.528741876302598e-06,
|
|
"loss": 0.2772,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 0.7714502657555049,
|
|
"grad_norm": 1.3980473384689627,
|
|
"learning_rate": 1.5211211189134955e-06,
|
|
"loss": 0.2478,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.7720577069096431,
|
|
"grad_norm": 0.8508504199116661,
|
|
"learning_rate": 1.5135159945300232e-06,
|
|
"loss": 0.2401,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 0.7726651480637813,
|
|
"grad_norm": 0.9969962158012547,
|
|
"learning_rate": 1.5059265373272574e-06,
|
|
"loss": 0.2617,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 0.7732725892179195,
|
|
"grad_norm": 0.964226927135635,
|
|
"learning_rate": 1.4983527814098736e-06,
|
|
"loss": 0.267,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 0.7738800303720577,
|
|
"grad_norm": 1.0115140691606623,
|
|
"learning_rate": 1.4907947608119866e-06,
|
|
"loss": 0.2421,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 0.7744874715261959,
|
|
"grad_norm": 1.2895393161155704,
|
|
"learning_rate": 1.4832525094970007e-06,
|
|
"loss": 0.2452,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.7750949126803341,
|
|
"grad_norm": 0.8408524834828659,
|
|
"learning_rate": 1.475726061357463e-06,
|
|
"loss": 0.2166,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 0.7757023538344723,
|
|
"grad_norm": 1.333514308934424,
|
|
"learning_rate": 1.4682154502149025e-06,
|
|
"loss": 0.2415,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 0.7763097949886105,
|
|
"grad_norm": 1.036857242677457,
|
|
"learning_rate": 1.4607207098196851e-06,
|
|
"loss": 0.2569,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 0.7769172361427487,
|
|
"grad_norm": 7.103965440866741,
|
|
"learning_rate": 1.4532418738508525e-06,
|
|
"loss": 0.2648,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 0.7775246772968869,
|
|
"grad_norm": 0.8400952986765654,
|
|
"learning_rate": 1.4457789759159813e-06,
|
|
"loss": 0.2018,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.778132118451025,
|
|
"grad_norm": 1.4757073564314478,
|
|
"learning_rate": 1.4383320495510267e-06,
|
|
"loss": 0.2616,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 0.7787395596051633,
|
|
"grad_norm": 1.004482929976758,
|
|
"learning_rate": 1.430901128220174e-06,
|
|
"loss": 0.2529,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 0.7793470007593014,
|
|
"grad_norm": 1.0139377829258103,
|
|
"learning_rate": 1.4234862453156839e-06,
|
|
"loss": 0.2756,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 0.7799544419134397,
|
|
"grad_norm": 1.0100199779353403,
|
|
"learning_rate": 1.4160874341577447e-06,
|
|
"loss": 0.2484,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 0.7805618830675778,
|
|
"grad_norm": 1.1168401047776593,
|
|
"learning_rate": 1.4087047279943267e-06,
|
|
"loss": 0.2687,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.781169324221716,
|
|
"grad_norm": 0.9503234909845282,
|
|
"learning_rate": 1.4013381600010278e-06,
|
|
"loss": 0.2563,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 0.7817767653758542,
|
|
"grad_norm": 1.0368290840258114,
|
|
"learning_rate": 1.3939877632809279e-06,
|
|
"loss": 0.2866,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 0.7823842065299924,
|
|
"grad_norm": 1.0086813795279805,
|
|
"learning_rate": 1.3866535708644335e-06,
|
|
"loss": 0.2418,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 0.7829916476841307,
|
|
"grad_norm": 1.1754106526081984,
|
|
"learning_rate": 1.3793356157091387e-06,
|
|
"loss": 0.2582,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 0.7835990888382688,
|
|
"grad_norm": 0.9640662064683282,
|
|
"learning_rate": 1.3720339306996666e-06,
|
|
"loss": 0.2834,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.784206529992407,
|
|
"grad_norm": 1.741496452010621,
|
|
"learning_rate": 1.3647485486475376e-06,
|
|
"loss": 0.2374,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 0.7848139711465452,
|
|
"grad_norm": 1.0182189209813342,
|
|
"learning_rate": 1.3574795022910014e-06,
|
|
"loss": 0.2531,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 0.7854214123006834,
|
|
"grad_norm": 0.9760934213660039,
|
|
"learning_rate": 1.3502268242949025e-06,
|
|
"loss": 0.2575,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 0.7860288534548215,
|
|
"grad_norm": 1.8383703679855188,
|
|
"learning_rate": 1.3429905472505344e-06,
|
|
"loss": 0.2383,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 0.7866362946089598,
|
|
"grad_norm": 0.9502317083781607,
|
|
"learning_rate": 1.3357707036754875e-06,
|
|
"loss": 0.2585,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.7872437357630979,
|
|
"grad_norm": 0.9297333282490423,
|
|
"learning_rate": 1.3285673260135073e-06,
|
|
"loss": 0.2452,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 0.7878511769172362,
|
|
"grad_norm": 0.9116623980444865,
|
|
"learning_rate": 1.321380446634342e-06,
|
|
"loss": 0.2514,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 0.7884586180713743,
|
|
"grad_norm": 1.0165136386704785,
|
|
"learning_rate": 1.314210097833607e-06,
|
|
"loss": 0.2698,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 0.7890660592255125,
|
|
"grad_norm": 1.0097975118483586,
|
|
"learning_rate": 1.3070563118326295e-06,
|
|
"loss": 0.2623,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 0.7896735003796507,
|
|
"grad_norm": 0.9511823883591485,
|
|
"learning_rate": 1.2999191207783129e-06,
|
|
"loss": 0.2227,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.7902809415337889,
|
|
"grad_norm": 1.083790995951702,
|
|
"learning_rate": 1.2927985567429868e-06,
|
|
"loss": 0.2386,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 0.7908883826879272,
|
|
"grad_norm": 0.9289196009158714,
|
|
"learning_rate": 1.2856946517242608e-06,
|
|
"loss": 0.2299,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 0.7914958238420653,
|
|
"grad_norm": 0.9764047061340886,
|
|
"learning_rate": 1.27860743764489e-06,
|
|
"loss": 0.2536,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 0.7921032649962035,
|
|
"grad_norm": 1.066452400553731,
|
|
"learning_rate": 1.2715369463526173e-06,
|
|
"loss": 0.2485,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 0.7927107061503417,
|
|
"grad_norm": 0.8830887915707148,
|
|
"learning_rate": 1.2644832096200498e-06,
|
|
"loss": 0.241,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.7933181473044799,
|
|
"grad_norm": 0.8852127517971522,
|
|
"learning_rate": 1.257446259144494e-06,
|
|
"loss": 0.2236,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 0.793925588458618,
|
|
"grad_norm": 0.9428821852573767,
|
|
"learning_rate": 1.2504261265478324e-06,
|
|
"loss": 0.2552,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 0.7945330296127563,
|
|
"grad_norm": 1.0320142315688623,
|
|
"learning_rate": 1.2434228433763657e-06,
|
|
"loss": 0.2469,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 0.7951404707668944,
|
|
"grad_norm": 1.0170174001133827,
|
|
"learning_rate": 1.2364364411006841e-06,
|
|
"loss": 0.2437,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 0.7957479119210327,
|
|
"grad_norm": 1.0560205729362242,
|
|
"learning_rate": 1.2294669511155193e-06,
|
|
"loss": 0.2327,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.7963553530751708,
|
|
"grad_norm": 1.239053820039574,
|
|
"learning_rate": 1.2225144047396015e-06,
|
|
"loss": 0.2627,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 0.796962794229309,
|
|
"grad_norm": 1.0988583237532765,
|
|
"learning_rate": 1.215578833215526e-06,
|
|
"loss": 0.262,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 0.7975702353834472,
|
|
"grad_norm": 0.867925390100386,
|
|
"learning_rate": 1.2086602677096033e-06,
|
|
"loss": 0.2416,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 0.7981776765375854,
|
|
"grad_norm": 1.2397806394064825,
|
|
"learning_rate": 1.201758739311728e-06,
|
|
"loss": 0.2478,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 0.7987851176917237,
|
|
"grad_norm": 1.7686159599994773,
|
|
"learning_rate": 1.1948742790352342e-06,
|
|
"loss": 0.2663,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.7993925588458618,
|
|
"grad_norm": 1.23788413067218,
|
|
"learning_rate": 1.1880069178167586e-06,
|
|
"loss": 0.2271,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.899946412274731,
|
|
"learning_rate": 1.1811566865160961e-06,
|
|
"loss": 0.222,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 0.8006074411541382,
|
|
"grad_norm": 0.87270457464745,
|
|
"learning_rate": 1.1743236159160654e-06,
|
|
"loss": 0.2592,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 0.8012148823082764,
|
|
"grad_norm": 1.3219310442302556,
|
|
"learning_rate": 1.167507736722377e-06,
|
|
"loss": 0.266,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 0.8018223234624146,
|
|
"grad_norm": 0.9267745860014674,
|
|
"learning_rate": 1.1607090795634802e-06,
|
|
"loss": 0.249,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.8024297646165528,
|
|
"grad_norm": 1.145281359593592,
|
|
"learning_rate": 1.15392767499044e-06,
|
|
"loss": 0.273,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 0.8030372057706909,
|
|
"grad_norm": 0.9943238830649733,
|
|
"learning_rate": 1.1471635534767877e-06,
|
|
"loss": 0.2713,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 0.8036446469248292,
|
|
"grad_norm": 0.8925102247035651,
|
|
"learning_rate": 1.1404167454183957e-06,
|
|
"loss": 0.2509,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 0.8042520880789673,
|
|
"grad_norm": 2.8209341271672557,
|
|
"learning_rate": 1.133687281133331e-06,
|
|
"loss": 0.2414,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 0.8048595292331056,
|
|
"grad_norm": 0.8913737685748099,
|
|
"learning_rate": 1.1269751908617277e-06,
|
|
"loss": 0.2382,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.8054669703872437,
|
|
"grad_norm": 0.8955833156392411,
|
|
"learning_rate": 1.1202805047656406e-06,
|
|
"loss": 0.2336,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 0.8060744115413819,
|
|
"grad_norm": 1.1427272635049914,
|
|
"learning_rate": 1.113603252928917e-06,
|
|
"loss": 0.2576,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 0.8066818526955202,
|
|
"grad_norm": 0.9867069146988969,
|
|
"learning_rate": 1.1069434653570633e-06,
|
|
"loss": 0.2703,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 0.8072892938496583,
|
|
"grad_norm": 1.0341115309067341,
|
|
"learning_rate": 1.1003011719771046e-06,
|
|
"loss": 0.251,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 0.8078967350037966,
|
|
"grad_norm": 0.941968386464464,
|
|
"learning_rate": 1.0936764026374547e-06,
|
|
"loss": 0.2523,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.8085041761579347,
|
|
"grad_norm": 0.9109334482631996,
|
|
"learning_rate": 1.0870691871077738e-06,
|
|
"loss": 0.2573,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 0.8091116173120729,
|
|
"grad_norm": 1.1679903287794757,
|
|
"learning_rate": 1.0804795550788473e-06,
|
|
"loss": 0.2727,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 0.8097190584662111,
|
|
"grad_norm": 1.0262961821675425,
|
|
"learning_rate": 1.073907536162443e-06,
|
|
"loss": 0.2499,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 0.8103264996203493,
|
|
"grad_norm": 0.8638969582311489,
|
|
"learning_rate": 1.0673531598911824e-06,
|
|
"loss": 0.2077,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 0.8109339407744874,
|
|
"grad_norm": 1.065445241867707,
|
|
"learning_rate": 1.0608164557184042e-06,
|
|
"loss": 0.2733,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.8115413819286257,
|
|
"grad_norm": 1.615302331483808,
|
|
"learning_rate": 1.0542974530180327e-06,
|
|
"loss": 0.2712,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 0.8121488230827638,
|
|
"grad_norm": 0.9558232094508515,
|
|
"learning_rate": 1.0477961810844517e-06,
|
|
"loss": 0.281,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 0.8127562642369021,
|
|
"grad_norm": 1.0153735128244517,
|
|
"learning_rate": 1.0413126691323667e-06,
|
|
"loss": 0.2521,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 0.8133637053910402,
|
|
"grad_norm": 0.9566927051256368,
|
|
"learning_rate": 1.0348469462966753e-06,
|
|
"loss": 0.2869,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 0.8139711465451784,
|
|
"grad_norm": 1.2528160410336169,
|
|
"learning_rate": 1.0283990416323336e-06,
|
|
"loss": 0.2747,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.8145785876993167,
|
|
"grad_norm": 0.8811463059194917,
|
|
"learning_rate": 1.0219689841142343e-06,
|
|
"loss": 0.2071,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 0.8151860288534548,
|
|
"grad_norm": 1.9173571112957752,
|
|
"learning_rate": 1.0155568026370637e-06,
|
|
"loss": 0.2345,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 0.8157934700075931,
|
|
"grad_norm": 1.0402835209362526,
|
|
"learning_rate": 1.0091625260151827e-06,
|
|
"loss": 0.2435,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 0.8164009111617312,
|
|
"grad_norm": 1.6822710586636964,
|
|
"learning_rate": 1.0027861829824953e-06,
|
|
"loss": 0.287,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 0.8170083523158694,
|
|
"grad_norm": 1.397843369835043,
|
|
"learning_rate": 9.964278021923107e-07,
|
|
"loss": 0.2605,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.8176157934700076,
|
|
"grad_norm": 1.123716581875761,
|
|
"learning_rate": 9.900874122172294e-07,
|
|
"loss": 0.2647,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 0.8182232346241458,
|
|
"grad_norm": 1.0901754083400064,
|
|
"learning_rate": 9.83765041548998e-07,
|
|
"loss": 0.2707,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 0.818830675778284,
|
|
"grad_norm": 0.9517248980182025,
|
|
"learning_rate": 9.774607185984004e-07,
|
|
"loss": 0.2515,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 0.8194381169324222,
|
|
"grad_norm": 0.93504230650595,
|
|
"learning_rate": 9.711744716951093e-07,
|
|
"loss": 0.241,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 0.8200455580865603,
|
|
"grad_norm": 1.0424392516819492,
|
|
"learning_rate": 9.649063290875771e-07,
|
|
"loss": 0.2197,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.8206529992406986,
|
|
"grad_norm": 2.171189768685288,
|
|
"learning_rate": 9.586563189428954e-07,
|
|
"loss": 0.2367,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 0.8212604403948367,
|
|
"grad_norm": 0.9333141948132236,
|
|
"learning_rate": 9.524244693466773e-07,
|
|
"loss": 0.2391,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 0.8218678815489749,
|
|
"grad_norm": 0.986280542191797,
|
|
"learning_rate": 9.462108083029287e-07,
|
|
"loss": 0.247,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 0.8224753227031132,
|
|
"grad_norm": 1.1077079150850047,
|
|
"learning_rate": 9.400153637339182e-07,
|
|
"loss": 0.2908,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 0.8230827638572513,
|
|
"grad_norm": 1.3469195622589663,
|
|
"learning_rate": 9.338381634800597e-07,
|
|
"loss": 0.2264,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.8236902050113896,
|
|
"grad_norm": 2.3223661832113476,
|
|
"learning_rate": 9.276792352997782e-07,
|
|
"loss": 0.2334,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 0.8242976461655277,
|
|
"grad_norm": 0.965162694370609,
|
|
"learning_rate": 9.215386068693927e-07,
|
|
"loss": 0.2554,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 0.8249050873196659,
|
|
"grad_norm": 1.1093572084109473,
|
|
"learning_rate": 9.154163057829879e-07,
|
|
"loss": 0.2328,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 0.8255125284738041,
|
|
"grad_norm": 0.9658006597774278,
|
|
"learning_rate": 9.093123595522929e-07,
|
|
"loss": 0.2641,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 0.8261199696279423,
|
|
"grad_norm": 1.1141024465330946,
|
|
"learning_rate": 9.032267956065516e-07,
|
|
"loss": 0.2168,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.8267274107820805,
|
|
"grad_norm": 1.085834176055846,
|
|
"learning_rate": 8.971596412924067e-07,
|
|
"loss": 0.2665,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 0.8273348519362187,
|
|
"grad_norm": 1.0277054005618411,
|
|
"learning_rate": 8.911109238737748e-07,
|
|
"loss": 0.2654,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 0.8279422930903568,
|
|
"grad_norm": 1.1286512062535323,
|
|
"learning_rate": 8.850806705317183e-07,
|
|
"loss": 0.2572,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 0.8285497342444951,
|
|
"grad_norm": 0.9881387168493946,
|
|
"learning_rate": 8.790689083643328e-07,
|
|
"loss": 0.2762,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 0.8291571753986332,
|
|
"grad_norm": 1.7913780275956543,
|
|
"learning_rate": 8.730756643866157e-07,
|
|
"loss": 0.2728,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.8297646165527715,
|
|
"grad_norm": 1.0438179673664785,
|
|
"learning_rate": 8.671009655303531e-07,
|
|
"loss": 0.2876,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 0.8303720577069097,
|
|
"grad_norm": 1.1157108355581231,
|
|
"learning_rate": 8.611448386439936e-07,
|
|
"loss": 0.2582,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 0.8309794988610478,
|
|
"grad_norm": 1.5568021179946305,
|
|
"learning_rate": 8.552073104925296e-07,
|
|
"loss": 0.2428,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 0.8315869400151861,
|
|
"grad_norm": 1.0699739124750929,
|
|
"learning_rate": 8.492884077573749e-07,
|
|
"loss": 0.2568,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 0.8321943811693242,
|
|
"grad_norm": 0.7926743610930811,
|
|
"learning_rate": 8.433881570362484e-07,
|
|
"loss": 0.2176,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.8328018223234624,
|
|
"grad_norm": 1.225524035303792,
|
|
"learning_rate": 8.375065848430508e-07,
|
|
"loss": 0.274,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 0.8334092634776006,
|
|
"grad_norm": 0.9030010611168635,
|
|
"learning_rate": 8.316437176077491e-07,
|
|
"loss": 0.2649,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 0.8340167046317388,
|
|
"grad_norm": 0.8628776555924657,
|
|
"learning_rate": 8.257995816762559e-07,
|
|
"loss": 0.238,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 0.834624145785877,
|
|
"grad_norm": 0.9519929470187486,
|
|
"learning_rate": 8.199742033103091e-07,
|
|
"loss": 0.22,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 0.8352315869400152,
|
|
"grad_norm": 0.9316429752072123,
|
|
"learning_rate": 8.141676086873574e-07,
|
|
"loss": 0.2523,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.8358390280941533,
|
|
"grad_norm": 1.5230699643150925,
|
|
"learning_rate": 8.083798239004408e-07,
|
|
"loss": 0.2601,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 0.8364464692482916,
|
|
"grad_norm": 1.0766184599801747,
|
|
"learning_rate": 8.026108749580758e-07,
|
|
"loss": 0.2538,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 0.8370539104024297,
|
|
"grad_norm": 2.320789746346856,
|
|
"learning_rate": 7.968607877841333e-07,
|
|
"loss": 0.2844,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 0.837661351556568,
|
|
"grad_norm": 0.8792983215848907,
|
|
"learning_rate": 7.911295882177256e-07,
|
|
"loss": 0.236,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 0.8382687927107062,
|
|
"grad_norm": 1.6787572932160961,
|
|
"learning_rate": 7.854173020130906e-07,
|
|
"loss": 0.2403,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.8388762338648443,
|
|
"grad_norm": 0.9829023277192255,
|
|
"learning_rate": 7.79723954839477e-07,
|
|
"loss": 0.2287,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 0.8394836750189826,
|
|
"grad_norm": 1.7023786251385593,
|
|
"learning_rate": 7.740495722810271e-07,
|
|
"loss": 0.2435,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 0.8400911161731207,
|
|
"grad_norm": 0.9441864654332802,
|
|
"learning_rate": 7.683941798366578e-07,
|
|
"loss": 0.2906,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 0.840698557327259,
|
|
"grad_norm": 1.2877471436954095,
|
|
"learning_rate": 7.627578029199562e-07,
|
|
"loss": 0.2498,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 0.8413059984813971,
|
|
"grad_norm": 0.9842560572472016,
|
|
"learning_rate": 7.571404668590532e-07,
|
|
"loss": 0.2742,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.8419134396355353,
|
|
"grad_norm": 1.130499069526576,
|
|
"learning_rate": 7.515421968965242e-07,
|
|
"loss": 0.2285,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 0.8425208807896735,
|
|
"grad_norm": 0.9696805118746494,
|
|
"learning_rate": 7.459630181892608e-07,
|
|
"loss": 0.262,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 0.8431283219438117,
|
|
"grad_norm": 1.2144471408339115,
|
|
"learning_rate": 7.404029558083653e-07,
|
|
"loss": 0.2675,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 0.8437357630979498,
|
|
"grad_norm": 1.0391512842651585,
|
|
"learning_rate": 7.348620347390384e-07,
|
|
"loss": 0.2855,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 0.8443432042520881,
|
|
"grad_norm": 0.9328634429354515,
|
|
"learning_rate": 7.293402798804667e-07,
|
|
"loss": 0.2345,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.8449506454062262,
|
|
"grad_norm": 0.9723351719934986,
|
|
"learning_rate": 7.238377160457094e-07,
|
|
"loss": 0.2645,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 0.8455580865603645,
|
|
"grad_norm": 1.4147751762924923,
|
|
"learning_rate": 7.183543679615834e-07,
|
|
"loss": 0.2626,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 0.8461655277145027,
|
|
"grad_norm": 1.177367440845403,
|
|
"learning_rate": 7.128902602685617e-07,
|
|
"loss": 0.2709,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 0.8467729688686408,
|
|
"grad_norm": 1.0052526002592344,
|
|
"learning_rate": 7.074454175206524e-07,
|
|
"loss": 0.2464,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 0.8473804100227791,
|
|
"grad_norm": 0.977663222241663,
|
|
"learning_rate": 7.020198641852949e-07,
|
|
"loss": 0.2447,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.8479878511769172,
|
|
"grad_norm": 2.675337956474618,
|
|
"learning_rate": 6.966136246432492e-07,
|
|
"loss": 0.2647,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 0.8485952923310555,
|
|
"grad_norm": 0.9873856637443686,
|
|
"learning_rate": 6.912267231884817e-07,
|
|
"loss": 0.266,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 0.8492027334851936,
|
|
"grad_norm": 1.003763602141105,
|
|
"learning_rate": 6.858591840280627e-07,
|
|
"loss": 0.2891,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 0.8498101746393318,
|
|
"grad_norm": 0.917577761815308,
|
|
"learning_rate": 6.805110312820501e-07,
|
|
"loss": 0.2545,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 0.85041761579347,
|
|
"grad_norm": 0.8727291361789857,
|
|
"learning_rate": 6.751822889833926e-07,
|
|
"loss": 0.2522,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.8510250569476082,
|
|
"grad_norm": 0.9567515585475891,
|
|
"learning_rate": 6.698729810778065e-07,
|
|
"loss": 0.2411,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 0.8516324981017463,
|
|
"grad_norm": 0.8896081239860772,
|
|
"learning_rate": 6.645831314236817e-07,
|
|
"loss": 0.2484,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 0.8522399392558846,
|
|
"grad_norm": 1.1157181737389836,
|
|
"learning_rate": 6.593127637919633e-07,
|
|
"loss": 0.2852,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 0.8528473804100227,
|
|
"grad_norm": 0.9750689112401771,
|
|
"learning_rate": 6.540619018660555e-07,
|
|
"loss": 0.2512,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 0.853454821564161,
|
|
"grad_norm": 0.8887565653775829,
|
|
"learning_rate": 6.488305692417074e-07,
|
|
"loss": 0.2538,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.8540622627182992,
|
|
"grad_norm": 0.9449518854689749,
|
|
"learning_rate": 6.436187894269086e-07,
|
|
"loss": 0.2412,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 0.8546697038724373,
|
|
"grad_norm": 1.1922033805891283,
|
|
"learning_rate": 6.384265858417877e-07,
|
|
"loss": 0.2618,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 0.8552771450265756,
|
|
"grad_norm": 0.9056621667680124,
|
|
"learning_rate": 6.332539818184985e-07,
|
|
"loss": 0.2363,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 0.8558845861807137,
|
|
"grad_norm": 1.0040516816367477,
|
|
"learning_rate": 6.281010006011256e-07,
|
|
"loss": 0.2511,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 0.856492027334852,
|
|
"grad_norm": 0.9636326863778567,
|
|
"learning_rate": 6.229676653455719e-07,
|
|
"loss": 0.2861,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.8570994684889901,
|
|
"grad_norm": 1.267651631290491,
|
|
"learning_rate": 6.178539991194599e-07,
|
|
"loss": 0.2562,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 0.8577069096431283,
|
|
"grad_norm": 1.125217028031925,
|
|
"learning_rate": 6.127600249020216e-07,
|
|
"loss": 0.2394,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 0.8583143507972665,
|
|
"grad_norm": 1.16266685601348,
|
|
"learning_rate": 6.076857655840024e-07,
|
|
"loss": 0.2844,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 0.8589217919514047,
|
|
"grad_norm": 0.9618647822548747,
|
|
"learning_rate": 6.026312439675553e-07,
|
|
"loss": 0.2221,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 0.8595292331055429,
|
|
"grad_norm": 1.0967811765567483,
|
|
"learning_rate": 5.975964827661346e-07,
|
|
"loss": 0.2839,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.8601366742596811,
|
|
"grad_norm": 1.2542292575403695,
|
|
"learning_rate": 5.925815046044026e-07,
|
|
"loss": 0.283,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 0.8607441154138192,
|
|
"grad_norm": 0.9685662585305622,
|
|
"learning_rate": 5.875863320181175e-07,
|
|
"loss": 0.2386,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 0.8613515565679575,
|
|
"grad_norm": 1.2191630870814079,
|
|
"learning_rate": 5.826109874540409e-07,
|
|
"loss": 0.2672,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 0.8619589977220957,
|
|
"grad_norm": 0.9467425337398014,
|
|
"learning_rate": 5.776554932698325e-07,
|
|
"loss": 0.2645,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 0.8625664388762339,
|
|
"grad_norm": 3.223483400807002,
|
|
"learning_rate": 5.727198717339511e-07,
|
|
"loss": 0.2326,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.8631738800303721,
|
|
"grad_norm": 0.9512745639027146,
|
|
"learning_rate": 5.678041450255512e-07,
|
|
"loss": 0.2629,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 0.8637813211845102,
|
|
"grad_norm": 0.9320452148866075,
|
|
"learning_rate": 5.6290833523439e-07,
|
|
"loss": 0.2641,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 0.8643887623386485,
|
|
"grad_norm": 1.0278008843267301,
|
|
"learning_rate": 5.58032464360721e-07,
|
|
"loss": 0.2803,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 0.8649962034927866,
|
|
"grad_norm": 0.908323450955481,
|
|
"learning_rate": 5.531765543152002e-07,
|
|
"loss": 0.2356,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 0.8656036446469249,
|
|
"grad_norm": 0.8303574957373083,
|
|
"learning_rate": 5.483406269187869e-07,
|
|
"loss": 0.2189,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.866211085801063,
|
|
"grad_norm": 1.1970452420325983,
|
|
"learning_rate": 5.435247039026398e-07,
|
|
"loss": 0.2094,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 0.8668185269552012,
|
|
"grad_norm": 1.2257111130524938,
|
|
"learning_rate": 5.387288069080298e-07,
|
|
"loss": 0.231,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 0.8674259681093394,
|
|
"grad_norm": 0.9627292722754438,
|
|
"learning_rate": 5.33952957486234e-07,
|
|
"loss": 0.2333,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 0.8680334092634776,
|
|
"grad_norm": 1.032967615425608,
|
|
"learning_rate": 5.291971770984428e-07,
|
|
"loss": 0.2958,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 0.8686408504176157,
|
|
"grad_norm": 1.139677124417918,
|
|
"learning_rate": 5.244614871156612e-07,
|
|
"loss": 0.2405,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.869248291571754,
|
|
"grad_norm": 1.2580527412823377,
|
|
"learning_rate": 5.197459088186163e-07,
|
|
"loss": 0.221,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 0.8698557327258922,
|
|
"grad_norm": 1.1944284727855752,
|
|
"learning_rate": 5.150504633976572e-07,
|
|
"loss": 0.2859,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 0.8704631738800304,
|
|
"grad_norm": 1.0957916763809294,
|
|
"learning_rate": 5.103751719526639e-07,
|
|
"loss": 0.2239,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 0.8710706150341686,
|
|
"grad_norm": 1.0470854505908578,
|
|
"learning_rate": 5.057200554929509e-07,
|
|
"loss": 0.2574,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 0.8716780561883067,
|
|
"grad_norm": 1.1296719722218975,
|
|
"learning_rate": 5.010851349371704e-07,
|
|
"loss": 0.2639,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.872285497342445,
|
|
"grad_norm": 2.711592251059139,
|
|
"learning_rate": 4.964704311132224e-07,
|
|
"loss": 0.2488,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 0.8728929384965831,
|
|
"grad_norm": 0.953048159062841,
|
|
"learning_rate": 4.918759647581578e-07,
|
|
"loss": 0.2581,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 0.8735003796507214,
|
|
"grad_norm": 1.0707876735381872,
|
|
"learning_rate": 4.873017565180871e-07,
|
|
"loss": 0.2578,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 0.8741078208048595,
|
|
"grad_norm": 0.9374479476013973,
|
|
"learning_rate": 4.827478269480895e-07,
|
|
"loss": 0.2405,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 0.8747152619589977,
|
|
"grad_norm": 1.0277390378554292,
|
|
"learning_rate": 4.782141965121129e-07,
|
|
"loss": 0.2701,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.8753227031131359,
|
|
"grad_norm": 1.4882213121058918,
|
|
"learning_rate": 4.7370088558289175e-07,
|
|
"loss": 0.2886,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 0.8759301442672741,
|
|
"grad_norm": 1.3548392131624356,
|
|
"learning_rate": 4.6920791444184934e-07,
|
|
"loss": 0.2471,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 0.8765375854214122,
|
|
"grad_norm": 1.0466228565297642,
|
|
"learning_rate": 4.647353032790086e-07,
|
|
"loss": 0.2414,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 0.8771450265755505,
|
|
"grad_norm": 0.8743868205121337,
|
|
"learning_rate": 4.602830721928997e-07,
|
|
"loss": 0.2079,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 0.8777524677296887,
|
|
"grad_norm": 1.0599925032639006,
|
|
"learning_rate": 4.558512411904731e-07,
|
|
"loss": 0.2949,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.8783599088838269,
|
|
"grad_norm": 1.4979260055048251,
|
|
"learning_rate": 4.5143983018700485e-07,
|
|
"loss": 0.249,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 0.8789673500379651,
|
|
"grad_norm": 0.9462464867555567,
|
|
"learning_rate": 4.4704885900601236e-07,
|
|
"loss": 0.2422,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 0.8795747911921032,
|
|
"grad_norm": 1.0122245637859872,
|
|
"learning_rate": 4.4267834737916295e-07,
|
|
"loss": 0.2516,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 0.8801822323462415,
|
|
"grad_norm": 1.2775093153721113,
|
|
"learning_rate": 4.3832831494618255e-07,
|
|
"loss": 0.2585,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 0.8807896735003796,
|
|
"grad_norm": 0.9907820157094275,
|
|
"learning_rate": 4.33998781254773e-07,
|
|
"loss": 0.2508,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.8813971146545179,
|
|
"grad_norm": 0.9761789386358818,
|
|
"learning_rate": 4.2968976576051703e-07,
|
|
"loss": 0.2848,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 0.882004555808656,
|
|
"grad_norm": 0.8788756221973065,
|
|
"learning_rate": 4.2540128782679934e-07,
|
|
"loss": 0.2185,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 0.8826119969627942,
|
|
"grad_norm": 0.9350228380136899,
|
|
"learning_rate": 4.211333667247125e-07,
|
|
"loss": 0.2464,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 0.8832194381169324,
|
|
"grad_norm": 0.9813301382441217,
|
|
"learning_rate": 4.1688602163297564e-07,
|
|
"loss": 0.2666,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 0.8838268792710706,
|
|
"grad_norm": 0.9810743433744146,
|
|
"learning_rate": 4.126592716378408e-07,
|
|
"loss": 0.2296,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.8844343204252088,
|
|
"grad_norm": 1.2038878027224096,
|
|
"learning_rate": 4.0845313573301736e-07,
|
|
"loss": 0.2682,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 0.885041761579347,
|
|
"grad_norm": 0.9884971893945054,
|
|
"learning_rate": 4.042676328195788e-07,
|
|
"loss": 0.2643,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 0.8856492027334852,
|
|
"grad_norm": 5.323233068234899,
|
|
"learning_rate": 4.001027817058789e-07,
|
|
"loss": 0.238,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 0.8862566438876234,
|
|
"grad_norm": 0.9225650218982654,
|
|
"learning_rate": 3.959586011074729e-07,
|
|
"loss": 0.2155,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 0.8868640850417616,
|
|
"grad_norm": 0.9528679131681773,
|
|
"learning_rate": 3.9183510964702463e-07,
|
|
"loss": 0.2418,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.8874715261958998,
|
|
"grad_norm": 1.1303434388021751,
|
|
"learning_rate": 3.8773232585422924e-07,
|
|
"loss": 0.2297,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 0.888078967350038,
|
|
"grad_norm": 0.9900217922322905,
|
|
"learning_rate": 3.836502681657289e-07,
|
|
"loss": 0.2462,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 0.8886864085041761,
|
|
"grad_norm": 0.887907349960081,
|
|
"learning_rate": 3.795889549250292e-07,
|
|
"loss": 0.2171,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 0.8892938496583144,
|
|
"grad_norm": 0.9248553866957503,
|
|
"learning_rate": 3.755484043824131e-07,
|
|
"loss": 0.2243,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 0.8899012908124525,
|
|
"grad_norm": 0.8912025953132797,
|
|
"learning_rate": 3.715286346948671e-07,
|
|
"loss": 0.2149,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.8905087319665907,
|
|
"grad_norm": 0.8751189326641786,
|
|
"learning_rate": 3.675296639259912e-07,
|
|
"loss": 0.228,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 0.8911161731207289,
|
|
"grad_norm": 1.0691558945787711,
|
|
"learning_rate": 3.6355151004592414e-07,
|
|
"loss": 0.2233,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 0.8917236142748671,
|
|
"grad_norm": 1.3028028349470695,
|
|
"learning_rate": 3.595941909312595e-07,
|
|
"loss": 0.2603,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 0.8923310554290053,
|
|
"grad_norm": 0.8896967704695612,
|
|
"learning_rate": 3.5565772436496336e-07,
|
|
"loss": 0.2269,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 0.8929384965831435,
|
|
"grad_norm": 0.907343561878061,
|
|
"learning_rate": 3.517421280363004e-07,
|
|
"loss": 0.2477,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.8935459377372817,
|
|
"grad_norm": 0.9527250045159997,
|
|
"learning_rate": 3.4784741954074884e-07,
|
|
"loss": 0.2645,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 0.8941533788914199,
|
|
"grad_norm": 0.9958249751109337,
|
|
"learning_rate": 3.439736163799251e-07,
|
|
"loss": 0.2331,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 0.8947608200455581,
|
|
"grad_norm": 1.2873604817141377,
|
|
"learning_rate": 3.4012073596150106e-07,
|
|
"loss": 0.235,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 0.8953682611996963,
|
|
"grad_norm": 0.8961669297469559,
|
|
"learning_rate": 3.362887955991301e-07,
|
|
"loss": 0.2408,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 0.8959757023538345,
|
|
"grad_norm": 1.051005440352167,
|
|
"learning_rate": 3.3247781251236623e-07,
|
|
"loss": 0.256,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.8965831435079726,
|
|
"grad_norm": 1.396804063195131,
|
|
"learning_rate": 3.2868780382658895e-07,
|
|
"loss": 0.2259,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 0.8971905846621109,
|
|
"grad_norm": 0.8551324108863454,
|
|
"learning_rate": 3.2491878657292643e-07,
|
|
"loss": 0.2552,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 0.897798025816249,
|
|
"grad_norm": 0.9274670411652475,
|
|
"learning_rate": 3.2117077768817395e-07,
|
|
"loss": 0.2271,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 0.8984054669703873,
|
|
"grad_norm": 1.2221587711171944,
|
|
"learning_rate": 3.174437940147268e-07,
|
|
"loss": 0.2447,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 0.8990129081245254,
|
|
"grad_norm": 6.662057062986324,
|
|
"learning_rate": 3.1373785230049356e-07,
|
|
"loss": 0.2718,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.8996203492786636,
|
|
"grad_norm": 2.162458231338751,
|
|
"learning_rate": 3.1005296919883354e-07,
|
|
"loss": 0.2563,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 0.9002277904328019,
|
|
"grad_norm": 1.1484595739033663,
|
|
"learning_rate": 3.0638916126846885e-07,
|
|
"loss": 0.2488,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 0.90083523158694,
|
|
"grad_norm": 0.9027654629735503,
|
|
"learning_rate": 3.0274644497342133e-07,
|
|
"loss": 0.2304,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 0.9014426727410783,
|
|
"grad_norm": 0.950573848140933,
|
|
"learning_rate": 2.991248366829291e-07,
|
|
"loss": 0.2141,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 0.9020501138952164,
|
|
"grad_norm": 0.8792954327784936,
|
|
"learning_rate": 2.955243526713808e-07,
|
|
"loss": 0.2382,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.9026575550493546,
|
|
"grad_norm": 1.1214719838180265,
|
|
"learning_rate": 2.91945009118238e-07,
|
|
"loss": 0.2527,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 0.9032649962034928,
|
|
"grad_norm": 1.1337122451080186,
|
|
"learning_rate": 2.883868221079628e-07,
|
|
"loss": 0.3125,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 0.903872437357631,
|
|
"grad_norm": 1.149501989804444,
|
|
"learning_rate": 2.848498076299483e-07,
|
|
"loss": 0.2788,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 0.9044798785117691,
|
|
"grad_norm": 0.9593512296584564,
|
|
"learning_rate": 2.813339815784416e-07,
|
|
"loss": 0.2439,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 0.9050873196659074,
|
|
"grad_norm": 1.0889375186520727,
|
|
"learning_rate": 2.7783935975247867e-07,
|
|
"loss": 0.2679,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.9056947608200455,
|
|
"grad_norm": 1.0266400030570082,
|
|
"learning_rate": 2.743659578558089e-07,
|
|
"loss": 0.2375,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 0.9063022019741838,
|
|
"grad_norm": 0.9071334642534141,
|
|
"learning_rate": 2.7091379149682683e-07,
|
|
"loss": 0.2293,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 0.9069096431283219,
|
|
"grad_norm": 1.2936790169599448,
|
|
"learning_rate": 2.6748287618849957e-07,
|
|
"loss": 0.2409,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 0.9075170842824601,
|
|
"grad_norm": 1.0584178177194592,
|
|
"learning_rate": 2.6407322734829763e-07,
|
|
"loss": 0.236,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 0.9081245254365984,
|
|
"grad_norm": 1.3313437851181316,
|
|
"learning_rate": 2.6068486029813154e-07,
|
|
"loss": 0.2356,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.9087319665907365,
|
|
"grad_norm": 0.9005677622751922,
|
|
"learning_rate": 2.573177902642726e-07,
|
|
"loss": 0.2245,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 0.9093394077448748,
|
|
"grad_norm": 0.9376710431209911,
|
|
"learning_rate": 2.539720323772926e-07,
|
|
"loss": 0.2416,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 0.9099468488990129,
|
|
"grad_norm": 0.9311283280877473,
|
|
"learning_rate": 2.506476016719922e-07,
|
|
"loss": 0.2341,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 0.9105542900531511,
|
|
"grad_norm": 1.1243831646626379,
|
|
"learning_rate": 2.473445130873353e-07,
|
|
"loss": 0.2628,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 0.9111617312072893,
|
|
"grad_norm": 0.9100015428632143,
|
|
"learning_rate": 2.440627814663804e-07,
|
|
"loss": 0.235,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.9117691723614275,
|
|
"grad_norm": 1.8437664997799614,
|
|
"learning_rate": 2.4080242155621327e-07,
|
|
"loss": 0.2469,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 0.9123766135155656,
|
|
"grad_norm": 1.0290473471963233,
|
|
"learning_rate": 2.3756344800788421e-07,
|
|
"loss": 0.2474,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 0.9129840546697039,
|
|
"grad_norm": 1.1477504753716588,
|
|
"learning_rate": 2.343458753763378e-07,
|
|
"loss": 0.2242,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 0.913591495823842,
|
|
"grad_norm": 1.1198944079500255,
|
|
"learning_rate": 2.3114971812034981e-07,
|
|
"loss": 0.2504,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 0.9141989369779803,
|
|
"grad_norm": 0.945076046858483,
|
|
"learning_rate": 2.2797499060246253e-07,
|
|
"loss": 0.2517,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.9148063781321184,
|
|
"grad_norm": 1.285167047982773,
|
|
"learning_rate": 2.2482170708892083e-07,
|
|
"loss": 0.2333,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 0.9154138192862566,
|
|
"grad_norm": 0.9188875249417393,
|
|
"learning_rate": 2.2168988174960382e-07,
|
|
"loss": 0.242,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 0.9160212604403949,
|
|
"grad_norm": 0.9392029956247224,
|
|
"learning_rate": 2.1857952865796616e-07,
|
|
"loss": 0.2494,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 0.916628701594533,
|
|
"grad_norm": 3.5960021463661223,
|
|
"learning_rate": 2.1549066179097355e-07,
|
|
"loss": 0.2581,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 0.9172361427486713,
|
|
"grad_norm": 1.4581742375551667,
|
|
"learning_rate": 2.124232950290367e-07,
|
|
"loss": 0.2536,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.9178435839028094,
|
|
"grad_norm": 1.0527162495034155,
|
|
"learning_rate": 2.0937744215595467e-07,
|
|
"loss": 0.2409,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 0.9184510250569476,
|
|
"grad_norm": 1.4110685445772864,
|
|
"learning_rate": 2.0635311685884675e-07,
|
|
"loss": 0.2095,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 0.9190584662110858,
|
|
"grad_norm": 1.213907408406235,
|
|
"learning_rate": 2.0335033272809612e-07,
|
|
"loss": 0.2757,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 0.919665907365224,
|
|
"grad_norm": 2.0561680152756114,
|
|
"learning_rate": 2.0036910325728521e-07,
|
|
"loss": 0.2397,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 0.9202733485193622,
|
|
"grad_norm": 1.0091633854522606,
|
|
"learning_rate": 1.9740944184313882e-07,
|
|
"loss": 0.256,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.9208807896735004,
|
|
"grad_norm": 1.374619992070271,
|
|
"learning_rate": 1.9447136178545766e-07,
|
|
"loss": 0.2351,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 0.9214882308276385,
|
|
"grad_norm": 1.0149607815681039,
|
|
"learning_rate": 1.9155487628706672e-07,
|
|
"loss": 0.2149,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 0.9220956719817768,
|
|
"grad_norm": 1.5286222741924442,
|
|
"learning_rate": 1.8865999845374794e-07,
|
|
"loss": 0.2401,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 0.9227031131359149,
|
|
"grad_norm": 1.1238029435165344,
|
|
"learning_rate": 1.857867412941883e-07,
|
|
"loss": 0.2259,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 0.9233105542900532,
|
|
"grad_norm": 1.047113679889672,
|
|
"learning_rate": 1.8293511771991624e-07,
|
|
"loss": 0.2562,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.9239179954441914,
|
|
"grad_norm": 1.1015346889794326,
|
|
"learning_rate": 1.8010514054524531e-07,
|
|
"loss": 0.2496,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 0.9245254365983295,
|
|
"grad_norm": 0.9348867843858392,
|
|
"learning_rate": 1.7729682248721848e-07,
|
|
"loss": 0.2193,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 0.9251328777524678,
|
|
"grad_norm": 0.900662481617006,
|
|
"learning_rate": 1.7451017616554822e-07,
|
|
"loss": 0.2346,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 0.9257403189066059,
|
|
"grad_norm": 1.0079629512704111,
|
|
"learning_rate": 1.7174521410256162e-07,
|
|
"loss": 0.2739,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 0.9263477600607442,
|
|
"grad_norm": 1.0034552226211848,
|
|
"learning_rate": 1.69001948723142e-07,
|
|
"loss": 0.2709,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.9269552012148823,
|
|
"grad_norm": 1.1773428284591294,
|
|
"learning_rate": 1.6628039235467686e-07,
|
|
"loss": 0.2472,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 0.9275626423690205,
|
|
"grad_norm": 1.1087538756550075,
|
|
"learning_rate": 1.6358055722699662e-07,
|
|
"loss": 0.2376,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 0.9281700835231587,
|
|
"grad_norm": 3.5302378698596972,
|
|
"learning_rate": 1.6090245547232707e-07,
|
|
"loss": 0.2445,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 0.9287775246772969,
|
|
"grad_norm": 0.9318045452754465,
|
|
"learning_rate": 1.5824609912522825e-07,
|
|
"loss": 0.2495,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 0.929384965831435,
|
|
"grad_norm": 0.9925458437861561,
|
|
"learning_rate": 1.5561150012254446e-07,
|
|
"loss": 0.252,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.9299924069855733,
|
|
"grad_norm": 0.9204194360128435,
|
|
"learning_rate": 1.5299867030334815e-07,
|
|
"loss": 0.2544,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 0.9305998481397114,
|
|
"grad_norm": 1.4389165535169934,
|
|
"learning_rate": 1.5040762140888843e-07,
|
|
"loss": 0.2509,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 0.9312072892938497,
|
|
"grad_norm": 0.9464143937114549,
|
|
"learning_rate": 1.4783836508253823e-07,
|
|
"loss": 0.219,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 0.9318147304479879,
|
|
"grad_norm": 1.1584265502532431,
|
|
"learning_rate": 1.4529091286973994e-07,
|
|
"loss": 0.2584,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 0.932422171602126,
|
|
"grad_norm": 1.0767653117954572,
|
|
"learning_rate": 1.4276527621795655e-07,
|
|
"loss": 0.2477,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.9330296127562643,
|
|
"grad_norm": 1.1174557743113676,
|
|
"learning_rate": 1.402614664766172e-07,
|
|
"loss": 0.2515,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 0.9336370539104024,
|
|
"grad_norm": 0.9313798735305144,
|
|
"learning_rate": 1.3777949489706898e-07,
|
|
"loss": 0.231,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 0.9342444950645407,
|
|
"grad_norm": 1.1379310451818712,
|
|
"learning_rate": 1.353193726325247e-07,
|
|
"loss": 0.2503,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 0.9348519362186788,
|
|
"grad_norm": 1.1585882440499968,
|
|
"learning_rate": 1.3288111073801235e-07,
|
|
"loss": 0.2784,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 0.935459377372817,
|
|
"grad_norm": 1.5930803179580344,
|
|
"learning_rate": 1.3046472017032685e-07,
|
|
"loss": 0.2418,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.9360668185269552,
|
|
"grad_norm": 0.9991292646052891,
|
|
"learning_rate": 1.280702117879795e-07,
|
|
"loss": 0.2397,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 0.9366742596810934,
|
|
"grad_norm": 1.212096857283085,
|
|
"learning_rate": 1.2569759635115086e-07,
|
|
"loss": 0.2582,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 0.9372817008352315,
|
|
"grad_norm": 1.1250193762265426,
|
|
"learning_rate": 1.2334688452164122e-07,
|
|
"loss": 0.2575,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 0.9378891419893698,
|
|
"grad_norm": 0.971115660382781,
|
|
"learning_rate": 1.210180868628219e-07,
|
|
"loss": 0.271,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 0.9384965831435079,
|
|
"grad_norm": 0.907500253470022,
|
|
"learning_rate": 1.1871121383958961e-07,
|
|
"loss": 0.2392,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.9391040242976462,
|
|
"grad_norm": 1.47219686578771,
|
|
"learning_rate": 1.1642627581831767e-07,
|
|
"loss": 0.2533,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 0.9397114654517844,
|
|
"grad_norm": 1.4887563913664645,
|
|
"learning_rate": 1.1416328306681046e-07,
|
|
"loss": 0.2665,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 0.9403189066059225,
|
|
"grad_norm": 0.9182957946443633,
|
|
"learning_rate": 1.1192224575425848e-07,
|
|
"loss": 0.2233,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 0.9409263477600608,
|
|
"grad_norm": 1.0661000364774975,
|
|
"learning_rate": 1.0970317395119001e-07,
|
|
"loss": 0.2722,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 0.9415337889141989,
|
|
"grad_norm": 1.156185857708016,
|
|
"learning_rate": 1.0750607762942622e-07,
|
|
"loss": 0.2374,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.9421412300683372,
|
|
"grad_norm": 1.0021679642199284,
|
|
"learning_rate": 1.0533096666203946e-07,
|
|
"loss": 0.247,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 0.9427486712224753,
|
|
"grad_norm": 1.062302865690974,
|
|
"learning_rate": 1.0317785082330555e-07,
|
|
"loss": 0.2415,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 0.9433561123766135,
|
|
"grad_norm": 0.8324437312272753,
|
|
"learning_rate": 1.0104673978866164e-07,
|
|
"loss": 0.2131,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 0.9439635535307517,
|
|
"grad_norm": 1.2649813678192605,
|
|
"learning_rate": 9.89376431346606e-08,
|
|
"loss": 0.276,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 0.9445709946848899,
|
|
"grad_norm": 1.622757976163991,
|
|
"learning_rate": 9.685057033892998e-08,
|
|
"loss": 0.2582,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.945178435839028,
|
|
"grad_norm": 1.765578442579649,
|
|
"learning_rate": 9.478553078013042e-08,
|
|
"loss": 0.2553,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 0.9457858769931663,
|
|
"grad_norm": 1.4608500363168406,
|
|
"learning_rate": 9.274253373791064e-08,
|
|
"loss": 0.2555,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 0.9463933181473044,
|
|
"grad_norm": 0.8888287744971497,
|
|
"learning_rate": 9.072158839286748e-08,
|
|
"loss": 0.2405,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 0.9470007593014427,
|
|
"grad_norm": 1.269488158961429,
|
|
"learning_rate": 8.872270382650372e-08,
|
|
"loss": 0.2397,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 0.9476082004555809,
|
|
"grad_norm": 0.9140050155362377,
|
|
"learning_rate": 8.674588902118919e-08,
|
|
"loss": 0.2581,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.948215641609719,
|
|
"grad_norm": 0.9350253085292872,
|
|
"learning_rate": 8.479115286011752e-08,
|
|
"loss": 0.2578,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 0.9488230827638573,
|
|
"grad_norm": 1.294156199026534,
|
|
"learning_rate": 8.285850412726837e-08,
|
|
"loss": 0.2768,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 0.9494305239179954,
|
|
"grad_norm": 0.8781441810000316,
|
|
"learning_rate": 8.094795150736745e-08,
|
|
"loss": 0.2124,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 0.9500379650721337,
|
|
"grad_norm": 0.8821255230738889,
|
|
"learning_rate": 7.905950358584768e-08,
|
|
"loss": 0.2358,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 0.9506454062262718,
|
|
"grad_norm": 1.08985926239788,
|
|
"learning_rate": 7.719316884880922e-08,
|
|
"loss": 0.2615,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.95125284738041,
|
|
"grad_norm": 0.8755195395367136,
|
|
"learning_rate": 7.534895568298395e-08,
|
|
"loss": 0.2352,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 0.9518602885345482,
|
|
"grad_norm": 0.9952147011406434,
|
|
"learning_rate": 7.352687237569489e-08,
|
|
"loss": 0.2557,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 0.9524677296886864,
|
|
"grad_norm": 1.1520050119158871,
|
|
"learning_rate": 7.172692711482022e-08,
|
|
"loss": 0.2156,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 0.9530751708428246,
|
|
"grad_norm": 1.0057754256396354,
|
|
"learning_rate": 6.994912798875875e-08,
|
|
"loss": 0.2465,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 0.9536826119969628,
|
|
"grad_norm": 1.0040861530461729,
|
|
"learning_rate": 6.819348298638839e-08,
|
|
"loss": 0.2816,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.9542900531511009,
|
|
"grad_norm": 1.0700694831197364,
|
|
"learning_rate": 6.6459999997035e-08,
|
|
"loss": 0.2412,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 0.9548974943052392,
|
|
"grad_norm": 1.038954346833618,
|
|
"learning_rate": 6.474868681043578e-08,
|
|
"loss": 0.2782,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 0.9555049354593774,
|
|
"grad_norm": 0.9247657016066214,
|
|
"learning_rate": 6.305955111670204e-08,
|
|
"loss": 0.241,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 0.9561123766135156,
|
|
"grad_norm": 1.2125088257095862,
|
|
"learning_rate": 6.13926005062876e-08,
|
|
"loss": 0.2506,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 0.9567198177676538,
|
|
"grad_norm": 2.8028781405959005,
|
|
"learning_rate": 5.974784246995214e-08,
|
|
"loss": 0.2117,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.9573272589217919,
|
|
"grad_norm": 1.2280962513812903,
|
|
"learning_rate": 5.8125284398730666e-08,
|
|
"loss": 0.2237,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 0.9579347000759302,
|
|
"grad_norm": 1.2300450579760327,
|
|
"learning_rate": 5.6524933583896326e-08,
|
|
"loss": 0.2126,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 0.9585421412300683,
|
|
"grad_norm": 2.4999481290325116,
|
|
"learning_rate": 5.4946797216931524e-08,
|
|
"loss": 0.2545,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 0.9591495823842066,
|
|
"grad_norm": 6.486220633951538,
|
|
"learning_rate": 5.339088238949186e-08,
|
|
"loss": 0.2354,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 0.9597570235383447,
|
|
"grad_norm": 0.9307011705790993,
|
|
"learning_rate": 5.185719609337836e-08,
|
|
"loss": 0.2342,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.9603644646924829,
|
|
"grad_norm": 0.9393824630850843,
|
|
"learning_rate": 5.034574522050251e-08,
|
|
"loss": 0.2467,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 0.9609719058466211,
|
|
"grad_norm": 1.4403224099920036,
|
|
"learning_rate": 4.885653656285627e-08,
|
|
"loss": 0.254,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 0.9615793470007593,
|
|
"grad_norm": 0.8733649961719668,
|
|
"learning_rate": 4.73895768124838e-08,
|
|
"loss": 0.2441,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 0.9621867881548974,
|
|
"grad_norm": 1.197435869062917,
|
|
"learning_rate": 4.5944872561448084e-08,
|
|
"loss": 0.2331,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 0.9627942293090357,
|
|
"grad_norm": 1.096594381356183,
|
|
"learning_rate": 4.45224303018027e-08,
|
|
"loss": 0.2402,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.9634016704631739,
|
|
"grad_norm": 1.0250873227147124,
|
|
"learning_rate": 4.3122256425563444e-08,
|
|
"loss": 0.266,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 0.9640091116173121,
|
|
"grad_norm": 1.0372168523415322,
|
|
"learning_rate": 4.174435722467951e-08,
|
|
"loss": 0.2625,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 0.9646165527714503,
|
|
"grad_norm": 0.9433606458038362,
|
|
"learning_rate": 4.038873889100237e-08,
|
|
"loss": 0.2642,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 0.9652239939255884,
|
|
"grad_norm": 0.9807120841293877,
|
|
"learning_rate": 3.905540751626191e-08,
|
|
"loss": 0.2472,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 0.9658314350797267,
|
|
"grad_norm": 0.9083975877708611,
|
|
"learning_rate": 3.77443690920376e-08,
|
|
"loss": 0.2307,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.9664388762338648,
|
|
"grad_norm": 3.6391836768271273,
|
|
"learning_rate": 3.645562950973014e-08,
|
|
"loss": 0.2495,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 0.9670463173880031,
|
|
"grad_norm": 1.0037148121403654,
|
|
"learning_rate": 3.518919456053649e-08,
|
|
"loss": 0.2738,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 0.9676537585421412,
|
|
"grad_norm": 1.1951622922237168,
|
|
"learning_rate": 3.3945069935423234e-08,
|
|
"loss": 0.2449,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 0.9682611996962794,
|
|
"grad_norm": 1.5169082502067843,
|
|
"learning_rate": 3.2723261225102164e-08,
|
|
"loss": 0.2377,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 0.9688686408504176,
|
|
"grad_norm": 3.6717572091002997,
|
|
"learning_rate": 3.152377392000361e-08,
|
|
"loss": 0.2671,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.9694760820045558,
|
|
"grad_norm": 0.8724528062444226,
|
|
"learning_rate": 3.034661341025258e-08,
|
|
"loss": 0.231,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 0.970083523158694,
|
|
"grad_norm": 0.9374629987363168,
|
|
"learning_rate": 2.9191784985644345e-08,
|
|
"loss": 0.241,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 0.9706909643128322,
|
|
"grad_norm": 0.9282402240595091,
|
|
"learning_rate": 2.8059293835620006e-08,
|
|
"loss": 0.2349,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 0.9712984054669704,
|
|
"grad_norm": 1.2644746158155864,
|
|
"learning_rate": 2.6949145049245396e-08,
|
|
"loss": 0.249,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 0.9719058466211086,
|
|
"grad_norm": 0.9000862092444777,
|
|
"learning_rate": 2.5861343615184997e-08,
|
|
"loss": 0.245,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.9725132877752468,
|
|
"grad_norm": 1.368391294736083,
|
|
"learning_rate": 2.479589442168251e-08,
|
|
"loss": 0.2333,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 0.973120728929385,
|
|
"grad_norm": 1.29020328624742,
|
|
"learning_rate": 2.3752802256536423e-08,
|
|
"loss": 0.2683,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 0.9737281700835232,
|
|
"grad_norm": 0.9410886564376265,
|
|
"learning_rate": 2.2732071807081147e-08,
|
|
"loss": 0.2285,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 0.9743356112376613,
|
|
"grad_norm": 1.4761677232747976,
|
|
"learning_rate": 2.173370766016314e-08,
|
|
"loss": 0.2315,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 0.9749430523917996,
|
|
"grad_norm": 0.9501609497089889,
|
|
"learning_rate": 2.0757714302122035e-08,
|
|
"loss": 0.241,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.9755504935459377,
|
|
"grad_norm": 1.2314833591182839,
|
|
"learning_rate": 1.98040961187701e-08,
|
|
"loss": 0.2703,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 0.976157934700076,
|
|
"grad_norm": 1.0158036848314118,
|
|
"learning_rate": 1.8872857395372812e-08,
|
|
"loss": 0.2597,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 0.9767653758542141,
|
|
"grad_norm": 1.1694648264181446,
|
|
"learning_rate": 1.7964002316628316e-08,
|
|
"loss": 0.2916,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 0.9773728170083523,
|
|
"grad_norm": 1.0688476934543394,
|
|
"learning_rate": 1.7077534966650767e-08,
|
|
"loss": 0.2558,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 0.9779802581624905,
|
|
"grad_norm": 0.9549062578238298,
|
|
"learning_rate": 1.6213459328950355e-08,
|
|
"loss": 0.244,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.9785876993166287,
|
|
"grad_norm": 2.412110087204276,
|
|
"learning_rate": 1.537177928641498e-08,
|
|
"loss": 0.2462,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 0.979195140470767,
|
|
"grad_norm": 0.941429067459168,
|
|
"learning_rate": 1.4552498621295264e-08,
|
|
"loss": 0.2535,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 0.9798025816249051,
|
|
"grad_norm": 0.9371186125393489,
|
|
"learning_rate": 1.3755621015184018e-08,
|
|
"loss": 0.2515,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 0.9804100227790433,
|
|
"grad_norm": 1.2909491242156825,
|
|
"learning_rate": 1.2981150049004021e-08,
|
|
"loss": 0.2327,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 0.9810174639331815,
|
|
"grad_norm": 0.9804844053080275,
|
|
"learning_rate": 1.2229089202987487e-08,
|
|
"loss": 0.2317,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.9816249050873197,
|
|
"grad_norm": 1.0876245667656317,
|
|
"learning_rate": 1.1499441856663296e-08,
|
|
"loss": 0.2516,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 0.9822323462414578,
|
|
"grad_norm": 0.8612954321226812,
|
|
"learning_rate": 1.0792211288841447e-08,
|
|
"loss": 0.2599,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 0.9828397873955961,
|
|
"grad_norm": 0.888353089870358,
|
|
"learning_rate": 1.0107400677596413e-08,
|
|
"loss": 0.2547,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 0.9834472285497342,
|
|
"grad_norm": 0.9827706274173068,
|
|
"learning_rate": 9.44501310025603e-09,
|
|
"loss": 0.2519,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 0.9840546697038725,
|
|
"grad_norm": 0.9827464321021941,
|
|
"learning_rate": 8.805051533384846e-09,
|
|
"loss": 0.2315,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.9846621108580106,
|
|
"grad_norm": 0.9339543239161487,
|
|
"learning_rate": 8.187518852771914e-09,
|
|
"loss": 0.2636,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 0.9852695520121488,
|
|
"grad_norm": 0.8991352561684951,
|
|
"learning_rate": 7.59241783341913e-09,
|
|
"loss": 0.2535,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 0.985876993166287,
|
|
"grad_norm": 0.8964567756827048,
|
|
"learning_rate": 7.019751149525133e-09,
|
|
"loss": 0.2098,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 0.9864844343204252,
|
|
"grad_norm": 1.0511236807122393,
|
|
"learning_rate": 6.469521374477539e-09,
|
|
"loss": 0.2739,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 0.9870918754745635,
|
|
"grad_norm": 0.8742334390126567,
|
|
"learning_rate": 5.941730980839056e-09,
|
|
"loss": 0.2423,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.9876993166287016,
|
|
"grad_norm": 0.8982580571091601,
|
|
"learning_rate": 5.436382340335833e-09,
|
|
"loss": 0.2275,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 0.9883067577828398,
|
|
"grad_norm": 0.9802151698563373,
|
|
"learning_rate": 4.9534777238485764e-09,
|
|
"loss": 0.2458,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 0.988914198936978,
|
|
"grad_norm": 1.021763994953802,
|
|
"learning_rate": 4.493019301401447e-09,
|
|
"loss": 0.2413,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 0.9895216400911162,
|
|
"grad_norm": 1.4457966401308637,
|
|
"learning_rate": 4.055009142152066e-09,
|
|
"loss": 0.2717,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 0.9901290812452543,
|
|
"grad_norm": 0.9291088704888705,
|
|
"learning_rate": 3.6394492143820847e-09,
|
|
"loss": 0.2631,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.9907365223993926,
|
|
"grad_norm": 0.9333078223932707,
|
|
"learning_rate": 3.2463413854899594e-09,
|
|
"loss": 0.2356,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 0.9913439635535307,
|
|
"grad_norm": 0.8258221417650397,
|
|
"learning_rate": 2.875687421980966e-09,
|
|
"loss": 0.2435,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 0.991951404707669,
|
|
"grad_norm": 1.3664318253744514,
|
|
"learning_rate": 2.5274889894583156e-09,
|
|
"loss": 0.2455,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 0.9925588458618071,
|
|
"grad_norm": 1.152187566076446,
|
|
"learning_rate": 2.201747652618713e-09,
|
|
"loss": 0.2713,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 0.9931662870159453,
|
|
"grad_norm": 1.085926992818226,
|
|
"learning_rate": 1.8984648752429222e-09,
|
|
"loss": 0.2716,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.9937737281700835,
|
|
"grad_norm": 1.1962597289024564,
|
|
"learning_rate": 1.6176420201902132e-09,
|
|
"loss": 0.2427,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 0.9943811693242217,
|
|
"grad_norm": 0.9297404890182976,
|
|
"learning_rate": 1.3592803493905904e-09,
|
|
"loss": 0.2492,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 0.99498861047836,
|
|
"grad_norm": 1.4127019374082828,
|
|
"learning_rate": 1.1233810238425735e-09,
|
|
"loss": 0.2329,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 0.9955960516324981,
|
|
"grad_norm": 0.9889547616457451,
|
|
"learning_rate": 9.099451036048701e-10,
|
|
"loss": 0.2464,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 0.9962034927866363,
|
|
"grad_norm": 1.479970043099125,
|
|
"learning_rate": 7.189735477913795e-10,
|
|
"loss": 0.243,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.9968109339407745,
|
|
"grad_norm": 0.9559279964912788,
|
|
"learning_rate": 5.504672145700829e-10,
|
|
"loss": 0.294,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 0.9974183750949127,
|
|
"grad_norm": 1.013358124558348,
|
|
"learning_rate": 4.0442686115582665e-10,
|
|
"loss": 0.2607,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 0.9980258162490508,
|
|
"grad_norm": 1.7596074983835073,
|
|
"learning_rate": 2.8085314380976725e-10,
|
|
"loss": 0.2626,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 0.9986332574031891,
|
|
"grad_norm": 1.0764621343908087,
|
|
"learning_rate": 1.797466178327101e-10,
|
|
"loss": 0.2622,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 0.9992406985573272,
|
|
"grad_norm": 1.1105069084316046,
|
|
"learning_rate": 1.011077375662195e-10,
|
|
"loss": 0.2386,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.9998481397114655,
|
|
"grad_norm": 1.0590187008021932,
|
|
"learning_rate": 4.4936856390398465e-11,
|
|
"loss": 0.2989,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 1.0590187008021932,
|
|
"learning_rate": 1.1234226718337405e-11,
|
|
"loss": 0.0578,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 1647,
|
|
"total_flos": 669099333058560.0,
|
|
"train_loss": 0.32129256987551813,
|
|
"train_runtime": 70988.8215,
|
|
"train_samples_per_second": 0.742,
|
|
"train_steps_per_second": 0.023
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1647,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 1000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 669099333058560.0,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|