24368 lines
590 KiB
JSON
24368 lines
590 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3475,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0014388489208633094,
|
|
"grad_norm": 5.7950803842745,
|
|
"learning_rate": 2.2988505747126437e-07,
|
|
"loss": 0.8788,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0028776978417266188,
|
|
"grad_norm": 5.80373372295397,
|
|
"learning_rate": 4.5977011494252875e-07,
|
|
"loss": 0.8729,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.004316546762589928,
|
|
"grad_norm": 5.812259476227604,
|
|
"learning_rate": 6.896551724137931e-07,
|
|
"loss": 0.8864,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0057553956834532375,
|
|
"grad_norm": 5.816790170804618,
|
|
"learning_rate": 9.195402298850575e-07,
|
|
"loss": 0.8852,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007194244604316547,
|
|
"grad_norm": 5.769489702353545,
|
|
"learning_rate": 1.1494252873563219e-06,
|
|
"loss": 0.8788,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.008633093525179856,
|
|
"grad_norm": 5.3965608640996345,
|
|
"learning_rate": 1.3793103448275862e-06,
|
|
"loss": 0.8663,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010071942446043165,
|
|
"grad_norm": 5.376780412972741,
|
|
"learning_rate": 1.6091954022988506e-06,
|
|
"loss": 0.8637,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.011510791366906475,
|
|
"grad_norm": 4.384601039022092,
|
|
"learning_rate": 1.839080459770115e-06,
|
|
"loss": 0.8256,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.012949640287769784,
|
|
"grad_norm": 4.106798347375774,
|
|
"learning_rate": 2.0689655172413796e-06,
|
|
"loss": 0.8204,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.014388489208633094,
|
|
"grad_norm": 2.3344269812202683,
|
|
"learning_rate": 2.2988505747126437e-06,
|
|
"loss": 0.79,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.015827338129496403,
|
|
"grad_norm": 2.2260292332920453,
|
|
"learning_rate": 2.5287356321839083e-06,
|
|
"loss": 0.7912,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.017266187050359712,
|
|
"grad_norm": 1.931036674751876,
|
|
"learning_rate": 2.7586206896551725e-06,
|
|
"loss": 0.7768,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.01870503597122302,
|
|
"grad_norm": 1.626454438692599,
|
|
"learning_rate": 2.988505747126437e-06,
|
|
"loss": 0.7622,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.02014388489208633,
|
|
"grad_norm": 3.408769119104633,
|
|
"learning_rate": 3.2183908045977012e-06,
|
|
"loss": 0.7579,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.02158273381294964,
|
|
"grad_norm": 3.6192220724987276,
|
|
"learning_rate": 3.448275862068966e-06,
|
|
"loss": 0.752,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.02302158273381295,
|
|
"grad_norm": 3.577067268801824,
|
|
"learning_rate": 3.67816091954023e-06,
|
|
"loss": 0.7533,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.02446043165467626,
|
|
"grad_norm": 3.397658895263752,
|
|
"learning_rate": 3.908045977011495e-06,
|
|
"loss": 0.7418,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.025899280575539568,
|
|
"grad_norm": 2.3712770828682395,
|
|
"learning_rate": 4.137931034482759e-06,
|
|
"loss": 0.7158,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.027338129496402876,
|
|
"grad_norm": 1.9727879660137988,
|
|
"learning_rate": 4.367816091954023e-06,
|
|
"loss": 0.6893,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.02877697841726619,
|
|
"grad_norm": 1.6338316844636638,
|
|
"learning_rate": 4.5977011494252875e-06,
|
|
"loss": 0.6734,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.030215827338129497,
|
|
"grad_norm": 1.2473765458997113,
|
|
"learning_rate": 4.8275862068965525e-06,
|
|
"loss": 0.6726,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.031654676258992806,
|
|
"grad_norm": 1.1510768753779166,
|
|
"learning_rate": 5.057471264367817e-06,
|
|
"loss": 0.6623,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.033093525179856115,
|
|
"grad_norm": 1.2320182452945079,
|
|
"learning_rate": 5.287356321839081e-06,
|
|
"loss": 0.6687,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.034532374100719423,
|
|
"grad_norm": 1.1607739009051437,
|
|
"learning_rate": 5.517241379310345e-06,
|
|
"loss": 0.651,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03597122302158273,
|
|
"grad_norm": 1.0934751466641257,
|
|
"learning_rate": 5.747126436781609e-06,
|
|
"loss": 0.6433,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.03741007194244604,
|
|
"grad_norm": 0.9241144892614676,
|
|
"learning_rate": 5.977011494252874e-06,
|
|
"loss": 0.6367,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.03884892086330935,
|
|
"grad_norm": 0.8081188746120616,
|
|
"learning_rate": 6.206896551724138e-06,
|
|
"loss": 0.638,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.04028776978417266,
|
|
"grad_norm": 0.8156287080974975,
|
|
"learning_rate": 6.4367816091954025e-06,
|
|
"loss": 0.6334,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.041726618705035974,
|
|
"grad_norm": 0.7672659957849104,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 0.6096,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.04316546762589928,
|
|
"grad_norm": 0.6021490572287345,
|
|
"learning_rate": 6.896551724137932e-06,
|
|
"loss": 0.6072,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04460431654676259,
|
|
"grad_norm": 0.5156027939922508,
|
|
"learning_rate": 7.126436781609196e-06,
|
|
"loss": 0.6121,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.0460431654676259,
|
|
"grad_norm": 0.671173742707715,
|
|
"learning_rate": 7.35632183908046e-06,
|
|
"loss": 0.6058,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.04748201438848921,
|
|
"grad_norm": 0.5733024607502514,
|
|
"learning_rate": 7.586206896551724e-06,
|
|
"loss": 0.6009,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.04892086330935252,
|
|
"grad_norm": 0.4093369794511272,
|
|
"learning_rate": 7.81609195402299e-06,
|
|
"loss": 0.5945,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.050359712230215826,
|
|
"grad_norm": 0.46196540371712275,
|
|
"learning_rate": 8.045977011494253e-06,
|
|
"loss": 0.596,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.051798561151079135,
|
|
"grad_norm": 0.533379539178946,
|
|
"learning_rate": 8.275862068965518e-06,
|
|
"loss": 0.5939,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.053237410071942444,
|
|
"grad_norm": 0.4538973737186048,
|
|
"learning_rate": 8.505747126436782e-06,
|
|
"loss": 0.5801,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.05467625899280575,
|
|
"grad_norm": 0.3679254432652543,
|
|
"learning_rate": 8.735632183908047e-06,
|
|
"loss": 0.5869,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05611510791366906,
|
|
"grad_norm": 0.35379718926683645,
|
|
"learning_rate": 8.965517241379312e-06,
|
|
"loss": 0.5781,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.05755395683453238,
|
|
"grad_norm": 0.4322492694878034,
|
|
"learning_rate": 9.195402298850575e-06,
|
|
"loss": 0.5974,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.058992805755395686,
|
|
"grad_norm": 0.3594634784213488,
|
|
"learning_rate": 9.42528735632184e-06,
|
|
"loss": 0.5667,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.060431654676258995,
|
|
"grad_norm": 0.3123133006554115,
|
|
"learning_rate": 9.655172413793105e-06,
|
|
"loss": 0.5812,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.0618705035971223,
|
|
"grad_norm": 0.40152584977896627,
|
|
"learning_rate": 9.885057471264368e-06,
|
|
"loss": 0.5782,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06330935251798561,
|
|
"grad_norm": 0.3906377415791127,
|
|
"learning_rate": 1.0114942528735633e-05,
|
|
"loss": 0.5689,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06474820143884892,
|
|
"grad_norm": 0.3173230092608099,
|
|
"learning_rate": 1.0344827586206898e-05,
|
|
"loss": 0.5631,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06618705035971223,
|
|
"grad_norm": 0.33367146816918736,
|
|
"learning_rate": 1.0574712643678162e-05,
|
|
"loss": 0.5585,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.06762589928057554,
|
|
"grad_norm": 0.3029294781916356,
|
|
"learning_rate": 1.0804597701149427e-05,
|
|
"loss": 0.5724,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.06906474820143885,
|
|
"grad_norm": 0.27827030515220125,
|
|
"learning_rate": 1.103448275862069e-05,
|
|
"loss": 0.5522,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07050359712230216,
|
|
"grad_norm": 0.3400614358242299,
|
|
"learning_rate": 1.1264367816091955e-05,
|
|
"loss": 0.5534,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07194244604316546,
|
|
"grad_norm": 0.25227357733107497,
|
|
"learning_rate": 1.1494252873563218e-05,
|
|
"loss": 0.5576,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07338129496402877,
|
|
"grad_norm": 0.26782634739711225,
|
|
"learning_rate": 1.1724137931034483e-05,
|
|
"loss": 0.5584,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.07482014388489208,
|
|
"grad_norm": 0.2745792932328221,
|
|
"learning_rate": 1.1954022988505748e-05,
|
|
"loss": 0.5492,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.07625899280575539,
|
|
"grad_norm": 0.2566777585760455,
|
|
"learning_rate": 1.2183908045977013e-05,
|
|
"loss": 0.5577,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.0776978417266187,
|
|
"grad_norm": 0.2474576963521596,
|
|
"learning_rate": 1.2413793103448277e-05,
|
|
"loss": 0.5518,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.07913669064748201,
|
|
"grad_norm": 0.2411483258618288,
|
|
"learning_rate": 1.2643678160919542e-05,
|
|
"loss": 0.5663,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08057553956834532,
|
|
"grad_norm": 0.23304380726651236,
|
|
"learning_rate": 1.2873563218390805e-05,
|
|
"loss": 0.5556,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08201438848920864,
|
|
"grad_norm": 0.22396393818764437,
|
|
"learning_rate": 1.310344827586207e-05,
|
|
"loss": 0.55,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08345323741007195,
|
|
"grad_norm": 0.21519099484472562,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.5473,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08489208633093526,
|
|
"grad_norm": 0.2498902710086418,
|
|
"learning_rate": 1.3563218390804598e-05,
|
|
"loss": 0.5401,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.08633093525179857,
|
|
"grad_norm": 0.22953306490968306,
|
|
"learning_rate": 1.3793103448275863e-05,
|
|
"loss": 0.5504,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.08776978417266187,
|
|
"grad_norm": 0.26401962747750474,
|
|
"learning_rate": 1.4022988505747128e-05,
|
|
"loss": 0.5414,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.08920863309352518,
|
|
"grad_norm": 0.22108805813505764,
|
|
"learning_rate": 1.4252873563218392e-05,
|
|
"loss": 0.5347,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09064748201438849,
|
|
"grad_norm": 0.2244497442390973,
|
|
"learning_rate": 1.4482758620689657e-05,
|
|
"loss": 0.5422,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.0920863309352518,
|
|
"grad_norm": 0.23319967845376763,
|
|
"learning_rate": 1.471264367816092e-05,
|
|
"loss": 0.5391,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.09352517985611511,
|
|
"grad_norm": 0.221575800004754,
|
|
"learning_rate": 1.4942528735632185e-05,
|
|
"loss": 0.5332,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09496402877697842,
|
|
"grad_norm": 0.3128709128318532,
|
|
"learning_rate": 1.5172413793103448e-05,
|
|
"loss": 0.5387,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.09640287769784173,
|
|
"grad_norm": 0.28003699067091614,
|
|
"learning_rate": 1.540229885057471e-05,
|
|
"loss": 0.5347,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.09784172661870504,
|
|
"grad_norm": 0.23521567329546428,
|
|
"learning_rate": 1.563218390804598e-05,
|
|
"loss": 0.5211,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.09928057553956834,
|
|
"grad_norm": 0.2771071200579004,
|
|
"learning_rate": 1.586206896551724e-05,
|
|
"loss": 0.5222,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.10071942446043165,
|
|
"grad_norm": 0.3740027693389457,
|
|
"learning_rate": 1.6091954022988507e-05,
|
|
"loss": 0.5249,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.10215827338129496,
|
|
"grad_norm": 0.37195099076108284,
|
|
"learning_rate": 1.632183908045977e-05,
|
|
"loss": 0.5348,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10359712230215827,
|
|
"grad_norm": 0.47890420865134453,
|
|
"learning_rate": 1.6551724137931037e-05,
|
|
"loss": 0.5292,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.10503597122302158,
|
|
"grad_norm": 0.6348886939012521,
|
|
"learning_rate": 1.6781609195402298e-05,
|
|
"loss": 0.5185,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.10647482014388489,
|
|
"grad_norm": 0.8488091368539316,
|
|
"learning_rate": 1.7011494252873563e-05,
|
|
"loss": 0.5207,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.1079136690647482,
|
|
"grad_norm": 0.7328521981147782,
|
|
"learning_rate": 1.7241379310344828e-05,
|
|
"loss": 0.5186,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.1093525179856115,
|
|
"grad_norm": 0.5863476782648949,
|
|
"learning_rate": 1.7471264367816093e-05,
|
|
"loss": 0.5188,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.11079136690647481,
|
|
"grad_norm": 0.5280199903637086,
|
|
"learning_rate": 1.770114942528736e-05,
|
|
"loss": 0.5265,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.11223021582733812,
|
|
"grad_norm": 0.6054659477704597,
|
|
"learning_rate": 1.7931034482758623e-05,
|
|
"loss": 0.5333,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11366906474820145,
|
|
"grad_norm": 0.5148655461375653,
|
|
"learning_rate": 1.8160919540229885e-05,
|
|
"loss": 0.5222,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.11510791366906475,
|
|
"grad_norm": 0.3885168711664523,
|
|
"learning_rate": 1.839080459770115e-05,
|
|
"loss": 0.5133,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.11654676258992806,
|
|
"grad_norm": 0.36987833647048085,
|
|
"learning_rate": 1.8620689655172415e-05,
|
|
"loss": 0.5174,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.11798561151079137,
|
|
"grad_norm": 0.4719382796323316,
|
|
"learning_rate": 1.885057471264368e-05,
|
|
"loss": 0.5201,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.11942446043165468,
|
|
"grad_norm": 0.43296728844211463,
|
|
"learning_rate": 1.908045977011494e-05,
|
|
"loss": 0.5289,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12086330935251799,
|
|
"grad_norm": 0.3953226540716472,
|
|
"learning_rate": 1.931034482758621e-05,
|
|
"loss": 0.5102,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.1223021582733813,
|
|
"grad_norm": 0.7406959984153508,
|
|
"learning_rate": 1.9540229885057475e-05,
|
|
"loss": 0.5185,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.1237410071942446,
|
|
"grad_norm": 1.058579365481271,
|
|
"learning_rate": 1.9770114942528737e-05,
|
|
"loss": 0.5273,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.1251798561151079,
|
|
"grad_norm": 0.9882094024000753,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.5206,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.12661870503597122,
|
|
"grad_norm": 0.7868228550747289,
|
|
"learning_rate": 2.0229885057471267e-05,
|
|
"loss": 0.5147,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.12805755395683452,
|
|
"grad_norm": 0.5672504597924308,
|
|
"learning_rate": 2.0459770114942528e-05,
|
|
"loss": 0.5121,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.12949640287769784,
|
|
"grad_norm": 0.6070469607994781,
|
|
"learning_rate": 2.0689655172413797e-05,
|
|
"loss": 0.513,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13093525179856116,
|
|
"grad_norm": 0.6659240486044178,
|
|
"learning_rate": 2.0919540229885058e-05,
|
|
"loss": 0.5165,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13237410071942446,
|
|
"grad_norm": 0.5012920366120729,
|
|
"learning_rate": 2.1149425287356323e-05,
|
|
"loss": 0.5114,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.13381294964028778,
|
|
"grad_norm": 0.4776027972081521,
|
|
"learning_rate": 2.1379310344827585e-05,
|
|
"loss": 0.5097,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.13525179856115108,
|
|
"grad_norm": 0.6642823083156734,
|
|
"learning_rate": 2.1609195402298853e-05,
|
|
"loss": 0.5092,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1366906474820144,
|
|
"grad_norm": 0.6705151665810749,
|
|
"learning_rate": 2.183908045977012e-05,
|
|
"loss": 0.5106,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.1381294964028777,
|
|
"grad_norm": 0.6765433286219615,
|
|
"learning_rate": 2.206896551724138e-05,
|
|
"loss": 0.5227,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.13956834532374102,
|
|
"grad_norm": 0.780880626019304,
|
|
"learning_rate": 2.229885057471265e-05,
|
|
"loss": 0.5012,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.1410071942446043,
|
|
"grad_norm": 0.9382143476692354,
|
|
"learning_rate": 2.252873563218391e-05,
|
|
"loss": 0.5167,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14244604316546763,
|
|
"grad_norm": 1.0257784548542523,
|
|
"learning_rate": 2.2758620689655175e-05,
|
|
"loss": 0.5144,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.14388489208633093,
|
|
"grad_norm": 1.1359585727505734,
|
|
"learning_rate": 2.2988505747126437e-05,
|
|
"loss": 0.5048,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.14532374100719425,
|
|
"grad_norm": 0.783280675276794,
|
|
"learning_rate": 2.3218390804597705e-05,
|
|
"loss": 0.5071,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.14676258992805755,
|
|
"grad_norm": 0.7669159184729264,
|
|
"learning_rate": 2.3448275862068967e-05,
|
|
"loss": 0.5178,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.14820143884892087,
|
|
"grad_norm": 0.889435981804711,
|
|
"learning_rate": 2.367816091954023e-05,
|
|
"loss": 0.5089,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.14964028776978416,
|
|
"grad_norm": 0.7586037318294544,
|
|
"learning_rate": 2.3908045977011497e-05,
|
|
"loss": 0.5051,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.1510791366906475,
|
|
"grad_norm": 0.6981028486003357,
|
|
"learning_rate": 2.413793103448276e-05,
|
|
"loss": 0.5087,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.15251798561151078,
|
|
"grad_norm": 1.0360763919208351,
|
|
"learning_rate": 2.4367816091954027e-05,
|
|
"loss": 0.5135,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.1539568345323741,
|
|
"grad_norm": 1.0924581335064758,
|
|
"learning_rate": 2.4597701149425288e-05,
|
|
"loss": 0.5061,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.1553956834532374,
|
|
"grad_norm": 0.8529644669673789,
|
|
"learning_rate": 2.4827586206896553e-05,
|
|
"loss": 0.5138,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.15683453237410072,
|
|
"grad_norm": 0.8086718544086735,
|
|
"learning_rate": 2.5057471264367815e-05,
|
|
"loss": 0.5098,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.15827338129496402,
|
|
"grad_norm": 0.7723203251328893,
|
|
"learning_rate": 2.5287356321839083e-05,
|
|
"loss": 0.5061,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.15971223021582734,
|
|
"grad_norm": 0.7320205387602443,
|
|
"learning_rate": 2.551724137931035e-05,
|
|
"loss": 0.5074,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.16115107913669063,
|
|
"grad_norm": 1.0993332476114543,
|
|
"learning_rate": 2.574712643678161e-05,
|
|
"loss": 0.5134,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.16258992805755396,
|
|
"grad_norm": 1.2061459893746738,
|
|
"learning_rate": 2.597701149425288e-05,
|
|
"loss": 0.5118,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.16402877697841728,
|
|
"grad_norm": 0.7177587656079357,
|
|
"learning_rate": 2.620689655172414e-05,
|
|
"loss": 0.508,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.16546762589928057,
|
|
"grad_norm": 0.7539185562502975,
|
|
"learning_rate": 2.6436781609195405e-05,
|
|
"loss": 0.5052,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.1669064748201439,
|
|
"grad_norm": 0.9476991759917403,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.5021,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.1683453237410072,
|
|
"grad_norm": 1.0990709833436665,
|
|
"learning_rate": 2.6896551724137935e-05,
|
|
"loss": 0.5065,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.1697841726618705,
|
|
"grad_norm": 0.7605876892359127,
|
|
"learning_rate": 2.7126436781609197e-05,
|
|
"loss": 0.4992,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.1712230215827338,
|
|
"grad_norm": 0.9370611583525377,
|
|
"learning_rate": 2.735632183908046e-05,
|
|
"loss": 0.5111,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.17266187050359713,
|
|
"grad_norm": 1.2644050035538514,
|
|
"learning_rate": 2.7586206896551727e-05,
|
|
"loss": 0.5109,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.17410071942446043,
|
|
"grad_norm": 0.6201970345956561,
|
|
"learning_rate": 2.781609195402299e-05,
|
|
"loss": 0.4955,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.17553956834532375,
|
|
"grad_norm": 0.9717857801716515,
|
|
"learning_rate": 2.8045977011494257e-05,
|
|
"loss": 0.5035,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.17697841726618704,
|
|
"grad_norm": 1.0704471630844348,
|
|
"learning_rate": 2.8275862068965518e-05,
|
|
"loss": 0.5125,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.17841726618705037,
|
|
"grad_norm": 0.615612631880327,
|
|
"learning_rate": 2.8505747126436783e-05,
|
|
"loss": 0.4971,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.17985611510791366,
|
|
"grad_norm": 0.8883180677122924,
|
|
"learning_rate": 2.8735632183908045e-05,
|
|
"loss": 0.4942,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.18129496402877698,
|
|
"grad_norm": 0.9602358329430951,
|
|
"learning_rate": 2.8965517241379313e-05,
|
|
"loss": 0.5027,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.18273381294964028,
|
|
"grad_norm": 1.144276122910815,
|
|
"learning_rate": 2.919540229885058e-05,
|
|
"loss": 0.5044,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.1841726618705036,
|
|
"grad_norm": 0.9282520702442505,
|
|
"learning_rate": 2.942528735632184e-05,
|
|
"loss": 0.4962,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.1856115107913669,
|
|
"grad_norm": 0.9652907094757717,
|
|
"learning_rate": 2.965517241379311e-05,
|
|
"loss": 0.5094,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.18705035971223022,
|
|
"grad_norm": 1.2522212064594618,
|
|
"learning_rate": 2.988505747126437e-05,
|
|
"loss": 0.4899,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.1884892086330935,
|
|
"grad_norm": 1.0227684202127514,
|
|
"learning_rate": 3.0114942528735635e-05,
|
|
"loss": 0.4982,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.18992805755395684,
|
|
"grad_norm": 0.8398890856892194,
|
|
"learning_rate": 3.0344827586206897e-05,
|
|
"loss": 0.4888,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.19136690647482013,
|
|
"grad_norm": 0.8691981772305059,
|
|
"learning_rate": 3.057471264367816e-05,
|
|
"loss": 0.4904,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.19280575539568345,
|
|
"grad_norm": 1.139251477242842,
|
|
"learning_rate": 3.080459770114942e-05,
|
|
"loss": 0.4925,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.19424460431654678,
|
|
"grad_norm": 0.7928919984415536,
|
|
"learning_rate": 3.103448275862069e-05,
|
|
"loss": 0.4969,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.19568345323741007,
|
|
"grad_norm": 0.8089278548829392,
|
|
"learning_rate": 3.126436781609196e-05,
|
|
"loss": 0.4981,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.1971223021582734,
|
|
"grad_norm": 0.893979283180644,
|
|
"learning_rate": 3.149425287356322e-05,
|
|
"loss": 0.5054,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.1985611510791367,
|
|
"grad_norm": 1.3117444887252856,
|
|
"learning_rate": 3.172413793103448e-05,
|
|
"loss": 0.4977,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"grad_norm": 0.6431344906886486,
|
|
"learning_rate": 3.195402298850575e-05,
|
|
"loss": 0.4958,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.2014388489208633,
|
|
"grad_norm": 1.1592447820706968,
|
|
"learning_rate": 3.218390804597701e-05,
|
|
"loss": 0.4934,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.20287769784172663,
|
|
"grad_norm": 1.2521752915712352,
|
|
"learning_rate": 3.2413793103448275e-05,
|
|
"loss": 0.4965,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.20431654676258992,
|
|
"grad_norm": 0.6016088386231214,
|
|
"learning_rate": 3.264367816091954e-05,
|
|
"loss": 0.5097,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.20575539568345325,
|
|
"grad_norm": 1.967243062319988,
|
|
"learning_rate": 3.287356321839081e-05,
|
|
"loss": 0.504,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.20719424460431654,
|
|
"grad_norm": 0.6338725362668393,
|
|
"learning_rate": 3.310344827586207e-05,
|
|
"loss": 0.4939,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.20863309352517986,
|
|
"grad_norm": 1.628427488401041,
|
|
"learning_rate": 3.3333333333333335e-05,
|
|
"loss": 0.5107,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.21007194244604316,
|
|
"grad_norm": 1.013344094898763,
|
|
"learning_rate": 3.3563218390804597e-05,
|
|
"loss": 0.4947,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.21151079136690648,
|
|
"grad_norm": 2.334013729226139,
|
|
"learning_rate": 3.3793103448275865e-05,
|
|
"loss": 0.516,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.21294964028776978,
|
|
"grad_norm": 2.3706526856174417,
|
|
"learning_rate": 3.4022988505747127e-05,
|
|
"loss": 0.5083,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.2143884892086331,
|
|
"grad_norm": 0.8172045661670454,
|
|
"learning_rate": 3.4252873563218395e-05,
|
|
"loss": 0.4957,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.2158273381294964,
|
|
"grad_norm": 1.5812629231172273,
|
|
"learning_rate": 3.4482758620689657e-05,
|
|
"loss": 0.5054,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.21726618705035972,
|
|
"grad_norm": 1.225663884188365,
|
|
"learning_rate": 3.4712643678160925e-05,
|
|
"loss": 0.5116,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.218705035971223,
|
|
"grad_norm": 1.1599490704964557,
|
|
"learning_rate": 3.4942528735632187e-05,
|
|
"loss": 0.5015,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.22014388489208633,
|
|
"grad_norm": 1.2940814687714215,
|
|
"learning_rate": 3.517241379310345e-05,
|
|
"loss": 0.5072,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.22158273381294963,
|
|
"grad_norm": 0.9348388917946038,
|
|
"learning_rate": 3.540229885057472e-05,
|
|
"loss": 0.496,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.22302158273381295,
|
|
"grad_norm": 0.8587516637853729,
|
|
"learning_rate": 3.563218390804598e-05,
|
|
"loss": 0.4951,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.22446043165467625,
|
|
"grad_norm": 0.910445319183165,
|
|
"learning_rate": 3.586206896551725e-05,
|
|
"loss": 0.4946,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.22589928057553957,
|
|
"grad_norm": 0.7986547078534473,
|
|
"learning_rate": 3.609195402298851e-05,
|
|
"loss": 0.4891,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.2273381294964029,
|
|
"grad_norm": 0.8815230996411629,
|
|
"learning_rate": 3.632183908045977e-05,
|
|
"loss": 0.4985,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.22877697841726619,
|
|
"grad_norm": 0.7674169563291096,
|
|
"learning_rate": 3.655172413793104e-05,
|
|
"loss": 0.4928,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.2302158273381295,
|
|
"grad_norm": 0.852425221422681,
|
|
"learning_rate": 3.67816091954023e-05,
|
|
"loss": 0.4956,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.2316546762589928,
|
|
"grad_norm": 0.8460593685034887,
|
|
"learning_rate": 3.701149425287357e-05,
|
|
"loss": 0.4859,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.23309352517985613,
|
|
"grad_norm": 1.2098761302153611,
|
|
"learning_rate": 3.724137931034483e-05,
|
|
"loss": 0.4957,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.23453237410071942,
|
|
"grad_norm": 1.4431704624428114,
|
|
"learning_rate": 3.74712643678161e-05,
|
|
"loss": 0.4869,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.23597122302158274,
|
|
"grad_norm": 0.6064326903747188,
|
|
"learning_rate": 3.770114942528736e-05,
|
|
"loss": 0.4894,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.23741007194244604,
|
|
"grad_norm": 1.0345490026881266,
|
|
"learning_rate": 3.793103448275862e-05,
|
|
"loss": 0.4922,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.23884892086330936,
|
|
"grad_norm": 1.8094520973588428,
|
|
"learning_rate": 3.816091954022988e-05,
|
|
"loss": 0.4888,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.24028776978417266,
|
|
"grad_norm": 0.6264600218044611,
|
|
"learning_rate": 3.839080459770115e-05,
|
|
"loss": 0.4932,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.24172661870503598,
|
|
"grad_norm": 2.1226351527839813,
|
|
"learning_rate": 3.862068965517242e-05,
|
|
"loss": 0.4922,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.24316546762589927,
|
|
"grad_norm": 0.8826629535539415,
|
|
"learning_rate": 3.885057471264368e-05,
|
|
"loss": 0.4968,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.2446043165467626,
|
|
"grad_norm": 2.3003832322493505,
|
|
"learning_rate": 3.908045977011495e-05,
|
|
"loss": 0.4994,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2460431654676259,
|
|
"grad_norm": 1.5332174810002905,
|
|
"learning_rate": 3.931034482758621e-05,
|
|
"loss": 0.515,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2474820143884892,
|
|
"grad_norm": 2.1251619640588504,
|
|
"learning_rate": 3.954022988505747e-05,
|
|
"loss": 0.4908,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2489208633093525,
|
|
"grad_norm": 1.9950974265709873,
|
|
"learning_rate": 3.9770114942528735e-05,
|
|
"loss": 0.502,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.2503597122302158,
|
|
"grad_norm": 1.190767912464862,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.4914,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.2517985611510791,
|
|
"grad_norm": 1.4983155711250777,
|
|
"learning_rate": 4.022988505747127e-05,
|
|
"loss": 0.511,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.25323741007194245,
|
|
"grad_norm": 1.243848961587619,
|
|
"learning_rate": 4.045977011494253e-05,
|
|
"loss": 0.5053,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.25467625899280577,
|
|
"grad_norm": 1.1080502990981556,
|
|
"learning_rate": 4.0689655172413795e-05,
|
|
"loss": 0.5005,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.25611510791366904,
|
|
"grad_norm": 1.3982377889584758,
|
|
"learning_rate": 4.0919540229885057e-05,
|
|
"loss": 0.5054,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.25755395683453236,
|
|
"grad_norm": 0.7844495769079776,
|
|
"learning_rate": 4.1149425287356325e-05,
|
|
"loss": 0.4975,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.2589928057553957,
|
|
"grad_norm": 1.2614206644026908,
|
|
"learning_rate": 4.137931034482759e-05,
|
|
"loss": 0.4879,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.260431654676259,
|
|
"grad_norm": 1.0248070520135202,
|
|
"learning_rate": 4.160919540229885e-05,
|
|
"loss": 0.497,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.26187050359712233,
|
|
"grad_norm": 1.141089573212033,
|
|
"learning_rate": 4.1839080459770117e-05,
|
|
"loss": 0.4764,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2633093525179856,
|
|
"grad_norm": 0.822634736776405,
|
|
"learning_rate": 4.2068965517241385e-05,
|
|
"loss": 0.4878,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2647482014388489,
|
|
"grad_norm": 0.8621925875313877,
|
|
"learning_rate": 4.2298850574712647e-05,
|
|
"loss": 0.4872,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.26618705035971224,
|
|
"grad_norm": 1.1200147206788935,
|
|
"learning_rate": 4.2528735632183915e-05,
|
|
"loss": 0.491,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.26762589928057556,
|
|
"grad_norm": 0.8625502184927313,
|
|
"learning_rate": 4.275862068965517e-05,
|
|
"loss": 0.4873,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.26906474820143883,
|
|
"grad_norm": 0.7483226150867532,
|
|
"learning_rate": 4.298850574712644e-05,
|
|
"loss": 0.4955,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.27050359712230215,
|
|
"grad_norm": 0.9109746432640708,
|
|
"learning_rate": 4.321839080459771e-05,
|
|
"loss": 0.484,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2719424460431655,
|
|
"grad_norm": 1.4195131352629455,
|
|
"learning_rate": 4.344827586206897e-05,
|
|
"loss": 0.4877,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2733812949640288,
|
|
"grad_norm": 0.783519612574412,
|
|
"learning_rate": 4.367816091954024e-05,
|
|
"loss": 0.496,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.27482014388489207,
|
|
"grad_norm": 1.1338691135831955,
|
|
"learning_rate": 4.39080459770115e-05,
|
|
"loss": 0.4845,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.2762589928057554,
|
|
"grad_norm": 1.0943668228371997,
|
|
"learning_rate": 4.413793103448276e-05,
|
|
"loss": 0.4792,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.2776978417266187,
|
|
"grad_norm": 1.0775568913415932,
|
|
"learning_rate": 4.436781609195403e-05,
|
|
"loss": 0.4852,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.27913669064748203,
|
|
"grad_norm": 1.142808622233739,
|
|
"learning_rate": 4.45977011494253e-05,
|
|
"loss": 0.4822,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.2805755395683453,
|
|
"grad_norm": 1.0134384262191969,
|
|
"learning_rate": 4.482758620689655e-05,
|
|
"loss": 0.4899,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2820143884892086,
|
|
"grad_norm": 1.1232245335232205,
|
|
"learning_rate": 4.505747126436782e-05,
|
|
"loss": 0.4853,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.28345323741007195,
|
|
"grad_norm": 1.1714046668864437,
|
|
"learning_rate": 4.528735632183908e-05,
|
|
"loss": 0.4858,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.28489208633093527,
|
|
"grad_norm": 1.1060809215522533,
|
|
"learning_rate": 4.551724137931035e-05,
|
|
"loss": 0.4787,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.28633093525179854,
|
|
"grad_norm": 1.0335155182143105,
|
|
"learning_rate": 4.574712643678162e-05,
|
|
"loss": 0.4793,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.28776978417266186,
|
|
"grad_norm": 1.2048774315404076,
|
|
"learning_rate": 4.597701149425287e-05,
|
|
"loss": 0.4896,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2892086330935252,
|
|
"grad_norm": 0.9213048180830371,
|
|
"learning_rate": 4.620689655172414e-05,
|
|
"loss": 0.4756,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.2906474820143885,
|
|
"grad_norm": 1.330841287678041,
|
|
"learning_rate": 4.643678160919541e-05,
|
|
"loss": 0.4711,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.2920863309352518,
|
|
"grad_norm": 1.0145857118389694,
|
|
"learning_rate": 4.666666666666667e-05,
|
|
"loss": 0.4863,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.2935251798561151,
|
|
"grad_norm": 1.0217817914207772,
|
|
"learning_rate": 4.689655172413793e-05,
|
|
"loss": 0.4833,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.2949640287769784,
|
|
"grad_norm": 1.5538734492227886,
|
|
"learning_rate": 4.7126436781609195e-05,
|
|
"loss": 0.4931,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.29640287769784174,
|
|
"grad_norm": 0.7110786754937796,
|
|
"learning_rate": 4.735632183908046e-05,
|
|
"loss": 0.4765,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.29784172661870506,
|
|
"grad_norm": 1.2930099942726418,
|
|
"learning_rate": 4.758620689655173e-05,
|
|
"loss": 0.476,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.2992805755395683,
|
|
"grad_norm": 0.9394282482728971,
|
|
"learning_rate": 4.781609195402299e-05,
|
|
"loss": 0.4805,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.30071942446043165,
|
|
"grad_norm": 1.1927083150462356,
|
|
"learning_rate": 4.8045977011494255e-05,
|
|
"loss": 0.4736,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.302158273381295,
|
|
"grad_norm": 1.185477200504906,
|
|
"learning_rate": 4.827586206896552e-05,
|
|
"loss": 0.4842,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.3035971223021583,
|
|
"grad_norm": 0.7763446499167929,
|
|
"learning_rate": 4.8505747126436785e-05,
|
|
"loss": 0.4656,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.30503597122302156,
|
|
"grad_norm": 0.8915157648522011,
|
|
"learning_rate": 4.873563218390805e-05,
|
|
"loss": 0.4906,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.3064748201438849,
|
|
"grad_norm": 1.360344215498736,
|
|
"learning_rate": 4.896551724137931e-05,
|
|
"loss": 0.4815,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3079136690647482,
|
|
"grad_norm": 0.7378791672976154,
|
|
"learning_rate": 4.9195402298850577e-05,
|
|
"loss": 0.4784,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.30935251798561153,
|
|
"grad_norm": 0.9898871184534456,
|
|
"learning_rate": 4.9425287356321845e-05,
|
|
"loss": 0.4795,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.3107913669064748,
|
|
"grad_norm": 1.2330520090507358,
|
|
"learning_rate": 4.9655172413793107e-05,
|
|
"loss": 0.4785,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.3122302158273381,
|
|
"grad_norm": 0.7971840971953688,
|
|
"learning_rate": 4.9885057471264375e-05,
|
|
"loss": 0.4746,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.31366906474820144,
|
|
"grad_norm": 1.3470126878925328,
|
|
"learning_rate": 5.011494252873563e-05,
|
|
"loss": 0.4804,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.31510791366906477,
|
|
"grad_norm": 0.7218108311024221,
|
|
"learning_rate": 5.03448275862069e-05,
|
|
"loss": 0.4778,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.31654676258992803,
|
|
"grad_norm": 0.9591078746801526,
|
|
"learning_rate": 5.057471264367817e-05,
|
|
"loss": 0.4783,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.31798561151079136,
|
|
"grad_norm": 1.1008171783922347,
|
|
"learning_rate": 5.0804597701149435e-05,
|
|
"loss": 0.4796,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.3194244604316547,
|
|
"grad_norm": 0.9352239850771547,
|
|
"learning_rate": 5.10344827586207e-05,
|
|
"loss": 0.4879,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.320863309352518,
|
|
"grad_norm": 0.9448279532736612,
|
|
"learning_rate": 5.126436781609196e-05,
|
|
"loss": 0.4798,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.32230215827338127,
|
|
"grad_norm": 1.1129787171792922,
|
|
"learning_rate": 5.149425287356322e-05,
|
|
"loss": 0.4864,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3237410071942446,
|
|
"grad_norm": 1.47608934534771,
|
|
"learning_rate": 5.172413793103449e-05,
|
|
"loss": 0.4866,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3251798561151079,
|
|
"grad_norm": 0.8867234603752784,
|
|
"learning_rate": 5.195402298850576e-05,
|
|
"loss": 0.4734,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.32661870503597124,
|
|
"grad_norm": 1.4380595809930212,
|
|
"learning_rate": 5.218390804597701e-05,
|
|
"loss": 0.4854,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.32805755395683456,
|
|
"grad_norm": 1.1655816289360368,
|
|
"learning_rate": 5.241379310344828e-05,
|
|
"loss": 0.4754,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.3294964028776978,
|
|
"grad_norm": 1.4579401414240616,
|
|
"learning_rate": 5.264367816091954e-05,
|
|
"loss": 0.4796,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.33093525179856115,
|
|
"grad_norm": 1.0124256992786442,
|
|
"learning_rate": 5.287356321839081e-05,
|
|
"loss": 0.4779,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.33237410071942447,
|
|
"grad_norm": 1.4515174308826553,
|
|
"learning_rate": 5.310344827586208e-05,
|
|
"loss": 0.4878,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3338129496402878,
|
|
"grad_norm": 1.0457846901458092,
|
|
"learning_rate": 5.333333333333333e-05,
|
|
"loss": 0.4919,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.33525179856115106,
|
|
"grad_norm": 1.2292527735878573,
|
|
"learning_rate": 5.35632183908046e-05,
|
|
"loss": 0.48,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.3366906474820144,
|
|
"grad_norm": 1.1566732382503886,
|
|
"learning_rate": 5.379310344827587e-05,
|
|
"loss": 0.4784,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.3381294964028777,
|
|
"grad_norm": 1.0247819700361531,
|
|
"learning_rate": 5.402298850574713e-05,
|
|
"loss": 0.4788,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.339568345323741,
|
|
"grad_norm": 1.8258192866492602,
|
|
"learning_rate": 5.425287356321839e-05,
|
|
"loss": 0.4859,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.3410071942446043,
|
|
"grad_norm": 0.7981064552373948,
|
|
"learning_rate": 5.4482758620689655e-05,
|
|
"loss": 0.4736,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.3424460431654676,
|
|
"grad_norm": 2.437046114331064,
|
|
"learning_rate": 5.471264367816092e-05,
|
|
"loss": 0.4873,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.34388489208633094,
|
|
"grad_norm": 1.6978724887649783,
|
|
"learning_rate": 5.494252873563219e-05,
|
|
"loss": 0.4917,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.34532374100719426,
|
|
"grad_norm": 1.95814675963988,
|
|
"learning_rate": 5.517241379310345e-05,
|
|
"loss": 0.4884,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.34676258992805753,
|
|
"grad_norm": 1.456311592048421,
|
|
"learning_rate": 5.5402298850574715e-05,
|
|
"loss": 0.478,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.34820143884892085,
|
|
"grad_norm": 1.7571642842965671,
|
|
"learning_rate": 5.563218390804598e-05,
|
|
"loss": 0.4852,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3496402877697842,
|
|
"grad_norm": 1.4077940826002997,
|
|
"learning_rate": 5.5862068965517245e-05,
|
|
"loss": 0.479,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.3510791366906475,
|
|
"grad_norm": 1.4152110915409488,
|
|
"learning_rate": 5.609195402298851e-05,
|
|
"loss": 0.4785,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.35251798561151076,
|
|
"grad_norm": 1.3911716799269604,
|
|
"learning_rate": 5.632183908045977e-05,
|
|
"loss": 0.4793,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.3539568345323741,
|
|
"grad_norm": 1.2727687269305468,
|
|
"learning_rate": 5.6551724137931037e-05,
|
|
"loss": 0.4789,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.3553956834532374,
|
|
"grad_norm": 1.0470903884339842,
|
|
"learning_rate": 5.6781609195402305e-05,
|
|
"loss": 0.475,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.35683453237410073,
|
|
"grad_norm": 1.471969264661154,
|
|
"learning_rate": 5.7011494252873567e-05,
|
|
"loss": 0.4826,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.35827338129496406,
|
|
"grad_norm": 0.8526524711196928,
|
|
"learning_rate": 5.7241379310344835e-05,
|
|
"loss": 0.4779,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3597122302158273,
|
|
"grad_norm": 1.3836455059251258,
|
|
"learning_rate": 5.747126436781609e-05,
|
|
"loss": 0.4863,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.36115107913669064,
|
|
"grad_norm": 0.8944998067942929,
|
|
"learning_rate": 5.770114942528736e-05,
|
|
"loss": 0.4697,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.36258992805755397,
|
|
"grad_norm": 1.4330250304456904,
|
|
"learning_rate": 5.7931034482758627e-05,
|
|
"loss": 0.4787,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.3640287769784173,
|
|
"grad_norm": 1.0755531621439596,
|
|
"learning_rate": 5.8160919540229895e-05,
|
|
"loss": 0.4772,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.36546762589928056,
|
|
"grad_norm": 1.3573618476314684,
|
|
"learning_rate": 5.839080459770116e-05,
|
|
"loss": 0.4835,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3669064748201439,
|
|
"grad_norm": 1.1609739231846,
|
|
"learning_rate": 5.862068965517242e-05,
|
|
"loss": 0.4765,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3683453237410072,
|
|
"grad_norm": 1.2329083137268348,
|
|
"learning_rate": 5.885057471264368e-05,
|
|
"loss": 0.4805,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.3697841726618705,
|
|
"grad_norm": 1.0354436874062634,
|
|
"learning_rate": 5.908045977011495e-05,
|
|
"loss": 0.4743,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3712230215827338,
|
|
"grad_norm": 1.1490260146649398,
|
|
"learning_rate": 5.931034482758622e-05,
|
|
"loss": 0.492,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3726618705035971,
|
|
"grad_norm": 1.1550233955158198,
|
|
"learning_rate": 5.954022988505747e-05,
|
|
"loss": 0.4795,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.37410071942446044,
|
|
"grad_norm": 1.4633704801274168,
|
|
"learning_rate": 5.977011494252874e-05,
|
|
"loss": 0.4849,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.37553956834532376,
|
|
"grad_norm": 1.1034158640688032,
|
|
"learning_rate": 6.000000000000001e-05,
|
|
"loss": 0.481,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.376978417266187,
|
|
"grad_norm": 0.752131880539697,
|
|
"learning_rate": 6.022988505747127e-05,
|
|
"loss": 0.4721,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.37841726618705035,
|
|
"grad_norm": 0.9362719185877103,
|
|
"learning_rate": 6.045977011494254e-05,
|
|
"loss": 0.4752,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.37985611510791367,
|
|
"grad_norm": 1.57716773950629,
|
|
"learning_rate": 6.068965517241379e-05,
|
|
"loss": 0.4814,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.381294964028777,
|
|
"grad_norm": 0.99467977060491,
|
|
"learning_rate": 6.091954022988506e-05,
|
|
"loss": 0.4761,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.38273381294964026,
|
|
"grad_norm": 1.5233280657576584,
|
|
"learning_rate": 6.114942528735632e-05,
|
|
"loss": 0.4744,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.3841726618705036,
|
|
"grad_norm": 0.9957618261915175,
|
|
"learning_rate": 6.137931034482759e-05,
|
|
"loss": 0.4772,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.3856115107913669,
|
|
"grad_norm": 0.9794755530669075,
|
|
"learning_rate": 6.160919540229885e-05,
|
|
"loss": 0.4716,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.38705035971223023,
|
|
"grad_norm": 1.7812089198791918,
|
|
"learning_rate": 6.183908045977011e-05,
|
|
"loss": 0.4833,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.38848920863309355,
|
|
"grad_norm": 0.7433427434830162,
|
|
"learning_rate": 6.206896551724138e-05,
|
|
"loss": 0.4666,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.3899280575539568,
|
|
"grad_norm": 1.7765435960688931,
|
|
"learning_rate": 6.229885057471265e-05,
|
|
"loss": 0.4795,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.39136690647482014,
|
|
"grad_norm": 1.0397303716400306,
|
|
"learning_rate": 6.252873563218392e-05,
|
|
"loss": 0.4716,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.39280575539568346,
|
|
"grad_norm": 1.5836626593590595,
|
|
"learning_rate": 6.275862068965517e-05,
|
|
"loss": 0.4778,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.3942446043165468,
|
|
"grad_norm": 0.9930230489753338,
|
|
"learning_rate": 6.298850574712644e-05,
|
|
"loss": 0.4692,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.39568345323741005,
|
|
"grad_norm": 1.6230739555013953,
|
|
"learning_rate": 6.321839080459771e-05,
|
|
"loss": 0.4689,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.3971223021582734,
|
|
"grad_norm": 1.1623176138620104,
|
|
"learning_rate": 6.344827586206897e-05,
|
|
"loss": 0.477,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.3985611510791367,
|
|
"grad_norm": 1.4647190319569283,
|
|
"learning_rate": 6.367816091954023e-05,
|
|
"loss": 0.4744,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"grad_norm": 1.041723502822214,
|
|
"learning_rate": 6.39080459770115e-05,
|
|
"loss": 0.4748,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.4014388489208633,
|
|
"grad_norm": 1.2943217280710306,
|
|
"learning_rate": 6.413793103448276e-05,
|
|
"loss": 0.4766,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.4028776978417266,
|
|
"grad_norm": 1.2799211391282943,
|
|
"learning_rate": 6.436781609195403e-05,
|
|
"loss": 0.4736,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.40431654676258993,
|
|
"grad_norm": 0.9998756572288829,
|
|
"learning_rate": 6.45977011494253e-05,
|
|
"loss": 0.4752,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.40575539568345326,
|
|
"grad_norm": 0.8197435470582363,
|
|
"learning_rate": 6.482758620689655e-05,
|
|
"loss": 0.4615,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.4071942446043165,
|
|
"grad_norm": 0.9314555479570825,
|
|
"learning_rate": 6.505747126436782e-05,
|
|
"loss": 0.4787,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.40863309352517985,
|
|
"grad_norm": 1.4092073558560732,
|
|
"learning_rate": 6.528735632183909e-05,
|
|
"loss": 0.487,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.41007194244604317,
|
|
"grad_norm": 1.1735842928383202,
|
|
"learning_rate": 6.551724137931035e-05,
|
|
"loss": 0.4702,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4115107913669065,
|
|
"grad_norm": 1.4440630714654656,
|
|
"learning_rate": 6.574712643678162e-05,
|
|
"loss": 0.4847,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.41294964028776976,
|
|
"grad_norm": 0.6374821445101378,
|
|
"learning_rate": 6.597701149425288e-05,
|
|
"loss": 0.4622,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.4143884892086331,
|
|
"grad_norm": 1.3843695467143289,
|
|
"learning_rate": 6.620689655172415e-05,
|
|
"loss": 0.4799,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.4158273381294964,
|
|
"grad_norm": 1.0101432669113624,
|
|
"learning_rate": 6.643678160919542e-05,
|
|
"loss": 0.4716,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.4172661870503597,
|
|
"grad_norm": 1.1923217143464735,
|
|
"learning_rate": 6.666666666666667e-05,
|
|
"loss": 0.4797,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.418705035971223,
|
|
"grad_norm": 1.322447797878433,
|
|
"learning_rate": 6.689655172413794e-05,
|
|
"loss": 0.4647,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4201438848920863,
|
|
"grad_norm": 1.235908499874906,
|
|
"learning_rate": 6.712643678160919e-05,
|
|
"loss": 0.4843,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.42158273381294964,
|
|
"grad_norm": 0.9958230517047106,
|
|
"learning_rate": 6.735632183908046e-05,
|
|
"loss": 0.4789,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.42302158273381296,
|
|
"grad_norm": 1.6221967196100449,
|
|
"learning_rate": 6.758620689655173e-05,
|
|
"loss": 0.4809,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4244604316546763,
|
|
"grad_norm": 1.123970026578974,
|
|
"learning_rate": 6.7816091954023e-05,
|
|
"loss": 0.4728,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.42589928057553955,
|
|
"grad_norm": 1.5683450016017422,
|
|
"learning_rate": 6.804597701149425e-05,
|
|
"loss": 0.4754,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.4273381294964029,
|
|
"grad_norm": 1.1554660855916954,
|
|
"learning_rate": 6.827586206896552e-05,
|
|
"loss": 0.4779,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.4287769784172662,
|
|
"grad_norm": 1.5006199543409728,
|
|
"learning_rate": 6.850574712643679e-05,
|
|
"loss": 0.4699,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4302158273381295,
|
|
"grad_norm": 0.9311000461643123,
|
|
"learning_rate": 6.873563218390806e-05,
|
|
"loss": 0.4749,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.4316546762589928,
|
|
"grad_norm": 1.4337266159407287,
|
|
"learning_rate": 6.896551724137931e-05,
|
|
"loss": 0.477,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.4330935251798561,
|
|
"grad_norm": 1.0125745035929625,
|
|
"learning_rate": 6.919540229885058e-05,
|
|
"loss": 0.4727,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.43453237410071943,
|
|
"grad_norm": 1.0555550707618235,
|
|
"learning_rate": 6.942528735632185e-05,
|
|
"loss": 0.4669,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.43597122302158275,
|
|
"grad_norm": 1.2392319573707697,
|
|
"learning_rate": 6.96551724137931e-05,
|
|
"loss": 0.4762,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.437410071942446,
|
|
"grad_norm": 1.2318134285261437,
|
|
"learning_rate": 6.988505747126437e-05,
|
|
"loss": 0.4747,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.43884892086330934,
|
|
"grad_norm": 1.0045070320980867,
|
|
"learning_rate": 7.011494252873563e-05,
|
|
"loss": 0.4799,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.44028776978417267,
|
|
"grad_norm": 1.0146786574538775,
|
|
"learning_rate": 7.03448275862069e-05,
|
|
"loss": 0.4716,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.441726618705036,
|
|
"grad_norm": 1.2386842639170537,
|
|
"learning_rate": 7.057471264367816e-05,
|
|
"loss": 0.4774,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.44316546762589926,
|
|
"grad_norm": 1.648990466575965,
|
|
"learning_rate": 7.080459770114943e-05,
|
|
"loss": 0.4694,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.4446043165467626,
|
|
"grad_norm": 0.6278988696002732,
|
|
"learning_rate": 7.10344827586207e-05,
|
|
"loss": 0.4762,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.4460431654676259,
|
|
"grad_norm": 1.462175354890507,
|
|
"learning_rate": 7.126436781609196e-05,
|
|
"loss": 0.4646,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.4474820143884892,
|
|
"grad_norm": 1.4371787435077172,
|
|
"learning_rate": 7.149425287356322e-05,
|
|
"loss": 0.4717,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.4489208633093525,
|
|
"grad_norm": 0.7427836643418814,
|
|
"learning_rate": 7.17241379310345e-05,
|
|
"loss": 0.4638,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.4503597122302158,
|
|
"grad_norm": 1.3228012449523432,
|
|
"learning_rate": 7.195402298850576e-05,
|
|
"loss": 0.4728,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.45179856115107914,
|
|
"grad_norm": 1.621331723300988,
|
|
"learning_rate": 7.218390804597702e-05,
|
|
"loss": 0.4798,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.45323741007194246,
|
|
"grad_norm": 0.6438140996559021,
|
|
"learning_rate": 7.241379310344828e-05,
|
|
"loss": 0.4722,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.4546762589928058,
|
|
"grad_norm": 1.3613614268410958,
|
|
"learning_rate": 7.264367816091954e-05,
|
|
"loss": 0.4743,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.45611510791366905,
|
|
"grad_norm": 1.1095417357786121,
|
|
"learning_rate": 7.287356321839081e-05,
|
|
"loss": 0.4736,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.45755395683453237,
|
|
"grad_norm": 1.0557047873812653,
|
|
"learning_rate": 7.310344827586208e-05,
|
|
"loss": 0.4762,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.4589928057553957,
|
|
"grad_norm": 0.9958536464873364,
|
|
"learning_rate": 7.333333333333333e-05,
|
|
"loss": 0.4623,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.460431654676259,
|
|
"grad_norm": 1.2008490813463424,
|
|
"learning_rate": 7.35632183908046e-05,
|
|
"loss": 0.4769,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4618705035971223,
|
|
"grad_norm": 0.9055584908001856,
|
|
"learning_rate": 7.379310344827587e-05,
|
|
"loss": 0.4822,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.4633093525179856,
|
|
"grad_norm": 1.8623515490499094,
|
|
"learning_rate": 7.402298850574714e-05,
|
|
"loss": 0.4775,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.46474820143884893,
|
|
"grad_norm": 0.7918120866349179,
|
|
"learning_rate": 7.425287356321839e-05,
|
|
"loss": 0.4768,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.46618705035971225,
|
|
"grad_norm": 1.8547513664823356,
|
|
"learning_rate": 7.448275862068966e-05,
|
|
"loss": 0.4815,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.4676258992805755,
|
|
"grad_norm": 1.0653867949146192,
|
|
"learning_rate": 7.471264367816093e-05,
|
|
"loss": 0.4748,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.46906474820143884,
|
|
"grad_norm": 1.7149203846403827,
|
|
"learning_rate": 7.49425287356322e-05,
|
|
"loss": 0.4759,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.47050359712230216,
|
|
"grad_norm": 1.3504481902098735,
|
|
"learning_rate": 7.517241379310345e-05,
|
|
"loss": 0.4891,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4719424460431655,
|
|
"grad_norm": 1.0947726183558977,
|
|
"learning_rate": 7.540229885057472e-05,
|
|
"loss": 0.4726,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.47338129496402875,
|
|
"grad_norm": 1.4986860479944213,
|
|
"learning_rate": 7.563218390804599e-05,
|
|
"loss": 0.4939,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4748201438848921,
|
|
"grad_norm": 0.9023164920832701,
|
|
"learning_rate": 7.586206896551724e-05,
|
|
"loss": 0.4816,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.4762589928057554,
|
|
"grad_norm": 1.4873196716650958,
|
|
"learning_rate": 7.609195402298851e-05,
|
|
"loss": 0.4822,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.4776978417266187,
|
|
"grad_norm": 0.90612541966311,
|
|
"learning_rate": 7.632183908045977e-05,
|
|
"loss": 0.4763,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.479136690647482,
|
|
"grad_norm": 1.1491013009982607,
|
|
"learning_rate": 7.655172413793103e-05,
|
|
"loss": 0.4652,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.4805755395683453,
|
|
"grad_norm": 0.9860657743440339,
|
|
"learning_rate": 7.67816091954023e-05,
|
|
"loss": 0.4805,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.48201438848920863,
|
|
"grad_norm": 1.3472255868141507,
|
|
"learning_rate": 7.701149425287357e-05,
|
|
"loss": 0.4784,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.48345323741007196,
|
|
"grad_norm": 0.8101899723846976,
|
|
"learning_rate": 7.724137931034484e-05,
|
|
"loss": 0.47,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.4848920863309353,
|
|
"grad_norm": 0.9407473519322342,
|
|
"learning_rate": 7.74712643678161e-05,
|
|
"loss": 0.4688,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.48633093525179855,
|
|
"grad_norm": 0.8597666464109487,
|
|
"learning_rate": 7.770114942528736e-05,
|
|
"loss": 0.4729,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.48776978417266187,
|
|
"grad_norm": 0.8135999269864299,
|
|
"learning_rate": 7.793103448275863e-05,
|
|
"loss": 0.4811,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.4892086330935252,
|
|
"grad_norm": 0.853228997601594,
|
|
"learning_rate": 7.81609195402299e-05,
|
|
"loss": 0.4653,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.4906474820143885,
|
|
"grad_norm": 1.4697798073338293,
|
|
"learning_rate": 7.839080459770115e-05,
|
|
"loss": 0.4678,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.4920863309352518,
|
|
"grad_norm": 0.6526206186866667,
|
|
"learning_rate": 7.862068965517242e-05,
|
|
"loss": 0.4562,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.4935251798561151,
|
|
"grad_norm": 0.6606843649335171,
|
|
"learning_rate": 7.885057471264368e-05,
|
|
"loss": 0.4687,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.4949640287769784,
|
|
"grad_norm": 1.0984241193968503,
|
|
"learning_rate": 7.908045977011495e-05,
|
|
"loss": 0.4664,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.49640287769784175,
|
|
"grad_norm": 1.435670165863932,
|
|
"learning_rate": 7.931034482758621e-05,
|
|
"loss": 0.4684,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.497841726618705,
|
|
"grad_norm": 0.7500270791117851,
|
|
"learning_rate": 7.954022988505747e-05,
|
|
"loss": 0.4701,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.49928057553956834,
|
|
"grad_norm": 1.1222791952213302,
|
|
"learning_rate": 7.977011494252874e-05,
|
|
"loss": 0.4715,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5007194244604316,
|
|
"grad_norm": 1.1386200461254727,
|
|
"learning_rate": 8e-05,
|
|
"loss": 0.475,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.5021582733812949,
|
|
"grad_norm": 0.9195473986269556,
|
|
"learning_rate": 7.999997981289966e-05,
|
|
"loss": 0.458,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5035971223021583,
|
|
"grad_norm": 1.4130885954885013,
|
|
"learning_rate": 7.999991925161896e-05,
|
|
"loss": 0.4689,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5050359712230216,
|
|
"grad_norm": 0.7303907223231949,
|
|
"learning_rate": 7.999981831621906e-05,
|
|
"loss": 0.4674,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5064748201438849,
|
|
"grad_norm": 0.7142980566280339,
|
|
"learning_rate": 7.999967700680183e-05,
|
|
"loss": 0.4678,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5079136690647482,
|
|
"grad_norm": 0.9677638275907295,
|
|
"learning_rate": 7.99994953235099e-05,
|
|
"loss": 0.4622,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5093525179856115,
|
|
"grad_norm": 0.7804039134442318,
|
|
"learning_rate": 7.999927326652667e-05,
|
|
"loss": 0.4697,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5107913669064749,
|
|
"grad_norm": 0.6902865077692293,
|
|
"learning_rate": 7.999901083607624e-05,
|
|
"loss": 0.464,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5122302158273381,
|
|
"grad_norm": 0.7495930797682308,
|
|
"learning_rate": 7.99987080324235e-05,
|
|
"loss": 0.4647,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5136690647482014,
|
|
"grad_norm": 1.1819242808237274,
|
|
"learning_rate": 7.999836485587415e-05,
|
|
"loss": 0.4728,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5151079136690647,
|
|
"grad_norm": 1.5936510259758971,
|
|
"learning_rate": 7.99979813067745e-05,
|
|
"loss": 0.4719,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.516546762589928,
|
|
"grad_norm": 0.5583020162500151,
|
|
"learning_rate": 7.999755738551171e-05,
|
|
"loss": 0.4596,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.5179856115107914,
|
|
"grad_norm": 1.201884532925642,
|
|
"learning_rate": 7.999709309251368e-05,
|
|
"loss": 0.4655,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.5194244604316547,
|
|
"grad_norm": 1.5272301622840552,
|
|
"learning_rate": 7.999658842824904e-05,
|
|
"loss": 0.4553,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.520863309352518,
|
|
"grad_norm": 0.6686122084895196,
|
|
"learning_rate": 7.999604339322717e-05,
|
|
"loss": 0.4652,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5223021582733813,
|
|
"grad_norm": 0.990841553676518,
|
|
"learning_rate": 7.999545798799823e-05,
|
|
"loss": 0.4728,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5237410071942447,
|
|
"grad_norm": 1.441615561659725,
|
|
"learning_rate": 7.999483221315307e-05,
|
|
"loss": 0.4584,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5251798561151079,
|
|
"grad_norm": 0.9069766260531882,
|
|
"learning_rate": 7.999416606932331e-05,
|
|
"loss": 0.4718,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5266187050359712,
|
|
"grad_norm": 1.2078365801340563,
|
|
"learning_rate": 7.999345955718136e-05,
|
|
"loss": 0.462,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5280575539568345,
|
|
"grad_norm": 0.9131384688483651,
|
|
"learning_rate": 7.999271267744033e-05,
|
|
"loss": 0.4644,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.5294964028776978,
|
|
"grad_norm": 1.4735555333176147,
|
|
"learning_rate": 7.999192543085407e-05,
|
|
"loss": 0.4706,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5309352517985612,
|
|
"grad_norm": 0.7312832314357198,
|
|
"learning_rate": 7.999109781821722e-05,
|
|
"loss": 0.4707,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5323741007194245,
|
|
"grad_norm": 1.4111231085543015,
|
|
"learning_rate": 7.999022984036512e-05,
|
|
"loss": 0.4802,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5338129496402878,
|
|
"grad_norm": 0.8768842135582798,
|
|
"learning_rate": 7.998932149817386e-05,
|
|
"loss": 0.4635,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.5352517985611511,
|
|
"grad_norm": 1.160512494150119,
|
|
"learning_rate": 7.998837279256028e-05,
|
|
"loss": 0.4666,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.5366906474820143,
|
|
"grad_norm": 0.8576419441967398,
|
|
"learning_rate": 7.998738372448196e-05,
|
|
"loss": 0.4715,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5381294964028777,
|
|
"grad_norm": 0.9764228808874443,
|
|
"learning_rate": 7.998635429493726e-05,
|
|
"loss": 0.4575,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.539568345323741,
|
|
"grad_norm": 0.9093833535143514,
|
|
"learning_rate": 7.998528450496519e-05,
|
|
"loss": 0.4611,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5410071942446043,
|
|
"grad_norm": 1.469283783734032,
|
|
"learning_rate": 7.998417435564557e-05,
|
|
"loss": 0.4727,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.5424460431654676,
|
|
"grad_norm": 0.8968076955000717,
|
|
"learning_rate": 7.998302384809893e-05,
|
|
"loss": 0.4692,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.543884892086331,
|
|
"grad_norm": 0.935464826975667,
|
|
"learning_rate": 7.998183298348654e-05,
|
|
"loss": 0.4626,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5453237410071943,
|
|
"grad_norm": 0.9404272564188892,
|
|
"learning_rate": 7.998060176301041e-05,
|
|
"loss": 0.4736,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5467625899280576,
|
|
"grad_norm": 0.6555997788334617,
|
|
"learning_rate": 7.997933018791327e-05,
|
|
"loss": 0.4637,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5482014388489208,
|
|
"grad_norm": 0.8759447455436626,
|
|
"learning_rate": 7.99780182594786e-05,
|
|
"loss": 0.4689,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5496402877697841,
|
|
"grad_norm": 0.8478996332945888,
|
|
"learning_rate": 7.99766659790306e-05,
|
|
"loss": 0.462,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5510791366906475,
|
|
"grad_norm": 0.97379046627732,
|
|
"learning_rate": 7.997527334793419e-05,
|
|
"loss": 0.4686,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5525179856115108,
|
|
"grad_norm": 1.39483002340797,
|
|
"learning_rate": 7.997384036759505e-05,
|
|
"loss": 0.4751,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.5539568345323741,
|
|
"grad_norm": 0.9192915286087388,
|
|
"learning_rate": 7.997236703945955e-05,
|
|
"loss": 0.4608,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5553956834532374,
|
|
"grad_norm": 1.1785712371486836,
|
|
"learning_rate": 7.99708533650148e-05,
|
|
"loss": 0.4809,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5568345323741007,
|
|
"grad_norm": 0.9602615599727863,
|
|
"learning_rate": 7.996929934578864e-05,
|
|
"loss": 0.4605,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5582733812949641,
|
|
"grad_norm": 1.4851353465185586,
|
|
"learning_rate": 7.996770498334963e-05,
|
|
"loss": 0.4709,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5597122302158274,
|
|
"grad_norm": 0.6507454426541847,
|
|
"learning_rate": 7.996607027930705e-05,
|
|
"loss": 0.4671,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5611510791366906,
|
|
"grad_norm": 1.4342523031736476,
|
|
"learning_rate": 7.996439523531088e-05,
|
|
"loss": 0.4711,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5625899280575539,
|
|
"grad_norm": 0.806056538163148,
|
|
"learning_rate": 7.996267985305186e-05,
|
|
"loss": 0.4698,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5640287769784172,
|
|
"grad_norm": 1.2054975787837283,
|
|
"learning_rate": 7.99609241342614e-05,
|
|
"loss": 0.4663,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5654676258992806,
|
|
"grad_norm": 0.6283174635123585,
|
|
"learning_rate": 7.995912808071164e-05,
|
|
"loss": 0.4585,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.5669064748201439,
|
|
"grad_norm": 1.0486227042739547,
|
|
"learning_rate": 7.995729169421545e-05,
|
|
"loss": 0.4691,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.5683453237410072,
|
|
"grad_norm": 0.8863716543575247,
|
|
"learning_rate": 7.99554149766264e-05,
|
|
"loss": 0.4662,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5697841726618705,
|
|
"grad_norm": 1.0877053664401433,
|
|
"learning_rate": 7.995349792983874e-05,
|
|
"loss": 0.4554,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.5712230215827339,
|
|
"grad_norm": 0.7359596530468507,
|
|
"learning_rate": 7.995154055578748e-05,
|
|
"loss": 0.462,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.5726618705035971,
|
|
"grad_norm": 0.975106188214437,
|
|
"learning_rate": 7.994954285644827e-05,
|
|
"loss": 0.4637,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.5741007194244604,
|
|
"grad_norm": 1.3453407872221257,
|
|
"learning_rate": 7.994750483383753e-05,
|
|
"loss": 0.4662,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.5755395683453237,
|
|
"grad_norm": 0.8562983756907204,
|
|
"learning_rate": 7.994542649001235e-05,
|
|
"loss": 0.4588,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.576978417266187,
|
|
"grad_norm": 0.5256870311962595,
|
|
"learning_rate": 7.994330782707048e-05,
|
|
"loss": 0.4668,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.5784172661870504,
|
|
"grad_norm": 0.6553104222043606,
|
|
"learning_rate": 7.994114884715045e-05,
|
|
"loss": 0.4533,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.5798561151079137,
|
|
"grad_norm": 0.7873652166872883,
|
|
"learning_rate": 7.99389495524314e-05,
|
|
"loss": 0.4656,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.581294964028777,
|
|
"grad_norm": 1.0080941280257354,
|
|
"learning_rate": 7.993670994513321e-05,
|
|
"loss": 0.4579,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.5827338129496403,
|
|
"grad_norm": 1.2774805391881845,
|
|
"learning_rate": 7.993443002751646e-05,
|
|
"loss": 0.4572,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.5841726618705037,
|
|
"grad_norm": 0.7438427012923391,
|
|
"learning_rate": 7.993210980188236e-05,
|
|
"loss": 0.4707,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.5856115107913669,
|
|
"grad_norm": 0.8090776674223477,
|
|
"learning_rate": 7.992974927057287e-05,
|
|
"loss": 0.4533,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.5870503597122302,
|
|
"grad_norm": 1.0280993101932538,
|
|
"learning_rate": 7.992734843597058e-05,
|
|
"loss": 0.4491,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.5884892086330935,
|
|
"grad_norm": 0.9015743445613568,
|
|
"learning_rate": 7.992490730049881e-05,
|
|
"loss": 0.458,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.5899280575539568,
|
|
"grad_norm": 0.7943608906905841,
|
|
"learning_rate": 7.992242586662152e-05,
|
|
"loss": 0.4572,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.5913669064748202,
|
|
"grad_norm": 0.7810122841010604,
|
|
"learning_rate": 7.991990413684336e-05,
|
|
"loss": 0.4641,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.5928057553956835,
|
|
"grad_norm": 1.01234826037888,
|
|
"learning_rate": 7.991734211370965e-05,
|
|
"loss": 0.4656,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.5942446043165468,
|
|
"grad_norm": 1.2813468744793832,
|
|
"learning_rate": 7.991473979980637e-05,
|
|
"loss": 0.4625,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.5956834532374101,
|
|
"grad_norm": 0.7226288723110718,
|
|
"learning_rate": 7.99120971977602e-05,
|
|
"loss": 0.4602,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.5971223021582733,
|
|
"grad_norm": 0.4868939995811055,
|
|
"learning_rate": 7.990941431023844e-05,
|
|
"loss": 0.4624,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.5985611510791367,
|
|
"grad_norm": 0.510906222214295,
|
|
"learning_rate": 7.990669113994911e-05,
|
|
"loss": 0.4488,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"grad_norm": 0.8681152921527936,
|
|
"learning_rate": 7.99039276896408e-05,
|
|
"loss": 0.4522,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6014388489208633,
|
|
"grad_norm": 1.2722714007264124,
|
|
"learning_rate": 7.990112396210288e-05,
|
|
"loss": 0.4641,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6028776978417266,
|
|
"grad_norm": 0.7564553679786442,
|
|
"learning_rate": 7.989827996016525e-05,
|
|
"loss": 0.4531,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.60431654676259,
|
|
"grad_norm": 0.5909206748440362,
|
|
"learning_rate": 7.989539568669856e-05,
|
|
"loss": 0.4488,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.6057553956834533,
|
|
"grad_norm": 0.9627613152682845,
|
|
"learning_rate": 7.989247114461403e-05,
|
|
"loss": 0.4575,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6071942446043166,
|
|
"grad_norm": 1.2832978048246335,
|
|
"learning_rate": 7.988950633686358e-05,
|
|
"loss": 0.4635,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6086330935251798,
|
|
"grad_norm": 0.49389608523622186,
|
|
"learning_rate": 7.988650126643976e-05,
|
|
"loss": 0.4619,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6100719424460431,
|
|
"grad_norm": 0.8251380503980983,
|
|
"learning_rate": 7.988345593637572e-05,
|
|
"loss": 0.4491,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6115107913669064,
|
|
"grad_norm": 1.1868707171037227,
|
|
"learning_rate": 7.988037034974532e-05,
|
|
"loss": 0.4512,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6129496402877698,
|
|
"grad_norm": 0.795601356103857,
|
|
"learning_rate": 7.9877244509663e-05,
|
|
"loss": 0.4547,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6143884892086331,
|
|
"grad_norm": 1.0012017435168794,
|
|
"learning_rate": 7.987407841928384e-05,
|
|
"loss": 0.4591,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6158273381294964,
|
|
"grad_norm": 1.2303921263144655,
|
|
"learning_rate": 7.987087208180355e-05,
|
|
"loss": 0.4619,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6172661870503597,
|
|
"grad_norm": 0.7948676938353377,
|
|
"learning_rate": 7.986762550045844e-05,
|
|
"loss": 0.4435,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.6187050359712231,
|
|
"grad_norm": 1.0192630222580203,
|
|
"learning_rate": 7.98643386785255e-05,
|
|
"loss": 0.4558,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6201438848920864,
|
|
"grad_norm": 0.8479267191154201,
|
|
"learning_rate": 7.986101161932227e-05,
|
|
"loss": 0.4556,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6215827338129496,
|
|
"grad_norm": 0.749235916119396,
|
|
"learning_rate": 7.985764432620695e-05,
|
|
"loss": 0.4524,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.6230215827338129,
|
|
"grad_norm": 0.9969042623037273,
|
|
"learning_rate": 7.985423680257833e-05,
|
|
"loss": 0.463,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.6244604316546762,
|
|
"grad_norm": 0.8901575635801605,
|
|
"learning_rate": 7.985078905187582e-05,
|
|
"loss": 0.4496,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6258992805755396,
|
|
"grad_norm": 1.0396297942131238,
|
|
"learning_rate": 7.984730107757942e-05,
|
|
"loss": 0.4472,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6273381294964029,
|
|
"grad_norm": 1.0875797454112965,
|
|
"learning_rate": 7.984377288320973e-05,
|
|
"loss": 0.4431,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6287769784172662,
|
|
"grad_norm": 0.7133102319403347,
|
|
"learning_rate": 7.984020447232795e-05,
|
|
"loss": 0.4518,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6302158273381295,
|
|
"grad_norm": 0.5661743246060049,
|
|
"learning_rate": 7.983659584853586e-05,
|
|
"loss": 0.459,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.6316546762589929,
|
|
"grad_norm": 0.36316916612260663,
|
|
"learning_rate": 7.983294701547588e-05,
|
|
"loss": 0.4507,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6330935251798561,
|
|
"grad_norm": 0.47607077300265466,
|
|
"learning_rate": 7.982925797683095e-05,
|
|
"loss": 0.454,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6345323741007194,
|
|
"grad_norm": 0.5871942669081124,
|
|
"learning_rate": 7.982552873632461e-05,
|
|
"loss": 0.4623,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6359712230215827,
|
|
"grad_norm": 0.7725751556985518,
|
|
"learning_rate": 7.982175929772102e-05,
|
|
"loss": 0.4418,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.637410071942446,
|
|
"grad_norm": 0.9685969908404306,
|
|
"learning_rate": 7.981794966482486e-05,
|
|
"loss": 0.4428,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.6388489208633094,
|
|
"grad_norm": 1.1228124147573655,
|
|
"learning_rate": 7.98140998414814e-05,
|
|
"loss": 0.4583,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.6402877697841727,
|
|
"grad_norm": 0.9583498742165508,
|
|
"learning_rate": 7.98102098315765e-05,
|
|
"loss": 0.4631,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.641726618705036,
|
|
"grad_norm": 0.9139786799865116,
|
|
"learning_rate": 7.980627963903654e-05,
|
|
"loss": 0.455,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6431654676258993,
|
|
"grad_norm": 0.973655562211781,
|
|
"learning_rate": 7.980230926782848e-05,
|
|
"loss": 0.4608,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.6446043165467625,
|
|
"grad_norm": 1.3766701670954513,
|
|
"learning_rate": 7.979829872195984e-05,
|
|
"loss": 0.4559,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6460431654676259,
|
|
"grad_norm": 0.6783473812531177,
|
|
"learning_rate": 7.979424800547869e-05,
|
|
"loss": 0.4581,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6474820143884892,
|
|
"grad_norm": 1.0801477660526912,
|
|
"learning_rate": 7.979015712247365e-05,
|
|
"loss": 0.4597,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6489208633093525,
|
|
"grad_norm": 1.2323608530128367,
|
|
"learning_rate": 7.978602607707383e-05,
|
|
"loss": 0.4528,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6503597122302158,
|
|
"grad_norm": 0.8443553213750237,
|
|
"learning_rate": 7.978185487344897e-05,
|
|
"loss": 0.4576,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6517985611510791,
|
|
"grad_norm": 0.9937579986423505,
|
|
"learning_rate": 7.977764351580928e-05,
|
|
"loss": 0.4647,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6532374100719425,
|
|
"grad_norm": 1.1216957091501647,
|
|
"learning_rate": 7.97733920084055e-05,
|
|
"loss": 0.4587,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6546762589928058,
|
|
"grad_norm": 0.7022211526266514,
|
|
"learning_rate": 7.976910035552892e-05,
|
|
"loss": 0.4488,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6561151079136691,
|
|
"grad_norm": 0.6303723585246381,
|
|
"learning_rate": 7.976476856151134e-05,
|
|
"loss": 0.4529,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.6575539568345323,
|
|
"grad_norm": 0.6999584608033735,
|
|
"learning_rate": 7.976039663072509e-05,
|
|
"loss": 0.4466,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6589928057553956,
|
|
"grad_norm": 0.675458409003255,
|
|
"learning_rate": 7.975598456758298e-05,
|
|
"loss": 0.465,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.660431654676259,
|
|
"grad_norm": 0.5283662437152983,
|
|
"learning_rate": 7.975153237653836e-05,
|
|
"loss": 0.4563,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6618705035971223,
|
|
"grad_norm": 0.5540932163036377,
|
|
"learning_rate": 7.974704006208509e-05,
|
|
"loss": 0.4553,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6633093525179856,
|
|
"grad_norm": 0.5579097412206523,
|
|
"learning_rate": 7.974250762875747e-05,
|
|
"loss": 0.4592,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6647482014388489,
|
|
"grad_norm": 0.5038880750321836,
|
|
"learning_rate": 7.973793508113035e-05,
|
|
"loss": 0.46,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6661870503597123,
|
|
"grad_norm": 0.4905843650368768,
|
|
"learning_rate": 7.973332242381908e-05,
|
|
"loss": 0.4519,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.6676258992805756,
|
|
"grad_norm": 0.5354123323800415,
|
|
"learning_rate": 7.972866966147942e-05,
|
|
"loss": 0.4584,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.6690647482014388,
|
|
"grad_norm": 0.44950758218219206,
|
|
"learning_rate": 7.972397679880771e-05,
|
|
"loss": 0.4563,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.6705035971223021,
|
|
"grad_norm": 0.4850625570611014,
|
|
"learning_rate": 7.971924384054068e-05,
|
|
"loss": 0.4458,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.6719424460431654,
|
|
"grad_norm": 0.48121170168916394,
|
|
"learning_rate": 7.971447079145557e-05,
|
|
"loss": 0.4486,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.6733812949640288,
|
|
"grad_norm": 0.5471652738196282,
|
|
"learning_rate": 7.970965765637011e-05,
|
|
"loss": 0.4447,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.6748201438848921,
|
|
"grad_norm": 0.7959450865209821,
|
|
"learning_rate": 7.970480444014244e-05,
|
|
"loss": 0.4497,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.6762589928057554,
|
|
"grad_norm": 1.1443975799693216,
|
|
"learning_rate": 7.969991114767114e-05,
|
|
"loss": 0.4621,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.6776978417266187,
|
|
"grad_norm": 1.154696833754496,
|
|
"learning_rate": 7.969497778389534e-05,
|
|
"loss": 0.4498,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.679136690647482,
|
|
"grad_norm": 0.8929202024187745,
|
|
"learning_rate": 7.969000435379454e-05,
|
|
"loss": 0.4505,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.6805755395683454,
|
|
"grad_norm": 0.8386705102769161,
|
|
"learning_rate": 7.968499086238867e-05,
|
|
"loss": 0.4566,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.6820143884892086,
|
|
"grad_norm": 0.7740131386063932,
|
|
"learning_rate": 7.967993731473815e-05,
|
|
"loss": 0.4511,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.6834532374100719,
|
|
"grad_norm": 0.7529160977908556,
|
|
"learning_rate": 7.96748437159438e-05,
|
|
"loss": 0.4549,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.6848920863309352,
|
|
"grad_norm": 0.8705557060523678,
|
|
"learning_rate": 7.966971007114686e-05,
|
|
"loss": 0.4502,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.6863309352517986,
|
|
"grad_norm": 1.0335512201394366,
|
|
"learning_rate": 7.966453638552901e-05,
|
|
"loss": 0.452,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.6877697841726619,
|
|
"grad_norm": 0.9374367671628433,
|
|
"learning_rate": 7.965932266431232e-05,
|
|
"loss": 0.4486,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.6892086330935252,
|
|
"grad_norm": 0.6776996825108257,
|
|
"learning_rate": 7.96540689127593e-05,
|
|
"loss": 0.4484,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.6906474820143885,
|
|
"grad_norm": 0.46849093853233853,
|
|
"learning_rate": 7.964877513617285e-05,
|
|
"loss": 0.4449,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.6920863309352518,
|
|
"grad_norm": 0.5014075250893144,
|
|
"learning_rate": 7.964344133989627e-05,
|
|
"loss": 0.4466,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.6935251798561151,
|
|
"grad_norm": 0.6641974297611191,
|
|
"learning_rate": 7.963806752931324e-05,
|
|
"loss": 0.4547,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.6949640287769784,
|
|
"grad_norm": 0.7211507658327702,
|
|
"learning_rate": 7.963265370984786e-05,
|
|
"loss": 0.454,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.6964028776978417,
|
|
"grad_norm": 0.7280077622434283,
|
|
"learning_rate": 7.962719988696458e-05,
|
|
"loss": 0.4509,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.697841726618705,
|
|
"grad_norm": 0.7149058196071809,
|
|
"learning_rate": 7.962170606616826e-05,
|
|
"loss": 0.4518,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.6992805755395683,
|
|
"grad_norm": 0.6418442445176309,
|
|
"learning_rate": 7.96161722530041e-05,
|
|
"loss": 0.449,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7007194244604317,
|
|
"grad_norm": 0.605700075210804,
|
|
"learning_rate": 7.96105984530577e-05,
|
|
"loss": 0.4283,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.702158273381295,
|
|
"grad_norm": 0.5749443105207445,
|
|
"learning_rate": 7.9604984671955e-05,
|
|
"loss": 0.4382,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7035971223021583,
|
|
"grad_norm": 0.5999308283345489,
|
|
"learning_rate": 7.959933091536227e-05,
|
|
"loss": 0.4495,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7050359712230215,
|
|
"grad_norm": 0.6662269793895412,
|
|
"learning_rate": 7.95936371889862e-05,
|
|
"loss": 0.4485,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7064748201438849,
|
|
"grad_norm": 0.8538589338165337,
|
|
"learning_rate": 7.958790349857375e-05,
|
|
"loss": 0.4435,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7079136690647482,
|
|
"grad_norm": 1.1382876250800844,
|
|
"learning_rate": 7.958212984991226e-05,
|
|
"loss": 0.4525,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.7093525179856115,
|
|
"grad_norm": 0.7968218034899702,
|
|
"learning_rate": 7.957631624882938e-05,
|
|
"loss": 0.4516,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7107913669064748,
|
|
"grad_norm": 0.5700570269618261,
|
|
"learning_rate": 7.957046270119313e-05,
|
|
"loss": 0.4535,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7122302158273381,
|
|
"grad_norm": 0.6944003662435277,
|
|
"learning_rate": 7.956456921291178e-05,
|
|
"loss": 0.4507,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7136690647482015,
|
|
"grad_norm": 1.0095804982644123,
|
|
"learning_rate": 7.955863578993396e-05,
|
|
"loss": 0.4525,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7151079136690648,
|
|
"grad_norm": 1.3548508245138946,
|
|
"learning_rate": 7.955266243824864e-05,
|
|
"loss": 0.4467,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7165467625899281,
|
|
"grad_norm": 0.5336186139995822,
|
|
"learning_rate": 7.954664916388499e-05,
|
|
"loss": 0.4505,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7179856115107913,
|
|
"grad_norm": 1.0601473934148962,
|
|
"learning_rate": 7.954059597291257e-05,
|
|
"loss": 0.4533,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7194244604316546,
|
|
"grad_norm": 1.2635395793026634,
|
|
"learning_rate": 7.953450287144121e-05,
|
|
"loss": 0.4583,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.720863309352518,
|
|
"grad_norm": 0.5752794811048644,
|
|
"learning_rate": 7.952836986562099e-05,
|
|
"loss": 0.4471,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.7223021582733813,
|
|
"grad_norm": 0.9350201467789351,
|
|
"learning_rate": 7.952219696164231e-05,
|
|
"loss": 0.4516,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7237410071942446,
|
|
"grad_norm": 0.9533182744412736,
|
|
"learning_rate": 7.95159841657358e-05,
|
|
"loss": 0.4443,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7251798561151079,
|
|
"grad_norm": 0.8031992625519673,
|
|
"learning_rate": 7.950973148417239e-05,
|
|
"loss": 0.4607,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.7266187050359713,
|
|
"grad_norm": 0.9001672838975818,
|
|
"learning_rate": 7.950343892326327e-05,
|
|
"loss": 0.4557,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.7280575539568346,
|
|
"grad_norm": 1.0832996790081715,
|
|
"learning_rate": 7.949710648935984e-05,
|
|
"loss": 0.4553,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7294964028776978,
|
|
"grad_norm": 0.8705065790314398,
|
|
"learning_rate": 7.949073418885378e-05,
|
|
"loss": 0.4512,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7309352517985611,
|
|
"grad_norm": 0.6764009348155204,
|
|
"learning_rate": 7.948432202817703e-05,
|
|
"loss": 0.4527,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7323741007194244,
|
|
"grad_norm": 0.5468223238668009,
|
|
"learning_rate": 7.94778700138017e-05,
|
|
"loss": 0.4422,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.7338129496402878,
|
|
"grad_norm": 0.7309868037715874,
|
|
"learning_rate": 7.947137815224018e-05,
|
|
"loss": 0.4464,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7352517985611511,
|
|
"grad_norm": 0.7171769802946248,
|
|
"learning_rate": 7.946484645004508e-05,
|
|
"loss": 0.4465,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7366906474820144,
|
|
"grad_norm": 0.5578779500225594,
|
|
"learning_rate": 7.945827491380916e-05,
|
|
"loss": 0.4435,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7381294964028777,
|
|
"grad_norm": 0.6108336958053552,
|
|
"learning_rate": 7.945166355016548e-05,
|
|
"loss": 0.4459,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.739568345323741,
|
|
"grad_norm": 0.49528479139762804,
|
|
"learning_rate": 7.944501236578722e-05,
|
|
"loss": 0.4352,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7410071942446043,
|
|
"grad_norm": 0.46930541822329525,
|
|
"learning_rate": 7.943832136738783e-05,
|
|
"loss": 0.4423,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.7424460431654676,
|
|
"grad_norm": 0.6252576742097442,
|
|
"learning_rate": 7.943159056172084e-05,
|
|
"loss": 0.4524,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.7438848920863309,
|
|
"grad_norm": 0.7045435512774456,
|
|
"learning_rate": 7.942481995558007e-05,
|
|
"loss": 0.4402,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.7453237410071942,
|
|
"grad_norm": 0.8197166907854925,
|
|
"learning_rate": 7.941800955579946e-05,
|
|
"loss": 0.4419,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.7467625899280576,
|
|
"grad_norm": 0.9676374236704188,
|
|
"learning_rate": 7.941115936925311e-05,
|
|
"loss": 0.4469,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7482014388489209,
|
|
"grad_norm": 1.135030867509472,
|
|
"learning_rate": 7.940426940285529e-05,
|
|
"loss": 0.4448,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7496402877697842,
|
|
"grad_norm": 0.7848036489889186,
|
|
"learning_rate": 7.939733966356042e-05,
|
|
"loss": 0.4431,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7510791366906475,
|
|
"grad_norm": 0.6470285002519534,
|
|
"learning_rate": 7.939037015836308e-05,
|
|
"loss": 0.4463,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7525179856115108,
|
|
"grad_norm": 0.7933055318531641,
|
|
"learning_rate": 7.938336089429796e-05,
|
|
"loss": 0.4425,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.753956834532374,
|
|
"grad_norm": 0.8639001148447225,
|
|
"learning_rate": 7.937631187843991e-05,
|
|
"loss": 0.444,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7553956834532374,
|
|
"grad_norm": 0.9501597272392723,
|
|
"learning_rate": 7.936922311790388e-05,
|
|
"loss": 0.4523,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7568345323741007,
|
|
"grad_norm": 1.0752808115122383,
|
|
"learning_rate": 7.936209461984495e-05,
|
|
"loss": 0.4493,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.758273381294964,
|
|
"grad_norm": 0.9393022261995804,
|
|
"learning_rate": 7.935492639145831e-05,
|
|
"loss": 0.4491,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.7597122302158273,
|
|
"grad_norm": 0.9254701174088416,
|
|
"learning_rate": 7.934771843997922e-05,
|
|
"loss": 0.4494,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.7611510791366907,
|
|
"grad_norm": 0.9860215644540206,
|
|
"learning_rate": 7.934047077268311e-05,
|
|
"loss": 0.4507,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.762589928057554,
|
|
"grad_norm": 0.9493961490814898,
|
|
"learning_rate": 7.93331833968854e-05,
|
|
"loss": 0.4544,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.7640287769784173,
|
|
"grad_norm": 0.6592673084005449,
|
|
"learning_rate": 7.932585631994168e-05,
|
|
"loss": 0.4559,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.7654676258992805,
|
|
"grad_norm": 0.5728658348991935,
|
|
"learning_rate": 7.931848954924754e-05,
|
|
"loss": 0.4411,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.7669064748201438,
|
|
"grad_norm": 0.7433885615356265,
|
|
"learning_rate": 7.931108309223868e-05,
|
|
"loss": 0.4508,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.7683453237410072,
|
|
"grad_norm": 0.6334833296417265,
|
|
"learning_rate": 7.930363695639085e-05,
|
|
"loss": 0.4586,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.7697841726618705,
|
|
"grad_norm": 0.6185008469663386,
|
|
"learning_rate": 7.929615114921984e-05,
|
|
"loss": 0.4487,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.7712230215827338,
|
|
"grad_norm": 0.6716886194415617,
|
|
"learning_rate": 7.92886256782815e-05,
|
|
"loss": 0.4522,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.7726618705035971,
|
|
"grad_norm": 0.49950746159284265,
|
|
"learning_rate": 7.928106055117168e-05,
|
|
"loss": 0.4459,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.7741007194244605,
|
|
"grad_norm": 0.4553313725701802,
|
|
"learning_rate": 7.927345577552627e-05,
|
|
"loss": 0.4449,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.7755395683453238,
|
|
"grad_norm": 0.6256098249711417,
|
|
"learning_rate": 7.926581135902122e-05,
|
|
"loss": 0.4518,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.7769784172661871,
|
|
"grad_norm": 0.6315617560561654,
|
|
"learning_rate": 7.925812730937245e-05,
|
|
"loss": 0.4516,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.7784172661870503,
|
|
"grad_norm": 0.6241731550176831,
|
|
"learning_rate": 7.92504036343359e-05,
|
|
"loss": 0.4444,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.7798561151079136,
|
|
"grad_norm": 0.6506331312255811,
|
|
"learning_rate": 7.924264034170747e-05,
|
|
"loss": 0.4573,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.781294964028777,
|
|
"grad_norm": 0.7039180202061426,
|
|
"learning_rate": 7.923483743932311e-05,
|
|
"loss": 0.4365,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.7827338129496403,
|
|
"grad_norm": 1.0337455472152288,
|
|
"learning_rate": 7.922699493505871e-05,
|
|
"loss": 0.4526,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.7841726618705036,
|
|
"grad_norm": 1.2089492481892488,
|
|
"learning_rate": 7.921911283683013e-05,
|
|
"loss": 0.4354,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.7856115107913669,
|
|
"grad_norm": 0.7807997329642885,
|
|
"learning_rate": 7.921119115259322e-05,
|
|
"loss": 0.4417,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.7870503597122303,
|
|
"grad_norm": 0.58533815233707,
|
|
"learning_rate": 7.920322989034377e-05,
|
|
"loss": 0.4545,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.7884892086330936,
|
|
"grad_norm": 0.5031302238995897,
|
|
"learning_rate": 7.919522905811752e-05,
|
|
"loss": 0.4473,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.7899280575539568,
|
|
"grad_norm": 0.7014036019165321,
|
|
"learning_rate": 7.918718866399012e-05,
|
|
"loss": 0.443,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.7913669064748201,
|
|
"grad_norm": 0.8000514288225485,
|
|
"learning_rate": 7.917910871607723e-05,
|
|
"loss": 0.4503,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.7928057553956834,
|
|
"grad_norm": 0.924808884301505,
|
|
"learning_rate": 7.917098922253436e-05,
|
|
"loss": 0.4466,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.7942446043165468,
|
|
"grad_norm": 1.0991530657829638,
|
|
"learning_rate": 7.916283019155696e-05,
|
|
"loss": 0.4479,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.7956834532374101,
|
|
"grad_norm": 0.7169539411041372,
|
|
"learning_rate": 7.915463163138041e-05,
|
|
"loss": 0.4315,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.7971223021582734,
|
|
"grad_norm": 0.38158796107939885,
|
|
"learning_rate": 7.914639355027995e-05,
|
|
"loss": 0.4393,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.7985611510791367,
|
|
"grad_norm": 0.49232108851560513,
|
|
"learning_rate": 7.913811595657072e-05,
|
|
"loss": 0.4566,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.7218701882454258,
|
|
"learning_rate": 7.912979885860776e-05,
|
|
"loss": 0.4485,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8014388489208633,
|
|
"grad_norm": 0.7986024640583175,
|
|
"learning_rate": 7.912144226478598e-05,
|
|
"loss": 0.4468,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8028776978417266,
|
|
"grad_norm": 0.8878424822486846,
|
|
"learning_rate": 7.911304618354015e-05,
|
|
"loss": 0.4567,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8043165467625899,
|
|
"grad_norm": 0.9174273403552199,
|
|
"learning_rate": 7.910461062334488e-05,
|
|
"loss": 0.4488,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8057553956834532,
|
|
"grad_norm": 0.891783877563457,
|
|
"learning_rate": 7.909613559271467e-05,
|
|
"loss": 0.4452,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8071942446043165,
|
|
"grad_norm": 0.761160347395357,
|
|
"learning_rate": 7.908762110020382e-05,
|
|
"loss": 0.4545,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8086330935251799,
|
|
"grad_norm": 0.5273976719479472,
|
|
"learning_rate": 7.907906715440649e-05,
|
|
"loss": 0.4392,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8100719424460432,
|
|
"grad_norm": 0.6152187805578019,
|
|
"learning_rate": 7.907047376395661e-05,
|
|
"loss": 0.4551,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8115107913669065,
|
|
"grad_norm": 0.7596694559475021,
|
|
"learning_rate": 7.906184093752801e-05,
|
|
"loss": 0.4362,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.8129496402877698,
|
|
"grad_norm": 0.9119631656962874,
|
|
"learning_rate": 7.905316868383425e-05,
|
|
"loss": 0.4573,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.814388489208633,
|
|
"grad_norm": 0.9202549216116336,
|
|
"learning_rate": 7.904445701162872e-05,
|
|
"loss": 0.4446,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8158273381294964,
|
|
"grad_norm": 0.7212012973297361,
|
|
"learning_rate": 7.903570592970458e-05,
|
|
"loss": 0.4412,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8172661870503597,
|
|
"grad_norm": 0.5048460538460028,
|
|
"learning_rate": 7.902691544689479e-05,
|
|
"loss": 0.4334,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.818705035971223,
|
|
"grad_norm": 0.5797905559470364,
|
|
"learning_rate": 7.901808557207206e-05,
|
|
"loss": 0.4428,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8201438848920863,
|
|
"grad_norm": 0.6826048102448758,
|
|
"learning_rate": 7.900921631414887e-05,
|
|
"loss": 0.4499,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8215827338129497,
|
|
"grad_norm": 0.6163012339630883,
|
|
"learning_rate": 7.900030768207746e-05,
|
|
"loss": 0.437,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.823021582733813,
|
|
"grad_norm": 0.47122542678140017,
|
|
"learning_rate": 7.899135968484979e-05,
|
|
"loss": 0.4288,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8244604316546763,
|
|
"grad_norm": 0.4502520544208999,
|
|
"learning_rate": 7.898237233149758e-05,
|
|
"loss": 0.4491,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8258992805755395,
|
|
"grad_norm": 0.43827333165975796,
|
|
"learning_rate": 7.897334563109225e-05,
|
|
"loss": 0.4441,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8273381294964028,
|
|
"grad_norm": 0.5118963700038387,
|
|
"learning_rate": 7.896427959274494e-05,
|
|
"loss": 0.4531,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8287769784172662,
|
|
"grad_norm": 0.6743432961679259,
|
|
"learning_rate": 7.895517422560651e-05,
|
|
"loss": 0.4392,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.8302158273381295,
|
|
"grad_norm": 0.6820618275094542,
|
|
"learning_rate": 7.89460295388675e-05,
|
|
"loss": 0.4475,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.8316546762589928,
|
|
"grad_norm": 0.5507828286811893,
|
|
"learning_rate": 7.893684554175817e-05,
|
|
"loss": 0.4401,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8330935251798561,
|
|
"grad_norm": 0.5190069058647889,
|
|
"learning_rate": 7.892762224354839e-05,
|
|
"loss": 0.4517,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8345323741007195,
|
|
"grad_norm": 0.46751854179305546,
|
|
"learning_rate": 7.891835965354778e-05,
|
|
"loss": 0.4478,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8359712230215828,
|
|
"grad_norm": 0.5048649343503672,
|
|
"learning_rate": 7.890905778110557e-05,
|
|
"loss": 0.4342,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.837410071942446,
|
|
"grad_norm": 0.7370964661777669,
|
|
"learning_rate": 7.889971663561065e-05,
|
|
"loss": 0.4527,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.8388489208633093,
|
|
"grad_norm": 1.0046879082447995,
|
|
"learning_rate": 7.889033622649155e-05,
|
|
"loss": 0.4355,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8402877697841726,
|
|
"grad_norm": 1.2173404078681964,
|
|
"learning_rate": 7.888091656321644e-05,
|
|
"loss": 0.4502,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.841726618705036,
|
|
"grad_norm": 0.6642423307387302,
|
|
"learning_rate": 7.88714576552931e-05,
|
|
"loss": 0.4435,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8431654676258993,
|
|
"grad_norm": 0.5666567388123632,
|
|
"learning_rate": 7.886195951226892e-05,
|
|
"loss": 0.4357,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8446043165467626,
|
|
"grad_norm": 0.637779909026688,
|
|
"learning_rate": 7.885242214373091e-05,
|
|
"loss": 0.4501,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8460431654676259,
|
|
"grad_norm": 0.7263528050142767,
|
|
"learning_rate": 7.884284555930564e-05,
|
|
"loss": 0.4418,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.8474820143884892,
|
|
"grad_norm": 0.8420977873692586,
|
|
"learning_rate": 7.883322976865932e-05,
|
|
"loss": 0.4463,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.8489208633093526,
|
|
"grad_norm": 0.8259761740732601,
|
|
"learning_rate": 7.882357478149767e-05,
|
|
"loss": 0.4496,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8503597122302158,
|
|
"grad_norm": 0.7465839036047074,
|
|
"learning_rate": 7.8813880607566e-05,
|
|
"loss": 0.4487,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.8517985611510791,
|
|
"grad_norm": 0.8231812069832233,
|
|
"learning_rate": 7.880414725664918e-05,
|
|
"loss": 0.4497,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8532374100719424,
|
|
"grad_norm": 0.8611877285379999,
|
|
"learning_rate": 7.879437473857161e-05,
|
|
"loss": 0.445,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8546762589928057,
|
|
"grad_norm": 0.8530351825139553,
|
|
"learning_rate": 7.878456306319723e-05,
|
|
"loss": 0.4482,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8561151079136691,
|
|
"grad_norm": 0.9213585371396524,
|
|
"learning_rate": 7.877471224042952e-05,
|
|
"loss": 0.4431,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.8575539568345324,
|
|
"grad_norm": 0.6776462396130913,
|
|
"learning_rate": 7.876482228021144e-05,
|
|
"loss": 0.4382,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.8589928057553957,
|
|
"grad_norm": 0.49002843494203835,
|
|
"learning_rate": 7.875489319252549e-05,
|
|
"loss": 0.4412,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.860431654676259,
|
|
"grad_norm": 0.8024400520804449,
|
|
"learning_rate": 7.874492498739362e-05,
|
|
"loss": 0.4465,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.8618705035971223,
|
|
"grad_norm": 0.8727221919435241,
|
|
"learning_rate": 7.87349176748773e-05,
|
|
"loss": 0.4416,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.8633093525179856,
|
|
"grad_norm": 0.5412032633747997,
|
|
"learning_rate": 7.872487126507747e-05,
|
|
"loss": 0.4454,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.8647482014388489,
|
|
"grad_norm": 0.5351987855374966,
|
|
"learning_rate": 7.87147857681345e-05,
|
|
"loss": 0.4601,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.8661870503597122,
|
|
"grad_norm": 0.6007308727092514,
|
|
"learning_rate": 7.870466119422826e-05,
|
|
"loss": 0.4414,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.8676258992805755,
|
|
"grad_norm": 0.5882223407279459,
|
|
"learning_rate": 7.869449755357803e-05,
|
|
"loss": 0.4419,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.8690647482014389,
|
|
"grad_norm": 0.526154872806299,
|
|
"learning_rate": 7.868429485644252e-05,
|
|
"loss": 0.4433,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.8705035971223022,
|
|
"grad_norm": 0.46177571415297813,
|
|
"learning_rate": 7.86740531131199e-05,
|
|
"loss": 0.4492,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.8719424460431655,
|
|
"grad_norm": 0.5916241848896225,
|
|
"learning_rate": 7.866377233394771e-05,
|
|
"loss": 0.446,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.8733812949640288,
|
|
"grad_norm": 0.7085414240402156,
|
|
"learning_rate": 7.865345252930291e-05,
|
|
"loss": 0.4471,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.874820143884892,
|
|
"grad_norm": 0.7856331214031158,
|
|
"learning_rate": 7.864309370960184e-05,
|
|
"loss": 0.4439,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.8762589928057554,
|
|
"grad_norm": 0.794706706433098,
|
|
"learning_rate": 7.863269588530023e-05,
|
|
"loss": 0.434,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.8776978417266187,
|
|
"grad_norm": 0.883405930853215,
|
|
"learning_rate": 7.862225906689319e-05,
|
|
"loss": 0.4357,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.879136690647482,
|
|
"grad_norm": 0.9923659442859196,
|
|
"learning_rate": 7.861178326491514e-05,
|
|
"loss": 0.4547,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.8805755395683453,
|
|
"grad_norm": 0.9294812322736127,
|
|
"learning_rate": 7.860126848993992e-05,
|
|
"loss": 0.447,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.8820143884892087,
|
|
"grad_norm": 0.8013402124261746,
|
|
"learning_rate": 7.859071475258065e-05,
|
|
"loss": 0.4445,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.883453237410072,
|
|
"grad_norm": 0.7059176326274994,
|
|
"learning_rate": 7.85801220634898e-05,
|
|
"loss": 0.4416,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.8848920863309353,
|
|
"grad_norm": 0.6392548247853963,
|
|
"learning_rate": 7.856949043335917e-05,
|
|
"loss": 0.4399,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.8863309352517985,
|
|
"grad_norm": 0.7817550953412244,
|
|
"learning_rate": 7.855881987291983e-05,
|
|
"loss": 0.4354,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.8877697841726618,
|
|
"grad_norm": 0.800230836741356,
|
|
"learning_rate": 7.854811039294216e-05,
|
|
"loss": 0.4524,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.8892086330935252,
|
|
"grad_norm": 0.7770226146869855,
|
|
"learning_rate": 7.853736200423584e-05,
|
|
"loss": 0.4368,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.8906474820143885,
|
|
"grad_norm": 0.7634205820028805,
|
|
"learning_rate": 7.852657471764983e-05,
|
|
"loss": 0.4542,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.8920863309352518,
|
|
"grad_norm": 0.6936675498406389,
|
|
"learning_rate": 7.851574854407228e-05,
|
|
"loss": 0.4469,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.8935251798561151,
|
|
"grad_norm": 0.6119128489266403,
|
|
"learning_rate": 7.85048834944307e-05,
|
|
"loss": 0.4428,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.8949640287769784,
|
|
"grad_norm": 0.5155117731130513,
|
|
"learning_rate": 7.849397957969173e-05,
|
|
"loss": 0.4372,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.8964028776978418,
|
|
"grad_norm": 0.44143447581586986,
|
|
"learning_rate": 7.848303681086134e-05,
|
|
"loss": 0.438,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.897841726618705,
|
|
"grad_norm": 0.43766214656888824,
|
|
"learning_rate": 7.847205519898461e-05,
|
|
"loss": 0.4318,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.8992805755395683,
|
|
"grad_norm": 0.5378630819927727,
|
|
"learning_rate": 7.846103475514595e-05,
|
|
"loss": 0.4408,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9007194244604316,
|
|
"grad_norm": 0.5527795555965724,
|
|
"learning_rate": 7.844997549046886e-05,
|
|
"loss": 0.4403,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.902158273381295,
|
|
"grad_norm": 0.4288550169075325,
|
|
"learning_rate": 7.843887741611608e-05,
|
|
"loss": 0.4498,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.9035971223021583,
|
|
"grad_norm": 0.5173121323053984,
|
|
"learning_rate": 7.842774054328949e-05,
|
|
"loss": 0.4286,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9050359712230216,
|
|
"grad_norm": 0.6641963840346449,
|
|
"learning_rate": 7.841656488323017e-05,
|
|
"loss": 0.4433,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9064748201438849,
|
|
"grad_norm": 0.6737185111721005,
|
|
"learning_rate": 7.840535044721832e-05,
|
|
"loss": 0.4402,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9079136690647482,
|
|
"grad_norm": 0.693885713739698,
|
|
"learning_rate": 7.839409724657327e-05,
|
|
"loss": 0.44,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9093525179856116,
|
|
"grad_norm": 0.7042479660836442,
|
|
"learning_rate": 7.838280529265353e-05,
|
|
"loss": 0.4441,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9107913669064748,
|
|
"grad_norm": 0.7908419204457521,
|
|
"learning_rate": 7.837147459685666e-05,
|
|
"loss": 0.4303,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9122302158273381,
|
|
"grad_norm": 0.9503299444377318,
|
|
"learning_rate": 7.836010517061937e-05,
|
|
"loss": 0.4475,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9136690647482014,
|
|
"grad_norm": 1.0612093733310903,
|
|
"learning_rate": 7.834869702541742e-05,
|
|
"loss": 0.4428,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9151079136690647,
|
|
"grad_norm": 0.9173830986999744,
|
|
"learning_rate": 7.833725017276573e-05,
|
|
"loss": 0.4431,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9165467625899281,
|
|
"grad_norm": 0.9099263587946228,
|
|
"learning_rate": 7.83257646242182e-05,
|
|
"loss": 0.4385,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9179856115107914,
|
|
"grad_norm": 1.0226574865634894,
|
|
"learning_rate": 7.831424039136783e-05,
|
|
"loss": 0.4452,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9194244604316547,
|
|
"grad_norm": 0.9419850429277917,
|
|
"learning_rate": 7.830267748584666e-05,
|
|
"loss": 0.4416,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.920863309352518,
|
|
"grad_norm": 0.7437410924245971,
|
|
"learning_rate": 7.829107591932578e-05,
|
|
"loss": 0.4321,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9223021582733812,
|
|
"grad_norm": 0.6037200239127269,
|
|
"learning_rate": 7.82794357035153e-05,
|
|
"loss": 0.4355,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9237410071942446,
|
|
"grad_norm": 0.5493050148069129,
|
|
"learning_rate": 7.82677568501643e-05,
|
|
"loss": 0.4389,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9251798561151079,
|
|
"grad_norm": 0.485701106871748,
|
|
"learning_rate": 7.82560393710609e-05,
|
|
"loss": 0.4353,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9266187050359712,
|
|
"grad_norm": 0.5436310782889469,
|
|
"learning_rate": 7.824428327803221e-05,
|
|
"loss": 0.4416,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.9280575539568345,
|
|
"grad_norm": 0.6027660593817955,
|
|
"learning_rate": 7.823248858294428e-05,
|
|
"loss": 0.4469,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9294964028776979,
|
|
"grad_norm": 0.5450805610271228,
|
|
"learning_rate": 7.822065529770216e-05,
|
|
"loss": 0.4407,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9309352517985612,
|
|
"grad_norm": 0.5451330638925455,
|
|
"learning_rate": 7.820878343424984e-05,
|
|
"loss": 0.4313,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9323741007194245,
|
|
"grad_norm": 0.4823960055606693,
|
|
"learning_rate": 7.819687300457021e-05,
|
|
"loss": 0.4305,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.9338129496402877,
|
|
"grad_norm": 0.51769113319901,
|
|
"learning_rate": 7.818492402068517e-05,
|
|
"loss": 0.4414,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.935251798561151,
|
|
"grad_norm": 0.5959361501782915,
|
|
"learning_rate": 7.817293649465546e-05,
|
|
"loss": 0.4451,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9366906474820144,
|
|
"grad_norm": 0.6370679041787691,
|
|
"learning_rate": 7.816091043858076e-05,
|
|
"loss": 0.4431,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9381294964028777,
|
|
"grad_norm": 0.6559923259973256,
|
|
"learning_rate": 7.814884586459962e-05,
|
|
"loss": 0.4427,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.939568345323741,
|
|
"grad_norm": 0.5777782137028638,
|
|
"learning_rate": 7.813674278488949e-05,
|
|
"loss": 0.4353,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.9410071942446043,
|
|
"grad_norm": 0.586635104496739,
|
|
"learning_rate": 7.812460121166666e-05,
|
|
"loss": 0.452,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9424460431654677,
|
|
"grad_norm": 0.8097037884054802,
|
|
"learning_rate": 7.81124211571863e-05,
|
|
"loss": 0.4347,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.943884892086331,
|
|
"grad_norm": 0.8223004958869112,
|
|
"learning_rate": 7.810020263374239e-05,
|
|
"loss": 0.4337,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9453237410071943,
|
|
"grad_norm": 0.576628963283304,
|
|
"learning_rate": 7.808794565366778e-05,
|
|
"loss": 0.4325,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9467625899280575,
|
|
"grad_norm": 0.3774931624759356,
|
|
"learning_rate": 7.807565022933412e-05,
|
|
"loss": 0.4282,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9482014388489208,
|
|
"grad_norm": 0.4072181335058804,
|
|
"learning_rate": 7.806331637315183e-05,
|
|
"loss": 0.4343,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9496402877697842,
|
|
"grad_norm": 0.4711928760917451,
|
|
"learning_rate": 7.805094409757017e-05,
|
|
"loss": 0.44,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.9510791366906475,
|
|
"grad_norm": 0.5409719336710797,
|
|
"learning_rate": 7.803853341507715e-05,
|
|
"loss": 0.4457,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.9525179856115108,
|
|
"grad_norm": 0.6216278669095939,
|
|
"learning_rate": 7.802608433819957e-05,
|
|
"loss": 0.4452,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.9539568345323741,
|
|
"grad_norm": 0.7331501884392513,
|
|
"learning_rate": 7.801359687950292e-05,
|
|
"loss": 0.4556,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.9553956834532374,
|
|
"grad_norm": 0.7731029117607034,
|
|
"learning_rate": 7.800107105159155e-05,
|
|
"loss": 0.4363,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.9568345323741008,
|
|
"grad_norm": 0.8029426982022957,
|
|
"learning_rate": 7.798850686710841e-05,
|
|
"loss": 0.4476,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.958273381294964,
|
|
"grad_norm": 0.9403389829507591,
|
|
"learning_rate": 7.797590433873526e-05,
|
|
"loss": 0.4386,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.9597122302158273,
|
|
"grad_norm": 1.0950818374577753,
|
|
"learning_rate": 7.79632634791925e-05,
|
|
"loss": 0.4373,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.9611510791366906,
|
|
"grad_norm": 0.9728625877497045,
|
|
"learning_rate": 7.795058430123925e-05,
|
|
"loss": 0.447,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.962589928057554,
|
|
"grad_norm": 0.8219750260150098,
|
|
"learning_rate": 7.793786681767333e-05,
|
|
"loss": 0.436,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.9640287769784173,
|
|
"grad_norm": 0.615327914665318,
|
|
"learning_rate": 7.792511104133117e-05,
|
|
"loss": 0.4304,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.9654676258992806,
|
|
"grad_norm": 0.5137229518358345,
|
|
"learning_rate": 7.791231698508786e-05,
|
|
"loss": 0.4361,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.9669064748201439,
|
|
"grad_norm": 0.5136013859775398,
|
|
"learning_rate": 7.789948466185718e-05,
|
|
"loss": 0.4192,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.9683453237410072,
|
|
"grad_norm": 0.6114870282067004,
|
|
"learning_rate": 7.788661408459146e-05,
|
|
"loss": 0.4384,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.9697841726618706,
|
|
"grad_norm": 0.710745885026895,
|
|
"learning_rate": 7.787370526628173e-05,
|
|
"loss": 0.4534,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.9712230215827338,
|
|
"grad_norm": 0.7680474164021992,
|
|
"learning_rate": 7.786075821995754e-05,
|
|
"loss": 0.4459,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.9726618705035971,
|
|
"grad_norm": 0.7317715654904084,
|
|
"learning_rate": 7.784777295868706e-05,
|
|
"loss": 0.4397,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.9741007194244604,
|
|
"grad_norm": 0.645119740216132,
|
|
"learning_rate": 7.783474949557704e-05,
|
|
"loss": 0.4342,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.9755395683453237,
|
|
"grad_norm": 0.7910821252478519,
|
|
"learning_rate": 7.782168784377276e-05,
|
|
"loss": 0.4466,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.9769784172661871,
|
|
"grad_norm": 1.0592414979731315,
|
|
"learning_rate": 7.780858801645806e-05,
|
|
"loss": 0.4382,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.9784172661870504,
|
|
"grad_norm": 0.9473110343908139,
|
|
"learning_rate": 7.779545002685535e-05,
|
|
"loss": 0.4427,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.9798561151079137,
|
|
"grad_norm": 0.7485463804004998,
|
|
"learning_rate": 7.778227388822552e-05,
|
|
"loss": 0.4372,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.981294964028777,
|
|
"grad_norm": 0.6130290707594002,
|
|
"learning_rate": 7.776905961386793e-05,
|
|
"loss": 0.4431,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.9827338129496402,
|
|
"grad_norm": 0.6211600633567216,
|
|
"learning_rate": 7.77558072171205e-05,
|
|
"loss": 0.4459,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.9841726618705036,
|
|
"grad_norm": 0.5904274088812954,
|
|
"learning_rate": 7.774251671135961e-05,
|
|
"loss": 0.4289,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.9856115107913669,
|
|
"grad_norm": 0.6101311610488774,
|
|
"learning_rate": 7.77291881100001e-05,
|
|
"loss": 0.4394,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.9870503597122302,
|
|
"grad_norm": 0.6046305929512732,
|
|
"learning_rate": 7.771582142649523e-05,
|
|
"loss": 0.434,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.9884892086330935,
|
|
"grad_norm": 0.5073999626038506,
|
|
"learning_rate": 7.770241667433677e-05,
|
|
"loss": 0.4434,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.9899280575539569,
|
|
"grad_norm": 0.4312185361037146,
|
|
"learning_rate": 7.768897386705488e-05,
|
|
"loss": 0.437,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.9913669064748202,
|
|
"grad_norm": 0.41857275595874277,
|
|
"learning_rate": 7.767549301821807e-05,
|
|
"loss": 0.4376,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.9928057553956835,
|
|
"grad_norm": 0.4306021735646804,
|
|
"learning_rate": 7.766197414143333e-05,
|
|
"loss": 0.4325,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.9942446043165467,
|
|
"grad_norm": 0.4875673700841474,
|
|
"learning_rate": 7.764841725034602e-05,
|
|
"loss": 0.4425,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.99568345323741,
|
|
"grad_norm": 0.5369761054222272,
|
|
"learning_rate": 7.763482235863985e-05,
|
|
"loss": 0.4389,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.9971223021582734,
|
|
"grad_norm": 0.49627000926263076,
|
|
"learning_rate": 7.762118948003688e-05,
|
|
"loss": 0.4413,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.9985611510791367,
|
|
"grad_norm": 0.4764528821576539,
|
|
"learning_rate": 7.760751862829754e-05,
|
|
"loss": 0.4337,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.5200888070683869,
|
|
"learning_rate": 7.759380981722055e-05,
|
|
"loss": 0.4356,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 1.0014388489208632,
|
|
"grad_norm": 0.5397119738137616,
|
|
"learning_rate": 7.758006306064301e-05,
|
|
"loss": 0.4189,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 1.0028776978417266,
|
|
"grad_norm": 0.5027624661722697,
|
|
"learning_rate": 7.756627837244023e-05,
|
|
"loss": 0.4195,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 1.0043165467625899,
|
|
"grad_norm": 0.6949367641066488,
|
|
"learning_rate": 7.755245576652588e-05,
|
|
"loss": 0.4256,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 1.0057553956834533,
|
|
"grad_norm": 0.9503923404450467,
|
|
"learning_rate": 7.753859525685187e-05,
|
|
"loss": 0.4167,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 1.0071942446043165,
|
|
"grad_norm": 1.2036536626001393,
|
|
"learning_rate": 7.752469685740838e-05,
|
|
"loss": 0.4258,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.00863309352518,
|
|
"grad_norm": 0.711788132311282,
|
|
"learning_rate": 7.751076058222381e-05,
|
|
"loss": 0.4118,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 1.0100719424460431,
|
|
"grad_norm": 0.5085421266577125,
|
|
"learning_rate": 7.749678644536485e-05,
|
|
"loss": 0.4106,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 1.0115107913669066,
|
|
"grad_norm": 0.5401986909928753,
|
|
"learning_rate": 7.748277446093631e-05,
|
|
"loss": 0.4182,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 1.0129496402877698,
|
|
"grad_norm": 0.5216637538023988,
|
|
"learning_rate": 7.746872464308131e-05,
|
|
"loss": 0.4147,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 1.014388489208633,
|
|
"grad_norm": 0.4701268233324493,
|
|
"learning_rate": 7.745463700598108e-05,
|
|
"loss": 0.4171,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 1.0158273381294964,
|
|
"grad_norm": 0.495279175951498,
|
|
"learning_rate": 7.744051156385503e-05,
|
|
"loss": 0.4165,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 1.0172661870503596,
|
|
"grad_norm": 0.5000824033698361,
|
|
"learning_rate": 7.742634833096077e-05,
|
|
"loss": 0.4217,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 1.018705035971223,
|
|
"grad_norm": 0.6040167618925529,
|
|
"learning_rate": 7.741214732159403e-05,
|
|
"loss": 0.4284,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 1.0201438848920863,
|
|
"grad_norm": 0.634921684306922,
|
|
"learning_rate": 7.739790855008867e-05,
|
|
"loss": 0.4226,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 1.0215827338129497,
|
|
"grad_norm": 0.559370532195212,
|
|
"learning_rate": 7.738363203081664e-05,
|
|
"loss": 0.4231,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.023021582733813,
|
|
"grad_norm": 0.7125587206563889,
|
|
"learning_rate": 7.736931777818805e-05,
|
|
"loss": 0.4328,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 1.0244604316546762,
|
|
"grad_norm": 0.9900604597963074,
|
|
"learning_rate": 7.735496580665105e-05,
|
|
"loss": 0.428,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 1.0258992805755396,
|
|
"grad_norm": 1.2104370523628105,
|
|
"learning_rate": 7.734057613069188e-05,
|
|
"loss": 0.4194,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 1.0273381294964028,
|
|
"grad_norm": 0.5391715255703227,
|
|
"learning_rate": 7.73261487648348e-05,
|
|
"loss": 0.4207,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 1.0287769784172662,
|
|
"grad_norm": 0.6935532416778775,
|
|
"learning_rate": 7.731168372364219e-05,
|
|
"loss": 0.4141,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 1.0302158273381294,
|
|
"grad_norm": 0.9815547844569567,
|
|
"learning_rate": 7.729718102171438e-05,
|
|
"loss": 0.4226,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 1.0316546762589929,
|
|
"grad_norm": 0.9021516604506093,
|
|
"learning_rate": 7.728264067368976e-05,
|
|
"loss": 0.4199,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 1.033093525179856,
|
|
"grad_norm": 0.8240309956503304,
|
|
"learning_rate": 7.726806269424469e-05,
|
|
"loss": 0.4136,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 1.0345323741007195,
|
|
"grad_norm": 0.723745442257892,
|
|
"learning_rate": 7.725344709809355e-05,
|
|
"loss": 0.4236,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 1.0359712230215827,
|
|
"grad_norm": 0.6575705286032772,
|
|
"learning_rate": 7.723879389998864e-05,
|
|
"loss": 0.4296,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.037410071942446,
|
|
"grad_norm": 0.4411840619144157,
|
|
"learning_rate": 7.722410311472026e-05,
|
|
"loss": 0.4141,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 1.0388489208633094,
|
|
"grad_norm": 0.34537034358681457,
|
|
"learning_rate": 7.72093747571166e-05,
|
|
"loss": 0.4154,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 1.0402877697841726,
|
|
"grad_norm": 0.546692315038691,
|
|
"learning_rate": 7.719460884204383e-05,
|
|
"loss": 0.4149,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 1.041726618705036,
|
|
"grad_norm": 0.6200710743046821,
|
|
"learning_rate": 7.717980538440599e-05,
|
|
"loss": 0.418,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 1.0431654676258992,
|
|
"grad_norm": 0.5877100876897944,
|
|
"learning_rate": 7.716496439914502e-05,
|
|
"loss": 0.4129,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 1.0446043165467627,
|
|
"grad_norm": 0.5111935263625161,
|
|
"learning_rate": 7.715008590124076e-05,
|
|
"loss": 0.4249,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 1.0460431654676259,
|
|
"grad_norm": 0.6114793041300146,
|
|
"learning_rate": 7.713516990571088e-05,
|
|
"loss": 0.4168,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 1.0474820143884893,
|
|
"grad_norm": 0.7361463056210263,
|
|
"learning_rate": 7.71202164276109e-05,
|
|
"loss": 0.4189,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 1.0489208633093525,
|
|
"grad_norm": 0.7685251111556513,
|
|
"learning_rate": 7.710522548203424e-05,
|
|
"loss": 0.4236,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 1.0503597122302157,
|
|
"grad_norm": 0.7808466297469931,
|
|
"learning_rate": 7.709019708411202e-05,
|
|
"loss": 0.4222,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.0517985611510792,
|
|
"grad_norm": 0.6810703401969539,
|
|
"learning_rate": 7.707513124901327e-05,
|
|
"loss": 0.4126,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 1.0532374100719424,
|
|
"grad_norm": 0.4423870214533422,
|
|
"learning_rate": 7.706002799194476e-05,
|
|
"loss": 0.4113,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 1.0546762589928058,
|
|
"grad_norm": 0.39355806705921925,
|
|
"learning_rate": 7.704488732815105e-05,
|
|
"loss": 0.4132,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 1.056115107913669,
|
|
"grad_norm": 0.4805361188127409,
|
|
"learning_rate": 7.702970927291442e-05,
|
|
"loss": 0.419,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 1.0575539568345325,
|
|
"grad_norm": 0.5931444176641889,
|
|
"learning_rate": 7.701449384155492e-05,
|
|
"loss": 0.4287,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 1.0589928057553957,
|
|
"grad_norm": 0.520132909664095,
|
|
"learning_rate": 7.699924104943033e-05,
|
|
"loss": 0.4281,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 1.0604316546762589,
|
|
"grad_norm": 0.5770625349386262,
|
|
"learning_rate": 7.698395091193615e-05,
|
|
"loss": 0.4145,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 1.0618705035971223,
|
|
"grad_norm": 0.7936763033110583,
|
|
"learning_rate": 7.696862344450553e-05,
|
|
"loss": 0.4139,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 1.0633093525179855,
|
|
"grad_norm": 0.9616831160014035,
|
|
"learning_rate": 7.695325866260932e-05,
|
|
"loss": 0.4275,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 1.064748201438849,
|
|
"grad_norm": 1.0001265652941833,
|
|
"learning_rate": 7.693785658175607e-05,
|
|
"loss": 0.414,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.0661870503597122,
|
|
"grad_norm": 0.9016483006574734,
|
|
"learning_rate": 7.692241721749194e-05,
|
|
"loss": 0.4207,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 1.0676258992805756,
|
|
"grad_norm": 0.7680834963253753,
|
|
"learning_rate": 7.69069405854007e-05,
|
|
"loss": 0.4219,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 1.0690647482014388,
|
|
"grad_norm": 0.5764738591998012,
|
|
"learning_rate": 7.68914267011038e-05,
|
|
"loss": 0.4176,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 1.0705035971223023,
|
|
"grad_norm": 0.5086096359154797,
|
|
"learning_rate": 7.687587558026024e-05,
|
|
"loss": 0.418,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 1.0719424460431655,
|
|
"grad_norm": 0.45935784543132885,
|
|
"learning_rate": 7.686028723856664e-05,
|
|
"loss": 0.4165,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 1.0733812949640287,
|
|
"grad_norm": 0.42164939743945123,
|
|
"learning_rate": 7.684466169175714e-05,
|
|
"loss": 0.4219,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 1.074820143884892,
|
|
"grad_norm": 0.5573256292055556,
|
|
"learning_rate": 7.68289989556035e-05,
|
|
"loss": 0.4118,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 1.0762589928057553,
|
|
"grad_norm": 0.6024380350836916,
|
|
"learning_rate": 7.681329904591495e-05,
|
|
"loss": 0.4118,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 1.0776978417266188,
|
|
"grad_norm": 0.4728299726333857,
|
|
"learning_rate": 7.67975619785383e-05,
|
|
"loss": 0.4142,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 1.079136690647482,
|
|
"grad_norm": 0.38479638053071774,
|
|
"learning_rate": 7.678178776935781e-05,
|
|
"loss": 0.4193,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.0805755395683454,
|
|
"grad_norm": 0.5403359487732518,
|
|
"learning_rate": 7.676597643429528e-05,
|
|
"loss": 0.4211,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 1.0820143884892086,
|
|
"grad_norm": 0.6696343100492275,
|
|
"learning_rate": 7.675012798930994e-05,
|
|
"loss": 0.4227,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 1.083453237410072,
|
|
"grad_norm": 0.5348812700439899,
|
|
"learning_rate": 7.673424245039852e-05,
|
|
"loss": 0.4166,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 1.0848920863309353,
|
|
"grad_norm": 0.33737183967735146,
|
|
"learning_rate": 7.671831983359515e-05,
|
|
"loss": 0.4224,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 1.0863309352517985,
|
|
"grad_norm": 0.2910550193530251,
|
|
"learning_rate": 7.670236015497141e-05,
|
|
"loss": 0.4125,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 1.087769784172662,
|
|
"grad_norm": 0.3218380881168982,
|
|
"learning_rate": 7.668636343063628e-05,
|
|
"loss": 0.4115,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 1.0892086330935251,
|
|
"grad_norm": 0.3885178095611745,
|
|
"learning_rate": 7.667032967673614e-05,
|
|
"loss": 0.4195,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 1.0906474820143885,
|
|
"grad_norm": 0.5208514202102771,
|
|
"learning_rate": 7.665425890945474e-05,
|
|
"loss": 0.4221,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 1.0920863309352518,
|
|
"grad_norm": 0.6876360376062792,
|
|
"learning_rate": 7.663815114501319e-05,
|
|
"loss": 0.4147,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 1.0935251798561152,
|
|
"grad_norm": 0.8966691407405837,
|
|
"learning_rate": 7.662200639966992e-05,
|
|
"loss": 0.4061,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.0949640287769784,
|
|
"grad_norm": 1.0153810720593752,
|
|
"learning_rate": 7.660582468972074e-05,
|
|
"loss": 0.4281,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 1.0964028776978418,
|
|
"grad_norm": 1.0476117573773387,
|
|
"learning_rate": 7.658960603149873e-05,
|
|
"loss": 0.412,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 1.097841726618705,
|
|
"grad_norm": 0.7944815310684014,
|
|
"learning_rate": 7.657335044137427e-05,
|
|
"loss": 0.4153,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 1.0992805755395683,
|
|
"grad_norm": 0.4611364553946389,
|
|
"learning_rate": 7.655705793575504e-05,
|
|
"loss": 0.4113,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 1.1007194244604317,
|
|
"grad_norm": 0.40068514354135115,
|
|
"learning_rate": 7.654072853108592e-05,
|
|
"loss": 0.4173,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 1.102158273381295,
|
|
"grad_norm": 0.6077188171329105,
|
|
"learning_rate": 7.652436224384911e-05,
|
|
"loss": 0.416,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 1.1035971223021583,
|
|
"grad_norm": 0.6540762211302478,
|
|
"learning_rate": 7.6507959090564e-05,
|
|
"loss": 0.4188,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 1.1050359712230216,
|
|
"grad_norm": 0.4992850745612786,
|
|
"learning_rate": 7.649151908778721e-05,
|
|
"loss": 0.4188,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 1.106474820143885,
|
|
"grad_norm": 0.4786454642267303,
|
|
"learning_rate": 7.64750422521125e-05,
|
|
"loss": 0.4221,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 1.1079136690647482,
|
|
"grad_norm": 0.4933201075769948,
|
|
"learning_rate": 7.645852860017086e-05,
|
|
"loss": 0.4129,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.1093525179856114,
|
|
"grad_norm": 0.5816668474690185,
|
|
"learning_rate": 7.644197814863045e-05,
|
|
"loss": 0.4139,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 1.1107913669064748,
|
|
"grad_norm": 0.7325776928381896,
|
|
"learning_rate": 7.642539091419654e-05,
|
|
"loss": 0.4236,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 1.112230215827338,
|
|
"grad_norm": 0.6948398316745751,
|
|
"learning_rate": 7.640876691361152e-05,
|
|
"loss": 0.4191,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 1.1136690647482015,
|
|
"grad_norm": 0.6001207046576845,
|
|
"learning_rate": 7.639210616365494e-05,
|
|
"loss": 0.4218,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 1.1151079136690647,
|
|
"grad_norm": 0.4844482653324157,
|
|
"learning_rate": 7.637540868114338e-05,
|
|
"loss": 0.4236,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 1.1165467625899281,
|
|
"grad_norm": 0.38682806277611953,
|
|
"learning_rate": 7.635867448293056e-05,
|
|
"loss": 0.4169,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 1.1179856115107913,
|
|
"grad_norm": 0.37580950950066533,
|
|
"learning_rate": 7.63419035859072e-05,
|
|
"loss": 0.4159,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 1.1194244604316548,
|
|
"grad_norm": 0.38648061407362977,
|
|
"learning_rate": 7.63250960070011e-05,
|
|
"loss": 0.4202,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 1.120863309352518,
|
|
"grad_norm": 0.4041556954365239,
|
|
"learning_rate": 7.630825176317707e-05,
|
|
"loss": 0.4155,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 1.1223021582733812,
|
|
"grad_norm": 0.45544087326800725,
|
|
"learning_rate": 7.629137087143693e-05,
|
|
"loss": 0.4213,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.1237410071942446,
|
|
"grad_norm": 0.5442316827207219,
|
|
"learning_rate": 7.627445334881951e-05,
|
|
"loss": 0.42,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 1.1251798561151078,
|
|
"grad_norm": 0.6556749503314576,
|
|
"learning_rate": 7.625749921240058e-05,
|
|
"loss": 0.4162,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 1.1266187050359713,
|
|
"grad_norm": 0.6576053681387307,
|
|
"learning_rate": 7.62405084792929e-05,
|
|
"loss": 0.4281,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 1.1280575539568345,
|
|
"grad_norm": 0.6292034318735407,
|
|
"learning_rate": 7.622348116664611e-05,
|
|
"loss": 0.4242,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 1.129496402877698,
|
|
"grad_norm": 0.687897587625346,
|
|
"learning_rate": 7.620641729164686e-05,
|
|
"loss": 0.4281,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 1.1309352517985611,
|
|
"grad_norm": 0.6595976646157676,
|
|
"learning_rate": 7.618931687151863e-05,
|
|
"loss": 0.4324,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 1.1323741007194243,
|
|
"grad_norm": 0.5736114629852814,
|
|
"learning_rate": 7.617217992352183e-05,
|
|
"loss": 0.4184,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 1.1338129496402878,
|
|
"grad_norm": 0.5871656908352676,
|
|
"learning_rate": 7.615500646495373e-05,
|
|
"loss": 0.4066,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 1.135251798561151,
|
|
"grad_norm": 0.5858621838037396,
|
|
"learning_rate": 7.613779651314841e-05,
|
|
"loss": 0.4053,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 1.1366906474820144,
|
|
"grad_norm": 0.4466132858224463,
|
|
"learning_rate": 7.612055008547688e-05,
|
|
"loss": 0.4234,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 1.1381294964028776,
|
|
"grad_norm": 0.3477754066341184,
|
|
"learning_rate": 7.610326719934685e-05,
|
|
"loss": 0.4175,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 1.139568345323741,
|
|
"grad_norm": 0.3628840436710598,
|
|
"learning_rate": 7.608594787220292e-05,
|
|
"loss": 0.4142,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 1.1410071942446043,
|
|
"grad_norm": 0.4610527378827971,
|
|
"learning_rate": 7.606859212152644e-05,
|
|
"loss": 0.4196,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 1.1424460431654677,
|
|
"grad_norm": 0.40725442097367937,
|
|
"learning_rate": 7.605119996483551e-05,
|
|
"loss": 0.4125,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 1.143884892086331,
|
|
"grad_norm": 0.2834431971236946,
|
|
"learning_rate": 7.6033771419685e-05,
|
|
"loss": 0.4176,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 1.1453237410071941,
|
|
"grad_norm": 0.3521219960816613,
|
|
"learning_rate": 7.601630650366648e-05,
|
|
"loss": 0.4198,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 1.1467625899280576,
|
|
"grad_norm": 0.43891109430203684,
|
|
"learning_rate": 7.59988052344083e-05,
|
|
"loss": 0.4133,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 1.1482014388489208,
|
|
"grad_norm": 0.4540403545711242,
|
|
"learning_rate": 7.59812676295754e-05,
|
|
"loss": 0.4143,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 1.1496402877697842,
|
|
"grad_norm": 0.4200704169174888,
|
|
"learning_rate": 7.596369370686947e-05,
|
|
"loss": 0.4207,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 1.1510791366906474,
|
|
"grad_norm": 0.4016234354118138,
|
|
"learning_rate": 7.594608348402885e-05,
|
|
"loss": 0.4255,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.1525179856115109,
|
|
"grad_norm": 0.3919936283534993,
|
|
"learning_rate": 7.592843697882848e-05,
|
|
"loss": 0.423,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 1.153956834532374,
|
|
"grad_norm": 0.4743878658496564,
|
|
"learning_rate": 7.591075420907997e-05,
|
|
"loss": 0.4178,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 1.1553956834532375,
|
|
"grad_norm": 0.4858027501180704,
|
|
"learning_rate": 7.589303519263151e-05,
|
|
"loss": 0.4136,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 1.1568345323741007,
|
|
"grad_norm": 0.5608050353825124,
|
|
"learning_rate": 7.587527994736787e-05,
|
|
"loss": 0.4135,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 1.158273381294964,
|
|
"grad_norm": 0.6822513937844785,
|
|
"learning_rate": 7.58574884912104e-05,
|
|
"loss": 0.4185,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 1.1597122302158274,
|
|
"grad_norm": 0.6942719585634665,
|
|
"learning_rate": 7.5839660842117e-05,
|
|
"loss": 0.4231,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 1.1611510791366906,
|
|
"grad_norm": 0.6631906624825691,
|
|
"learning_rate": 7.582179701808208e-05,
|
|
"loss": 0.4176,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 1.162589928057554,
|
|
"grad_norm": 0.6400688994338029,
|
|
"learning_rate": 7.580389703713661e-05,
|
|
"loss": 0.4134,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 1.1640287769784172,
|
|
"grad_norm": 0.6199777582992885,
|
|
"learning_rate": 7.5785960917348e-05,
|
|
"loss": 0.4103,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 1.1654676258992807,
|
|
"grad_norm": 0.5892268183734033,
|
|
"learning_rate": 7.576798867682018e-05,
|
|
"loss": 0.4224,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 1.1669064748201439,
|
|
"grad_norm": 0.5775349850537081,
|
|
"learning_rate": 7.574998033369349e-05,
|
|
"loss": 0.4119,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 1.1683453237410073,
|
|
"grad_norm": 1.0546349340175452,
|
|
"learning_rate": 7.573193590614479e-05,
|
|
"loss": 0.4253,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 1.1697841726618705,
|
|
"grad_norm": 0.43795610768884824,
|
|
"learning_rate": 7.571385541238727e-05,
|
|
"loss": 0.4245,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 1.1712230215827337,
|
|
"grad_norm": 0.6329719385877239,
|
|
"learning_rate": 7.569573887067059e-05,
|
|
"loss": 0.4178,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 1.1726618705035972,
|
|
"grad_norm": 0.7093298178959643,
|
|
"learning_rate": 7.567758629928076e-05,
|
|
"loss": 0.4318,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 1.1741007194244604,
|
|
"grad_norm": 0.6609250290769466,
|
|
"learning_rate": 7.565939771654018e-05,
|
|
"loss": 0.4208,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 1.1755395683453238,
|
|
"grad_norm": 0.7637221016087584,
|
|
"learning_rate": 7.564117314080758e-05,
|
|
"loss": 0.4164,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 1.176978417266187,
|
|
"grad_norm": 0.7632217156685271,
|
|
"learning_rate": 7.562291259047804e-05,
|
|
"loss": 0.4188,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 1.1784172661870504,
|
|
"grad_norm": 0.804017807669019,
|
|
"learning_rate": 7.560461608398292e-05,
|
|
"loss": 0.4189,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 1.1798561151079137,
|
|
"grad_norm": 0.8496685804189026,
|
|
"learning_rate": 7.558628363978991e-05,
|
|
"loss": 0.4205,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.181294964028777,
|
|
"grad_norm": 0.7061245267061335,
|
|
"learning_rate": 7.556791527640292e-05,
|
|
"loss": 0.4218,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 1.1827338129496403,
|
|
"grad_norm": 0.5020850995203467,
|
|
"learning_rate": 7.554951101236219e-05,
|
|
"loss": 0.4215,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 1.1841726618705035,
|
|
"grad_norm": 0.37119328851378186,
|
|
"learning_rate": 7.553107086624413e-05,
|
|
"loss": 0.4089,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 1.185611510791367,
|
|
"grad_norm": 0.4496321141970347,
|
|
"learning_rate": 7.551259485666141e-05,
|
|
"loss": 0.4136,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 1.1870503597122302,
|
|
"grad_norm": 0.5553021443144005,
|
|
"learning_rate": 7.549408300226287e-05,
|
|
"loss": 0.4213,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 1.1884892086330936,
|
|
"grad_norm": 0.4774970302453056,
|
|
"learning_rate": 7.547553532173356e-05,
|
|
"loss": 0.4189,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 1.1899280575539568,
|
|
"grad_norm": 0.40644678600928075,
|
|
"learning_rate": 7.545695183379465e-05,
|
|
"loss": 0.4138,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 1.19136690647482,
|
|
"grad_norm": 0.3903764532274062,
|
|
"learning_rate": 7.54383325572035e-05,
|
|
"loss": 0.4211,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 1.1928057553956835,
|
|
"grad_norm": 0.44266353128325003,
|
|
"learning_rate": 7.541967751075354e-05,
|
|
"loss": 0.4271,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 1.1942446043165469,
|
|
"grad_norm": 0.4060296126662683,
|
|
"learning_rate": 7.540098671327438e-05,
|
|
"loss": 0.4159,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.19568345323741,
|
|
"grad_norm": 0.4284965378717239,
|
|
"learning_rate": 7.538226018363164e-05,
|
|
"loss": 0.4195,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 1.1971223021582733,
|
|
"grad_norm": 0.44891542301387827,
|
|
"learning_rate": 7.536349794072705e-05,
|
|
"loss": 0.4136,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 1.1985611510791367,
|
|
"grad_norm": 0.4702652359610675,
|
|
"learning_rate": 7.534470000349835e-05,
|
|
"loss": 0.4174,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"grad_norm": 0.4144414432211031,
|
|
"learning_rate": 7.532586639091936e-05,
|
|
"loss": 0.4275,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 1.2014388489208634,
|
|
"grad_norm": 0.5100052283373594,
|
|
"learning_rate": 7.530699712199985e-05,
|
|
"loss": 0.4111,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 1.2028776978417266,
|
|
"grad_norm": 0.5517005808710922,
|
|
"learning_rate": 7.528809221578565e-05,
|
|
"loss": 0.4133,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 1.2043165467625898,
|
|
"grad_norm": 0.5030338021440134,
|
|
"learning_rate": 7.52691516913585e-05,
|
|
"loss": 0.4298,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 1.2057553956834532,
|
|
"grad_norm": 0.5059463726911169,
|
|
"learning_rate": 7.525017556783612e-05,
|
|
"loss": 0.4238,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 1.2071942446043165,
|
|
"grad_norm": 0.5502407419595037,
|
|
"learning_rate": 7.523116386437216e-05,
|
|
"loss": 0.4119,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 1.20863309352518,
|
|
"grad_norm": 0.6164672282947923,
|
|
"learning_rate": 7.521211660015615e-05,
|
|
"loss": 0.4193,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.210071942446043,
|
|
"grad_norm": 0.7318209274543548,
|
|
"learning_rate": 7.519303379441357e-05,
|
|
"loss": 0.416,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 1.2115107913669065,
|
|
"grad_norm": 0.6894364547775985,
|
|
"learning_rate": 7.517391546640573e-05,
|
|
"loss": 0.422,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 1.2129496402877697,
|
|
"grad_norm": 0.5944071209022024,
|
|
"learning_rate": 7.515476163542982e-05,
|
|
"loss": 0.4153,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 1.2143884892086332,
|
|
"grad_norm": 0.5352976869435,
|
|
"learning_rate": 7.513557232081887e-05,
|
|
"loss": 0.4165,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 1.2158273381294964,
|
|
"grad_norm": 0.4753335444550983,
|
|
"learning_rate": 7.511634754194168e-05,
|
|
"loss": 0.4211,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 1.2172661870503596,
|
|
"grad_norm": 0.4012759932047907,
|
|
"learning_rate": 7.50970873182029e-05,
|
|
"loss": 0.416,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 1.218705035971223,
|
|
"grad_norm": 0.3772629363282154,
|
|
"learning_rate": 7.507779166904292e-05,
|
|
"loss": 0.4184,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 1.2201438848920863,
|
|
"grad_norm": 0.373439227425041,
|
|
"learning_rate": 7.50584606139379e-05,
|
|
"loss": 0.4232,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 1.2215827338129497,
|
|
"grad_norm": 0.3326135021575873,
|
|
"learning_rate": 7.503909417239975e-05,
|
|
"loss": 0.4179,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 1.223021582733813,
|
|
"grad_norm": 0.3080551894839136,
|
|
"learning_rate": 7.501969236397607e-05,
|
|
"loss": 0.4178,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.2244604316546763,
|
|
"grad_norm": 0.29953486621859216,
|
|
"learning_rate": 7.500025520825018e-05,
|
|
"loss": 0.4111,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 1.2258992805755395,
|
|
"grad_norm": 0.35287952112213083,
|
|
"learning_rate": 7.498078272484108e-05,
|
|
"loss": 0.4188,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 1.227338129496403,
|
|
"grad_norm": 0.46064473318025706,
|
|
"learning_rate": 7.496127493340341e-05,
|
|
"loss": 0.4284,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 1.2287769784172662,
|
|
"grad_norm": 0.5494519821129565,
|
|
"learning_rate": 7.494173185362745e-05,
|
|
"loss": 0.4251,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 1.2302158273381294,
|
|
"grad_norm": 0.5974430796977439,
|
|
"learning_rate": 7.492215350523913e-05,
|
|
"loss": 0.415,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 1.2316546762589928,
|
|
"grad_norm": 0.6697740402303514,
|
|
"learning_rate": 7.490253990799991e-05,
|
|
"loss": 0.4245,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 1.233093525179856,
|
|
"grad_norm": 0.7541577029013361,
|
|
"learning_rate": 7.488289108170692e-05,
|
|
"loss": 0.417,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 1.2345323741007195,
|
|
"grad_norm": 0.7902751733270713,
|
|
"learning_rate": 7.486320704619276e-05,
|
|
"loss": 0.4094,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 1.2359712230215827,
|
|
"grad_norm": 0.7105019988626443,
|
|
"learning_rate": 7.484348782132565e-05,
|
|
"loss": 0.4195,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 1.2374100719424461,
|
|
"grad_norm": 0.6403613036729681,
|
|
"learning_rate": 7.482373342700927e-05,
|
|
"loss": 0.4175,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.2388489208633093,
|
|
"grad_norm": 0.5156147475502347,
|
|
"learning_rate": 7.48039438831828e-05,
|
|
"loss": 0.4139,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 1.2402877697841728,
|
|
"grad_norm": 0.41799084671672465,
|
|
"learning_rate": 7.478411920982095e-05,
|
|
"loss": 0.4222,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 1.241726618705036,
|
|
"grad_norm": 0.4250344038508307,
|
|
"learning_rate": 7.476425942693382e-05,
|
|
"loss": 0.4242,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 1.2431654676258992,
|
|
"grad_norm": 0.4651291731321205,
|
|
"learning_rate": 7.474436455456701e-05,
|
|
"loss": 0.4207,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 1.2446043165467626,
|
|
"grad_norm": 0.4679835909041654,
|
|
"learning_rate": 7.472443461280149e-05,
|
|
"loss": 0.4204,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 1.2460431654676258,
|
|
"grad_norm": 0.44198948513389913,
|
|
"learning_rate": 7.470446962175367e-05,
|
|
"loss": 0.4238,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 1.2474820143884893,
|
|
"grad_norm": 0.5088097004716065,
|
|
"learning_rate": 7.468446960157527e-05,
|
|
"loss": 0.4119,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 1.2489208633093525,
|
|
"grad_norm": 0.6135542172110087,
|
|
"learning_rate": 7.466443457245344e-05,
|
|
"loss": 0.4265,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 1.2503597122302157,
|
|
"grad_norm": 0.6634678173550453,
|
|
"learning_rate": 7.464436455461066e-05,
|
|
"loss": 0.4163,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 1.2517985611510791,
|
|
"grad_norm": 0.7065789496795636,
|
|
"learning_rate": 7.462425956830466e-05,
|
|
"loss": 0.421,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.2532374100719426,
|
|
"grad_norm": 0.7405322686003336,
|
|
"learning_rate": 7.460411963382853e-05,
|
|
"loss": 0.4164,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 1.2546762589928058,
|
|
"grad_norm": 0.7677306806360541,
|
|
"learning_rate": 7.45839447715106e-05,
|
|
"loss": 0.4111,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 1.256115107913669,
|
|
"grad_norm": 0.6769546628643688,
|
|
"learning_rate": 7.456373500171449e-05,
|
|
"loss": 0.4094,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 1.2575539568345324,
|
|
"grad_norm": 0.48764791941931157,
|
|
"learning_rate": 7.454349034483903e-05,
|
|
"loss": 0.4144,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 1.2589928057553956,
|
|
"grad_norm": 0.42265526094719313,
|
|
"learning_rate": 7.452321082131824e-05,
|
|
"loss": 0.4239,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 1.260431654676259,
|
|
"grad_norm": 0.3945799098004373,
|
|
"learning_rate": 7.450289645162138e-05,
|
|
"loss": 0.411,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 1.2618705035971223,
|
|
"grad_norm": 0.31517794607417116,
|
|
"learning_rate": 7.448254725625287e-05,
|
|
"loss": 0.4166,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 1.2633093525179855,
|
|
"grad_norm": 0.345400212342425,
|
|
"learning_rate": 7.446216325575225e-05,
|
|
"loss": 0.4226,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 1.264748201438849,
|
|
"grad_norm": 0.4170993950801287,
|
|
"learning_rate": 7.444174447069423e-05,
|
|
"loss": 0.4194,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 1.2661870503597124,
|
|
"grad_norm": 0.40689819545298733,
|
|
"learning_rate": 7.442129092168859e-05,
|
|
"loss": 0.4238,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.2676258992805756,
|
|
"grad_norm": 0.42613337823107816,
|
|
"learning_rate": 7.440080262938026e-05,
|
|
"loss": 0.4135,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 1.2690647482014388,
|
|
"grad_norm": 0.46859210219135744,
|
|
"learning_rate": 7.438027961444916e-05,
|
|
"loss": 0.4214,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 1.2705035971223022,
|
|
"grad_norm": 0.5107757579743645,
|
|
"learning_rate": 7.435972189761033e-05,
|
|
"loss": 0.4144,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 1.2719424460431654,
|
|
"grad_norm": 0.5035512902666552,
|
|
"learning_rate": 7.43391294996138e-05,
|
|
"loss": 0.4067,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 1.2733812949640289,
|
|
"grad_norm": 0.511560908122011,
|
|
"learning_rate": 7.431850244124459e-05,
|
|
"loss": 0.4204,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 1.274820143884892,
|
|
"grad_norm": 0.49815444998499475,
|
|
"learning_rate": 7.429784074332274e-05,
|
|
"loss": 0.416,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 1.2762589928057553,
|
|
"grad_norm": 0.42480200160919024,
|
|
"learning_rate": 7.427714442670324e-05,
|
|
"loss": 0.4057,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 1.2776978417266187,
|
|
"grad_norm": 0.42492890785448467,
|
|
"learning_rate": 7.425641351227602e-05,
|
|
"loss": 0.4189,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 1.2791366906474821,
|
|
"grad_norm": 0.3842979511642088,
|
|
"learning_rate": 7.423564802096592e-05,
|
|
"loss": 0.4157,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 1.2805755395683454,
|
|
"grad_norm": 0.35064103980921363,
|
|
"learning_rate": 7.42148479737327e-05,
|
|
"loss": 0.4181,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.2820143884892086,
|
|
"grad_norm": 0.3634022826815026,
|
|
"learning_rate": 7.419401339157099e-05,
|
|
"loss": 0.4126,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 1.283453237410072,
|
|
"grad_norm": 0.4087728655950996,
|
|
"learning_rate": 7.41731442955103e-05,
|
|
"loss": 0.4178,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 1.2848920863309352,
|
|
"grad_norm": 0.4914074589065225,
|
|
"learning_rate": 7.415224070661492e-05,
|
|
"loss": 0.4193,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 1.2863309352517986,
|
|
"grad_norm": 0.7529374758396754,
|
|
"learning_rate": 7.413130264598404e-05,
|
|
"loss": 0.42,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 1.2877697841726619,
|
|
"grad_norm": 0.8722571455261084,
|
|
"learning_rate": 7.411033013475156e-05,
|
|
"loss": 0.4122,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 1.289208633093525,
|
|
"grad_norm": 0.8697001242169138,
|
|
"learning_rate": 7.408932319408619e-05,
|
|
"loss": 0.4173,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 1.2906474820143885,
|
|
"grad_norm": 0.8189987603327172,
|
|
"learning_rate": 7.406828184519141e-05,
|
|
"loss": 0.42,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 1.292086330935252,
|
|
"grad_norm": 0.7428122994067121,
|
|
"learning_rate": 7.40472061093054e-05,
|
|
"loss": 0.4188,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 1.2935251798561151,
|
|
"grad_norm": 0.5928766546640613,
|
|
"learning_rate": 7.402609600770104e-05,
|
|
"loss": 0.4148,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 1.2949640287769784,
|
|
"grad_norm": 0.38872884264822516,
|
|
"learning_rate": 7.400495156168596e-05,
|
|
"loss": 0.4182,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.2964028776978418,
|
|
"grad_norm": 0.23818720339562638,
|
|
"learning_rate": 7.39837727926024e-05,
|
|
"loss": 0.4089,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 1.297841726618705,
|
|
"grad_norm": 0.35445809064974554,
|
|
"learning_rate": 7.396255972182723e-05,
|
|
"loss": 0.4112,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 1.2992805755395684,
|
|
"grad_norm": 0.4286582618848441,
|
|
"learning_rate": 7.394131237077199e-05,
|
|
"loss": 0.417,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 1.3007194244604317,
|
|
"grad_norm": 0.44277042133672895,
|
|
"learning_rate": 7.39200307608828e-05,
|
|
"loss": 0.4177,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 1.3021582733812949,
|
|
"grad_norm": 0.4948822640462802,
|
|
"learning_rate": 7.389871491364036e-05,
|
|
"loss": 0.4161,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 1.3035971223021583,
|
|
"grad_norm": 0.5538515630763183,
|
|
"learning_rate": 7.387736485055993e-05,
|
|
"loss": 0.4146,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 1.3050359712230215,
|
|
"grad_norm": 0.5339728445738461,
|
|
"learning_rate": 7.385598059319129e-05,
|
|
"loss": 0.4285,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 1.306474820143885,
|
|
"grad_norm": 0.5086999183753598,
|
|
"learning_rate": 7.383456216311875e-05,
|
|
"loss": 0.4264,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 1.3079136690647482,
|
|
"grad_norm": 0.556027035078394,
|
|
"learning_rate": 7.381310958196112e-05,
|
|
"loss": 0.4223,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 1.3093525179856116,
|
|
"grad_norm": 0.5933495224827502,
|
|
"learning_rate": 7.379162287137167e-05,
|
|
"loss": 0.4107,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.3107913669064748,
|
|
"grad_norm": 0.7748770521937965,
|
|
"learning_rate": 7.37701020530381e-05,
|
|
"loss": 0.4141,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 1.3122302158273382,
|
|
"grad_norm": 0.9978205433818295,
|
|
"learning_rate": 7.374854714868259e-05,
|
|
"loss": 0.4092,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 1.3136690647482014,
|
|
"grad_norm": 1.0442931630716008,
|
|
"learning_rate": 7.372695818006167e-05,
|
|
"loss": 0.4218,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 1.3151079136690647,
|
|
"grad_norm": 0.7743622233782931,
|
|
"learning_rate": 7.370533516896627e-05,
|
|
"loss": 0.4211,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 1.316546762589928,
|
|
"grad_norm": 0.6178333942179591,
|
|
"learning_rate": 7.368367813722169e-05,
|
|
"loss": 0.4224,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 1.3179856115107913,
|
|
"grad_norm": 0.6026690498027666,
|
|
"learning_rate": 7.366198710668755e-05,
|
|
"loss": 0.417,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 1.3194244604316547,
|
|
"grad_norm": 0.4410836343050182,
|
|
"learning_rate": 7.364026209925783e-05,
|
|
"loss": 0.4224,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 1.320863309352518,
|
|
"grad_norm": 0.443825565429779,
|
|
"learning_rate": 7.361850313686076e-05,
|
|
"loss": 0.4198,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 1.3223021582733812,
|
|
"grad_norm": 0.5346643696363846,
|
|
"learning_rate": 7.359671024145886e-05,
|
|
"loss": 0.4129,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 1.3237410071942446,
|
|
"grad_norm": 0.3967859710362696,
|
|
"learning_rate": 7.35748834350489e-05,
|
|
"loss": 0.4135,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.325179856115108,
|
|
"grad_norm": 0.35494875520483227,
|
|
"learning_rate": 7.355302273966186e-05,
|
|
"loss": 0.4193,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 1.3266187050359712,
|
|
"grad_norm": 0.4547775927358858,
|
|
"learning_rate": 7.353112817736295e-05,
|
|
"loss": 0.4125,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 1.3280575539568344,
|
|
"grad_norm": 0.3177003152098819,
|
|
"learning_rate": 7.350919977025157e-05,
|
|
"loss": 0.416,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 1.3294964028776979,
|
|
"grad_norm": 0.26225062386993286,
|
|
"learning_rate": 7.348723754046127e-05,
|
|
"loss": 0.4061,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 1.330935251798561,
|
|
"grad_norm": 0.34044093415409726,
|
|
"learning_rate": 7.34652415101597e-05,
|
|
"loss": 0.4137,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 1.3323741007194245,
|
|
"grad_norm": 0.3863855271166891,
|
|
"learning_rate": 7.344321170154871e-05,
|
|
"loss": 0.4174,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 1.3338129496402877,
|
|
"grad_norm": 0.48807986922448976,
|
|
"learning_rate": 7.342114813686419e-05,
|
|
"loss": 0.4196,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 1.335251798561151,
|
|
"grad_norm": 0.5465023331764318,
|
|
"learning_rate": 7.339905083837608e-05,
|
|
"loss": 0.4312,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 1.3366906474820144,
|
|
"grad_norm": 0.480775706941404,
|
|
"learning_rate": 7.337691982838841e-05,
|
|
"loss": 0.4101,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 1.3381294964028778,
|
|
"grad_norm": 0.4407409668433634,
|
|
"learning_rate": 7.335475512923924e-05,
|
|
"loss": 0.4105,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.339568345323741,
|
|
"grad_norm": 0.5533246989150952,
|
|
"learning_rate": 7.33325567633006e-05,
|
|
"loss": 0.424,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 1.3410071942446042,
|
|
"grad_norm": 0.612861517030539,
|
|
"learning_rate": 7.331032475297855e-05,
|
|
"loss": 0.4155,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 1.3424460431654677,
|
|
"grad_norm": 0.5647465642123591,
|
|
"learning_rate": 7.328805912071307e-05,
|
|
"loss": 0.4113,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 1.3438848920863309,
|
|
"grad_norm": 0.5418604881043935,
|
|
"learning_rate": 7.326575988897807e-05,
|
|
"loss": 0.4237,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 1.3453237410071943,
|
|
"grad_norm": 0.46826549213518326,
|
|
"learning_rate": 7.324342708028141e-05,
|
|
"loss": 0.4141,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 1.3467625899280575,
|
|
"grad_norm": 0.40848967134198355,
|
|
"learning_rate": 7.322106071716483e-05,
|
|
"loss": 0.4071,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 1.3482014388489207,
|
|
"grad_norm": 0.34720650591460006,
|
|
"learning_rate": 7.319866082220388e-05,
|
|
"loss": 0.4169,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 1.3496402877697842,
|
|
"grad_norm": 0.25172209218303604,
|
|
"learning_rate": 7.317622741800808e-05,
|
|
"loss": 0.4346,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 1.3510791366906476,
|
|
"grad_norm": 0.30311786828791615,
|
|
"learning_rate": 7.315376052722065e-05,
|
|
"loss": 0.4154,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 1.3525179856115108,
|
|
"grad_norm": 0.3564708542898199,
|
|
"learning_rate": 7.313126017251868e-05,
|
|
"loss": 0.4039,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.353956834532374,
|
|
"grad_norm": 0.30946919524878164,
|
|
"learning_rate": 7.3108726376613e-05,
|
|
"loss": 0.4199,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 1.3553956834532375,
|
|
"grad_norm": 0.3222727643131815,
|
|
"learning_rate": 7.308615916224823e-05,
|
|
"loss": 0.4091,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 1.3568345323741007,
|
|
"grad_norm": 0.4547286357167272,
|
|
"learning_rate": 7.306355855220267e-05,
|
|
"loss": 0.4212,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 1.358273381294964,
|
|
"grad_norm": 0.5336447981611737,
|
|
"learning_rate": 7.30409245692884e-05,
|
|
"loss": 0.4147,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 1.3597122302158273,
|
|
"grad_norm": 0.45554279091464367,
|
|
"learning_rate": 7.301825723635111e-05,
|
|
"loss": 0.4105,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 1.3611510791366905,
|
|
"grad_norm": 0.35047515650342453,
|
|
"learning_rate": 7.299555657627021e-05,
|
|
"loss": 0.4117,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 1.362589928057554,
|
|
"grad_norm": 0.4656316387932456,
|
|
"learning_rate": 7.29728226119587e-05,
|
|
"loss": 0.4313,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 1.3640287769784174,
|
|
"grad_norm": 0.5809961862625231,
|
|
"learning_rate": 7.295005536636325e-05,
|
|
"loss": 0.415,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 1.3654676258992806,
|
|
"grad_norm": 0.5791863469829993,
|
|
"learning_rate": 7.292725486246407e-05,
|
|
"loss": 0.4151,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 1.3669064748201438,
|
|
"grad_norm": 0.5327725413707503,
|
|
"learning_rate": 7.290442112327498e-05,
|
|
"loss": 0.4188,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.3683453237410073,
|
|
"grad_norm": 0.4999021299627686,
|
|
"learning_rate": 7.288155417184331e-05,
|
|
"loss": 0.4215,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 1.3697841726618705,
|
|
"grad_norm": 0.48661250973276715,
|
|
"learning_rate": 7.285865403124995e-05,
|
|
"loss": 0.4163,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 1.371223021582734,
|
|
"grad_norm": 0.5867526826318913,
|
|
"learning_rate": 7.283572072460927e-05,
|
|
"loss": 0.4092,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 1.3726618705035971,
|
|
"grad_norm": 0.7092684144582881,
|
|
"learning_rate": 7.28127542750691e-05,
|
|
"loss": 0.4128,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 1.3741007194244603,
|
|
"grad_norm": 0.7381533178447467,
|
|
"learning_rate": 7.278975470581076e-05,
|
|
"loss": 0.4215,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 1.3755395683453238,
|
|
"grad_norm": 0.7083289001568649,
|
|
"learning_rate": 7.276672204004898e-05,
|
|
"loss": 0.4189,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 1.376978417266187,
|
|
"grad_norm": 0.6923996413530602,
|
|
"learning_rate": 7.274365630103189e-05,
|
|
"loss": 0.4136,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 1.3784172661870504,
|
|
"grad_norm": 0.6897932823235978,
|
|
"learning_rate": 7.2720557512041e-05,
|
|
"loss": 0.4203,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 1.3798561151079136,
|
|
"grad_norm": 0.6561196790221027,
|
|
"learning_rate": 7.269742569639121e-05,
|
|
"loss": 0.421,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 1.381294964028777,
|
|
"grad_norm": 0.5661688814066334,
|
|
"learning_rate": 7.267426087743073e-05,
|
|
"loss": 0.4233,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 1.3827338129496403,
|
|
"grad_norm": 0.49591568266591085,
|
|
"learning_rate": 7.265106307854107e-05,
|
|
"loss": 0.4199,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 1.3841726618705037,
|
|
"grad_norm": 0.46747653890901786,
|
|
"learning_rate": 7.262783232313706e-05,
|
|
"loss": 0.4102,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 1.385611510791367,
|
|
"grad_norm": 0.5404795058688571,
|
|
"learning_rate": 7.260456863466676e-05,
|
|
"loss": 0.4173,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 1.3870503597122301,
|
|
"grad_norm": 0.6840098698733905,
|
|
"learning_rate": 7.258127203661153e-05,
|
|
"loss": 0.4303,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 1.3884892086330936,
|
|
"grad_norm": 0.8148554777221287,
|
|
"learning_rate": 7.255794255248587e-05,
|
|
"loss": 0.415,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 1.3899280575539568,
|
|
"grad_norm": 0.7820153067112996,
|
|
"learning_rate": 7.253458020583752e-05,
|
|
"loss": 0.4113,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 1.3913669064748202,
|
|
"grad_norm": 0.7267491121483379,
|
|
"learning_rate": 7.25111850202474e-05,
|
|
"loss": 0.4227,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 1.3928057553956834,
|
|
"grad_norm": 0.7385393400461305,
|
|
"learning_rate": 7.248775701932953e-05,
|
|
"loss": 0.4281,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 1.3942446043165468,
|
|
"grad_norm": 0.6442830171507052,
|
|
"learning_rate": 7.246429622673111e-05,
|
|
"loss": 0.4263,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 1.39568345323741,
|
|
"grad_norm": 0.6122859666171925,
|
|
"learning_rate": 7.244080266613238e-05,
|
|
"loss": 0.422,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 1.3971223021582735,
|
|
"grad_norm": 0.5212806797837968,
|
|
"learning_rate": 7.241727636124671e-05,
|
|
"loss": 0.4183,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 1.3985611510791367,
|
|
"grad_norm": 0.4466864683358835,
|
|
"learning_rate": 7.239371733582047e-05,
|
|
"loss": 0.4232,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"grad_norm": 0.4478402053923854,
|
|
"learning_rate": 7.23701256136331e-05,
|
|
"loss": 0.4078,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 1.4014388489208633,
|
|
"grad_norm": 0.6157439243791086,
|
|
"learning_rate": 7.2346501218497e-05,
|
|
"loss": 0.4128,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 1.4028776978417266,
|
|
"grad_norm": 0.5583159237355977,
|
|
"learning_rate": 7.23228441742576e-05,
|
|
"loss": 0.4155,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 1.40431654676259,
|
|
"grad_norm": 0.33310200734909345,
|
|
"learning_rate": 7.229915450479324e-05,
|
|
"loss": 0.4126,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 1.4057553956834532,
|
|
"grad_norm": 0.26590348886054815,
|
|
"learning_rate": 7.227543223401522e-05,
|
|
"loss": 0.4153,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 1.4071942446043164,
|
|
"grad_norm": 0.3297618425279744,
|
|
"learning_rate": 7.225167738586772e-05,
|
|
"loss": 0.4167,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 1.4086330935251798,
|
|
"grad_norm": 0.30024736703762384,
|
|
"learning_rate": 7.22278899843278e-05,
|
|
"loss": 0.4187,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 1.4100719424460433,
|
|
"grad_norm": 0.315708292644023,
|
|
"learning_rate": 7.220407005340542e-05,
|
|
"loss": 0.4154,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 1.4115107913669065,
|
|
"grad_norm": 0.36812354303876516,
|
|
"learning_rate": 7.218021761714336e-05,
|
|
"loss": 0.4219,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 1.4129496402877697,
|
|
"grad_norm": 0.39482302960401716,
|
|
"learning_rate": 7.215633269961714e-05,
|
|
"loss": 0.4116,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 1.4143884892086331,
|
|
"grad_norm": 0.4049072881133782,
|
|
"learning_rate": 7.213241532493516e-05,
|
|
"loss": 0.4102,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 1.4158273381294963,
|
|
"grad_norm": 0.36190043663384364,
|
|
"learning_rate": 7.210846551723855e-05,
|
|
"loss": 0.4118,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 1.4172661870503598,
|
|
"grad_norm": 0.43473639581836004,
|
|
"learning_rate": 7.208448330070116e-05,
|
|
"loss": 0.4225,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 1.418705035971223,
|
|
"grad_norm": 0.5901824181658488,
|
|
"learning_rate": 7.206046869952954e-05,
|
|
"loss": 0.4288,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 1.4201438848920862,
|
|
"grad_norm": 0.7617421193357684,
|
|
"learning_rate": 7.203642173796298e-05,
|
|
"loss": 0.4118,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 1.4215827338129496,
|
|
"grad_norm": 0.7796360209446979,
|
|
"learning_rate": 7.201234244027338e-05,
|
|
"loss": 0.4173,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 1.423021582733813,
|
|
"grad_norm": 0.68344644531172,
|
|
"learning_rate": 7.19882308307653e-05,
|
|
"loss": 0.4082,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 1.4244604316546763,
|
|
"grad_norm": 0.5722059587205249,
|
|
"learning_rate": 7.196408693377594e-05,
|
|
"loss": 0.4141,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 1.4258992805755395,
|
|
"grad_norm": 0.5349648600570341,
|
|
"learning_rate": 7.193991077367501e-05,
|
|
"loss": 0.4237,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 1.427338129496403,
|
|
"grad_norm": 0.47599056223773706,
|
|
"learning_rate": 7.19157023748649e-05,
|
|
"loss": 0.4173,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 1.4287769784172661,
|
|
"grad_norm": 0.4180349178757039,
|
|
"learning_rate": 7.189146176178044e-05,
|
|
"loss": 0.4223,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 1.4302158273381296,
|
|
"grad_norm": 0.3890475551246027,
|
|
"learning_rate": 7.186718895888904e-05,
|
|
"loss": 0.4111,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 1.4316546762589928,
|
|
"grad_norm": 0.3939170324727033,
|
|
"learning_rate": 7.184288399069054e-05,
|
|
"loss": 0.4114,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 1.433093525179856,
|
|
"grad_norm": 0.3832369477280717,
|
|
"learning_rate": 7.181854688171732e-05,
|
|
"loss": 0.4158,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 1.4345323741007194,
|
|
"grad_norm": 0.3941526023870261,
|
|
"learning_rate": 7.179417765653413e-05,
|
|
"loss": 0.4153,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 1.4359712230215829,
|
|
"grad_norm": 0.3831417059270232,
|
|
"learning_rate": 7.17697763397382e-05,
|
|
"loss": 0.4109,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 1.437410071942446,
|
|
"grad_norm": 0.362665185390241,
|
|
"learning_rate": 7.174534295595911e-05,
|
|
"loss": 0.4104,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 1.4388489208633093,
|
|
"grad_norm": 0.3845899749120542,
|
|
"learning_rate": 7.17208775298588e-05,
|
|
"loss": 0.4176,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 1.4402877697841727,
|
|
"grad_norm": 0.4380466218415703,
|
|
"learning_rate": 7.169638008613158e-05,
|
|
"loss": 0.4187,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 1.441726618705036,
|
|
"grad_norm": 0.4287069308687293,
|
|
"learning_rate": 7.16718506495041e-05,
|
|
"loss": 0.4164,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 1.4431654676258994,
|
|
"grad_norm": 0.44954260961151854,
|
|
"learning_rate": 7.164728924473522e-05,
|
|
"loss": 0.412,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 1.4446043165467626,
|
|
"grad_norm": 0.4962520077772707,
|
|
"learning_rate": 7.162269589661614e-05,
|
|
"loss": 0.4197,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 1.4460431654676258,
|
|
"grad_norm": 0.4797791556347553,
|
|
"learning_rate": 7.15980706299703e-05,
|
|
"loss": 0.4019,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 1.4474820143884892,
|
|
"grad_norm": 0.47076825194047095,
|
|
"learning_rate": 7.15734134696533e-05,
|
|
"loss": 0.4227,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 1.4489208633093524,
|
|
"grad_norm": 0.4603791054695319,
|
|
"learning_rate": 7.1548724440553e-05,
|
|
"loss": 0.4022,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 1.4503597122302159,
|
|
"grad_norm": 0.4521512831929274,
|
|
"learning_rate": 7.152400356758937e-05,
|
|
"loss": 0.4183,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 1.451798561151079,
|
|
"grad_norm": 0.4734472261805035,
|
|
"learning_rate": 7.149925087571456e-05,
|
|
"loss": 0.4214,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 1.4532374100719425,
|
|
"grad_norm": 0.5735418518238788,
|
|
"learning_rate": 7.147446638991283e-05,
|
|
"loss": 0.4069,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 1.4546762589928057,
|
|
"grad_norm": 0.6877408685327635,
|
|
"learning_rate": 7.14496501352005e-05,
|
|
"loss": 0.4145,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 1.4561151079136692,
|
|
"grad_norm": 0.6844539005939926,
|
|
"learning_rate": 7.1424802136626e-05,
|
|
"loss": 0.4201,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 1.4575539568345324,
|
|
"grad_norm": 0.5088216724517556,
|
|
"learning_rate": 7.139992241926978e-05,
|
|
"loss": 0.4067,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 1.4589928057553956,
|
|
"grad_norm": 0.39590789494956596,
|
|
"learning_rate": 7.137501100824432e-05,
|
|
"loss": 0.4155,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 1.460431654676259,
|
|
"grad_norm": 0.4998606349258579,
|
|
"learning_rate": 7.135006792869405e-05,
|
|
"loss": 0.4142,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 1.4618705035971222,
|
|
"grad_norm": 0.5634433054354914,
|
|
"learning_rate": 7.132509320579542e-05,
|
|
"loss": 0.4141,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 1.4633093525179857,
|
|
"grad_norm": 0.5033695158899758,
|
|
"learning_rate": 7.130008686475677e-05,
|
|
"loss": 0.4241,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 1.4647482014388489,
|
|
"grad_norm": 0.532376897595603,
|
|
"learning_rate": 7.127504893081839e-05,
|
|
"loss": 0.4073,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 1.4661870503597123,
|
|
"grad_norm": 0.485537497798653,
|
|
"learning_rate": 7.124997942925244e-05,
|
|
"loss": 0.4165,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 1.4676258992805755,
|
|
"grad_norm": 0.3649976241740195,
|
|
"learning_rate": 7.122487838536295e-05,
|
|
"loss": 0.407,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 1.469064748201439,
|
|
"grad_norm": 0.30679761598813227,
|
|
"learning_rate": 7.119974582448577e-05,
|
|
"loss": 0.4123,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 1.4705035971223022,
|
|
"grad_norm": 0.37263103696854233,
|
|
"learning_rate": 7.11745817719886e-05,
|
|
"loss": 0.4119,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 1.4719424460431654,
|
|
"grad_norm": 0.3945608659455072,
|
|
"learning_rate": 7.114938625327088e-05,
|
|
"loss": 0.4087,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 1.4733812949640288,
|
|
"grad_norm": 0.33749277911887265,
|
|
"learning_rate": 7.112415929376385e-05,
|
|
"loss": 0.4232,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 1.474820143884892,
|
|
"grad_norm": 0.3398487615885897,
|
|
"learning_rate": 7.109890091893047e-05,
|
|
"loss": 0.4193,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 1.4762589928057555,
|
|
"grad_norm": 0.436334310571793,
|
|
"learning_rate": 7.107361115426537e-05,
|
|
"loss": 0.4085,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 1.4776978417266187,
|
|
"grad_norm": 0.5396357497182525,
|
|
"learning_rate": 7.104829002529496e-05,
|
|
"loss": 0.4091,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 1.4791366906474819,
|
|
"grad_norm": 0.4796818500371543,
|
|
"learning_rate": 7.102293755757721e-05,
|
|
"loss": 0.4192,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 1.4805755395683453,
|
|
"grad_norm": 0.4155995300811726,
|
|
"learning_rate": 7.099755377670177e-05,
|
|
"loss": 0.4234,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 1.4820143884892087,
|
|
"grad_norm": 0.4586562495343292,
|
|
"learning_rate": 7.097213870828989e-05,
|
|
"loss": 0.4133,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 1.483453237410072,
|
|
"grad_norm": 0.47444227626660973,
|
|
"learning_rate": 7.094669237799437e-05,
|
|
"loss": 0.4092,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 1.4848920863309352,
|
|
"grad_norm": 0.46999750239616417,
|
|
"learning_rate": 7.092121481149964e-05,
|
|
"loss": 0.427,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 1.4863309352517986,
|
|
"grad_norm": 0.4585040742585487,
|
|
"learning_rate": 7.089570603452157e-05,
|
|
"loss": 0.3997,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 1.4877697841726618,
|
|
"grad_norm": 0.4480130753880926,
|
|
"learning_rate": 7.087016607280758e-05,
|
|
"loss": 0.4082,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 1.4892086330935252,
|
|
"grad_norm": 0.408781036888166,
|
|
"learning_rate": 7.084459495213658e-05,
|
|
"loss": 0.4124,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 1.4906474820143885,
|
|
"grad_norm": 0.333471079323863,
|
|
"learning_rate": 7.081899269831888e-05,
|
|
"loss": 0.4165,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 1.4920863309352517,
|
|
"grad_norm": 0.2672755535192444,
|
|
"learning_rate": 7.079335933719625e-05,
|
|
"loss": 0.415,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 1.493525179856115,
|
|
"grad_norm": 0.3043804011063258,
|
|
"learning_rate": 7.076769489464188e-05,
|
|
"loss": 0.4144,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 1.4949640287769785,
|
|
"grad_norm": 0.31003874055281755,
|
|
"learning_rate": 7.074199939656027e-05,
|
|
"loss": 0.4125,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 1.4964028776978417,
|
|
"grad_norm": 0.29636590565419707,
|
|
"learning_rate": 7.071627286888731e-05,
|
|
"loss": 0.4021,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 1.497841726618705,
|
|
"grad_norm": 0.2506700415329472,
|
|
"learning_rate": 7.06905153375902e-05,
|
|
"loss": 0.4088,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 1.4992805755395684,
|
|
"grad_norm": 0.26175557361890217,
|
|
"learning_rate": 7.066472682866744e-05,
|
|
"loss": 0.4081,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 1.5007194244604316,
|
|
"grad_norm": 0.2607016432524402,
|
|
"learning_rate": 7.063890736814878e-05,
|
|
"loss": 0.4131,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 1.502158273381295,
|
|
"grad_norm": 0.2850287540783795,
|
|
"learning_rate": 7.061305698209524e-05,
|
|
"loss": 0.4028,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 1.5035971223021583,
|
|
"grad_norm": 0.30155286471101217,
|
|
"learning_rate": 7.058717569659901e-05,
|
|
"loss": 0.4132,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 1.5050359712230215,
|
|
"grad_norm": 0.2968589098494209,
|
|
"learning_rate": 7.05612635377835e-05,
|
|
"loss": 0.4109,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 1.506474820143885,
|
|
"grad_norm": 0.3257442349219374,
|
|
"learning_rate": 7.053532053180332e-05,
|
|
"loss": 0.4179,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 1.5079136690647483,
|
|
"grad_norm": 0.28721684208299936,
|
|
"learning_rate": 7.050934670484413e-05,
|
|
"loss": 0.4082,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 1.5093525179856115,
|
|
"grad_norm": 0.2926696474727033,
|
|
"learning_rate": 7.048334208312273e-05,
|
|
"loss": 0.4195,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 1.5107913669064748,
|
|
"grad_norm": 0.29169574785248825,
|
|
"learning_rate": 7.045730669288706e-05,
|
|
"loss": 0.4186,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 1.512230215827338,
|
|
"grad_norm": 0.3376735034297397,
|
|
"learning_rate": 7.043124056041606e-05,
|
|
"loss": 0.4213,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 1.5136690647482014,
|
|
"grad_norm": 0.36234044371661067,
|
|
"learning_rate": 7.040514371201969e-05,
|
|
"loss": 0.4137,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 1.5151079136690648,
|
|
"grad_norm": 0.3767828195437152,
|
|
"learning_rate": 7.037901617403894e-05,
|
|
"loss": 0.4174,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 1.516546762589928,
|
|
"grad_norm": 0.3511654716835026,
|
|
"learning_rate": 7.035285797284578e-05,
|
|
"loss": 0.406,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 1.5179856115107913,
|
|
"grad_norm": 0.27083796199888666,
|
|
"learning_rate": 7.032666913484313e-05,
|
|
"loss": 0.4093,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 1.5194244604316547,
|
|
"grad_norm": 0.2891166778164732,
|
|
"learning_rate": 7.030044968646481e-05,
|
|
"loss": 0.4205,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 1.5208633093525181,
|
|
"grad_norm": 0.32414774383089967,
|
|
"learning_rate": 7.027419965417556e-05,
|
|
"loss": 0.4054,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 1.5223021582733813,
|
|
"grad_norm": 0.31257493699801037,
|
|
"learning_rate": 7.024791906447098e-05,
|
|
"loss": 0.4072,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 1.5237410071942445,
|
|
"grad_norm": 0.3650608228412096,
|
|
"learning_rate": 7.022160794387751e-05,
|
|
"loss": 0.4123,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 1.5251798561151078,
|
|
"grad_norm": 0.4307570688136797,
|
|
"learning_rate": 7.019526631895242e-05,
|
|
"loss": 0.4101,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 1.5266187050359712,
|
|
"grad_norm": 0.39843530815096495,
|
|
"learning_rate": 7.016889421628374e-05,
|
|
"loss": 0.4176,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 1.5280575539568346,
|
|
"grad_norm": 0.38691773565093956,
|
|
"learning_rate": 7.014249166249032e-05,
|
|
"loss": 0.4222,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 1.5294964028776978,
|
|
"grad_norm": 0.3981930274148255,
|
|
"learning_rate": 7.011605868422168e-05,
|
|
"loss": 0.4168,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 1.530935251798561,
|
|
"grad_norm": 0.49440047634389284,
|
|
"learning_rate": 7.00895953081581e-05,
|
|
"loss": 0.408,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 1.5323741007194245,
|
|
"grad_norm": 0.651501069312381,
|
|
"learning_rate": 7.00631015610105e-05,
|
|
"loss": 0.4083,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 1.533812949640288,
|
|
"grad_norm": 0.8705843434335055,
|
|
"learning_rate": 7.00365774695205e-05,
|
|
"loss": 0.4107,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 1.5352517985611511,
|
|
"grad_norm": 1.1226578000566327,
|
|
"learning_rate": 7.001002306046031e-05,
|
|
"loss": 0.4188,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 1.5366906474820143,
|
|
"grad_norm": 0.8871024399185847,
|
|
"learning_rate": 6.998343836063276e-05,
|
|
"loss": 0.4091,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 1.5381294964028775,
|
|
"grad_norm": 0.7355986291324699,
|
|
"learning_rate": 6.995682339687125e-05,
|
|
"loss": 0.4134,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 1.539568345323741,
|
|
"grad_norm": 0.631090515119735,
|
|
"learning_rate": 6.993017819603973e-05,
|
|
"loss": 0.4236,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 1.5410071942446044,
|
|
"grad_norm": 0.47026291144024823,
|
|
"learning_rate": 6.990350278503267e-05,
|
|
"loss": 0.4131,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 1.5424460431654676,
|
|
"grad_norm": 0.33616758794366475,
|
|
"learning_rate": 6.9876797190775e-05,
|
|
"loss": 0.4058,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 1.5438848920863308,
|
|
"grad_norm": 0.44928587726580743,
|
|
"learning_rate": 6.985006144022219e-05,
|
|
"loss": 0.4109,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 1.5453237410071943,
|
|
"grad_norm": 0.5347750703829274,
|
|
"learning_rate": 6.982329556036007e-05,
|
|
"loss": 0.4108,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 1.5467625899280577,
|
|
"grad_norm": 0.46342735445677957,
|
|
"learning_rate": 6.979649957820494e-05,
|
|
"loss": 0.4191,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 1.548201438848921,
|
|
"grad_norm": 0.30871057954071957,
|
|
"learning_rate": 6.976967352080345e-05,
|
|
"loss": 0.4055,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 1.5496402877697841,
|
|
"grad_norm": 0.24159752029766884,
|
|
"learning_rate": 6.974281741523259e-05,
|
|
"loss": 0.4072,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 1.5510791366906473,
|
|
"grad_norm": 0.4309042771747021,
|
|
"learning_rate": 6.971593128859974e-05,
|
|
"loss": 0.4192,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 1.5525179856115108,
|
|
"grad_norm": 0.5468895329801764,
|
|
"learning_rate": 6.968901516804254e-05,
|
|
"loss": 0.4086,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 1.5539568345323742,
|
|
"grad_norm": 0.5447320738963137,
|
|
"learning_rate": 6.966206908072891e-05,
|
|
"loss": 0.4066,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 1.5553956834532374,
|
|
"grad_norm": 0.5274144761123536,
|
|
"learning_rate": 6.963509305385701e-05,
|
|
"loss": 0.4165,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 1.5568345323741006,
|
|
"grad_norm": 0.6306258464670222,
|
|
"learning_rate": 6.960808711465524e-05,
|
|
"loss": 0.4091,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 1.558273381294964,
|
|
"grad_norm": 0.8093462016195803,
|
|
"learning_rate": 6.958105129038216e-05,
|
|
"loss": 0.4097,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 1.5597122302158275,
|
|
"grad_norm": 1.0979515953115018,
|
|
"learning_rate": 6.955398560832654e-05,
|
|
"loss": 0.4195,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 1.5611510791366907,
|
|
"grad_norm": 1.085411622875164,
|
|
"learning_rate": 6.952689009580724e-05,
|
|
"loss": 0.418,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 1.562589928057554,
|
|
"grad_norm": 0.6563572322350896,
|
|
"learning_rate": 6.949976478017327e-05,
|
|
"loss": 0.4094,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 1.5640287769784171,
|
|
"grad_norm": 0.35164750751110374,
|
|
"learning_rate": 6.947260968880369e-05,
|
|
"loss": 0.414,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 1.5654676258992806,
|
|
"grad_norm": 0.8574658641501741,
|
|
"learning_rate": 6.944542484910763e-05,
|
|
"loss": 0.417,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 1.566906474820144,
|
|
"grad_norm": 1.127244343795044,
|
|
"learning_rate": 6.941821028852424e-05,
|
|
"loss": 0.4127,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 1.5683453237410072,
|
|
"grad_norm": 0.40379544021464825,
|
|
"learning_rate": 6.939096603452269e-05,
|
|
"loss": 0.424,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 1.5697841726618704,
|
|
"grad_norm": 0.6818972826325004,
|
|
"learning_rate": 6.93636921146021e-05,
|
|
"loss": 0.4142,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 1.5712230215827339,
|
|
"grad_norm": 1.1717547175095797,
|
|
"learning_rate": 6.933638855629153e-05,
|
|
"loss": 0.418,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 1.572661870503597,
|
|
"grad_norm": 0.3364132595093078,
|
|
"learning_rate": 6.930905538714995e-05,
|
|
"loss": 0.4184,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 1.5741007194244605,
|
|
"grad_norm": 0.9930880412733121,
|
|
"learning_rate": 6.928169263476628e-05,
|
|
"loss": 0.4164,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 1.5755395683453237,
|
|
"grad_norm": 1.2190989571324873,
|
|
"learning_rate": 6.92543003267592e-05,
|
|
"loss": 0.4171,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 1.576978417266187,
|
|
"grad_norm": 0.33424910438861494,
|
|
"learning_rate": 6.922687849077729e-05,
|
|
"loss": 0.4196,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 1.5784172661870504,
|
|
"grad_norm": 1.2852951680487354,
|
|
"learning_rate": 6.919942715449893e-05,
|
|
"loss": 0.4187,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 1.5798561151079138,
|
|
"grad_norm": 0.5462684063967858,
|
|
"learning_rate": 6.917194634563225e-05,
|
|
"loss": 0.416,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 1.581294964028777,
|
|
"grad_norm": 0.5152187916960064,
|
|
"learning_rate": 6.914443609191514e-05,
|
|
"loss": 0.4145,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 1.5827338129496402,
|
|
"grad_norm": 0.6403221319193427,
|
|
"learning_rate": 6.911689642111523e-05,
|
|
"loss": 0.4101,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 1.5841726618705037,
|
|
"grad_norm": 0.41366203821793074,
|
|
"learning_rate": 6.90893273610298e-05,
|
|
"loss": 0.4205,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 1.5856115107913669,
|
|
"grad_norm": 0.36537750744414527,
|
|
"learning_rate": 6.906172893948585e-05,
|
|
"loss": 0.4172,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 1.5870503597122303,
|
|
"grad_norm": 0.37518656714771914,
|
|
"learning_rate": 6.903410118433996e-05,
|
|
"loss": 0.4135,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 1.5884892086330935,
|
|
"grad_norm": 0.689292364139962,
|
|
"learning_rate": 6.900644412347836e-05,
|
|
"loss": 0.4237,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 1.5899280575539567,
|
|
"grad_norm": 0.5552694082947653,
|
|
"learning_rate": 6.897875778481682e-05,
|
|
"loss": 0.4058,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 1.5913669064748202,
|
|
"grad_norm": 0.4213080697718193,
|
|
"learning_rate": 6.89510421963007e-05,
|
|
"loss": 0.4155,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 1.5928057553956836,
|
|
"grad_norm": 0.3962725327991354,
|
|
"learning_rate": 6.892329738590489e-05,
|
|
"loss": 0.4144,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 1.5942446043165468,
|
|
"grad_norm": 0.4097271468220884,
|
|
"learning_rate": 6.889552338163372e-05,
|
|
"loss": 0.417,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 1.59568345323741,
|
|
"grad_norm": 0.43211370804238053,
|
|
"learning_rate": 6.886772021152104e-05,
|
|
"loss": 0.4241,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 1.5971223021582732,
|
|
"grad_norm": 0.41885406678379655,
|
|
"learning_rate": 6.883988790363009e-05,
|
|
"loss": 0.4188,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 1.5985611510791367,
|
|
"grad_norm": 0.41092254624953684,
|
|
"learning_rate": 6.881202648605359e-05,
|
|
"loss": 0.4242,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"grad_norm": 0.4686184656099245,
|
|
"learning_rate": 6.878413598691358e-05,
|
|
"loss": 0.423,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 1.6014388489208633,
|
|
"grad_norm": 0.5525951420679763,
|
|
"learning_rate": 6.875621643436147e-05,
|
|
"loss": 0.4113,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 1.6028776978417265,
|
|
"grad_norm": 0.43380769864315477,
|
|
"learning_rate": 6.872826785657802e-05,
|
|
"loss": 0.4111,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 1.60431654676259,
|
|
"grad_norm": 0.3237848384002195,
|
|
"learning_rate": 6.870029028177324e-05,
|
|
"loss": 0.4147,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 1.6057553956834534,
|
|
"grad_norm": 0.3628739835040663,
|
|
"learning_rate": 6.867228373818648e-05,
|
|
"loss": 0.4125,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 1.6071942446043166,
|
|
"grad_norm": 0.3601060733125625,
|
|
"learning_rate": 6.864424825408624e-05,
|
|
"loss": 0.4034,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 1.6086330935251798,
|
|
"grad_norm": 0.3523344505185421,
|
|
"learning_rate": 6.861618385777028e-05,
|
|
"loss": 0.424,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 1.610071942446043,
|
|
"grad_norm": 0.33513865402363063,
|
|
"learning_rate": 6.858809057756558e-05,
|
|
"loss": 0.4112,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 1.6115107913669064,
|
|
"grad_norm": 0.3493569216719213,
|
|
"learning_rate": 6.855996844182819e-05,
|
|
"loss": 0.4175,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 1.6129496402877699,
|
|
"grad_norm": 0.37091300953277856,
|
|
"learning_rate": 6.853181747894334e-05,
|
|
"loss": 0.4072,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 1.614388489208633,
|
|
"grad_norm": 0.35399907114549634,
|
|
"learning_rate": 6.850363771732536e-05,
|
|
"loss": 0.415,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 1.6158273381294963,
|
|
"grad_norm": 0.32838160199407745,
|
|
"learning_rate": 6.847542918541762e-05,
|
|
"loss": 0.4234,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 1.6172661870503597,
|
|
"grad_norm": 0.33369653335699007,
|
|
"learning_rate": 6.844719191169254e-05,
|
|
"loss": 0.4132,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 1.6187050359712232,
|
|
"grad_norm": 0.32401281820988453,
|
|
"learning_rate": 6.841892592465158e-05,
|
|
"loss": 0.4058,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 1.6201438848920864,
|
|
"grad_norm": 0.29527247919516475,
|
|
"learning_rate": 6.839063125282512e-05,
|
|
"loss": 0.4169,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 1.6215827338129496,
|
|
"grad_norm": 0.3989909678209932,
|
|
"learning_rate": 6.836230792477256e-05,
|
|
"loss": 0.4111,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 1.6230215827338128,
|
|
"grad_norm": 0.28664194189928793,
|
|
"learning_rate": 6.833395596908217e-05,
|
|
"loss": 0.4168,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 1.6244604316546762,
|
|
"grad_norm": 0.28679375719271183,
|
|
"learning_rate": 6.830557541437114e-05,
|
|
"loss": 0.4053,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 1.6258992805755397,
|
|
"grad_norm": 0.3751096932816005,
|
|
"learning_rate": 6.827716628928556e-05,
|
|
"loss": 0.4151,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 1.6273381294964029,
|
|
"grad_norm": 0.5441536616363141,
|
|
"learning_rate": 6.824872862250028e-05,
|
|
"loss": 0.422,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 1.628776978417266,
|
|
"grad_norm": 0.6681085872979128,
|
|
"learning_rate": 6.822026244271903e-05,
|
|
"loss": 0.414,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 1.6302158273381295,
|
|
"grad_norm": 0.7682412572051888,
|
|
"learning_rate": 6.819176777867425e-05,
|
|
"loss": 0.4157,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 1.631654676258993,
|
|
"grad_norm": 0.8039449087436183,
|
|
"learning_rate": 6.816324465912723e-05,
|
|
"loss": 0.4126,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 1.6330935251798562,
|
|
"grad_norm": 0.9180943543059067,
|
|
"learning_rate": 6.813469311286789e-05,
|
|
"loss": 0.4215,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 1.6345323741007194,
|
|
"grad_norm": 1.1341630995558352,
|
|
"learning_rate": 6.810611316871488e-05,
|
|
"loss": 0.4128,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 1.6359712230215826,
|
|
"grad_norm": 0.7040278577335536,
|
|
"learning_rate": 6.80775048555155e-05,
|
|
"loss": 0.4102,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 1.637410071942446,
|
|
"grad_norm": 0.4430411456257879,
|
|
"learning_rate": 6.804886820214572e-05,
|
|
"loss": 0.4156,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 1.6388489208633095,
|
|
"grad_norm": 0.5899482330781229,
|
|
"learning_rate": 6.802020323751008e-05,
|
|
"loss": 0.4141,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 1.6402877697841727,
|
|
"grad_norm": 0.7920844274123987,
|
|
"learning_rate": 6.799150999054169e-05,
|
|
"loss": 0.4233,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 1.641726618705036,
|
|
"grad_norm": 0.8774328378161079,
|
|
"learning_rate": 6.796278849020225e-05,
|
|
"loss": 0.4143,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 1.6431654676258993,
|
|
"grad_norm": 0.6572390139526776,
|
|
"learning_rate": 6.79340387654819e-05,
|
|
"loss": 0.4093,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 1.6446043165467625,
|
|
"grad_norm": 0.4032497646303312,
|
|
"learning_rate": 6.790526084539939e-05,
|
|
"loss": 0.4172,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 1.646043165467626,
|
|
"grad_norm": 0.33944122747485767,
|
|
"learning_rate": 6.787645475900182e-05,
|
|
"loss": 0.4074,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 1.6474820143884892,
|
|
"grad_norm": 0.38874340039439864,
|
|
"learning_rate": 6.784762053536475e-05,
|
|
"loss": 0.4203,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 1.6489208633093524,
|
|
"grad_norm": 0.40733333684337525,
|
|
"learning_rate": 6.781875820359216e-05,
|
|
"loss": 0.4081,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 1.6503597122302158,
|
|
"grad_norm": 0.4146101357225759,
|
|
"learning_rate": 6.778986779281639e-05,
|
|
"loss": 0.4153,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 1.6517985611510793,
|
|
"grad_norm": 0.390635806033591,
|
|
"learning_rate": 6.776094933219811e-05,
|
|
"loss": 0.406,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 1.6532374100719425,
|
|
"grad_norm": 0.34920955028601547,
|
|
"learning_rate": 6.773200285092633e-05,
|
|
"loss": 0.4105,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 1.6546762589928057,
|
|
"grad_norm": 0.3649296416784623,
|
|
"learning_rate": 6.770302837821833e-05,
|
|
"loss": 0.4085,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 1.6561151079136691,
|
|
"grad_norm": 0.33539966785468556,
|
|
"learning_rate": 6.767402594331961e-05,
|
|
"loss": 0.4095,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 1.6575539568345323,
|
|
"grad_norm": 0.35560757287331174,
|
|
"learning_rate": 6.764499557550396e-05,
|
|
"loss": 0.4134,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 1.6589928057553958,
|
|
"grad_norm": 0.4302937340639296,
|
|
"learning_rate": 6.761593730407329e-05,
|
|
"loss": 0.4163,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 1.660431654676259,
|
|
"grad_norm": 0.4820432460327968,
|
|
"learning_rate": 6.758685115835776e-05,
|
|
"loss": 0.413,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 1.6618705035971222,
|
|
"grad_norm": 0.45493650414717424,
|
|
"learning_rate": 6.755773716771555e-05,
|
|
"loss": 0.4132,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 1.6633093525179856,
|
|
"grad_norm": 0.5087478869548814,
|
|
"learning_rate": 6.752859536153306e-05,
|
|
"loss": 0.4072,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 1.664748201438849,
|
|
"grad_norm": 0.4874886227916882,
|
|
"learning_rate": 6.749942576922473e-05,
|
|
"loss": 0.4063,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 1.6661870503597123,
|
|
"grad_norm": 0.484329610042878,
|
|
"learning_rate": 6.7470228420233e-05,
|
|
"loss": 0.4198,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 1.6676258992805755,
|
|
"grad_norm": 0.5404489027931266,
|
|
"learning_rate": 6.744100334402836e-05,
|
|
"loss": 0.4062,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 1.6690647482014387,
|
|
"grad_norm": 0.5447756313427996,
|
|
"learning_rate": 6.741175057010932e-05,
|
|
"loss": 0.4,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 1.6705035971223021,
|
|
"grad_norm": 0.4532765952951404,
|
|
"learning_rate": 6.738247012800228e-05,
|
|
"loss": 0.41,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 1.6719424460431656,
|
|
"grad_norm": 0.39366343626986483,
|
|
"learning_rate": 6.735316204726163e-05,
|
|
"loss": 0.4192,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 1.6733812949640288,
|
|
"grad_norm": 0.3260200942450756,
|
|
"learning_rate": 6.732382635746961e-05,
|
|
"loss": 0.4106,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 1.674820143884892,
|
|
"grad_norm": 0.31721397475961977,
|
|
"learning_rate": 6.729446308823635e-05,
|
|
"loss": 0.4016,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 1.6762589928057554,
|
|
"grad_norm": 0.3394753508124515,
|
|
"learning_rate": 6.72650722691998e-05,
|
|
"loss": 0.4182,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 1.6776978417266188,
|
|
"grad_norm": 0.3725389131813356,
|
|
"learning_rate": 6.723565393002576e-05,
|
|
"loss": 0.4233,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 1.679136690647482,
|
|
"grad_norm": 0.3381248299143651,
|
|
"learning_rate": 6.720620810040776e-05,
|
|
"loss": 0.412,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 1.6805755395683453,
|
|
"grad_norm": 0.39708508477503807,
|
|
"learning_rate": 6.717673481006709e-05,
|
|
"loss": 0.4048,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 1.6820143884892085,
|
|
"grad_norm": 0.458999099755708,
|
|
"learning_rate": 6.714723408875279e-05,
|
|
"loss": 0.4146,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 1.683453237410072,
|
|
"grad_norm": 0.4731616709172583,
|
|
"learning_rate": 6.711770596624153e-05,
|
|
"loss": 0.4192,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 1.6848920863309353,
|
|
"grad_norm": 0.4834892760591032,
|
|
"learning_rate": 6.708815047233768e-05,
|
|
"loss": 0.4152,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 1.6863309352517986,
|
|
"grad_norm": 0.42894607107973964,
|
|
"learning_rate": 6.705856763687324e-05,
|
|
"loss": 0.4147,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 1.6877697841726618,
|
|
"grad_norm": 0.3859036785319959,
|
|
"learning_rate": 6.702895748970776e-05,
|
|
"loss": 0.4067,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 1.6892086330935252,
|
|
"grad_norm": 0.400431781609053,
|
|
"learning_rate": 6.699932006072842e-05,
|
|
"loss": 0.4204,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 1.6906474820143886,
|
|
"grad_norm": 0.34606568668407256,
|
|
"learning_rate": 6.69696553798499e-05,
|
|
"loss": 0.4048,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 1.6920863309352518,
|
|
"grad_norm": 0.34031695449579613,
|
|
"learning_rate": 6.693996347701442e-05,
|
|
"loss": 0.4133,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 1.693525179856115,
|
|
"grad_norm": 0.45062565239386126,
|
|
"learning_rate": 6.691024438219159e-05,
|
|
"loss": 0.4244,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 1.6949640287769783,
|
|
"grad_norm": 0.5900408463531492,
|
|
"learning_rate": 6.688049812537857e-05,
|
|
"loss": 0.4036,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 1.6964028776978417,
|
|
"grad_norm": 0.5852406545392577,
|
|
"learning_rate": 6.685072473659989e-05,
|
|
"loss": 0.4063,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 1.6978417266187051,
|
|
"grad_norm": 0.5446070379865361,
|
|
"learning_rate": 6.682092424590747e-05,
|
|
"loss": 0.4158,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 1.6992805755395683,
|
|
"grad_norm": 0.5260420880538615,
|
|
"learning_rate": 6.679109668338057e-05,
|
|
"loss": 0.4264,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 1.7007194244604316,
|
|
"grad_norm": 0.48331412684807296,
|
|
"learning_rate": 6.676124207912582e-05,
|
|
"loss": 0.4108,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 1.702158273381295,
|
|
"grad_norm": 0.39041660415427576,
|
|
"learning_rate": 6.673136046327707e-05,
|
|
"loss": 0.4058,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 1.7035971223021584,
|
|
"grad_norm": 0.2788692276357607,
|
|
"learning_rate": 6.670145186599552e-05,
|
|
"loss": 0.41,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 1.7050359712230216,
|
|
"grad_norm": 0.2585239756382045,
|
|
"learning_rate": 6.667151631746953e-05,
|
|
"loss": 0.4069,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 1.7064748201438849,
|
|
"grad_norm": 0.38268218337128024,
|
|
"learning_rate": 6.664155384791473e-05,
|
|
"loss": 0.4017,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 1.707913669064748,
|
|
"grad_norm": 0.4382189854421018,
|
|
"learning_rate": 6.661156448757386e-05,
|
|
"loss": 0.4049,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 1.7093525179856115,
|
|
"grad_norm": 0.41508653385772964,
|
|
"learning_rate": 6.658154826671685e-05,
|
|
"loss": 0.4092,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 1.710791366906475,
|
|
"grad_norm": 0.4277551449447479,
|
|
"learning_rate": 6.655150521564072e-05,
|
|
"loss": 0.4071,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 1.7122302158273381,
|
|
"grad_norm": 0.4920332702338059,
|
|
"learning_rate": 6.652143536466955e-05,
|
|
"loss": 0.4246,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 1.7136690647482014,
|
|
"grad_norm": 0.4607027546082493,
|
|
"learning_rate": 6.649133874415454e-05,
|
|
"loss": 0.4099,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 1.7151079136690648,
|
|
"grad_norm": 0.3927913814935246,
|
|
"learning_rate": 6.646121538447382e-05,
|
|
"loss": 0.4046,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 1.7165467625899282,
|
|
"grad_norm": 0.2947427246904165,
|
|
"learning_rate": 6.643106531603259e-05,
|
|
"loss": 0.4026,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 1.7179856115107914,
|
|
"grad_norm": 0.3405518937453307,
|
|
"learning_rate": 6.640088856926294e-05,
|
|
"loss": 0.4068,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 1.7194244604316546,
|
|
"grad_norm": 0.5545298705592184,
|
|
"learning_rate": 6.637068517462395e-05,
|
|
"loss": 0.4021,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 1.7208633093525179,
|
|
"grad_norm": 0.5203170041901992,
|
|
"learning_rate": 6.634045516260156e-05,
|
|
"loss": 0.4123,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 1.7223021582733813,
|
|
"grad_norm": 0.3662193888162871,
|
|
"learning_rate": 6.631019856370856e-05,
|
|
"loss": 0.4129,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 1.7237410071942447,
|
|
"grad_norm": 0.30979173237246843,
|
|
"learning_rate": 6.627991540848464e-05,
|
|
"loss": 0.4018,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 1.725179856115108,
|
|
"grad_norm": 0.3906808259278531,
|
|
"learning_rate": 6.624960572749622e-05,
|
|
"loss": 0.4047,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 1.7266187050359711,
|
|
"grad_norm": 0.4499791693302069,
|
|
"learning_rate": 6.621926955133657e-05,
|
|
"loss": 0.4129,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 1.7280575539568346,
|
|
"grad_norm": 0.5040857351643536,
|
|
"learning_rate": 6.618890691062561e-05,
|
|
"loss": 0.4174,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 1.7294964028776978,
|
|
"grad_norm": 0.4811368872710842,
|
|
"learning_rate": 6.615851783601006e-05,
|
|
"loss": 0.4053,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 1.7309352517985612,
|
|
"grad_norm": 0.3337717007696158,
|
|
"learning_rate": 6.612810235816326e-05,
|
|
"loss": 0.4091,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 1.7323741007194244,
|
|
"grad_norm": 0.32353525074518524,
|
|
"learning_rate": 6.609766050778525e-05,
|
|
"loss": 0.4093,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 1.7338129496402876,
|
|
"grad_norm": 0.34948984870688293,
|
|
"learning_rate": 6.606719231560265e-05,
|
|
"loss": 0.4114,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 1.735251798561151,
|
|
"grad_norm": 0.31459819383157894,
|
|
"learning_rate": 6.60366978123687e-05,
|
|
"loss": 0.4092,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 1.7366906474820145,
|
|
"grad_norm": 0.33434667297456505,
|
|
"learning_rate": 6.600617702886314e-05,
|
|
"loss": 0.406,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 1.7381294964028777,
|
|
"grad_norm": 0.2905445186740311,
|
|
"learning_rate": 6.597562999589233e-05,
|
|
"loss": 0.4094,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 1.739568345323741,
|
|
"grad_norm": 0.31687633845416563,
|
|
"learning_rate": 6.594505674428903e-05,
|
|
"loss": 0.4126,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 1.7410071942446042,
|
|
"grad_norm": 0.4214653846648878,
|
|
"learning_rate": 6.59144573049125e-05,
|
|
"loss": 0.4061,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 1.7424460431654676,
|
|
"grad_norm": 0.4423344471741586,
|
|
"learning_rate": 6.588383170864849e-05,
|
|
"loss": 0.411,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 1.743884892086331,
|
|
"grad_norm": 0.44736929723014374,
|
|
"learning_rate": 6.585317998640903e-05,
|
|
"loss": 0.4157,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 1.7453237410071942,
|
|
"grad_norm": 0.4116457171856172,
|
|
"learning_rate": 6.582250216913265e-05,
|
|
"loss": 0.4025,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 1.7467625899280574,
|
|
"grad_norm": 0.40360938669889546,
|
|
"learning_rate": 6.579179828778414e-05,
|
|
"loss": 0.4075,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 1.7482014388489209,
|
|
"grad_norm": 0.4162770911222631,
|
|
"learning_rate": 6.576106837335458e-05,
|
|
"loss": 0.4078,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 1.7496402877697843,
|
|
"grad_norm": 0.4351024454479562,
|
|
"learning_rate": 6.573031245686142e-05,
|
|
"loss": 0.4144,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 1.7510791366906475,
|
|
"grad_norm": 0.42832866186916807,
|
|
"learning_rate": 6.569953056934826e-05,
|
|
"loss": 0.4115,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 1.7525179856115107,
|
|
"grad_norm": 0.36772599326702243,
|
|
"learning_rate": 6.566872274188496e-05,
|
|
"loss": 0.4227,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 1.753956834532374,
|
|
"grad_norm": 0.30646398234919936,
|
|
"learning_rate": 6.563788900556756e-05,
|
|
"loss": 0.4132,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 1.7553956834532374,
|
|
"grad_norm": 0.2581683402363098,
|
|
"learning_rate": 6.560702939151826e-05,
|
|
"loss": 0.4122,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 1.7568345323741008,
|
|
"grad_norm": 0.2579698872381285,
|
|
"learning_rate": 6.557614393088534e-05,
|
|
"loss": 0.4158,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 1.758273381294964,
|
|
"grad_norm": 0.3129253064703092,
|
|
"learning_rate": 6.554523265484321e-05,
|
|
"loss": 0.4056,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 1.7597122302158272,
|
|
"grad_norm": 0.328565084270504,
|
|
"learning_rate": 6.551429559459231e-05,
|
|
"loss": 0.4091,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 1.7611510791366907,
|
|
"grad_norm": 0.3076306410813236,
|
|
"learning_rate": 6.548333278135915e-05,
|
|
"loss": 0.4036,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 1.762589928057554,
|
|
"grad_norm": 0.31100652912150695,
|
|
"learning_rate": 6.545234424639616e-05,
|
|
"loss": 0.4086,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 1.7640287769784173,
|
|
"grad_norm": 0.4255345969276006,
|
|
"learning_rate": 6.542133002098178e-05,
|
|
"loss": 0.4143,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 1.7654676258992805,
|
|
"grad_norm": 0.5519170841848765,
|
|
"learning_rate": 6.53902901364204e-05,
|
|
"loss": 0.4172,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 1.7669064748201437,
|
|
"grad_norm": 0.6431117319415265,
|
|
"learning_rate": 6.535922462404226e-05,
|
|
"loss": 0.4045,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 1.7683453237410072,
|
|
"grad_norm": 0.7154469244707025,
|
|
"learning_rate": 6.53281335152035e-05,
|
|
"loss": 0.4141,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 1.7697841726618706,
|
|
"grad_norm": 0.7162117482101983,
|
|
"learning_rate": 6.529701684128608e-05,
|
|
"loss": 0.4087,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 1.7712230215827338,
|
|
"grad_norm": 0.6844988584194438,
|
|
"learning_rate": 6.526587463369779e-05,
|
|
"loss": 0.418,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 1.772661870503597,
|
|
"grad_norm": 0.6718739123513164,
|
|
"learning_rate": 6.523470692387215e-05,
|
|
"loss": 0.4216,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 1.7741007194244605,
|
|
"grad_norm": 0.7340334254776659,
|
|
"learning_rate": 6.520351374326846e-05,
|
|
"loss": 0.4091,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 1.775539568345324,
|
|
"grad_norm": 6.837891786402796,
|
|
"learning_rate": 6.51722951233717e-05,
|
|
"loss": 0.4259,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 1.776978417266187,
|
|
"grad_norm": 1.4683042357455764,
|
|
"learning_rate": 6.514105109569254e-05,
|
|
"loss": 0.4153,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 1.7784172661870503,
|
|
"grad_norm": 1.2255401259055407,
|
|
"learning_rate": 6.510978169176731e-05,
|
|
"loss": 0.4201,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 1.7798561151079135,
|
|
"grad_norm": 0.31525693403716415,
|
|
"learning_rate": 6.507848694315794e-05,
|
|
"loss": 0.4041,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 1.781294964028777,
|
|
"grad_norm": 1.075945101830815,
|
|
"learning_rate": 6.504716688145192e-05,
|
|
"loss": 0.4108,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 1.7827338129496404,
|
|
"grad_norm": 1.083974820516606,
|
|
"learning_rate": 6.501582153826235e-05,
|
|
"loss": 0.4189,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 1.7841726618705036,
|
|
"grad_norm": 0.4438069358490529,
|
|
"learning_rate": 6.498445094522776e-05,
|
|
"loss": 0.4138,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 1.7856115107913668,
|
|
"grad_norm": 0.6042071647475051,
|
|
"learning_rate": 6.495305513401226e-05,
|
|
"loss": 0.4099,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 1.7870503597122303,
|
|
"grad_norm": 0.8778214085458328,
|
|
"learning_rate": 6.492163413630534e-05,
|
|
"loss": 0.4249,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 1.7884892086330937,
|
|
"grad_norm": 0.7231214894554593,
|
|
"learning_rate": 6.489018798382195e-05,
|
|
"loss": 0.422,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 1.789928057553957,
|
|
"grad_norm": 0.4250172311505059,
|
|
"learning_rate": 6.485871670830243e-05,
|
|
"loss": 0.4185,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 1.79136690647482,
|
|
"grad_norm": 0.46360213691053914,
|
|
"learning_rate": 6.482722034151247e-05,
|
|
"loss": 0.4108,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 1.7928057553956833,
|
|
"grad_norm": 0.7001712271804472,
|
|
"learning_rate": 6.479569891524307e-05,
|
|
"loss": 0.4085,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 1.7942446043165468,
|
|
"grad_norm": 0.4663685278823222,
|
|
"learning_rate": 6.476415246131056e-05,
|
|
"loss": 0.4026,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 1.7956834532374102,
|
|
"grad_norm": 0.35242007084624677,
|
|
"learning_rate": 6.47325810115565e-05,
|
|
"loss": 0.4146,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 1.7971223021582734,
|
|
"grad_norm": 0.4402882107538915,
|
|
"learning_rate": 6.470098459784768e-05,
|
|
"loss": 0.4061,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 1.7985611510791366,
|
|
"grad_norm": 0.4927824463137918,
|
|
"learning_rate": 6.466936325207612e-05,
|
|
"loss": 0.407,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"grad_norm": 2.444967282861804,
|
|
"learning_rate": 6.463771700615898e-05,
|
|
"loss": 0.4384,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 1.8014388489208633,
|
|
"grad_norm": 1.9224348984194457,
|
|
"learning_rate": 6.460604589203854e-05,
|
|
"loss": 0.4338,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 1.8028776978417267,
|
|
"grad_norm": 0.9561737151700765,
|
|
"learning_rate": 6.457434994168224e-05,
|
|
"loss": 0.446,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 1.80431654676259,
|
|
"grad_norm": 0.9522097389196967,
|
|
"learning_rate": 6.454262918708247e-05,
|
|
"loss": 0.4281,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 1.8057553956834531,
|
|
"grad_norm": 2.7923174727181093,
|
|
"learning_rate": 6.451088366025682e-05,
|
|
"loss": 0.4561,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 1.8071942446043165,
|
|
"grad_norm": 3.1235818492348892,
|
|
"learning_rate": 6.447911339324773e-05,
|
|
"loss": 0.459,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 1.80863309352518,
|
|
"grad_norm": 1.7966329512261376,
|
|
"learning_rate": 6.444731841812274e-05,
|
|
"loss": 0.4604,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 1.8100719424460432,
|
|
"grad_norm": 22.529540289494804,
|
|
"learning_rate": 6.44154987669742e-05,
|
|
"loss": 0.6483,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 1.8115107913669064,
|
|
"grad_norm": 6.771243519731716,
|
|
"learning_rate": 6.438365447191947e-05,
|
|
"loss": 0.5792,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 1.8129496402877698,
|
|
"grad_norm": 3.2081197341119236,
|
|
"learning_rate": 6.435178556510076e-05,
|
|
"loss": 0.5072,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 1.814388489208633,
|
|
"grad_norm": 8.188041031140857,
|
|
"learning_rate": 6.431989207868508e-05,
|
|
"loss": 0.5018,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 1.8158273381294965,
|
|
"grad_norm": 1085.5551002151617,
|
|
"learning_rate": 6.428797404486431e-05,
|
|
"loss": 0.6493,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 1.8172661870503597,
|
|
"grad_norm": 3.2736686015558125,
|
|
"learning_rate": 6.425603149585507e-05,
|
|
"loss": 0.5107,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 1.818705035971223,
|
|
"grad_norm": 2.22717964160841,
|
|
"learning_rate": 6.422406446389872e-05,
|
|
"loss": 0.4845,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 1.8201438848920863,
|
|
"grad_norm": 1.329912948642261,
|
|
"learning_rate": 6.419207298126135e-05,
|
|
"loss": 0.463,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 1.8215827338129498,
|
|
"grad_norm": 265.4958412073951,
|
|
"learning_rate": 6.416005708023372e-05,
|
|
"loss": 1.8774,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 1.823021582733813,
|
|
"grad_norm": 6.929043713443921,
|
|
"learning_rate": 6.412801679313125e-05,
|
|
"loss": 0.5125,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 1.8244604316546762,
|
|
"grad_norm": 2.660059063808743,
|
|
"learning_rate": 6.409595215229397e-05,
|
|
"loss": 0.5042,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 1.8258992805755394,
|
|
"grad_norm": 3.480337051499914,
|
|
"learning_rate": 6.406386319008647e-05,
|
|
"loss": 0.4766,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 1.8273381294964028,
|
|
"grad_norm": 10.220863079756933,
|
|
"learning_rate": 6.403174993889791e-05,
|
|
"loss": 0.4896,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 1.8287769784172663,
|
|
"grad_norm": 122.7534547473039,
|
|
"learning_rate": 6.399961243114197e-05,
|
|
"loss": 0.6035,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 1.8302158273381295,
|
|
"grad_norm": 7.405541411692232,
|
|
"learning_rate": 6.39674506992568e-05,
|
|
"loss": 0.622,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 1.8316546762589927,
|
|
"grad_norm": 2.4763860553537946,
|
|
"learning_rate": 6.393526477570499e-05,
|
|
"loss": 0.5079,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 1.8330935251798561,
|
|
"grad_norm": 1.0561145840013435,
|
|
"learning_rate": 6.390305469297357e-05,
|
|
"loss": 0.4795,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 1.8345323741007196,
|
|
"grad_norm": 1.7463303747728163,
|
|
"learning_rate": 6.387082048357397e-05,
|
|
"loss": 0.4735,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 1.8359712230215828,
|
|
"grad_norm": 1.5861322348432676,
|
|
"learning_rate": 6.383856218004193e-05,
|
|
"loss": 0.4642,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 1.837410071942446,
|
|
"grad_norm": 2.7458922384875986,
|
|
"learning_rate": 6.380627981493753e-05,
|
|
"loss": 0.5084,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 1.8388489208633092,
|
|
"grad_norm": 1.6866305355688227,
|
|
"learning_rate": 6.377397342084514e-05,
|
|
"loss": 0.4806,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 1.8402877697841726,
|
|
"grad_norm": 7.188370970971249,
|
|
"learning_rate": 6.37416430303734e-05,
|
|
"loss": 0.534,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 1.841726618705036,
|
|
"grad_norm": 6.28622808888569,
|
|
"learning_rate": 6.370928867615513e-05,
|
|
"loss": 0.5456,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 1.8431654676258993,
|
|
"grad_norm": 133.84218887004928,
|
|
"learning_rate": 6.367691039084736e-05,
|
|
"loss": 0.6013,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 1.8446043165467625,
|
|
"grad_norm": 1.7201385015283455,
|
|
"learning_rate": 6.36445082071313e-05,
|
|
"loss": 0.5292,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 1.846043165467626,
|
|
"grad_norm": 1.413366085018994,
|
|
"learning_rate": 6.361208215771222e-05,
|
|
"loss": 0.4956,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 1.8474820143884894,
|
|
"grad_norm": 3.182765557747668,
|
|
"learning_rate": 6.357963227531954e-05,
|
|
"loss": 0.5615,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 1.8489208633093526,
|
|
"grad_norm": 3.432143562781076,
|
|
"learning_rate": 6.35471585927067e-05,
|
|
"loss": 0.5262,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 1.8503597122302158,
|
|
"grad_norm": 8.442515712180505,
|
|
"learning_rate": 6.351466114265118e-05,
|
|
"loss": 0.598,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 1.851798561151079,
|
|
"grad_norm": 2.127067129501325,
|
|
"learning_rate": 6.348213995795445e-05,
|
|
"loss": 0.5195,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 1.8532374100719424,
|
|
"grad_norm": 0.7165663370894075,
|
|
"learning_rate": 6.344959507144192e-05,
|
|
"loss": 0.4827,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 1.8546762589928059,
|
|
"grad_norm": 0.8781812103146103,
|
|
"learning_rate": 6.341702651596293e-05,
|
|
"loss": 0.4587,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 1.856115107913669,
|
|
"grad_norm": 0.8035959609456851,
|
|
"learning_rate": 6.338443432439074e-05,
|
|
"loss": 0.4744,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 1.8575539568345323,
|
|
"grad_norm": 0.9563352931147517,
|
|
"learning_rate": 6.335181852962242e-05,
|
|
"loss": 0.4642,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 1.8589928057553957,
|
|
"grad_norm": 0.7855041679333894,
|
|
"learning_rate": 6.331917916457889e-05,
|
|
"loss": 0.4612,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 1.8604316546762591,
|
|
"grad_norm": 1.5365711204979504,
|
|
"learning_rate": 6.328651626220485e-05,
|
|
"loss": 0.4732,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 1.8618705035971224,
|
|
"grad_norm": 0.7568118470170938,
|
|
"learning_rate": 6.325382985546879e-05,
|
|
"loss": 0.466,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 1.8633093525179856,
|
|
"grad_norm": 0.603144383346574,
|
|
"learning_rate": 6.322111997736288e-05,
|
|
"loss": 0.4617,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 1.8647482014388488,
|
|
"grad_norm": 0.7611452510226641,
|
|
"learning_rate": 6.3188386660903e-05,
|
|
"loss": 0.4529,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 1.8661870503597122,
|
|
"grad_norm": 0.5640204848356418,
|
|
"learning_rate": 6.315562993912869e-05,
|
|
"loss": 0.453,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 1.8676258992805757,
|
|
"grad_norm": 0.7538387217357524,
|
|
"learning_rate": 6.31228498451031e-05,
|
|
"loss": 0.4509,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 1.8690647482014389,
|
|
"grad_norm": 0.5372913533733327,
|
|
"learning_rate": 6.309004641191299e-05,
|
|
"loss": 0.4445,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 1.870503597122302,
|
|
"grad_norm": 0.6991179612148957,
|
|
"learning_rate": 6.305721967266869e-05,
|
|
"loss": 0.448,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.8719424460431655,
|
|
"grad_norm": 0.37367784264425935,
|
|
"learning_rate": 6.302436966050401e-05,
|
|
"loss": 0.4318,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 1.873381294964029,
|
|
"grad_norm": 0.6934540928195164,
|
|
"learning_rate": 6.29914964085763e-05,
|
|
"loss": 0.4326,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 1.8748201438848922,
|
|
"grad_norm": 0.4162013450627137,
|
|
"learning_rate": 6.295859995006629e-05,
|
|
"loss": 0.4347,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 1.8762589928057554,
|
|
"grad_norm": 0.4660483736626902,
|
|
"learning_rate": 6.292568031817823e-05,
|
|
"loss": 0.4351,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 1.8776978417266186,
|
|
"grad_norm": 0.42400777149610414,
|
|
"learning_rate": 6.28927375461397e-05,
|
|
"loss": 0.4279,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 1.879136690647482,
|
|
"grad_norm": 0.5029130866544471,
|
|
"learning_rate": 6.285977166720166e-05,
|
|
"loss": 0.4295,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 1.8805755395683454,
|
|
"grad_norm": 0.42130808755514393,
|
|
"learning_rate": 6.28267827146384e-05,
|
|
"loss": 0.4192,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 1.8820143884892087,
|
|
"grad_norm": 0.48870355111585995,
|
|
"learning_rate": 6.279377072174744e-05,
|
|
"loss": 0.4293,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 1.8834532374100719,
|
|
"grad_norm": 0.4675249076742231,
|
|
"learning_rate": 6.276073572184964e-05,
|
|
"loss": 0.4325,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 1.8848920863309353,
|
|
"grad_norm": 0.3750630801027773,
|
|
"learning_rate": 6.272767774828903e-05,
|
|
"loss": 0.4295,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 1.8863309352517985,
|
|
"grad_norm": 0.38733969293029497,
|
|
"learning_rate": 6.269459683443283e-05,
|
|
"loss": 0.4276,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 1.887769784172662,
|
|
"grad_norm": 0.3245573257832168,
|
|
"learning_rate": 6.266149301367146e-05,
|
|
"loss": 0.4269,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 1.8892086330935252,
|
|
"grad_norm": 0.4354366118875056,
|
|
"learning_rate": 6.262836631941839e-05,
|
|
"loss": 0.4247,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 1.8906474820143884,
|
|
"grad_norm": 0.315199756414718,
|
|
"learning_rate": 6.259521678511023e-05,
|
|
"loss": 0.4248,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 1.8920863309352518,
|
|
"grad_norm": 0.3298924235829545,
|
|
"learning_rate": 6.256204444420663e-05,
|
|
"loss": 0.4102,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 1.8935251798561152,
|
|
"grad_norm": 0.2994811931010017,
|
|
"learning_rate": 6.252884933019028e-05,
|
|
"loss": 0.4207,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 1.8949640287769784,
|
|
"grad_norm": 0.42323884038545273,
|
|
"learning_rate": 6.249563147656679e-05,
|
|
"loss": 0.4198,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 1.8964028776978417,
|
|
"grad_norm": 0.4291148039980726,
|
|
"learning_rate": 6.24623909168648e-05,
|
|
"loss": 0.4182,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 1.8978417266187049,
|
|
"grad_norm": 0.43797602392280516,
|
|
"learning_rate": 6.242912768463581e-05,
|
|
"loss": 0.4251,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 1.8992805755395683,
|
|
"grad_norm": 0.31398809591968035,
|
|
"learning_rate": 6.239584181345426e-05,
|
|
"loss": 0.4203,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 1.9007194244604317,
|
|
"grad_norm": 0.28355404034307835,
|
|
"learning_rate": 6.236253333691739e-05,
|
|
"loss": 0.4187,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 1.902158273381295,
|
|
"grad_norm": 0.341752587218552,
|
|
"learning_rate": 6.23292022886453e-05,
|
|
"loss": 0.415,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 1.9035971223021582,
|
|
"grad_norm": 0.3534481231918439,
|
|
"learning_rate": 6.229584870228083e-05,
|
|
"loss": 0.4187,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 1.9050359712230216,
|
|
"grad_norm": 0.36421304455724157,
|
|
"learning_rate": 6.226247261148958e-05,
|
|
"loss": 0.4189,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 1.906474820143885,
|
|
"grad_norm": 0.332760957750746,
|
|
"learning_rate": 6.22290740499599e-05,
|
|
"loss": 0.4328,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 1.9079136690647482,
|
|
"grad_norm": 0.3106224711926624,
|
|
"learning_rate": 6.21956530514028e-05,
|
|
"loss": 0.4175,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 1.9093525179856115,
|
|
"grad_norm": 0.3832684431882573,
|
|
"learning_rate": 6.216220964955192e-05,
|
|
"loss": 0.4213,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 1.9107913669064747,
|
|
"grad_norm": 0.3182664524110593,
|
|
"learning_rate": 6.21287438781635e-05,
|
|
"loss": 0.4081,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 1.912230215827338,
|
|
"grad_norm": 0.26037988569821807,
|
|
"learning_rate": 6.209525577101642e-05,
|
|
"loss": 0.4166,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 1.9136690647482015,
|
|
"grad_norm": 0.2912278406893621,
|
|
"learning_rate": 6.206174536191207e-05,
|
|
"loss": 0.4168,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 1.9151079136690647,
|
|
"grad_norm": 0.4485614719978345,
|
|
"learning_rate": 6.202821268467433e-05,
|
|
"loss": 0.4263,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 1.916546762589928,
|
|
"grad_norm": 0.4342956652229297,
|
|
"learning_rate": 6.199465777314958e-05,
|
|
"loss": 0.4196,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 1.9179856115107914,
|
|
"grad_norm": 0.3303195946500688,
|
|
"learning_rate": 6.196108066120663e-05,
|
|
"loss": 0.4117,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 1.9194244604316548,
|
|
"grad_norm": 0.2276888502685281,
|
|
"learning_rate": 6.192748138273674e-05,
|
|
"loss": 0.4183,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 1.920863309352518,
|
|
"grad_norm": 0.2995356590089933,
|
|
"learning_rate": 6.189385997165348e-05,
|
|
"loss": 0.4158,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 1.9223021582733812,
|
|
"grad_norm": 0.24745315200081555,
|
|
"learning_rate": 6.186021646189281e-05,
|
|
"loss": 0.4041,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 1.9237410071942445,
|
|
"grad_norm": 0.22176112222569647,
|
|
"learning_rate": 6.182655088741294e-05,
|
|
"loss": 0.4161,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 1.925179856115108,
|
|
"grad_norm": 0.2174370659497639,
|
|
"learning_rate": 6.179286328219442e-05,
|
|
"loss": 0.4085,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 1.9266187050359713,
|
|
"grad_norm": 0.22892432882693567,
|
|
"learning_rate": 6.175915368024e-05,
|
|
"loss": 0.4171,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 1.9280575539568345,
|
|
"grad_norm": 0.26698342075572684,
|
|
"learning_rate": 6.172542211557463e-05,
|
|
"loss": 0.4096,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 1.9294964028776977,
|
|
"grad_norm": 0.21910132425573622,
|
|
"learning_rate": 6.169166862224542e-05,
|
|
"loss": 0.4137,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 1.9309352517985612,
|
|
"grad_norm": 0.22401588811392442,
|
|
"learning_rate": 6.165789323432166e-05,
|
|
"loss": 0.4134,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 1.9323741007194246,
|
|
"grad_norm": 0.2267585330290721,
|
|
"learning_rate": 6.162409598589467e-05,
|
|
"loss": 0.4197,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 1.9338129496402878,
|
|
"grad_norm": 0.23455930094877256,
|
|
"learning_rate": 6.159027691107791e-05,
|
|
"loss": 0.4152,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 1.935251798561151,
|
|
"grad_norm": 0.28061689446619226,
|
|
"learning_rate": 6.15564360440068e-05,
|
|
"loss": 0.4141,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 1.9366906474820142,
|
|
"grad_norm": 0.3062108497086098,
|
|
"learning_rate": 6.15225734188388e-05,
|
|
"loss": 0.4161,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 1.9381294964028777,
|
|
"grad_norm": 0.35490107188287534,
|
|
"learning_rate": 6.148868906975334e-05,
|
|
"loss": 0.4223,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 1.9395683453237411,
|
|
"grad_norm": 0.3442955625247736,
|
|
"learning_rate": 6.145478303095174e-05,
|
|
"loss": 0.4058,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 1.9410071942446043,
|
|
"grad_norm": 0.39261982892114783,
|
|
"learning_rate": 6.142085533665722e-05,
|
|
"loss": 0.4232,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 1.9424460431654675,
|
|
"grad_norm": 0.4632520008286428,
|
|
"learning_rate": 6.138690602111487e-05,
|
|
"loss": 0.4096,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.943884892086331,
|
|
"grad_norm": 0.485295361931828,
|
|
"learning_rate": 6.135293511859164e-05,
|
|
"loss": 0.4087,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 1.9453237410071944,
|
|
"grad_norm": 0.38206456996957383,
|
|
"learning_rate": 6.131894266337618e-05,
|
|
"loss": 0.4077,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 1.9467625899280576,
|
|
"grad_norm": 0.3294171401497278,
|
|
"learning_rate": 6.128492868977897e-05,
|
|
"loss": 0.4223,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 1.9482014388489208,
|
|
"grad_norm": 0.398849357943549,
|
|
"learning_rate": 6.12508932321322e-05,
|
|
"loss": 0.4172,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 1.949640287769784,
|
|
"grad_norm": 0.48982540195031965,
|
|
"learning_rate": 6.12168363247897e-05,
|
|
"loss": 0.4052,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 1.9510791366906475,
|
|
"grad_norm": 0.4916354437053983,
|
|
"learning_rate": 6.1182758002127e-05,
|
|
"loss": 0.4096,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 1.952517985611511,
|
|
"grad_norm": 0.46837855264547495,
|
|
"learning_rate": 6.114865829854123e-05,
|
|
"loss": 0.4241,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 1.9539568345323741,
|
|
"grad_norm": 0.37147124516884855,
|
|
"learning_rate": 6.111453724845106e-05,
|
|
"loss": 0.4173,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 1.9553956834532373,
|
|
"grad_norm": 0.2707796572879834,
|
|
"learning_rate": 6.108039488629679e-05,
|
|
"loss": 0.4175,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 1.9568345323741008,
|
|
"grad_norm": 0.30147421333395263,
|
|
"learning_rate": 6.104623124654016e-05,
|
|
"loss": 0.418,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 1.958273381294964,
|
|
"grad_norm": 0.4499047077061615,
|
|
"learning_rate": 6.101204636366441e-05,
|
|
"loss": 0.4138,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 1.9597122302158274,
|
|
"grad_norm": 0.3808157050392399,
|
|
"learning_rate": 6.0977840272174224e-05,
|
|
"loss": 0.4158,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 1.9611510791366906,
|
|
"grad_norm": 0.3519081586258106,
|
|
"learning_rate": 6.094361300659571e-05,
|
|
"loss": 0.4119,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 1.9625899280575538,
|
|
"grad_norm": 0.3542983008873921,
|
|
"learning_rate": 6.090936460147632e-05,
|
|
"loss": 0.4143,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 1.9640287769784173,
|
|
"grad_norm": 0.34613856155485506,
|
|
"learning_rate": 6.087509509138483e-05,
|
|
"loss": 0.4019,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 1.9654676258992807,
|
|
"grad_norm": 0.3786672408030351,
|
|
"learning_rate": 6.0840804510911374e-05,
|
|
"loss": 0.4127,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 1.966906474820144,
|
|
"grad_norm": 0.3244856101992254,
|
|
"learning_rate": 6.0806492894667315e-05,
|
|
"loss": 0.4008,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 1.9683453237410071,
|
|
"grad_norm": 0.3612158558943704,
|
|
"learning_rate": 6.077216027728524e-05,
|
|
"loss": 0.4076,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 1.9697841726618706,
|
|
"grad_norm": 0.36435116124815087,
|
|
"learning_rate": 6.073780669341896e-05,
|
|
"loss": 0.4066,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 1.9712230215827338,
|
|
"grad_norm": 0.31696690245015396,
|
|
"learning_rate": 6.070343217774343e-05,
|
|
"loss": 0.4088,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 1.9726618705035972,
|
|
"grad_norm": 0.22336772760482543,
|
|
"learning_rate": 6.066903676495477e-05,
|
|
"loss": 0.4168,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 1.9741007194244604,
|
|
"grad_norm": 0.303754876646737,
|
|
"learning_rate": 6.063462048977011e-05,
|
|
"loss": 0.4081,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 1.9755395683453236,
|
|
"grad_norm": 0.29928868797709823,
|
|
"learning_rate": 6.060018338692774e-05,
|
|
"loss": 0.4168,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 1.976978417266187,
|
|
"grad_norm": 0.241889271299397,
|
|
"learning_rate": 6.056572549118688e-05,
|
|
"loss": 0.4183,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 1.9784172661870505,
|
|
"grad_norm": 0.26372563544416205,
|
|
"learning_rate": 6.053124683732781e-05,
|
|
"loss": 0.4301,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 1.9798561151079137,
|
|
"grad_norm": 0.3580164619799479,
|
|
"learning_rate": 6.049674746015172e-05,
|
|
"loss": 0.4077,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 1.981294964028777,
|
|
"grad_norm": 0.39960899893994123,
|
|
"learning_rate": 6.046222739448075e-05,
|
|
"loss": 0.4191,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 1.9827338129496401,
|
|
"grad_norm": 0.38112635572271364,
|
|
"learning_rate": 6.042768667515786e-05,
|
|
"loss": 0.414,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 1.9841726618705036,
|
|
"grad_norm": 0.4452345821038321,
|
|
"learning_rate": 6.039312533704692e-05,
|
|
"loss": 0.4104,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 1.985611510791367,
|
|
"grad_norm": 0.5710364106364948,
|
|
"learning_rate": 6.0358543415032625e-05,
|
|
"loss": 0.4117,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.9870503597122302,
|
|
"grad_norm": 0.5678039638659944,
|
|
"learning_rate": 6.032394094402035e-05,
|
|
"loss": 0.4083,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 1.9884892086330934,
|
|
"grad_norm": 0.4723631634616504,
|
|
"learning_rate": 6.0289317958936305e-05,
|
|
"loss": 0.4169,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 1.9899280575539569,
|
|
"grad_norm": 0.39218033976040145,
|
|
"learning_rate": 6.0254674494727374e-05,
|
|
"loss": 0.4063,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 1.9913669064748203,
|
|
"grad_norm": 0.3251996021954182,
|
|
"learning_rate": 6.022001058636111e-05,
|
|
"loss": 0.4129,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 1.9928057553956835,
|
|
"grad_norm": 0.34408524022899284,
|
|
"learning_rate": 6.01853262688257e-05,
|
|
"loss": 0.4102,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 1.9942446043165467,
|
|
"grad_norm": 0.3178995021972047,
|
|
"learning_rate": 6.0150621577129934e-05,
|
|
"loss": 0.4137,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 1.99568345323741,
|
|
"grad_norm": 0.31218320513444386,
|
|
"learning_rate": 6.011589654630318e-05,
|
|
"loss": 0.4091,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 1.9971223021582734,
|
|
"grad_norm": 0.3507879025640938,
|
|
"learning_rate": 6.008115121139528e-05,
|
|
"loss": 0.4059,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 1.9985611510791368,
|
|
"grad_norm": 0.3105691121940582,
|
|
"learning_rate": 6.0046385607476655e-05,
|
|
"loss": 0.4169,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.21604535856154838,
|
|
"learning_rate": 6.001159976963814e-05,
|
|
"loss": 0.4246,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 2.001438848920863,
|
|
"grad_norm": 0.2887880416632466,
|
|
"learning_rate": 5.9976793732990965e-05,
|
|
"loss": 0.3811,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 2.0028776978417264,
|
|
"grad_norm": 0.29012694322847493,
|
|
"learning_rate": 5.9941967532666806e-05,
|
|
"loss": 0.3884,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 2.00431654676259,
|
|
"grad_norm": 0.2994422014652841,
|
|
"learning_rate": 5.990712120381766e-05,
|
|
"loss": 0.3984,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 2.0057553956834533,
|
|
"grad_norm": 0.30566352380523154,
|
|
"learning_rate": 5.987225478161583e-05,
|
|
"loss": 0.3818,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 2.0071942446043165,
|
|
"grad_norm": 0.33548574909554396,
|
|
"learning_rate": 5.9837368301253905e-05,
|
|
"loss": 0.3805,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 2.0086330935251797,
|
|
"grad_norm": 0.3031244364366609,
|
|
"learning_rate": 5.980246179794476e-05,
|
|
"loss": 0.3931,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 2.0100719424460434,
|
|
"grad_norm": 0.38514721898758386,
|
|
"learning_rate": 5.976753530692144e-05,
|
|
"loss": 0.3812,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 2.0115107913669066,
|
|
"grad_norm": 0.2978199981766877,
|
|
"learning_rate": 5.9732588863437155e-05,
|
|
"loss": 0.3812,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 2.01294964028777,
|
|
"grad_norm": 0.2763572880777705,
|
|
"learning_rate": 5.96976225027653e-05,
|
|
"loss": 0.3939,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 2.014388489208633,
|
|
"grad_norm": 0.4070368269820224,
|
|
"learning_rate": 5.966263626019932e-05,
|
|
"loss": 0.3865,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 2.015827338129496,
|
|
"grad_norm": 0.37094785344019987,
|
|
"learning_rate": 5.9627630171052774e-05,
|
|
"loss": 0.4017,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 2.01726618705036,
|
|
"grad_norm": 0.3289829185055233,
|
|
"learning_rate": 5.9592604270659234e-05,
|
|
"loss": 0.3924,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 2.018705035971223,
|
|
"grad_norm": 0.2920177009620508,
|
|
"learning_rate": 5.955755859437225e-05,
|
|
"loss": 0.386,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 2.0201438848920863,
|
|
"grad_norm": 0.22216208961656103,
|
|
"learning_rate": 5.9522493177565366e-05,
|
|
"loss": 0.3862,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 2.0215827338129495,
|
|
"grad_norm": 0.21574804778660417,
|
|
"learning_rate": 5.948740805563203e-05,
|
|
"loss": 0.3836,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 2.023021582733813,
|
|
"grad_norm": 0.2803499682693122,
|
|
"learning_rate": 5.94523032639856e-05,
|
|
"loss": 0.3857,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 2.0244604316546764,
|
|
"grad_norm": 0.34345390953920857,
|
|
"learning_rate": 5.9417178838059254e-05,
|
|
"loss": 0.3772,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 2.0258992805755396,
|
|
"grad_norm": 0.31567088918886027,
|
|
"learning_rate": 5.9382034813306014e-05,
|
|
"loss": 0.3832,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 2.027338129496403,
|
|
"grad_norm": 0.21744897822531922,
|
|
"learning_rate": 5.934687122519868e-05,
|
|
"loss": 0.3908,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 2.028776978417266,
|
|
"grad_norm": 0.2292838961412375,
|
|
"learning_rate": 5.93116881092298e-05,
|
|
"loss": 0.3823,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 2.0302158273381297,
|
|
"grad_norm": 0.2190463028801707,
|
|
"learning_rate": 5.927648550091162e-05,
|
|
"loss": 0.3898,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 2.031654676258993,
|
|
"grad_norm": 0.251546955059302,
|
|
"learning_rate": 5.9241263435776087e-05,
|
|
"loss": 0.3843,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 2.033093525179856,
|
|
"grad_norm": 0.28801130994436996,
|
|
"learning_rate": 5.920602194937474e-05,
|
|
"loss": 0.3956,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 2.0345323741007193,
|
|
"grad_norm": 0.2575878648421845,
|
|
"learning_rate": 5.9170761077278766e-05,
|
|
"loss": 0.3885,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 2.0359712230215825,
|
|
"grad_norm": 0.29281510252832754,
|
|
"learning_rate": 5.9135480855078915e-05,
|
|
"loss": 0.3902,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 2.037410071942446,
|
|
"grad_norm": 0.25701430635917655,
|
|
"learning_rate": 5.910018131838544e-05,
|
|
"loss": 0.3938,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 2.0388489208633094,
|
|
"grad_norm": 0.23768401066604516,
|
|
"learning_rate": 5.906486250282811e-05,
|
|
"loss": 0.3887,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 2.0402877697841726,
|
|
"grad_norm": 0.24471636925761628,
|
|
"learning_rate": 5.902952444405615e-05,
|
|
"loss": 0.3909,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 2.041726618705036,
|
|
"grad_norm": 0.24554144304410766,
|
|
"learning_rate": 5.899416717773822e-05,
|
|
"loss": 0.3947,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 2.0431654676258995,
|
|
"grad_norm": 0.266945870506506,
|
|
"learning_rate": 5.8958790739562316e-05,
|
|
"loss": 0.3854,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 2.0446043165467627,
|
|
"grad_norm": 0.25105097054721626,
|
|
"learning_rate": 5.892339516523586e-05,
|
|
"loss": 0.382,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 2.046043165467626,
|
|
"grad_norm": 0.22819421395017353,
|
|
"learning_rate": 5.8887980490485536e-05,
|
|
"loss": 0.381,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 2.047482014388489,
|
|
"grad_norm": 0.26205631016735825,
|
|
"learning_rate": 5.8852546751057337e-05,
|
|
"loss": 0.3887,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 2.0489208633093523,
|
|
"grad_norm": 0.2365304899026524,
|
|
"learning_rate": 5.8817093982716455e-05,
|
|
"loss": 0.4012,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 2.050359712230216,
|
|
"grad_norm": 0.26484604945943363,
|
|
"learning_rate": 5.878162222124735e-05,
|
|
"loss": 0.3807,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 2.051798561151079,
|
|
"grad_norm": 0.24500924418663053,
|
|
"learning_rate": 5.8746131502453623e-05,
|
|
"loss": 0.3853,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 2.0532374100719424,
|
|
"grad_norm": 0.24561115808204895,
|
|
"learning_rate": 5.871062186215799e-05,
|
|
"loss": 0.3882,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 2.0546762589928056,
|
|
"grad_norm": 0.25943946873193013,
|
|
"learning_rate": 5.867509333620231e-05,
|
|
"loss": 0.3798,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 2.0561151079136692,
|
|
"grad_norm": 0.30379474816700924,
|
|
"learning_rate": 5.863954596044744e-05,
|
|
"loss": 0.3822,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 2.0575539568345325,
|
|
"grad_norm": 0.3187969283099091,
|
|
"learning_rate": 5.8603979770773344e-05,
|
|
"loss": 0.3883,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 2.0589928057553957,
|
|
"grad_norm": 0.284129722191442,
|
|
"learning_rate": 5.85683948030789e-05,
|
|
"loss": 0.3876,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 2.060431654676259,
|
|
"grad_norm": 0.23986812689082604,
|
|
"learning_rate": 5.8532791093282e-05,
|
|
"loss": 0.3817,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 2.061870503597122,
|
|
"grad_norm": 0.2297144550771247,
|
|
"learning_rate": 5.849716867731941e-05,
|
|
"loss": 0.3883,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 2.0633093525179858,
|
|
"grad_norm": 0.2484838224718224,
|
|
"learning_rate": 5.84615275911468e-05,
|
|
"loss": 0.3853,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 2.064748201438849,
|
|
"grad_norm": 0.3246453907361612,
|
|
"learning_rate": 5.8425867870738684e-05,
|
|
"loss": 0.3844,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 2.066187050359712,
|
|
"grad_norm": 0.26606388938158,
|
|
"learning_rate": 5.839018955208838e-05,
|
|
"loss": 0.3884,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 2.0676258992805754,
|
|
"grad_norm": 0.26749192456772164,
|
|
"learning_rate": 5.835449267120796e-05,
|
|
"loss": 0.3875,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 2.069064748201439,
|
|
"grad_norm": 0.2832924489315253,
|
|
"learning_rate": 5.831877726412827e-05,
|
|
"loss": 0.385,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 2.0705035971223023,
|
|
"grad_norm": 0.33872413957084624,
|
|
"learning_rate": 5.828304336689883e-05,
|
|
"loss": 0.3882,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 2.0719424460431655,
|
|
"grad_norm": 0.36243945958999113,
|
|
"learning_rate": 5.824729101558781e-05,
|
|
"loss": 0.3896,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 2.0733812949640287,
|
|
"grad_norm": 0.256848493066568,
|
|
"learning_rate": 5.821152024628207e-05,
|
|
"loss": 0.3807,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 2.074820143884892,
|
|
"grad_norm": 0.24749724833016729,
|
|
"learning_rate": 5.8175731095086974e-05,
|
|
"loss": 0.3857,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 2.0762589928057555,
|
|
"grad_norm": 0.24910490955778009,
|
|
"learning_rate": 5.813992359812649e-05,
|
|
"loss": 0.3884,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 2.0776978417266188,
|
|
"grad_norm": 0.30258993101298864,
|
|
"learning_rate": 5.8104097791543104e-05,
|
|
"loss": 0.3832,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 2.079136690647482,
|
|
"grad_norm": 0.3023604065207533,
|
|
"learning_rate": 5.806825371149778e-05,
|
|
"loss": 0.3894,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 2.080575539568345,
|
|
"grad_norm": 0.26044774812213134,
|
|
"learning_rate": 5.803239139416989e-05,
|
|
"loss": 0.3849,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 2.082014388489209,
|
|
"grad_norm": 0.29669315942190305,
|
|
"learning_rate": 5.799651087575728e-05,
|
|
"loss": 0.3789,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 2.083453237410072,
|
|
"grad_norm": 0.3668399045645778,
|
|
"learning_rate": 5.7960612192476096e-05,
|
|
"loss": 0.3817,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 2.0848920863309353,
|
|
"grad_norm": 0.24504573622330686,
|
|
"learning_rate": 5.792469538056089e-05,
|
|
"loss": 0.381,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 2.0863309352517985,
|
|
"grad_norm": 0.24096038919896795,
|
|
"learning_rate": 5.7888760476264445e-05,
|
|
"loss": 0.3777,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 2.0877697841726617,
|
|
"grad_norm": 0.4030663618646048,
|
|
"learning_rate": 5.785280751585785e-05,
|
|
"loss": 0.3921,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 2.0892086330935253,
|
|
"grad_norm": 0.44876460620347847,
|
|
"learning_rate": 5.7816836535630436e-05,
|
|
"loss": 0.3846,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 2.0906474820143885,
|
|
"grad_norm": 0.4081310350393721,
|
|
"learning_rate": 5.7780847571889625e-05,
|
|
"loss": 0.3828,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 2.0920863309352518,
|
|
"grad_norm": 0.28447454918743886,
|
|
"learning_rate": 5.7744840660961126e-05,
|
|
"loss": 0.3873,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 2.093525179856115,
|
|
"grad_norm": 0.30015654831992505,
|
|
"learning_rate": 5.770881583918865e-05,
|
|
"loss": 0.3865,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 2.0949640287769786,
|
|
"grad_norm": 0.2744486566092523,
|
|
"learning_rate": 5.767277314293404e-05,
|
|
"loss": 0.3967,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 2.096402877697842,
|
|
"grad_norm": 0.2922509530409927,
|
|
"learning_rate": 5.76367126085772e-05,
|
|
"loss": 0.394,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 2.097841726618705,
|
|
"grad_norm": 0.3094686934087384,
|
|
"learning_rate": 5.760063427251599e-05,
|
|
"loss": 0.3905,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 2.0992805755395683,
|
|
"grad_norm": 0.29077439384264403,
|
|
"learning_rate": 5.756453817116624e-05,
|
|
"loss": 0.3872,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 2.1007194244604315,
|
|
"grad_norm": 0.3093840284017235,
|
|
"learning_rate": 5.752842434096176e-05,
|
|
"loss": 0.3892,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 2.102158273381295,
|
|
"grad_norm": 0.30695936450222566,
|
|
"learning_rate": 5.7492292818354224e-05,
|
|
"loss": 0.3968,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 2.1035971223021583,
|
|
"grad_norm": 0.32460057017873606,
|
|
"learning_rate": 5.745614363981316e-05,
|
|
"loss": 0.3841,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 2.1050359712230216,
|
|
"grad_norm": 0.3699703204003878,
|
|
"learning_rate": 5.741997684182591e-05,
|
|
"loss": 0.3813,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 2.1064748201438848,
|
|
"grad_norm": 0.3883348209223791,
|
|
"learning_rate": 5.7383792460897626e-05,
|
|
"loss": 0.3827,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 2.1079136690647484,
|
|
"grad_norm": 0.478757567710824,
|
|
"learning_rate": 5.73475905335512e-05,
|
|
"loss": 0.3831,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 2.1093525179856116,
|
|
"grad_norm": 0.46932959830154625,
|
|
"learning_rate": 5.731137109632722e-05,
|
|
"loss": 0.3925,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 2.110791366906475,
|
|
"grad_norm": 0.26416081207584324,
|
|
"learning_rate": 5.727513418578397e-05,
|
|
"loss": 0.3819,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 2.112230215827338,
|
|
"grad_norm": 0.25881787114513277,
|
|
"learning_rate": 5.723887983849732e-05,
|
|
"loss": 0.3917,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 2.1136690647482013,
|
|
"grad_norm": 0.3598133851327527,
|
|
"learning_rate": 5.720260809106083e-05,
|
|
"loss": 0.3892,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 2.115107913669065,
|
|
"grad_norm": 0.39431936276782453,
|
|
"learning_rate": 5.716631898008553e-05,
|
|
"loss": 0.3945,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 2.116546762589928,
|
|
"grad_norm": 0.3862663340992022,
|
|
"learning_rate": 5.713001254220002e-05,
|
|
"loss": 0.3805,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 2.1179856115107913,
|
|
"grad_norm": 0.3555172427359884,
|
|
"learning_rate": 5.7093688814050425e-05,
|
|
"loss": 0.3788,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 2.1194244604316546,
|
|
"grad_norm": 0.27783832091529853,
|
|
"learning_rate": 5.705734783230022e-05,
|
|
"loss": 0.3809,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 2.1208633093525178,
|
|
"grad_norm": 0.23205458036717905,
|
|
"learning_rate": 5.7020989633630414e-05,
|
|
"loss": 0.3898,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 2.1223021582733814,
|
|
"grad_norm": 0.23007009826375002,
|
|
"learning_rate": 5.6984614254739306e-05,
|
|
"loss": 0.3885,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 2.1237410071942446,
|
|
"grad_norm": 0.25953387737328293,
|
|
"learning_rate": 5.694822173234257e-05,
|
|
"loss": 0.3811,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 2.125179856115108,
|
|
"grad_norm": 0.28384406215461166,
|
|
"learning_rate": 5.691181210317319e-05,
|
|
"loss": 0.3759,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 2.126618705035971,
|
|
"grad_norm": 0.3025907541075651,
|
|
"learning_rate": 5.687538540398141e-05,
|
|
"loss": 0.3823,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 2.1280575539568347,
|
|
"grad_norm": 0.2796078741946139,
|
|
"learning_rate": 5.683894167153468e-05,
|
|
"loss": 0.3892,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 2.129496402877698,
|
|
"grad_norm": 0.2434020072542014,
|
|
"learning_rate": 5.680248094261769e-05,
|
|
"loss": 0.3989,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 2.130935251798561,
|
|
"grad_norm": 0.25340620278692505,
|
|
"learning_rate": 5.676600325403224e-05,
|
|
"loss": 0.3802,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 2.1323741007194243,
|
|
"grad_norm": 0.23410221112781054,
|
|
"learning_rate": 5.672950864259729e-05,
|
|
"loss": 0.3826,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 2.133812949640288,
|
|
"grad_norm": 0.2510344444196461,
|
|
"learning_rate": 5.669299714514884e-05,
|
|
"loss": 0.3876,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 2.135251798561151,
|
|
"grad_norm": 0.30582345160348356,
|
|
"learning_rate": 5.665646879853995e-05,
|
|
"loss": 0.3814,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 2.1366906474820144,
|
|
"grad_norm": 0.38190661032899026,
|
|
"learning_rate": 5.661992363964072e-05,
|
|
"loss": 0.3863,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 2.1381294964028776,
|
|
"grad_norm": 0.337587236186603,
|
|
"learning_rate": 5.658336170533814e-05,
|
|
"loss": 0.391,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 2.139568345323741,
|
|
"grad_norm": 0.3440327764267977,
|
|
"learning_rate": 5.654678303253624e-05,
|
|
"loss": 0.3884,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 2.1410071942446045,
|
|
"grad_norm": 0.30538098812170344,
|
|
"learning_rate": 5.6510187658155846e-05,
|
|
"loss": 0.4021,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 2.1424460431654677,
|
|
"grad_norm": 0.23921070516497805,
|
|
"learning_rate": 5.6473575619134686e-05,
|
|
"loss": 0.3843,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 2.143884892086331,
|
|
"grad_norm": 0.24919299553464283,
|
|
"learning_rate": 5.643694695242731e-05,
|
|
"loss": 0.3839,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 2.145323741007194,
|
|
"grad_norm": 0.2629581845876819,
|
|
"learning_rate": 5.640030169500508e-05,
|
|
"loss": 0.377,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 2.1467625899280574,
|
|
"grad_norm": 0.2810087915946878,
|
|
"learning_rate": 5.636363988385601e-05,
|
|
"loss": 0.3904,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 2.148201438848921,
|
|
"grad_norm": 0.3244878408624964,
|
|
"learning_rate": 5.632696155598493e-05,
|
|
"loss": 0.3972,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 2.149640287769784,
|
|
"grad_norm": 0.30867146080940827,
|
|
"learning_rate": 5.6290266748413266e-05,
|
|
"loss": 0.3758,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 2.1510791366906474,
|
|
"grad_norm": 0.27728156713050567,
|
|
"learning_rate": 5.6253555498179124e-05,
|
|
"loss": 0.3834,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 2.1525179856115106,
|
|
"grad_norm": 0.2457253817314213,
|
|
"learning_rate": 5.621682784233718e-05,
|
|
"loss": 0.3802,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 2.1539568345323743,
|
|
"grad_norm": 0.2673230123425325,
|
|
"learning_rate": 5.618008381795868e-05,
|
|
"loss": 0.3871,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 2.1553956834532375,
|
|
"grad_norm": 0.2564978285681744,
|
|
"learning_rate": 5.61433234621314e-05,
|
|
"loss": 0.383,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 2.1568345323741007,
|
|
"grad_norm": 0.29113868276963406,
|
|
"learning_rate": 5.610654681195957e-05,
|
|
"loss": 0.3825,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 2.158273381294964,
|
|
"grad_norm": 0.31363746262583797,
|
|
"learning_rate": 5.606975390456391e-05,
|
|
"loss": 0.383,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 2.159712230215827,
|
|
"grad_norm": 0.3367445439203929,
|
|
"learning_rate": 5.603294477708149e-05,
|
|
"loss": 0.3835,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 2.161151079136691,
|
|
"grad_norm": 0.29494836157270066,
|
|
"learning_rate": 5.599611946666581e-05,
|
|
"loss": 0.3886,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 2.162589928057554,
|
|
"grad_norm": 0.2964206103409831,
|
|
"learning_rate": 5.595927801048669e-05,
|
|
"loss": 0.3847,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 2.1640287769784172,
|
|
"grad_norm": 0.3409833590118832,
|
|
"learning_rate": 5.5922420445730245e-05,
|
|
"loss": 0.3905,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 2.1654676258992804,
|
|
"grad_norm": 0.35262861647078886,
|
|
"learning_rate": 5.5885546809598805e-05,
|
|
"loss": 0.3796,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 2.166906474820144,
|
|
"grad_norm": 0.28486829640449757,
|
|
"learning_rate": 5.584865713931098e-05,
|
|
"loss": 0.3798,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 2.1683453237410073,
|
|
"grad_norm": 0.21267008553075398,
|
|
"learning_rate": 5.5811751472101564e-05,
|
|
"loss": 0.3823,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 2.1697841726618705,
|
|
"grad_norm": 0.26737200635981273,
|
|
"learning_rate": 5.577482984522145e-05,
|
|
"loss": 0.3852,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 2.1712230215827337,
|
|
"grad_norm": 0.2818631791770683,
|
|
"learning_rate": 5.573789229593767e-05,
|
|
"loss": 0.3829,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 2.172661870503597,
|
|
"grad_norm": 0.3695316153874667,
|
|
"learning_rate": 5.570093886153334e-05,
|
|
"loss": 0.3951,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 2.1741007194244606,
|
|
"grad_norm": 0.40014200409825845,
|
|
"learning_rate": 5.5663969579307594e-05,
|
|
"loss": 0.3821,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 2.175539568345324,
|
|
"grad_norm": 0.3039087158633958,
|
|
"learning_rate": 5.562698448657553e-05,
|
|
"loss": 0.3977,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 2.176978417266187,
|
|
"grad_norm": 0.2844413043170997,
|
|
"learning_rate": 5.5589983620668286e-05,
|
|
"loss": 0.3895,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 2.1784172661870502,
|
|
"grad_norm": 0.2581266490051725,
|
|
"learning_rate": 5.555296701893284e-05,
|
|
"loss": 0.3783,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 2.1798561151079134,
|
|
"grad_norm": 0.21320489150420954,
|
|
"learning_rate": 5.551593471873208e-05,
|
|
"loss": 0.3706,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 2.181294964028777,
|
|
"grad_norm": 0.31082703652913163,
|
|
"learning_rate": 5.547888675744476e-05,
|
|
"loss": 0.3945,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 2.1827338129496403,
|
|
"grad_norm": 0.3646404374400559,
|
|
"learning_rate": 5.5441823172465427e-05,
|
|
"loss": 0.3888,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 2.1841726618705035,
|
|
"grad_norm": 0.31162977902536365,
|
|
"learning_rate": 5.540474400120438e-05,
|
|
"loss": 0.3809,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 2.1856115107913667,
|
|
"grad_norm": 0.24709242298967402,
|
|
"learning_rate": 5.536764928108769e-05,
|
|
"loss": 0.3879,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 2.1870503597122304,
|
|
"grad_norm": 0.2524327812996217,
|
|
"learning_rate": 5.533053904955709e-05,
|
|
"loss": 0.3814,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 2.1884892086330936,
|
|
"grad_norm": 0.23476618411827865,
|
|
"learning_rate": 5.5293413344069964e-05,
|
|
"loss": 0.3938,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 2.189928057553957,
|
|
"grad_norm": 0.21261137691723106,
|
|
"learning_rate": 5.525627220209934e-05,
|
|
"loss": 0.383,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 2.19136690647482,
|
|
"grad_norm": 0.2559050899711571,
|
|
"learning_rate": 5.5219115661133815e-05,
|
|
"loss": 0.3886,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 2.1928057553956837,
|
|
"grad_norm": 0.3286796660338887,
|
|
"learning_rate": 5.518194375867754e-05,
|
|
"loss": 0.392,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 2.194244604316547,
|
|
"grad_norm": 0.28229203394117025,
|
|
"learning_rate": 5.514475653225014e-05,
|
|
"loss": 0.3877,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 2.19568345323741,
|
|
"grad_norm": 0.2431677560009044,
|
|
"learning_rate": 5.510755401938676e-05,
|
|
"loss": 0.389,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 2.1971223021582733,
|
|
"grad_norm": 0.958327663255131,
|
|
"learning_rate": 5.5070336257637904e-05,
|
|
"loss": 0.3815,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 2.1985611510791365,
|
|
"grad_norm": 0.2920895494267732,
|
|
"learning_rate": 5.503310328456953e-05,
|
|
"loss": 0.3871,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"grad_norm": 0.24069409864807934,
|
|
"learning_rate": 5.4995855137762926e-05,
|
|
"loss": 0.3971,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 2.2014388489208634,
|
|
"grad_norm": 0.2787763561204119,
|
|
"learning_rate": 5.4958591854814695e-05,
|
|
"loss": 0.3957,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 2.2028776978417266,
|
|
"grad_norm": 0.2997713606316793,
|
|
"learning_rate": 5.492131347333671e-05,
|
|
"loss": 0.3863,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 2.20431654676259,
|
|
"grad_norm": 0.2824505172073425,
|
|
"learning_rate": 5.48840200309561e-05,
|
|
"loss": 0.382,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 2.205755395683453,
|
|
"grad_norm": 0.2927505271000982,
|
|
"learning_rate": 5.484671156531519e-05,
|
|
"loss": 0.3755,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 2.2071942446043167,
|
|
"grad_norm": 0.24239634582389688,
|
|
"learning_rate": 5.480938811407146e-05,
|
|
"loss": 0.3878,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 2.20863309352518,
|
|
"grad_norm": 0.29690328433069985,
|
|
"learning_rate": 5.477204971489753e-05,
|
|
"loss": 0.3881,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 2.210071942446043,
|
|
"grad_norm": 0.3743992032811978,
|
|
"learning_rate": 5.473469640548109e-05,
|
|
"loss": 0.3928,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 2.2115107913669063,
|
|
"grad_norm": 0.33321688391434434,
|
|
"learning_rate": 5.469732822352491e-05,
|
|
"loss": 0.3891,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 2.21294964028777,
|
|
"grad_norm": 0.30454047172144444,
|
|
"learning_rate": 5.465994520674672e-05,
|
|
"loss": 0.3807,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 2.214388489208633,
|
|
"grad_norm": 0.2579915970773604,
|
|
"learning_rate": 5.4622547392879295e-05,
|
|
"loss": 0.3924,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 2.2158273381294964,
|
|
"grad_norm": 0.2803122949297988,
|
|
"learning_rate": 5.458513481967027e-05,
|
|
"loss": 0.3914,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 2.2172661870503596,
|
|
"grad_norm": 0.3174004059579346,
|
|
"learning_rate": 5.454770752488223e-05,
|
|
"loss": 0.3742,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 2.218705035971223,
|
|
"grad_norm": 0.23285021234819764,
|
|
"learning_rate": 5.4510265546292615e-05,
|
|
"loss": 0.3932,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 2.2201438848920865,
|
|
"grad_norm": 0.27453872143396457,
|
|
"learning_rate": 5.4472808921693657e-05,
|
|
"loss": 0.3884,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 2.2215827338129497,
|
|
"grad_norm": 0.3127474980870224,
|
|
"learning_rate": 5.4435337688892396e-05,
|
|
"loss": 0.3818,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 2.223021582733813,
|
|
"grad_norm": 0.35718218406460484,
|
|
"learning_rate": 5.4397851885710595e-05,
|
|
"loss": 0.3948,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 2.224460431654676,
|
|
"grad_norm": 0.27279711098433823,
|
|
"learning_rate": 5.4360351549984755e-05,
|
|
"loss": 0.3858,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 2.2258992805755398,
|
|
"grad_norm": 0.2907638892300066,
|
|
"learning_rate": 5.432283671956601e-05,
|
|
"loss": 0.384,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 2.227338129496403,
|
|
"grad_norm": 0.3199053528052606,
|
|
"learning_rate": 5.428530743232016e-05,
|
|
"loss": 0.3949,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 2.228776978417266,
|
|
"grad_norm": 0.28002856309970187,
|
|
"learning_rate": 5.4247763726127564e-05,
|
|
"loss": 0.3891,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 2.2302158273381294,
|
|
"grad_norm": 0.25446256800404726,
|
|
"learning_rate": 5.421020563888317e-05,
|
|
"loss": 0.386,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 2.2316546762589926,
|
|
"grad_norm": 0.27874489331346564,
|
|
"learning_rate": 5.417263320849641e-05,
|
|
"loss": 0.377,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 2.2330935251798563,
|
|
"grad_norm": 0.23657895483306096,
|
|
"learning_rate": 5.4135046472891205e-05,
|
|
"loss": 0.3864,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 2.2345323741007195,
|
|
"grad_norm": 0.22189615822093495,
|
|
"learning_rate": 5.409744547000591e-05,
|
|
"loss": 0.3873,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 2.2359712230215827,
|
|
"grad_norm": 0.1984186182637193,
|
|
"learning_rate": 5.405983023779328e-05,
|
|
"loss": 0.381,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 2.237410071942446,
|
|
"grad_norm": 0.19747233881477402,
|
|
"learning_rate": 5.402220081422048e-05,
|
|
"loss": 0.3867,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 2.2388489208633096,
|
|
"grad_norm": 0.19697923300719425,
|
|
"learning_rate": 5.3984557237268905e-05,
|
|
"loss": 0.3901,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 2.2402877697841728,
|
|
"grad_norm": 0.21040094118037814,
|
|
"learning_rate": 5.394689954493432e-05,
|
|
"loss": 0.3947,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 2.241726618705036,
|
|
"grad_norm": 0.24906115309635818,
|
|
"learning_rate": 5.390922777522669e-05,
|
|
"loss": 0.3845,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 2.243165467625899,
|
|
"grad_norm": 0.21783238753601744,
|
|
"learning_rate": 5.3871541966170225e-05,
|
|
"loss": 0.3919,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 2.2446043165467624,
|
|
"grad_norm": 0.22577492896311513,
|
|
"learning_rate": 5.383384215580326e-05,
|
|
"loss": 0.3851,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 2.246043165467626,
|
|
"grad_norm": 0.24424333832726794,
|
|
"learning_rate": 5.37961283821783e-05,
|
|
"loss": 0.3932,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 2.2474820143884893,
|
|
"grad_norm": 0.23892622375802364,
|
|
"learning_rate": 5.3758400683361926e-05,
|
|
"loss": 0.3812,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 2.2489208633093525,
|
|
"grad_norm": 0.2695403326065177,
|
|
"learning_rate": 5.372065909743479e-05,
|
|
"loss": 0.3919,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 2.2503597122302157,
|
|
"grad_norm": 0.2692956712474961,
|
|
"learning_rate": 5.368290366249155e-05,
|
|
"loss": 0.3942,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 2.2517985611510793,
|
|
"grad_norm": 0.23902802086422323,
|
|
"learning_rate": 5.364513441664084e-05,
|
|
"loss": 0.39,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 2.2532374100719426,
|
|
"grad_norm": 0.26126162525768504,
|
|
"learning_rate": 5.3607351398005234e-05,
|
|
"loss": 0.3838,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 2.2546762589928058,
|
|
"grad_norm": 0.2550518489039177,
|
|
"learning_rate": 5.356955464472121e-05,
|
|
"loss": 0.38,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 2.256115107913669,
|
|
"grad_norm": 0.34293101606148046,
|
|
"learning_rate": 5.353174419493913e-05,
|
|
"loss": 0.3831,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 2.257553956834532,
|
|
"grad_norm": 0.4501368935378753,
|
|
"learning_rate": 5.349392008682314e-05,
|
|
"loss": 0.3933,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 2.258992805755396,
|
|
"grad_norm": 0.460476970906807,
|
|
"learning_rate": 5.3456082358551204e-05,
|
|
"loss": 0.3808,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 2.260431654676259,
|
|
"grad_norm": 0.43846766271023646,
|
|
"learning_rate": 5.341823104831501e-05,
|
|
"loss": 0.3851,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 2.2618705035971223,
|
|
"grad_norm": 0.4205838992209499,
|
|
"learning_rate": 5.338036619431999e-05,
|
|
"loss": 0.3893,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 2.2633093525179855,
|
|
"grad_norm": 0.30781355419394074,
|
|
"learning_rate": 5.33424878347852e-05,
|
|
"loss": 0.3795,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 2.2647482014388487,
|
|
"grad_norm": 0.23276352973871836,
|
|
"learning_rate": 5.330459600794337e-05,
|
|
"loss": 0.391,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 2.2661870503597124,
|
|
"grad_norm": 0.2622509866067334,
|
|
"learning_rate": 5.32666907520408e-05,
|
|
"loss": 0.3841,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 2.2676258992805756,
|
|
"grad_norm": 0.23009431066145555,
|
|
"learning_rate": 5.322877210533735e-05,
|
|
"loss": 0.3826,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 2.2690647482014388,
|
|
"grad_norm": 0.22157582135692874,
|
|
"learning_rate": 5.319084010610638e-05,
|
|
"loss": 0.3848,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 2.270503597122302,
|
|
"grad_norm": 0.21173951124947143,
|
|
"learning_rate": 5.3152894792634785e-05,
|
|
"loss": 0.3923,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 2.2719424460431656,
|
|
"grad_norm": 0.26895806961249485,
|
|
"learning_rate": 5.311493620322282e-05,
|
|
"loss": 0.3897,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 2.273381294964029,
|
|
"grad_norm": 0.23446011324185626,
|
|
"learning_rate": 5.3076964376184186e-05,
|
|
"loss": 0.3817,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 2.274820143884892,
|
|
"grad_norm": 0.24440189850824454,
|
|
"learning_rate": 5.303897934984595e-05,
|
|
"loss": 0.3879,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 2.2762589928057553,
|
|
"grad_norm": 0.2814922931976218,
|
|
"learning_rate": 5.300098116254848e-05,
|
|
"loss": 0.3811,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 2.277697841726619,
|
|
"grad_norm": 0.2759979588562507,
|
|
"learning_rate": 5.296296985264543e-05,
|
|
"loss": 0.3869,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 2.279136690647482,
|
|
"grad_norm": 0.25053049671384126,
|
|
"learning_rate": 5.2924945458503713e-05,
|
|
"loss": 0.3866,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 2.2805755395683454,
|
|
"grad_norm": 0.2806205660877336,
|
|
"learning_rate": 5.2886908018503454e-05,
|
|
"loss": 0.391,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 2.2820143884892086,
|
|
"grad_norm": 0.32529376958126494,
|
|
"learning_rate": 5.284885757103792e-05,
|
|
"loss": 0.3974,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 2.283453237410072,
|
|
"grad_norm": 0.3064676974765673,
|
|
"learning_rate": 5.2810794154513503e-05,
|
|
"loss": 0.3892,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 2.2848920863309354,
|
|
"grad_norm": 0.19606764424995293,
|
|
"learning_rate": 5.277271780734975e-05,
|
|
"loss": 0.3824,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 2.2863309352517986,
|
|
"grad_norm": 0.288586557188386,
|
|
"learning_rate": 5.273462856797918e-05,
|
|
"loss": 0.39,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 2.287769784172662,
|
|
"grad_norm": 0.23355525268280722,
|
|
"learning_rate": 5.269652647484735e-05,
|
|
"loss": 0.3863,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 2.289208633093525,
|
|
"grad_norm": 0.25242315444086605,
|
|
"learning_rate": 5.2658411566412837e-05,
|
|
"loss": 0.38,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 2.2906474820143883,
|
|
"grad_norm": 0.27283090796983017,
|
|
"learning_rate": 5.262028388114708e-05,
|
|
"loss": 0.3873,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 2.292086330935252,
|
|
"grad_norm": 0.3357716605823331,
|
|
"learning_rate": 5.258214345753446e-05,
|
|
"loss": 0.3907,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 2.293525179856115,
|
|
"grad_norm": 0.23549376940431055,
|
|
"learning_rate": 5.254399033407221e-05,
|
|
"loss": 0.389,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 2.2949640287769784,
|
|
"grad_norm": 0.24827105722781986,
|
|
"learning_rate": 5.250582454927037e-05,
|
|
"loss": 0.3849,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 2.2964028776978416,
|
|
"grad_norm": 0.3239075303799912,
|
|
"learning_rate": 5.2467646141651764e-05,
|
|
"loss": 0.3838,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 2.2978417266187052,
|
|
"grad_norm": 0.29719088761427137,
|
|
"learning_rate": 5.2429455149751976e-05,
|
|
"loss": 0.3897,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 2.2992805755395684,
|
|
"grad_norm": 0.26438529874206507,
|
|
"learning_rate": 5.2391251612119256e-05,
|
|
"loss": 0.3911,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 2.3007194244604317,
|
|
"grad_norm": 0.2514954353475795,
|
|
"learning_rate": 5.235303556731456e-05,
|
|
"loss": 0.3919,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 2.302158273381295,
|
|
"grad_norm": 0.2510672008834341,
|
|
"learning_rate": 5.23148070539114e-05,
|
|
"loss": 0.3856,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 2.3035971223021585,
|
|
"grad_norm": 0.2799560346344144,
|
|
"learning_rate": 5.227656611049598e-05,
|
|
"loss": 0.3866,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 2.3050359712230217,
|
|
"grad_norm": 0.3324478151322014,
|
|
"learning_rate": 5.2238312775666935e-05,
|
|
"loss": 0.3816,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 2.306474820143885,
|
|
"grad_norm": 0.28928396100617226,
|
|
"learning_rate": 5.220004708803548e-05,
|
|
"loss": 0.384,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 2.307913669064748,
|
|
"grad_norm": 0.2573912811948004,
|
|
"learning_rate": 5.216176908622528e-05,
|
|
"loss": 0.3867,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 2.3093525179856114,
|
|
"grad_norm": 0.24794500084546275,
|
|
"learning_rate": 5.2123478808872436e-05,
|
|
"loss": 0.3775,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 2.310791366906475,
|
|
"grad_norm": 0.268798630381141,
|
|
"learning_rate": 5.208517629462541e-05,
|
|
"loss": 0.3831,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 2.3122302158273382,
|
|
"grad_norm": 0.25844464816089263,
|
|
"learning_rate": 5.204686158214507e-05,
|
|
"loss": 0.3791,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 2.3136690647482014,
|
|
"grad_norm": 0.21913264130578688,
|
|
"learning_rate": 5.200853471010453e-05,
|
|
"loss": 0.3856,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 2.3151079136690647,
|
|
"grad_norm": 0.2194218025964406,
|
|
"learning_rate": 5.197019571718921e-05,
|
|
"loss": 0.3857,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 2.316546762589928,
|
|
"grad_norm": 0.2959302848537896,
|
|
"learning_rate": 5.19318446420968e-05,
|
|
"loss": 0.3904,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 2.3179856115107915,
|
|
"grad_norm": 0.25520937421118556,
|
|
"learning_rate": 5.189348152353712e-05,
|
|
"loss": 0.3846,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 2.3194244604316547,
|
|
"grad_norm": 0.25544683088833603,
|
|
"learning_rate": 5.1855106400232196e-05,
|
|
"loss": 0.3908,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 2.320863309352518,
|
|
"grad_norm": 0.2326045360480261,
|
|
"learning_rate": 5.181671931091612e-05,
|
|
"loss": 0.3848,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 2.322302158273381,
|
|
"grad_norm": 0.20627932576219066,
|
|
"learning_rate": 5.1778320294335126e-05,
|
|
"loss": 0.3966,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 2.3237410071942444,
|
|
"grad_norm": 0.2350101675045765,
|
|
"learning_rate": 5.1739909389247445e-05,
|
|
"loss": 0.3731,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 2.325179856115108,
|
|
"grad_norm": 0.20760302327534524,
|
|
"learning_rate": 5.17014866344233e-05,
|
|
"loss": 0.3872,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 2.3266187050359712,
|
|
"grad_norm": 0.22767597977691015,
|
|
"learning_rate": 5.166305206864492e-05,
|
|
"loss": 0.3823,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 2.3280575539568344,
|
|
"grad_norm": 0.20489321540223537,
|
|
"learning_rate": 5.162460573070642e-05,
|
|
"loss": 0.387,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 2.3294964028776977,
|
|
"grad_norm": 0.2192346874970893,
|
|
"learning_rate": 5.158614765941376e-05,
|
|
"loss": 0.3759,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 2.3309352517985613,
|
|
"grad_norm": 0.28543493001032233,
|
|
"learning_rate": 5.1547677893584846e-05,
|
|
"loss": 0.3898,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 2.3323741007194245,
|
|
"grad_norm": 0.35001032752376776,
|
|
"learning_rate": 5.15091964720493e-05,
|
|
"loss": 0.3866,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 2.3338129496402877,
|
|
"grad_norm": 0.4226417487693915,
|
|
"learning_rate": 5.1470703433648556e-05,
|
|
"loss": 0.3787,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 2.335251798561151,
|
|
"grad_norm": 0.43932112181168936,
|
|
"learning_rate": 5.143219881723573e-05,
|
|
"loss": 0.3921,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 2.3366906474820146,
|
|
"grad_norm": 0.3892711297282333,
|
|
"learning_rate": 5.139368266167567e-05,
|
|
"loss": 0.3883,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 2.338129496402878,
|
|
"grad_norm": 0.3360845243954553,
|
|
"learning_rate": 5.135515500584484e-05,
|
|
"loss": 0.3857,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 2.339568345323741,
|
|
"grad_norm": 0.28977891908466297,
|
|
"learning_rate": 5.131661588863132e-05,
|
|
"loss": 0.384,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 2.3410071942446042,
|
|
"grad_norm": 0.23206437453901463,
|
|
"learning_rate": 5.1278065348934786e-05,
|
|
"loss": 0.3803,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 2.3424460431654675,
|
|
"grad_norm": 0.25009289563462755,
|
|
"learning_rate": 5.123950342566639e-05,
|
|
"loss": 0.3956,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 2.343884892086331,
|
|
"grad_norm": 0.26252213588352424,
|
|
"learning_rate": 5.120093015774882e-05,
|
|
"loss": 0.3883,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 2.3453237410071943,
|
|
"grad_norm": 0.2208785518640901,
|
|
"learning_rate": 5.116234558411618e-05,
|
|
"loss": 0.3895,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 2.3467625899280575,
|
|
"grad_norm": 0.2520809492103715,
|
|
"learning_rate": 5.1123749743714024e-05,
|
|
"loss": 0.3801,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 2.3482014388489207,
|
|
"grad_norm": 0.2608965768122394,
|
|
"learning_rate": 5.1085142675499246e-05,
|
|
"loss": 0.3902,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 2.349640287769784,
|
|
"grad_norm": 0.24456017433426602,
|
|
"learning_rate": 5.1046524418440075e-05,
|
|
"loss": 0.385,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 2.3510791366906476,
|
|
"grad_norm": 0.2558853419798534,
|
|
"learning_rate": 5.100789501151607e-05,
|
|
"loss": 0.3814,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 2.352517985611511,
|
|
"grad_norm": 0.2553918252834351,
|
|
"learning_rate": 5.0969254493717996e-05,
|
|
"loss": 0.3997,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 2.353956834532374,
|
|
"grad_norm": 0.22345089951944275,
|
|
"learning_rate": 5.093060290404785e-05,
|
|
"loss": 0.3861,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 2.3553956834532372,
|
|
"grad_norm": 0.20253110194807594,
|
|
"learning_rate": 5.089194028151882e-05,
|
|
"loss": 0.3924,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 2.356834532374101,
|
|
"grad_norm": 0.2970015242347035,
|
|
"learning_rate": 5.085326666515521e-05,
|
|
"loss": 0.3882,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 2.358273381294964,
|
|
"grad_norm": 0.37532857685934257,
|
|
"learning_rate": 5.081458209399243e-05,
|
|
"loss": 0.3863,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 2.3597122302158273,
|
|
"grad_norm": 0.43942720123168294,
|
|
"learning_rate": 5.0775886607076954e-05,
|
|
"loss": 0.3796,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 2.3611510791366905,
|
|
"grad_norm": 0.4291801560558247,
|
|
"learning_rate": 5.073718024346626e-05,
|
|
"loss": 0.3873,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 2.362589928057554,
|
|
"grad_norm": 0.29259892105674734,
|
|
"learning_rate": 5.06984630422288e-05,
|
|
"loss": 0.3849,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 2.3640287769784174,
|
|
"grad_norm": 0.21642316473925502,
|
|
"learning_rate": 5.065973504244399e-05,
|
|
"loss": 0.3862,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 2.3654676258992806,
|
|
"grad_norm": 0.29186570636656023,
|
|
"learning_rate": 5.062099628320213e-05,
|
|
"loss": 0.3884,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 2.366906474820144,
|
|
"grad_norm": 0.2502372238427353,
|
|
"learning_rate": 5.058224680360438e-05,
|
|
"loss": 0.3877,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 2.368345323741007,
|
|
"grad_norm": 0.26671048244743695,
|
|
"learning_rate": 5.054348664276271e-05,
|
|
"loss": 0.39,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 2.3697841726618707,
|
|
"grad_norm": 0.2976952260854514,
|
|
"learning_rate": 5.05047158397999e-05,
|
|
"loss": 0.3921,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 2.371223021582734,
|
|
"grad_norm": 0.3105265956452332,
|
|
"learning_rate": 5.046593443384945e-05,
|
|
"loss": 0.3875,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 2.372661870503597,
|
|
"grad_norm": 0.24798070330496666,
|
|
"learning_rate": 5.042714246405555e-05,
|
|
"loss": 0.3864,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 2.3741007194244603,
|
|
"grad_norm": 0.2287009932234164,
|
|
"learning_rate": 5.038833996957309e-05,
|
|
"loss": 0.3855,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 2.3755395683453235,
|
|
"grad_norm": 0.22471630538730594,
|
|
"learning_rate": 5.0349526989567546e-05,
|
|
"loss": 0.3881,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 2.376978417266187,
|
|
"grad_norm": 0.27387505232907666,
|
|
"learning_rate": 5.0310703563215016e-05,
|
|
"loss": 0.3877,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 2.3784172661870504,
|
|
"grad_norm": 0.3083771112457394,
|
|
"learning_rate": 5.027186972970211e-05,
|
|
"loss": 0.3954,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 2.3798561151079136,
|
|
"grad_norm": 0.34300073482573895,
|
|
"learning_rate": 5.0233025528225934e-05,
|
|
"loss": 0.3891,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 2.381294964028777,
|
|
"grad_norm": 0.281798980342933,
|
|
"learning_rate": 5.01941709979941e-05,
|
|
"loss": 0.3831,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 2.38273381294964,
|
|
"grad_norm": 0.20493954535351877,
|
|
"learning_rate": 5.015530617822462e-05,
|
|
"loss": 0.3913,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 2.3841726618705037,
|
|
"grad_norm": 0.2768549887224621,
|
|
"learning_rate": 5.011643110814589e-05,
|
|
"loss": 0.3862,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 2.385611510791367,
|
|
"grad_norm": 0.3018414098255564,
|
|
"learning_rate": 5.007754582699666e-05,
|
|
"loss": 0.383,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 2.38705035971223,
|
|
"grad_norm": 0.2593047814678623,
|
|
"learning_rate": 5.003865037402598e-05,
|
|
"loss": 0.3865,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 2.3884892086330938,
|
|
"grad_norm": 0.2213246208779518,
|
|
"learning_rate": 4.999974478849319e-05,
|
|
"loss": 0.3749,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 2.389928057553957,
|
|
"grad_norm": 0.2107366092198208,
|
|
"learning_rate": 4.99608291096678e-05,
|
|
"loss": 0.3928,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 2.39136690647482,
|
|
"grad_norm": 0.27771157652246276,
|
|
"learning_rate": 4.9921903376829565e-05,
|
|
"loss": 0.3843,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 2.3928057553956834,
|
|
"grad_norm": 0.3019548903178843,
|
|
"learning_rate": 4.988296762926838e-05,
|
|
"loss": 0.3881,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 2.3942446043165466,
|
|
"grad_norm": 0.20933663805234012,
|
|
"learning_rate": 4.984402190628422e-05,
|
|
"loss": 0.3881,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 2.3956834532374103,
|
|
"grad_norm": 0.23382587937968935,
|
|
"learning_rate": 4.980506624718716e-05,
|
|
"loss": 0.3943,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 2.3971223021582735,
|
|
"grad_norm": 0.3872640176210066,
|
|
"learning_rate": 4.9766100691297284e-05,
|
|
"loss": 0.388,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 2.3985611510791367,
|
|
"grad_norm": 0.3966990171449825,
|
|
"learning_rate": 4.9727125277944675e-05,
|
|
"loss": 0.3796,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"grad_norm": 0.2222757436807374,
|
|
"learning_rate": 4.968814004646934e-05,
|
|
"loss": 0.3786,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 2.401438848920863,
|
|
"grad_norm": 0.22810066209880694,
|
|
"learning_rate": 4.964914503622126e-05,
|
|
"loss": 0.3867,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 2.402877697841727,
|
|
"grad_norm": 0.3398874352715919,
|
|
"learning_rate": 4.961014028656021e-05,
|
|
"loss": 0.3908,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 2.40431654676259,
|
|
"grad_norm": 0.3246050790890519,
|
|
"learning_rate": 4.9571125836855825e-05,
|
|
"loss": 0.393,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 2.405755395683453,
|
|
"grad_norm": 0.25315967326985717,
|
|
"learning_rate": 4.9532101726487564e-05,
|
|
"loss": 0.3762,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 2.4071942446043164,
|
|
"grad_norm": 0.1940357091138065,
|
|
"learning_rate": 4.9493067994844606e-05,
|
|
"loss": 0.3846,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 2.4086330935251796,
|
|
"grad_norm": 0.283572096629059,
|
|
"learning_rate": 4.9454024681325815e-05,
|
|
"loss": 0.3865,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 2.4100719424460433,
|
|
"grad_norm": 0.282266235117897,
|
|
"learning_rate": 4.941497182533978e-05,
|
|
"loss": 0.3883,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 2.4115107913669065,
|
|
"grad_norm": 0.26411764458831766,
|
|
"learning_rate": 4.937590946630469e-05,
|
|
"loss": 0.3912,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 2.4129496402877697,
|
|
"grad_norm": 0.2367587506951602,
|
|
"learning_rate": 4.9336837643648335e-05,
|
|
"loss": 0.3823,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 2.414388489208633,
|
|
"grad_norm": 0.20866587804841538,
|
|
"learning_rate": 4.929775639680805e-05,
|
|
"loss": 0.3789,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 2.4158273381294966,
|
|
"grad_norm": 0.22968193961274097,
|
|
"learning_rate": 4.925866576523069e-05,
|
|
"loss": 0.3905,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 2.41726618705036,
|
|
"grad_norm": 0.2573876684137442,
|
|
"learning_rate": 4.921956578837259e-05,
|
|
"loss": 0.387,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 2.418705035971223,
|
|
"grad_norm": 0.26520641448106225,
|
|
"learning_rate": 4.918045650569949e-05,
|
|
"loss": 0.3863,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 2.420143884892086,
|
|
"grad_norm": 0.17712365142154465,
|
|
"learning_rate": 4.9141337956686564e-05,
|
|
"loss": 0.3843,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 2.42158273381295,
|
|
"grad_norm": 0.44069872035146274,
|
|
"learning_rate": 4.91022101808183e-05,
|
|
"loss": 0.3931,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 2.423021582733813,
|
|
"grad_norm": 0.22611193727329945,
|
|
"learning_rate": 4.90630732175885e-05,
|
|
"loss": 0.3912,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 2.4244604316546763,
|
|
"grad_norm": 0.23034288631270863,
|
|
"learning_rate": 4.902392710650028e-05,
|
|
"loss": 0.3951,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 2.4258992805755395,
|
|
"grad_norm": 0.45539534072797283,
|
|
"learning_rate": 4.898477188706596e-05,
|
|
"loss": 0.3888,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 2.4273381294964027,
|
|
"grad_norm": 0.21444759441613848,
|
|
"learning_rate": 4.894560759880705e-05,
|
|
"loss": 0.3862,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 2.4287769784172664,
|
|
"grad_norm": 0.24448544453761703,
|
|
"learning_rate": 4.8906434281254223e-05,
|
|
"loss": 0.3782,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 2.4302158273381296,
|
|
"grad_norm": 0.2290956032891487,
|
|
"learning_rate": 4.886725197394726e-05,
|
|
"loss": 0.3794,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 2.431654676258993,
|
|
"grad_norm": 0.25790187946393134,
|
|
"learning_rate": 4.882806071643503e-05,
|
|
"loss": 0.396,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 2.433093525179856,
|
|
"grad_norm": 0.21786703720104683,
|
|
"learning_rate": 4.878886054827541e-05,
|
|
"loss": 0.391,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 2.434532374100719,
|
|
"grad_norm": 0.2606971048622634,
|
|
"learning_rate": 4.874965150903529e-05,
|
|
"loss": 0.3932,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 2.435971223021583,
|
|
"grad_norm": 0.28835259843290095,
|
|
"learning_rate": 4.871043363829053e-05,
|
|
"loss": 0.3787,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 2.437410071942446,
|
|
"grad_norm": 1.1106438804913326,
|
|
"learning_rate": 4.8671206975625856e-05,
|
|
"loss": 0.4043,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 2.4388489208633093,
|
|
"grad_norm": 0.25069341318469796,
|
|
"learning_rate": 4.863197156063492e-05,
|
|
"loss": 0.3808,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 2.4402877697841725,
|
|
"grad_norm": 0.32154999046879323,
|
|
"learning_rate": 4.859272743292017e-05,
|
|
"loss": 0.3902,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 2.441726618705036,
|
|
"grad_norm": 0.28588472916625,
|
|
"learning_rate": 4.855347463209287e-05,
|
|
"loss": 0.3954,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 2.4431654676258994,
|
|
"grad_norm": 0.3004971826955718,
|
|
"learning_rate": 4.851421319777304e-05,
|
|
"loss": 0.3868,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 2.4446043165467626,
|
|
"grad_norm": 0.26091801410888577,
|
|
"learning_rate": 4.847494316958939e-05,
|
|
"loss": 0.3832,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 2.446043165467626,
|
|
"grad_norm": 0.2750189577437227,
|
|
"learning_rate": 4.8435664587179315e-05,
|
|
"loss": 0.3972,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 2.4474820143884894,
|
|
"grad_norm": 0.27874822099705315,
|
|
"learning_rate": 4.839637749018887e-05,
|
|
"loss": 0.3841,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 2.4489208633093527,
|
|
"grad_norm": 0.2109393303440418,
|
|
"learning_rate": 4.835708191827268e-05,
|
|
"loss": 0.3914,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 2.450359712230216,
|
|
"grad_norm": 0.21985096325040535,
|
|
"learning_rate": 4.831777791109392e-05,
|
|
"loss": 0.3831,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 2.451798561151079,
|
|
"grad_norm": 0.31716657254358516,
|
|
"learning_rate": 4.827846550832428e-05,
|
|
"loss": 0.3877,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 2.4532374100719423,
|
|
"grad_norm": 0.2799301512277885,
|
|
"learning_rate": 4.8239144749643936e-05,
|
|
"loss": 0.3861,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 2.454676258992806,
|
|
"grad_norm": 0.30721452629504203,
|
|
"learning_rate": 4.819981567474152e-05,
|
|
"loss": 0.3865,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 2.456115107913669,
|
|
"grad_norm": 0.2584637886497313,
|
|
"learning_rate": 4.8160478323313974e-05,
|
|
"loss": 0.3929,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 2.4575539568345324,
|
|
"grad_norm": 0.1961977960275271,
|
|
"learning_rate": 4.812113273506671e-05,
|
|
"loss": 0.3827,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 2.4589928057553956,
|
|
"grad_norm": 0.28329071606110734,
|
|
"learning_rate": 4.808177894971336e-05,
|
|
"loss": 0.3898,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 2.460431654676259,
|
|
"grad_norm": 0.30925948341080894,
|
|
"learning_rate": 4.804241700697588e-05,
|
|
"loss": 0.3866,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 2.4618705035971225,
|
|
"grad_norm": 0.2852094467036699,
|
|
"learning_rate": 4.800304694658443e-05,
|
|
"loss": 0.3869,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 2.4633093525179857,
|
|
"grad_norm": 0.22356900870937116,
|
|
"learning_rate": 4.796366880827739e-05,
|
|
"loss": 0.3794,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 2.464748201438849,
|
|
"grad_norm": 0.21638374941090194,
|
|
"learning_rate": 4.792428263180128e-05,
|
|
"loss": 0.3792,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 2.466187050359712,
|
|
"grad_norm": 0.26591011250145025,
|
|
"learning_rate": 4.7884888456910734e-05,
|
|
"loss": 0.3972,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 2.4676258992805753,
|
|
"grad_norm": 0.21338775000086369,
|
|
"learning_rate": 4.784548632336846e-05,
|
|
"loss": 0.3931,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 2.469064748201439,
|
|
"grad_norm": 0.25160590370357494,
|
|
"learning_rate": 4.7806076270945197e-05,
|
|
"loss": 0.3827,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 2.470503597122302,
|
|
"grad_norm": 0.2571015589072176,
|
|
"learning_rate": 4.776665833941968e-05,
|
|
"loss": 0.3855,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 2.4719424460431654,
|
|
"grad_norm": 0.22724765766770022,
|
|
"learning_rate": 4.772723256857859e-05,
|
|
"loss": 0.3911,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 2.4733812949640286,
|
|
"grad_norm": 0.23172269527902886,
|
|
"learning_rate": 4.768779899821655e-05,
|
|
"loss": 0.383,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 2.4748201438848922,
|
|
"grad_norm": 0.18294914973103676,
|
|
"learning_rate": 4.7648357668135996e-05,
|
|
"loss": 0.3896,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 2.4762589928057555,
|
|
"grad_norm": 0.24573883125496895,
|
|
"learning_rate": 4.760890861814726e-05,
|
|
"loss": 0.3906,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 2.4776978417266187,
|
|
"grad_norm": 0.3195029988487335,
|
|
"learning_rate": 4.756945188806843e-05,
|
|
"loss": 0.3817,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 2.479136690647482,
|
|
"grad_norm": 0.22873228917822144,
|
|
"learning_rate": 4.752998751772536e-05,
|
|
"loss": 0.3916,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 2.4805755395683455,
|
|
"grad_norm": 0.21362155268507563,
|
|
"learning_rate": 4.749051554695159e-05,
|
|
"loss": 0.3823,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 2.4820143884892087,
|
|
"grad_norm": 0.2672207490906109,
|
|
"learning_rate": 4.745103601558838e-05,
|
|
"loss": 0.3855,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 2.483453237410072,
|
|
"grad_norm": 0.2390018241307908,
|
|
"learning_rate": 4.741154896348458e-05,
|
|
"loss": 0.3905,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 2.484892086330935,
|
|
"grad_norm": 0.20252049989136114,
|
|
"learning_rate": 4.7372054430496636e-05,
|
|
"loss": 0.3825,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 2.4863309352517984,
|
|
"grad_norm": 0.23336006173460797,
|
|
"learning_rate": 4.733255245648857e-05,
|
|
"loss": 0.3894,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 2.487769784172662,
|
|
"grad_norm": 0.24488585293977672,
|
|
"learning_rate": 4.729304308133189e-05,
|
|
"loss": 0.3868,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 2.4892086330935252,
|
|
"grad_norm": 0.21698342494739425,
|
|
"learning_rate": 4.725352634490557e-05,
|
|
"loss": 0.3921,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 2.4906474820143885,
|
|
"grad_norm": 0.21648159437081843,
|
|
"learning_rate": 4.7214002287096035e-05,
|
|
"loss": 0.3875,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 2.4920863309352517,
|
|
"grad_norm": 0.22620972898766825,
|
|
"learning_rate": 4.7174470947797117e-05,
|
|
"loss": 0.3827,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 2.493525179856115,
|
|
"grad_norm": 0.21014648153098664,
|
|
"learning_rate": 4.7134932366909915e-05,
|
|
"loss": 0.381,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 2.4949640287769785,
|
|
"grad_norm": 0.20927604952472664,
|
|
"learning_rate": 4.709538658434294e-05,
|
|
"loss": 0.3886,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 2.4964028776978417,
|
|
"grad_norm": 0.1839214221961082,
|
|
"learning_rate": 4.705583364001192e-05,
|
|
"loss": 0.3873,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 2.497841726618705,
|
|
"grad_norm": 0.25505203712304286,
|
|
"learning_rate": 4.701627357383981e-05,
|
|
"loss": 0.3871,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 2.499280575539568,
|
|
"grad_norm": 0.26369518296814703,
|
|
"learning_rate": 4.697670642575675e-05,
|
|
"loss": 0.3883,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 2.5007194244604314,
|
|
"grad_norm": 0.18376660493967642,
|
|
"learning_rate": 4.693713223570006e-05,
|
|
"loss": 0.3842,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 2.502158273381295,
|
|
"grad_norm": 0.2134172431593663,
|
|
"learning_rate": 4.689755104361414e-05,
|
|
"loss": 0.3805,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 2.5035971223021583,
|
|
"grad_norm": 0.2066932300456145,
|
|
"learning_rate": 4.685796288945046e-05,
|
|
"loss": 0.3788,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 2.5050359712230215,
|
|
"grad_norm": 0.234925229829283,
|
|
"learning_rate": 4.6818367813167535e-05,
|
|
"loss": 0.3834,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 2.506474820143885,
|
|
"grad_norm": 0.1664357960901228,
|
|
"learning_rate": 4.6778765854730835e-05,
|
|
"loss": 0.381,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 2.5079136690647483,
|
|
"grad_norm": 0.21529984678524808,
|
|
"learning_rate": 4.673915705411281e-05,
|
|
"loss": 0.3945,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 2.5093525179856115,
|
|
"grad_norm": 0.2510938033473777,
|
|
"learning_rate": 4.6699541451292786e-05,
|
|
"loss": 0.3835,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 2.5107913669064748,
|
|
"grad_norm": 0.2304811294334107,
|
|
"learning_rate": 4.665991908625699e-05,
|
|
"loss": 0.389,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 2.512230215827338,
|
|
"grad_norm": 0.25768145113733715,
|
|
"learning_rate": 4.6620289998998445e-05,
|
|
"loss": 0.38,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 2.5136690647482016,
|
|
"grad_norm": 0.22390565159633255,
|
|
"learning_rate": 4.658065422951697e-05,
|
|
"loss": 0.381,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 2.515107913669065,
|
|
"grad_norm": 0.23984543610405287,
|
|
"learning_rate": 4.654101181781913e-05,
|
|
"loss": 0.3852,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 2.516546762589928,
|
|
"grad_norm": 0.21800343052535792,
|
|
"learning_rate": 4.650136280391818e-05,
|
|
"loss": 0.3842,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 2.5179856115107913,
|
|
"grad_norm": 0.19864786485278876,
|
|
"learning_rate": 4.646170722783408e-05,
|
|
"loss": 0.3725,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 2.5194244604316545,
|
|
"grad_norm": 0.2083757293158586,
|
|
"learning_rate": 4.6422045129593344e-05,
|
|
"loss": 0.3812,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 2.520863309352518,
|
|
"grad_norm": 0.2542484826451339,
|
|
"learning_rate": 4.6382376549229146e-05,
|
|
"loss": 0.374,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 2.5223021582733813,
|
|
"grad_norm": 0.25574229100711776,
|
|
"learning_rate": 4.634270152678115e-05,
|
|
"loss": 0.3796,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 2.5237410071942445,
|
|
"grad_norm": 0.23316642478685295,
|
|
"learning_rate": 4.630302010229555e-05,
|
|
"loss": 0.3754,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 2.5251798561151078,
|
|
"grad_norm": 0.23313434706707242,
|
|
"learning_rate": 4.6263332315824964e-05,
|
|
"loss": 0.3875,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 2.526618705035971,
|
|
"grad_norm": 0.2543304067027591,
|
|
"learning_rate": 4.622363820742848e-05,
|
|
"loss": 0.3885,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 2.5280575539568346,
|
|
"grad_norm": 0.21911717176812673,
|
|
"learning_rate": 4.618393781717156e-05,
|
|
"loss": 0.3806,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 2.529496402877698,
|
|
"grad_norm": 0.27792372956342243,
|
|
"learning_rate": 4.614423118512595e-05,
|
|
"loss": 0.3936,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 2.530935251798561,
|
|
"grad_norm": 0.22873980633162636,
|
|
"learning_rate": 4.610451835136978e-05,
|
|
"loss": 0.3895,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 2.5323741007194247,
|
|
"grad_norm": 0.2392360890169555,
|
|
"learning_rate": 4.606479935598738e-05,
|
|
"loss": 0.3953,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 2.533812949640288,
|
|
"grad_norm": 0.24354632385773428,
|
|
"learning_rate": 4.602507423906931e-05,
|
|
"loss": 0.3792,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 2.535251798561151,
|
|
"grad_norm": 0.19551753454295567,
|
|
"learning_rate": 4.598534304071233e-05,
|
|
"loss": 0.3771,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 2.5366906474820143,
|
|
"grad_norm": 0.19946774842998388,
|
|
"learning_rate": 4.5945605801019315e-05,
|
|
"loss": 0.3792,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 2.5381294964028775,
|
|
"grad_norm": 0.2712368601186858,
|
|
"learning_rate": 4.5905862560099255e-05,
|
|
"loss": 0.3852,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 2.539568345323741,
|
|
"grad_norm": 0.2698823834570867,
|
|
"learning_rate": 4.5866113358067187e-05,
|
|
"loss": 0.3839,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 2.5410071942446044,
|
|
"grad_norm": 0.20585124881862535,
|
|
"learning_rate": 4.582635823504416e-05,
|
|
"loss": 0.3813,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 2.5424460431654676,
|
|
"grad_norm": 0.2099911356829471,
|
|
"learning_rate": 4.5786597231157214e-05,
|
|
"loss": 0.3922,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 2.543884892086331,
|
|
"grad_norm": 0.21338049617605137,
|
|
"learning_rate": 4.574683038653932e-05,
|
|
"loss": 0.3836,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 2.545323741007194,
|
|
"grad_norm": 0.23296272737405282,
|
|
"learning_rate": 4.5707057741329324e-05,
|
|
"loss": 0.3832,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 2.5467625899280577,
|
|
"grad_norm": 0.25571358221969553,
|
|
"learning_rate": 4.5667279335671986e-05,
|
|
"loss": 0.3891,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 2.548201438848921,
|
|
"grad_norm": 0.2826055221936342,
|
|
"learning_rate": 4.56274952097178e-05,
|
|
"loss": 0.3815,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 2.549640287769784,
|
|
"grad_norm": 0.24545804864860377,
|
|
"learning_rate": 4.558770540362308e-05,
|
|
"loss": 0.3847,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 2.5510791366906473,
|
|
"grad_norm": 0.23159197825580435,
|
|
"learning_rate": 4.554790995754988e-05,
|
|
"loss": 0.3906,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 2.5525179856115106,
|
|
"grad_norm": 0.2053727516145863,
|
|
"learning_rate": 4.5508108911665926e-05,
|
|
"loss": 0.3869,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 2.553956834532374,
|
|
"grad_norm": 0.23493741154719533,
|
|
"learning_rate": 4.5468302306144594e-05,
|
|
"loss": 0.3837,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 2.5553956834532374,
|
|
"grad_norm": 0.3010480215072811,
|
|
"learning_rate": 4.542849018116491e-05,
|
|
"loss": 0.3827,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 2.5568345323741006,
|
|
"grad_norm": 0.26371636569380963,
|
|
"learning_rate": 4.538867257691141e-05,
|
|
"loss": 0.3767,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 2.5582733812949643,
|
|
"grad_norm": 0.201296867444794,
|
|
"learning_rate": 4.53488495335742e-05,
|
|
"loss": 0.3933,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 2.5597122302158275,
|
|
"grad_norm": 0.18863025599440553,
|
|
"learning_rate": 4.5309021091348885e-05,
|
|
"loss": 0.3875,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 2.5611510791366907,
|
|
"grad_norm": 0.26787788064667256,
|
|
"learning_rate": 4.5269187290436486e-05,
|
|
"loss": 0.3798,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 2.562589928057554,
|
|
"grad_norm": 0.29183279286729297,
|
|
"learning_rate": 4.5229348171043466e-05,
|
|
"loss": 0.3819,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 2.564028776978417,
|
|
"grad_norm": 0.33367040546630733,
|
|
"learning_rate": 4.51895037733816e-05,
|
|
"loss": 0.3867,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 2.565467625899281,
|
|
"grad_norm": 0.3060653234086106,
|
|
"learning_rate": 4.5149654137668095e-05,
|
|
"loss": 0.385,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 2.566906474820144,
|
|
"grad_norm": 0.2344273420574633,
|
|
"learning_rate": 4.5109799304125333e-05,
|
|
"loss": 0.3905,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 2.568345323741007,
|
|
"grad_norm": 0.16800242452974798,
|
|
"learning_rate": 4.5069939312981e-05,
|
|
"loss": 0.3841,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 2.5697841726618704,
|
|
"grad_norm": 0.22559147577479122,
|
|
"learning_rate": 4.503007420446798e-05,
|
|
"loss": 0.3791,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 2.5712230215827336,
|
|
"grad_norm": 0.212005824746216,
|
|
"learning_rate": 4.499020401882433e-05,
|
|
"loss": 0.3825,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 2.5726618705035973,
|
|
"grad_norm": 0.25787731478463954,
|
|
"learning_rate": 4.49503287962932e-05,
|
|
"loss": 0.3836,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 2.5741007194244605,
|
|
"grad_norm": 0.21712548708077195,
|
|
"learning_rate": 4.491044857712288e-05,
|
|
"loss": 0.3857,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 2.5755395683453237,
|
|
"grad_norm": 0.21077651381013476,
|
|
"learning_rate": 4.4870563401566634e-05,
|
|
"loss": 0.3903,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 2.576978417266187,
|
|
"grad_norm": 0.2267505117928637,
|
|
"learning_rate": 4.483067330988278e-05,
|
|
"loss": 0.3856,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 2.57841726618705,
|
|
"grad_norm": 0.2191258094315309,
|
|
"learning_rate": 4.479077834233458e-05,
|
|
"loss": 0.3871,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 2.579856115107914,
|
|
"grad_norm": 0.19419512155841098,
|
|
"learning_rate": 4.475087853919023e-05,
|
|
"loss": 0.3853,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 2.581294964028777,
|
|
"grad_norm": 0.24618927392271306,
|
|
"learning_rate": 4.4710973940722786e-05,
|
|
"loss": 0.3907,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 2.58273381294964,
|
|
"grad_norm": 0.23845595620137308,
|
|
"learning_rate": 4.4671064587210146e-05,
|
|
"loss": 0.3807,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 2.584172661870504,
|
|
"grad_norm": 0.21952496254899284,
|
|
"learning_rate": 4.4631150518935044e-05,
|
|
"loss": 0.3797,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 2.5856115107913666,
|
|
"grad_norm": 0.20009093186816157,
|
|
"learning_rate": 4.459123177618491e-05,
|
|
"loss": 0.3824,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 2.5870503597122303,
|
|
"grad_norm": 0.2149318451212652,
|
|
"learning_rate": 4.455130839925195e-05,
|
|
"loss": 0.3878,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 2.5884892086330935,
|
|
"grad_norm": 0.18814443595613267,
|
|
"learning_rate": 4.451138042843302e-05,
|
|
"loss": 0.3871,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 2.5899280575539567,
|
|
"grad_norm": 0.19254491038613214,
|
|
"learning_rate": 4.447144790402963e-05,
|
|
"loss": 0.3847,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 2.5913669064748204,
|
|
"grad_norm": 0.22234583540934225,
|
|
"learning_rate": 4.4431510866347837e-05,
|
|
"loss": 0.3817,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 2.5928057553956836,
|
|
"grad_norm": 0.1973016488235195,
|
|
"learning_rate": 4.439156935569833e-05,
|
|
"loss": 0.3868,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 2.594244604316547,
|
|
"grad_norm": 0.20298616290572372,
|
|
"learning_rate": 4.435162341239625e-05,
|
|
"loss": 0.3868,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 2.59568345323741,
|
|
"grad_norm": 0.19542267654221263,
|
|
"learning_rate": 4.4311673076761254e-05,
|
|
"loss": 0.3826,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 2.597122302158273,
|
|
"grad_norm": 0.16966634705533268,
|
|
"learning_rate": 4.42717183891174e-05,
|
|
"loss": 0.3761,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 2.598561151079137,
|
|
"grad_norm": 0.20764160101405277,
|
|
"learning_rate": 4.4231759389793144e-05,
|
|
"loss": 0.3888,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"grad_norm": 0.18502544926973474,
|
|
"learning_rate": 4.4191796119121335e-05,
|
|
"loss": 0.3861,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 2.6014388489208633,
|
|
"grad_norm": 0.17047352288155362,
|
|
"learning_rate": 4.415182861743906e-05,
|
|
"loss": 0.3913,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 2.6028776978417265,
|
|
"grad_norm": 0.20830825724082266,
|
|
"learning_rate": 4.411185692508774e-05,
|
|
"loss": 0.3856,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 2.6043165467625897,
|
|
"grad_norm": 0.18882208139469067,
|
|
"learning_rate": 4.4071881082413e-05,
|
|
"loss": 0.3828,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 2.6057553956834534,
|
|
"grad_norm": 0.18498626302163906,
|
|
"learning_rate": 4.4031901129764665e-05,
|
|
"loss": 0.3767,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 2.6071942446043166,
|
|
"grad_norm": 0.17774349521329066,
|
|
"learning_rate": 4.3991917107496695e-05,
|
|
"loss": 0.389,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 2.60863309352518,
|
|
"grad_norm": 0.1630858133864924,
|
|
"learning_rate": 4.395192905596716e-05,
|
|
"loss": 0.3843,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 2.610071942446043,
|
|
"grad_norm": 0.19403085971963063,
|
|
"learning_rate": 4.3911937015538186e-05,
|
|
"loss": 0.385,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 2.6115107913669062,
|
|
"grad_norm": 0.22247682560065388,
|
|
"learning_rate": 4.3871941026575965e-05,
|
|
"loss": 0.3826,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 2.61294964028777,
|
|
"grad_norm": 0.2130318366407136,
|
|
"learning_rate": 4.383194112945066e-05,
|
|
"loss": 0.3869,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 2.614388489208633,
|
|
"grad_norm": 0.23399197648749875,
|
|
"learning_rate": 4.379193736453633e-05,
|
|
"loss": 0.3833,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 2.6158273381294963,
|
|
"grad_norm": 0.29132953508804943,
|
|
"learning_rate": 4.375192977221099e-05,
|
|
"loss": 0.393,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 2.61726618705036,
|
|
"grad_norm": 0.29821117341441095,
|
|
"learning_rate": 4.371191839285651e-05,
|
|
"loss": 0.3788,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 2.618705035971223,
|
|
"grad_norm": 0.27578353874734546,
|
|
"learning_rate": 4.367190326685858e-05,
|
|
"loss": 0.3879,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 2.6201438848920864,
|
|
"grad_norm": 0.34105613060662027,
|
|
"learning_rate": 4.363188443460666e-05,
|
|
"loss": 0.3899,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 2.6215827338129496,
|
|
"grad_norm": 0.32947024205852915,
|
|
"learning_rate": 4.3591861936493964e-05,
|
|
"loss": 0.38,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 2.623021582733813,
|
|
"grad_norm": 0.30505053544421046,
|
|
"learning_rate": 4.3551835812917395e-05,
|
|
"loss": 0.3844,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 2.6244604316546765,
|
|
"grad_norm": 0.22944788798627733,
|
|
"learning_rate": 4.351180610427754e-05,
|
|
"loss": 0.3851,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 2.6258992805755397,
|
|
"grad_norm": 0.27179380254037994,
|
|
"learning_rate": 4.347177285097855e-05,
|
|
"loss": 0.3837,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 2.627338129496403,
|
|
"grad_norm": 0.2867014104238608,
|
|
"learning_rate": 4.343173609342822e-05,
|
|
"loss": 0.3903,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 2.628776978417266,
|
|
"grad_norm": 0.24182278284855807,
|
|
"learning_rate": 4.339169587203785e-05,
|
|
"loss": 0.3832,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 2.6302158273381293,
|
|
"grad_norm": 0.20743323660964966,
|
|
"learning_rate": 4.335165222722222e-05,
|
|
"loss": 0.3717,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 2.631654676258993,
|
|
"grad_norm": 0.2153772314711594,
|
|
"learning_rate": 4.331160519939962e-05,
|
|
"loss": 0.3768,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 2.633093525179856,
|
|
"grad_norm": 0.20181612151241649,
|
|
"learning_rate": 4.327155482899168e-05,
|
|
"loss": 0.3816,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 2.6345323741007194,
|
|
"grad_norm": 0.22862664289672663,
|
|
"learning_rate": 4.323150115642346e-05,
|
|
"loss": 0.3897,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 2.6359712230215826,
|
|
"grad_norm": 0.21517908165451471,
|
|
"learning_rate": 4.3191444222123326e-05,
|
|
"loss": 0.393,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 2.637410071942446,
|
|
"grad_norm": 0.2142479902994153,
|
|
"learning_rate": 4.3151384066522964e-05,
|
|
"loss": 0.3823,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 2.6388489208633095,
|
|
"grad_norm": 0.19311218598679847,
|
|
"learning_rate": 4.311132073005727e-05,
|
|
"loss": 0.3877,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 2.6402877697841727,
|
|
"grad_norm": 0.20408896956105657,
|
|
"learning_rate": 4.3071254253164395e-05,
|
|
"loss": 0.3831,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 2.641726618705036,
|
|
"grad_norm": 0.21829313209142068,
|
|
"learning_rate": 4.3031184676285625e-05,
|
|
"loss": 0.3732,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 2.6431654676258995,
|
|
"grad_norm": 0.17795336783545596,
|
|
"learning_rate": 4.299111203986539e-05,
|
|
"loss": 0.3752,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 2.6446043165467623,
|
|
"grad_norm": 0.19973947350856514,
|
|
"learning_rate": 4.29510363843512e-05,
|
|
"loss": 0.3806,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 2.646043165467626,
|
|
"grad_norm": 0.22147721709773463,
|
|
"learning_rate": 4.291095775019364e-05,
|
|
"loss": 0.3848,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 2.647482014388489,
|
|
"grad_norm": 0.19642175021630362,
|
|
"learning_rate": 4.287087617784627e-05,
|
|
"loss": 0.3888,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 2.6489208633093524,
|
|
"grad_norm": 0.20952438633992274,
|
|
"learning_rate": 4.283079170776561e-05,
|
|
"loss": 0.3833,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 2.650359712230216,
|
|
"grad_norm": 0.27563855390474284,
|
|
"learning_rate": 4.279070438041116e-05,
|
|
"loss": 0.382,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 2.6517985611510793,
|
|
"grad_norm": 0.22285525534841993,
|
|
"learning_rate": 4.275061423624522e-05,
|
|
"loss": 0.3868,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 2.6532374100719425,
|
|
"grad_norm": 0.18612174505138793,
|
|
"learning_rate": 4.2710521315733e-05,
|
|
"loss": 0.3861,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 2.6546762589928057,
|
|
"grad_norm": 0.22062750582327986,
|
|
"learning_rate": 4.26704256593425e-05,
|
|
"loss": 0.3913,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 2.656115107913669,
|
|
"grad_norm": 0.22389157493796577,
|
|
"learning_rate": 4.2630327307544454e-05,
|
|
"loss": 0.377,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 2.6575539568345325,
|
|
"grad_norm": 0.19680879988535324,
|
|
"learning_rate": 4.2590226300812335e-05,
|
|
"loss": 0.3807,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 2.6589928057553958,
|
|
"grad_norm": 0.17670600228984773,
|
|
"learning_rate": 4.255012267962232e-05,
|
|
"loss": 0.3893,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 2.660431654676259,
|
|
"grad_norm": 0.22836042087134162,
|
|
"learning_rate": 4.251001648445317e-05,
|
|
"loss": 0.3856,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 2.661870503597122,
|
|
"grad_norm": 0.22626937943378758,
|
|
"learning_rate": 4.246990775578628e-05,
|
|
"loss": 0.3819,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 2.6633093525179854,
|
|
"grad_norm": 0.23412246151020968,
|
|
"learning_rate": 4.242979653410562e-05,
|
|
"loss": 0.3858,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 2.664748201438849,
|
|
"grad_norm": 0.23815661132247493,
|
|
"learning_rate": 4.238968285989762e-05,
|
|
"loss": 0.3813,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 2.6661870503597123,
|
|
"grad_norm": 0.2534920396788507,
|
|
"learning_rate": 4.2349566773651236e-05,
|
|
"loss": 0.3806,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 2.6676258992805755,
|
|
"grad_norm": 0.32017210986040495,
|
|
"learning_rate": 4.2309448315857844e-05,
|
|
"loss": 0.387,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 2.6690647482014387,
|
|
"grad_norm": 0.26351685079794046,
|
|
"learning_rate": 4.226932752701122e-05,
|
|
"loss": 0.3861,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 2.670503597122302,
|
|
"grad_norm": 0.2464721133391229,
|
|
"learning_rate": 4.2229204447607456e-05,
|
|
"loss": 0.3781,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 2.6719424460431656,
|
|
"grad_norm": 0.2756683845511027,
|
|
"learning_rate": 4.2189079118145e-05,
|
|
"loss": 0.3915,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 2.6733812949640288,
|
|
"grad_norm": 0.24597916531986108,
|
|
"learning_rate": 4.214895157912454e-05,
|
|
"loss": 0.3885,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 2.674820143884892,
|
|
"grad_norm": 0.27632203369280156,
|
|
"learning_rate": 4.210882187104904e-05,
|
|
"loss": 0.38,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 2.6762589928057556,
|
|
"grad_norm": 0.3241704139737846,
|
|
"learning_rate": 4.206869003442358e-05,
|
|
"loss": 0.3846,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 2.677697841726619,
|
|
"grad_norm": 0.21119870260279336,
|
|
"learning_rate": 4.2028556109755465e-05,
|
|
"loss": 0.3813,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 2.679136690647482,
|
|
"grad_norm": 0.3215270964941637,
|
|
"learning_rate": 4.198842013755408e-05,
|
|
"loss": 0.3856,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 2.6805755395683453,
|
|
"grad_norm": 0.26212038561585965,
|
|
"learning_rate": 4.194828215833082e-05,
|
|
"loss": 0.3854,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 2.6820143884892085,
|
|
"grad_norm": 0.2787809479058089,
|
|
"learning_rate": 4.1908142212599206e-05,
|
|
"loss": 0.3732,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 2.683453237410072,
|
|
"grad_norm": 0.2336213083718478,
|
|
"learning_rate": 4.1868000340874674e-05,
|
|
"loss": 0.3745,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 2.6848920863309353,
|
|
"grad_norm": 0.27978823768393496,
|
|
"learning_rate": 4.182785658367462e-05,
|
|
"loss": 0.3864,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 2.6863309352517986,
|
|
"grad_norm": 0.2932721340547552,
|
|
"learning_rate": 4.178771098151835e-05,
|
|
"loss": 0.3872,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 2.6877697841726618,
|
|
"grad_norm": 0.18876516510001815,
|
|
"learning_rate": 4.1747563574927034e-05,
|
|
"loss": 0.3884,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 2.689208633093525,
|
|
"grad_norm": 0.19715809898324685,
|
|
"learning_rate": 4.170741440442366e-05,
|
|
"loss": 0.3831,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 2.6906474820143886,
|
|
"grad_norm": 0.27834656630893656,
|
|
"learning_rate": 4.166726351053299e-05,
|
|
"loss": 0.3863,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 2.692086330935252,
|
|
"grad_norm": 0.2761216332088214,
|
|
"learning_rate": 4.1627110933781515e-05,
|
|
"loss": 0.3912,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 2.693525179856115,
|
|
"grad_norm": 0.28459273855266376,
|
|
"learning_rate": 4.158695671469746e-05,
|
|
"loss": 0.3951,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 2.6949640287769783,
|
|
"grad_norm": 0.2111598445003445,
|
|
"learning_rate": 4.154680089381068e-05,
|
|
"loss": 0.3908,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 2.6964028776978415,
|
|
"grad_norm": 0.20413344196280028,
|
|
"learning_rate": 4.150664351165266e-05,
|
|
"loss": 0.3743,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 2.697841726618705,
|
|
"grad_norm": 0.3258658553516126,
|
|
"learning_rate": 4.146648460875646e-05,
|
|
"loss": 0.3896,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 2.6992805755395683,
|
|
"grad_norm": 0.2278190567541258,
|
|
"learning_rate": 4.1426324225656644e-05,
|
|
"loss": 0.384,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 2.7007194244604316,
|
|
"grad_norm": 0.22736642283183017,
|
|
"learning_rate": 4.138616240288934e-05,
|
|
"loss": 0.3879,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 2.702158273381295,
|
|
"grad_norm": 0.20167907784677327,
|
|
"learning_rate": 4.134599918099204e-05,
|
|
"loss": 0.3879,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 2.7035971223021584,
|
|
"grad_norm": 0.191391312303086,
|
|
"learning_rate": 4.130583460050371e-05,
|
|
"loss": 0.3904,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 2.7050359712230216,
|
|
"grad_norm": 0.25965305818637435,
|
|
"learning_rate": 4.126566870196468e-05,
|
|
"loss": 0.393,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 2.706474820143885,
|
|
"grad_norm": 0.19547246994017597,
|
|
"learning_rate": 4.12255015259166e-05,
|
|
"loss": 0.3837,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 2.707913669064748,
|
|
"grad_norm": 0.16878971045393576,
|
|
"learning_rate": 4.1185333112902394e-05,
|
|
"loss": 0.3831,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 2.7093525179856117,
|
|
"grad_norm": 0.19649719073124605,
|
|
"learning_rate": 4.114516350346626e-05,
|
|
"loss": 0.3787,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 2.710791366906475,
|
|
"grad_norm": 0.18922227905061667,
|
|
"learning_rate": 4.1104992738153616e-05,
|
|
"loss": 0.3887,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 2.712230215827338,
|
|
"grad_norm": 0.1652561735994928,
|
|
"learning_rate": 4.1064820857511e-05,
|
|
"loss": 0.3824,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 2.7136690647482014,
|
|
"grad_norm": 0.17446636504613597,
|
|
"learning_rate": 4.1024647902086107e-05,
|
|
"loss": 0.3928,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 2.7151079136690646,
|
|
"grad_norm": 0.18786814621208797,
|
|
"learning_rate": 4.0984473912427706e-05,
|
|
"loss": 0.3918,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 2.716546762589928,
|
|
"grad_norm": 0.22243217330118314,
|
|
"learning_rate": 4.0944298929085633e-05,
|
|
"loss": 0.3806,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 2.7179856115107914,
|
|
"grad_norm": 0.17173897312816497,
|
|
"learning_rate": 4.090412299261068e-05,
|
|
"loss": 0.3841,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 2.7194244604316546,
|
|
"grad_norm": 0.16864012935566222,
|
|
"learning_rate": 4.086394614355467e-05,
|
|
"loss": 0.3782,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 2.720863309352518,
|
|
"grad_norm": 0.18377591063673382,
|
|
"learning_rate": 4.082376842247027e-05,
|
|
"loss": 0.3866,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 2.722302158273381,
|
|
"grad_norm": 0.18297125808495668,
|
|
"learning_rate": 4.0783589869911074e-05,
|
|
"loss": 0.3898,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 2.7237410071942447,
|
|
"grad_norm": 0.20132802628206997,
|
|
"learning_rate": 4.074341052643152e-05,
|
|
"loss": 0.3837,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 2.725179856115108,
|
|
"grad_norm": 0.1766236380879585,
|
|
"learning_rate": 4.070323043258683e-05,
|
|
"loss": 0.3895,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 2.726618705035971,
|
|
"grad_norm": 0.25045343389363944,
|
|
"learning_rate": 4.066304962893297e-05,
|
|
"loss": 0.3862,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 2.728057553956835,
|
|
"grad_norm": 0.22774701472913508,
|
|
"learning_rate": 4.062286815602661e-05,
|
|
"loss": 0.3897,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 2.7294964028776976,
|
|
"grad_norm": 0.1860965590253563,
|
|
"learning_rate": 4.0582686054425196e-05,
|
|
"loss": 0.3856,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 2.7309352517985612,
|
|
"grad_norm": 0.19327490329061806,
|
|
"learning_rate": 4.054250336468666e-05,
|
|
"loss": 0.3804,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 2.7323741007194244,
|
|
"grad_norm": 0.20056199664665128,
|
|
"learning_rate": 4.050232012736964e-05,
|
|
"loss": 0.3843,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 2.7338129496402876,
|
|
"grad_norm": 0.19598282337402015,
|
|
"learning_rate": 4.0462136383033285e-05,
|
|
"loss": 0.3885,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 2.7352517985611513,
|
|
"grad_norm": 0.23277122589012764,
|
|
"learning_rate": 4.0421952172237254e-05,
|
|
"loss": 0.3817,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 2.7366906474820145,
|
|
"grad_norm": 0.21928039556780154,
|
|
"learning_rate": 4.038176753554166e-05,
|
|
"loss": 0.3848,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 2.7381294964028777,
|
|
"grad_norm": 0.1879895041520755,
|
|
"learning_rate": 4.034158251350711e-05,
|
|
"loss": 0.3775,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 2.739568345323741,
|
|
"grad_norm": 0.262119931611971,
|
|
"learning_rate": 4.030139714669453e-05,
|
|
"loss": 0.3824,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 2.741007194244604,
|
|
"grad_norm": 0.22392279382280658,
|
|
"learning_rate": 4.026121147566522e-05,
|
|
"loss": 0.3921,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 2.742446043165468,
|
|
"grad_norm": 0.20824791648838323,
|
|
"learning_rate": 4.02210255409808e-05,
|
|
"loss": 0.3856,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 2.743884892086331,
|
|
"grad_norm": 0.28193501760965545,
|
|
"learning_rate": 4.018083938320314e-05,
|
|
"loss": 0.3835,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 2.7453237410071942,
|
|
"grad_norm": 0.264281168892852,
|
|
"learning_rate": 4.014065304289435e-05,
|
|
"loss": 0.3795,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 2.7467625899280574,
|
|
"grad_norm": 0.18564889596491396,
|
|
"learning_rate": 4.010046656061669e-05,
|
|
"loss": 0.3894,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 2.7482014388489207,
|
|
"grad_norm": 0.27663097203351705,
|
|
"learning_rate": 4.006027997693262e-05,
|
|
"loss": 0.3798,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 2.7496402877697843,
|
|
"grad_norm": 0.2970264600024271,
|
|
"learning_rate": 4.002009333240465e-05,
|
|
"loss": 0.3766,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 2.7510791366906475,
|
|
"grad_norm": 0.19657670393474266,
|
|
"learning_rate": 3.997990666759536e-05,
|
|
"loss": 0.3904,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 2.7525179856115107,
|
|
"grad_norm": 0.26765675734042443,
|
|
"learning_rate": 3.99397200230674e-05,
|
|
"loss": 0.3735,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 2.753956834532374,
|
|
"grad_norm": 0.2583583961269702,
|
|
"learning_rate": 3.989953343938331e-05,
|
|
"loss": 0.3878,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 2.755395683453237,
|
|
"grad_norm": 0.18295287634087892,
|
|
"learning_rate": 3.985934695710566e-05,
|
|
"loss": 0.385,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 2.756834532374101,
|
|
"grad_norm": 0.2514042532897146,
|
|
"learning_rate": 3.9819160616796873e-05,
|
|
"loss": 0.387,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 2.758273381294964,
|
|
"grad_norm": 0.22523621445412403,
|
|
"learning_rate": 3.977897445901922e-05,
|
|
"loss": 0.3903,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 2.7597122302158272,
|
|
"grad_norm": 0.20920485520912854,
|
|
"learning_rate": 3.9738788524334794e-05,
|
|
"loss": 0.3841,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 2.761151079136691,
|
|
"grad_norm": 0.23331636461587274,
|
|
"learning_rate": 3.969860285330549e-05,
|
|
"loss": 0.3909,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 2.762589928057554,
|
|
"grad_norm": 0.23014941621194834,
|
|
"learning_rate": 3.965841748649291e-05,
|
|
"loss": 0.3859,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 2.7640287769784173,
|
|
"grad_norm": 0.2556422173671989,
|
|
"learning_rate": 3.961823246445834e-05,
|
|
"loss": 0.3809,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 2.7654676258992805,
|
|
"grad_norm": 0.1824380463295436,
|
|
"learning_rate": 3.957804782776276e-05,
|
|
"loss": 0.3874,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 2.7669064748201437,
|
|
"grad_norm": 0.2176392212047248,
|
|
"learning_rate": 3.953786361696673e-05,
|
|
"loss": 0.383,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 2.7683453237410074,
|
|
"grad_norm": 0.2126668652872775,
|
|
"learning_rate": 3.9497679872630366e-05,
|
|
"loss": 0.3869,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 2.7697841726618706,
|
|
"grad_norm": 0.18741354703350005,
|
|
"learning_rate": 3.945749663531334e-05,
|
|
"loss": 0.3881,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 2.771223021582734,
|
|
"grad_norm": 0.24093937314251143,
|
|
"learning_rate": 3.941731394557482e-05,
|
|
"loss": 0.3866,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 2.772661870503597,
|
|
"grad_norm": 0.20911380469057766,
|
|
"learning_rate": 3.9377131843973394e-05,
|
|
"loss": 0.383,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 2.7741007194244602,
|
|
"grad_norm": 0.1917970062433543,
|
|
"learning_rate": 3.933695037106705e-05,
|
|
"loss": 0.3768,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 2.775539568345324,
|
|
"grad_norm": 0.19937845174176258,
|
|
"learning_rate": 3.9296769567413177e-05,
|
|
"loss": 0.3945,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 2.776978417266187,
|
|
"grad_norm": 0.2019688252849309,
|
|
"learning_rate": 3.925658947356849e-05,
|
|
"loss": 0.3783,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 2.7784172661870503,
|
|
"grad_norm": 0.2034943714635055,
|
|
"learning_rate": 3.921641013008893e-05,
|
|
"loss": 0.3809,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 2.7798561151079135,
|
|
"grad_norm": 0.24179145223658585,
|
|
"learning_rate": 3.9176231577529734e-05,
|
|
"loss": 0.391,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 2.7812949640287767,
|
|
"grad_norm": 0.21894313019520437,
|
|
"learning_rate": 3.913605385644535e-05,
|
|
"loss": 0.3833,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 2.7827338129496404,
|
|
"grad_norm": 0.2348853121212761,
|
|
"learning_rate": 3.909587700738933e-05,
|
|
"loss": 0.3781,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 2.7841726618705036,
|
|
"grad_norm": 0.27599018222491367,
|
|
"learning_rate": 3.9055701070914393e-05,
|
|
"loss": 0.3912,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 2.785611510791367,
|
|
"grad_norm": 0.28335757226025454,
|
|
"learning_rate": 3.90155260875723e-05,
|
|
"loss": 0.376,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 2.7870503597122305,
|
|
"grad_norm": 0.2744216840423249,
|
|
"learning_rate": 3.8975352097913914e-05,
|
|
"loss": 0.3879,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 2.7884892086330937,
|
|
"grad_norm": 0.22531992723565977,
|
|
"learning_rate": 3.8935179142489016e-05,
|
|
"loss": 0.3854,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 2.789928057553957,
|
|
"grad_norm": 0.30879882712305806,
|
|
"learning_rate": 3.8895007261846404e-05,
|
|
"loss": 0.3811,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 2.79136690647482,
|
|
"grad_norm": 0.3134740967284474,
|
|
"learning_rate": 3.885483649653374e-05,
|
|
"loss": 0.3838,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 2.7928057553956833,
|
|
"grad_norm": 0.22009210622036252,
|
|
"learning_rate": 3.881466688709761e-05,
|
|
"loss": 0.3841,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 2.794244604316547,
|
|
"grad_norm": 0.3126995156730231,
|
|
"learning_rate": 3.877449847408342e-05,
|
|
"loss": 0.3905,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 2.79568345323741,
|
|
"grad_norm": 0.23084814007127713,
|
|
"learning_rate": 3.873433129803532e-05,
|
|
"loss": 0.3802,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 2.7971223021582734,
|
|
"grad_norm": 0.24280131625429474,
|
|
"learning_rate": 3.86941653994963e-05,
|
|
"loss": 0.3937,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 2.7985611510791366,
|
|
"grad_norm": 0.22495469403678497,
|
|
"learning_rate": 3.8654000819007974e-05,
|
|
"loss": 0.3922,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"grad_norm": 0.2486412718829283,
|
|
"learning_rate": 3.8613837597110686e-05,
|
|
"loss": 0.3878,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 2.8014388489208635,
|
|
"grad_norm": 0.2391662477021012,
|
|
"learning_rate": 3.8573675774343356e-05,
|
|
"loss": 0.3863,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 2.8028776978417267,
|
|
"grad_norm": 0.2169296813419823,
|
|
"learning_rate": 3.853351539124355e-05,
|
|
"loss": 0.3902,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 2.80431654676259,
|
|
"grad_norm": 0.2124835901307931,
|
|
"learning_rate": 3.8493356488347345e-05,
|
|
"loss": 0.3812,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 2.805755395683453,
|
|
"grad_norm": 0.19919689138064986,
|
|
"learning_rate": 3.845319910618933e-05,
|
|
"loss": 0.379,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 2.8071942446043163,
|
|
"grad_norm": 0.19020097125065544,
|
|
"learning_rate": 3.841304328530254e-05,
|
|
"loss": 0.3843,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 2.80863309352518,
|
|
"grad_norm": 0.19539477252193874,
|
|
"learning_rate": 3.83728890662185e-05,
|
|
"loss": 0.3811,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 2.810071942446043,
|
|
"grad_norm": 0.20064137021662662,
|
|
"learning_rate": 3.833273648946704e-05,
|
|
"loss": 0.3809,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 2.8115107913669064,
|
|
"grad_norm": 0.224147099626367,
|
|
"learning_rate": 3.829258559557635e-05,
|
|
"loss": 0.3702,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 2.81294964028777,
|
|
"grad_norm": 0.19782756752848318,
|
|
"learning_rate": 3.825243642507297e-05,
|
|
"loss": 0.3851,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 2.814388489208633,
|
|
"grad_norm": 0.19616974722487354,
|
|
"learning_rate": 3.8212289018481666e-05,
|
|
"loss": 0.3867,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 2.8158273381294965,
|
|
"grad_norm": 0.1711831429634888,
|
|
"learning_rate": 3.817214341632539e-05,
|
|
"loss": 0.3756,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 2.8172661870503597,
|
|
"grad_norm": 0.17497593247628174,
|
|
"learning_rate": 3.813199965912533e-05,
|
|
"loss": 0.3825,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 2.818705035971223,
|
|
"grad_norm": 0.21527376674133467,
|
|
"learning_rate": 3.80918577874008e-05,
|
|
"loss": 0.3857,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 2.8201438848920866,
|
|
"grad_norm": 0.18794783133282614,
|
|
"learning_rate": 3.8051717841669196e-05,
|
|
"loss": 0.3918,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 2.8215827338129498,
|
|
"grad_norm": 0.20439905308489822,
|
|
"learning_rate": 3.801157986244595e-05,
|
|
"loss": 0.381,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 2.823021582733813,
|
|
"grad_norm": 0.27842346987708216,
|
|
"learning_rate": 3.7971443890244534e-05,
|
|
"loss": 0.3768,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 2.824460431654676,
|
|
"grad_norm": 0.20862673236647317,
|
|
"learning_rate": 3.7931309965576426e-05,
|
|
"loss": 0.3808,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 2.8258992805755394,
|
|
"grad_norm": 0.27837904457152257,
|
|
"learning_rate": 3.7891178128950975e-05,
|
|
"loss": 0.3783,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 2.827338129496403,
|
|
"grad_norm": 0.24794233455375222,
|
|
"learning_rate": 3.785104842087546e-05,
|
|
"loss": 0.3758,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 2.8287769784172663,
|
|
"grad_norm": 0.26896083154818823,
|
|
"learning_rate": 3.7810920881855016e-05,
|
|
"loss": 0.3772,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 2.8302158273381295,
|
|
"grad_norm": 0.24210949916977012,
|
|
"learning_rate": 3.777079555239255e-05,
|
|
"loss": 0.3776,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 2.8316546762589927,
|
|
"grad_norm": 0.20235352195949372,
|
|
"learning_rate": 3.77306724729888e-05,
|
|
"loss": 0.3903,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 2.833093525179856,
|
|
"grad_norm": 0.2908992441598886,
|
|
"learning_rate": 3.769055168414215e-05,
|
|
"loss": 0.3868,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 2.8345323741007196,
|
|
"grad_norm": 0.2625251180454279,
|
|
"learning_rate": 3.765043322634877e-05,
|
|
"loss": 0.3854,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 2.8359712230215828,
|
|
"grad_norm": 0.2229334902510211,
|
|
"learning_rate": 3.761031714010239e-05,
|
|
"loss": 0.3808,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 2.837410071942446,
|
|
"grad_norm": 0.23005909137505817,
|
|
"learning_rate": 3.75702034658944e-05,
|
|
"loss": 0.3786,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 2.838848920863309,
|
|
"grad_norm": 0.2305149842375871,
|
|
"learning_rate": 3.753009224421373e-05,
|
|
"loss": 0.3939,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 2.8402877697841724,
|
|
"grad_norm": 0.24480399389020072,
|
|
"learning_rate": 3.748998351554684e-05,
|
|
"loss": 0.3774,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 2.841726618705036,
|
|
"grad_norm": 0.18727462977862316,
|
|
"learning_rate": 3.74498773203777e-05,
|
|
"loss": 0.3793,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 2.8431654676258993,
|
|
"grad_norm": 0.2621441515112876,
|
|
"learning_rate": 3.7409773699187664e-05,
|
|
"loss": 0.3839,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 2.8446043165467625,
|
|
"grad_norm": 0.2656433222589684,
|
|
"learning_rate": 3.736967269245555e-05,
|
|
"loss": 0.3792,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 2.846043165467626,
|
|
"grad_norm": 0.23002971458232002,
|
|
"learning_rate": 3.732957434065751e-05,
|
|
"loss": 0.3851,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 2.8474820143884894,
|
|
"grad_norm": 0.18571076257524893,
|
|
"learning_rate": 3.728947868426701e-05,
|
|
"loss": 0.3766,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 2.8489208633093526,
|
|
"grad_norm": 0.24238508646392748,
|
|
"learning_rate": 3.724938576375479e-05,
|
|
"loss": 0.3697,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 2.850359712230216,
|
|
"grad_norm": 0.17022198003356945,
|
|
"learning_rate": 3.7209295619588856e-05,
|
|
"loss": 0.3889,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 2.851798561151079,
|
|
"grad_norm": 0.21202154551917904,
|
|
"learning_rate": 3.7169208292234395e-05,
|
|
"loss": 0.3797,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 2.8532374100719426,
|
|
"grad_norm": 0.22406809058149044,
|
|
"learning_rate": 3.7129123822153746e-05,
|
|
"loss": 0.3863,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 2.854676258992806,
|
|
"grad_norm": 0.179235843665288,
|
|
"learning_rate": 3.708904224980636e-05,
|
|
"loss": 0.3862,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 2.856115107913669,
|
|
"grad_norm": 0.18942913936130998,
|
|
"learning_rate": 3.704896361564881e-05,
|
|
"loss": 0.3834,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 2.8575539568345323,
|
|
"grad_norm": 0.16981464636407365,
|
|
"learning_rate": 3.700888796013462e-05,
|
|
"loss": 0.3793,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 2.8589928057553955,
|
|
"grad_norm": 0.18869526057105387,
|
|
"learning_rate": 3.696881532371439e-05,
|
|
"loss": 0.3771,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 2.860431654676259,
|
|
"grad_norm": 0.19570915141944986,
|
|
"learning_rate": 3.692874574683562e-05,
|
|
"loss": 0.3862,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 2.8618705035971224,
|
|
"grad_norm": 0.16090360310710594,
|
|
"learning_rate": 3.688867926994274e-05,
|
|
"loss": 0.3772,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 2.8633093525179856,
|
|
"grad_norm": 0.18237054170007996,
|
|
"learning_rate": 3.684861593347705e-05,
|
|
"loss": 0.3819,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 2.864748201438849,
|
|
"grad_norm": 0.20580337324225872,
|
|
"learning_rate": 3.6808555777876673e-05,
|
|
"loss": 0.3909,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 2.866187050359712,
|
|
"grad_norm": 0.17301918822492318,
|
|
"learning_rate": 3.676849884357655e-05,
|
|
"loss": 0.3904,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 2.8676258992805757,
|
|
"grad_norm": 0.16949160306286967,
|
|
"learning_rate": 3.672844517100833e-05,
|
|
"loss": 0.3868,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 2.869064748201439,
|
|
"grad_norm": 0.19403013801639782,
|
|
"learning_rate": 3.66883948006004e-05,
|
|
"loss": 0.3839,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 2.870503597122302,
|
|
"grad_norm": 0.16360370291892293,
|
|
"learning_rate": 3.664834777277777e-05,
|
|
"loss": 0.3895,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 2.8719424460431657,
|
|
"grad_norm": 0.21385354971369736,
|
|
"learning_rate": 3.6608304127962166e-05,
|
|
"loss": 0.3913,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 2.873381294964029,
|
|
"grad_norm": 0.15813375475190072,
|
|
"learning_rate": 3.656826390657179e-05,
|
|
"loss": 0.3868,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 2.874820143884892,
|
|
"grad_norm": 0.18378078936862144,
|
|
"learning_rate": 3.6528227149021455e-05,
|
|
"loss": 0.3775,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 2.8762589928057554,
|
|
"grad_norm": 0.20546102375015077,
|
|
"learning_rate": 3.648819389572248e-05,
|
|
"loss": 0.3781,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 2.8776978417266186,
|
|
"grad_norm": 0.1676510429854442,
|
|
"learning_rate": 3.644816418708261e-05,
|
|
"loss": 0.3832,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 2.8791366906474822,
|
|
"grad_norm": 0.2236787953617335,
|
|
"learning_rate": 3.6408138063506057e-05,
|
|
"loss": 0.385,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 2.8805755395683454,
|
|
"grad_norm": 0.19145192066461608,
|
|
"learning_rate": 3.636811556539335e-05,
|
|
"loss": 0.3812,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 2.8820143884892087,
|
|
"grad_norm": 0.1872206646975845,
|
|
"learning_rate": 3.6328096733141423e-05,
|
|
"loss": 0.38,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 2.883453237410072,
|
|
"grad_norm": 0.2321990692239429,
|
|
"learning_rate": 3.6288081607143496e-05,
|
|
"loss": 0.3813,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 2.884892086330935,
|
|
"grad_norm": 0.17099942650720834,
|
|
"learning_rate": 3.6248070227789034e-05,
|
|
"loss": 0.3764,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 2.8863309352517987,
|
|
"grad_norm": 0.23596791262595684,
|
|
"learning_rate": 3.620806263546369e-05,
|
|
"loss": 0.3804,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 2.887769784172662,
|
|
"grad_norm": 0.24036669487005471,
|
|
"learning_rate": 3.6168058870549355e-05,
|
|
"loss": 0.383,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 2.889208633093525,
|
|
"grad_norm": 0.20243601892813715,
|
|
"learning_rate": 3.612805897342405e-05,
|
|
"loss": 0.3809,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 2.8906474820143884,
|
|
"grad_norm": 0.2239010107795876,
|
|
"learning_rate": 3.608806298446182e-05,
|
|
"loss": 0.3854,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 2.8920863309352516,
|
|
"grad_norm": 0.20023459982280983,
|
|
"learning_rate": 3.604807094403286e-05,
|
|
"loss": 0.3789,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 2.8935251798561152,
|
|
"grad_norm": 0.2063308531165251,
|
|
"learning_rate": 3.6008082892503325e-05,
|
|
"loss": 0.3773,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 2.8949640287769784,
|
|
"grad_norm": 0.23011750084514337,
|
|
"learning_rate": 3.596809887023534e-05,
|
|
"loss": 0.3859,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 2.8964028776978417,
|
|
"grad_norm": 0.16549451793637004,
|
|
"learning_rate": 3.5928118917587e-05,
|
|
"loss": 0.3848,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 2.897841726618705,
|
|
"grad_norm": 0.20369313785516174,
|
|
"learning_rate": 3.588814307491227e-05,
|
|
"loss": 0.3777,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 2.899280575539568,
|
|
"grad_norm": 0.2021886266344476,
|
|
"learning_rate": 3.584817138256096e-05,
|
|
"loss": 0.3796,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 2.9007194244604317,
|
|
"grad_norm": 0.16998478237352915,
|
|
"learning_rate": 3.580820388087869e-05,
|
|
"loss": 0.3829,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 2.902158273381295,
|
|
"grad_norm": 0.18800834166027916,
|
|
"learning_rate": 3.5768240610206855e-05,
|
|
"loss": 0.3959,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 2.903597122302158,
|
|
"grad_norm": 0.1778473439620837,
|
|
"learning_rate": 3.572828161088262e-05,
|
|
"loss": 0.3817,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 2.905035971223022,
|
|
"grad_norm": 0.16721638119908358,
|
|
"learning_rate": 3.568832692323876e-05,
|
|
"loss": 0.3758,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 2.906474820143885,
|
|
"grad_norm": 0.1893160388827097,
|
|
"learning_rate": 3.564837658760376e-05,
|
|
"loss": 0.3837,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 2.9079136690647482,
|
|
"grad_norm": 0.17890313275774383,
|
|
"learning_rate": 3.560843064430168e-05,
|
|
"loss": 0.3854,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 2.9093525179856115,
|
|
"grad_norm": 0.19072896399989747,
|
|
"learning_rate": 3.556848913365218e-05,
|
|
"loss": 0.3827,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 2.9107913669064747,
|
|
"grad_norm": 0.2280625411531843,
|
|
"learning_rate": 3.552855209597039e-05,
|
|
"loss": 0.3919,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 2.9122302158273383,
|
|
"grad_norm": 0.2124682020162911,
|
|
"learning_rate": 3.548861957156698e-05,
|
|
"loss": 0.3746,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 2.9136690647482015,
|
|
"grad_norm": 0.18005438775472654,
|
|
"learning_rate": 3.544869160074806e-05,
|
|
"loss": 0.3861,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 2.9151079136690647,
|
|
"grad_norm": 0.198732308173173,
|
|
"learning_rate": 3.5408768223815105e-05,
|
|
"loss": 0.3845,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 2.916546762589928,
|
|
"grad_norm": 0.18011580966846377,
|
|
"learning_rate": 3.536884948106498e-05,
|
|
"loss": 0.382,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 2.917985611510791,
|
|
"grad_norm": 0.18812759566769532,
|
|
"learning_rate": 3.532893541278986e-05,
|
|
"loss": 0.386,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 2.919424460431655,
|
|
"grad_norm": 0.23548893743761498,
|
|
"learning_rate": 3.528902605927722e-05,
|
|
"loss": 0.3896,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 2.920863309352518,
|
|
"grad_norm": 0.16194759620341975,
|
|
"learning_rate": 3.524912146080978e-05,
|
|
"loss": 0.3779,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 2.9223021582733812,
|
|
"grad_norm": 0.16862283343100698,
|
|
"learning_rate": 3.5209221657665436e-05,
|
|
"loss": 0.3852,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 2.9237410071942445,
|
|
"grad_norm": 0.18165888741805836,
|
|
"learning_rate": 3.516932669011723e-05,
|
|
"loss": 0.387,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 2.9251798561151077,
|
|
"grad_norm": 0.19149373207839285,
|
|
"learning_rate": 3.512943659843337e-05,
|
|
"loss": 0.3859,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 2.9266187050359713,
|
|
"grad_norm": 0.17793991817294222,
|
|
"learning_rate": 3.508955142287714e-05,
|
|
"loss": 0.3863,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 2.9280575539568345,
|
|
"grad_norm": 0.18357088820264988,
|
|
"learning_rate": 3.50496712037068e-05,
|
|
"loss": 0.3798,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 2.9294964028776977,
|
|
"grad_norm": 0.18291335189786462,
|
|
"learning_rate": 3.5009795981175676e-05,
|
|
"loss": 0.387,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 2.9309352517985614,
|
|
"grad_norm": 0.2040841192133014,
|
|
"learning_rate": 3.496992579553203e-05,
|
|
"loss": 0.3829,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 2.9323741007194246,
|
|
"grad_norm": 0.17536656915876747,
|
|
"learning_rate": 3.4930060687019015e-05,
|
|
"loss": 0.381,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 2.933812949640288,
|
|
"grad_norm": 0.14581795922536053,
|
|
"learning_rate": 3.489020069587467e-05,
|
|
"loss": 0.38,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 2.935251798561151,
|
|
"grad_norm": 0.18899513073057425,
|
|
"learning_rate": 3.485034586233192e-05,
|
|
"loss": 0.3735,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 2.9366906474820142,
|
|
"grad_norm": 0.17596429632249244,
|
|
"learning_rate": 3.4810496226618404e-05,
|
|
"loss": 0.3811,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 2.938129496402878,
|
|
"grad_norm": 0.18474103247518409,
|
|
"learning_rate": 3.477065182895656e-05,
|
|
"loss": 0.379,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 2.939568345323741,
|
|
"grad_norm": 0.20050017686673058,
|
|
"learning_rate": 3.473081270956352e-05,
|
|
"loss": 0.379,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 2.9410071942446043,
|
|
"grad_norm": 0.16363877461263454,
|
|
"learning_rate": 3.469097890865113e-05,
|
|
"loss": 0.3812,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 2.9424460431654675,
|
|
"grad_norm": 0.1449511706782976,
|
|
"learning_rate": 3.465115046642581e-05,
|
|
"loss": 0.3821,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 2.9438848920863308,
|
|
"grad_norm": 0.2016092688518925,
|
|
"learning_rate": 3.461132742308859e-05,
|
|
"loss": 0.3812,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 2.9453237410071944,
|
|
"grad_norm": 0.1849281926806324,
|
|
"learning_rate": 3.45715098188351e-05,
|
|
"loss": 0.3783,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 2.9467625899280576,
|
|
"grad_norm": 0.1636492261152567,
|
|
"learning_rate": 3.453169769385541e-05,
|
|
"loss": 0.3755,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 2.948201438848921,
|
|
"grad_norm": 0.21230674314569922,
|
|
"learning_rate": 3.449189108833409e-05,
|
|
"loss": 0.3859,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 2.949640287769784,
|
|
"grad_norm": 0.17355160873539235,
|
|
"learning_rate": 3.445209004245012e-05,
|
|
"loss": 0.3813,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 2.9510791366906473,
|
|
"grad_norm": 0.21249934066959503,
|
|
"learning_rate": 3.441229459637693e-05,
|
|
"loss": 0.3935,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 2.952517985611511,
|
|
"grad_norm": 0.19536039730373267,
|
|
"learning_rate": 3.4372504790282215e-05,
|
|
"loss": 0.3854,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 2.953956834532374,
|
|
"grad_norm": 0.21512738325880446,
|
|
"learning_rate": 3.4332720664328034e-05,
|
|
"loss": 0.3793,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 2.9553956834532373,
|
|
"grad_norm": 0.18153668050088206,
|
|
"learning_rate": 3.4292942258670675e-05,
|
|
"loss": 0.3741,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 2.956834532374101,
|
|
"grad_norm": 0.19541109054587436,
|
|
"learning_rate": 3.425316961346069e-05,
|
|
"loss": 0.3943,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 2.9582733812949638,
|
|
"grad_norm": 0.23017089582725975,
|
|
"learning_rate": 3.42134027688428e-05,
|
|
"loss": 0.3877,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 2.9597122302158274,
|
|
"grad_norm": 0.22136958376711582,
|
|
"learning_rate": 3.417364176495585e-05,
|
|
"loss": 0.3789,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 2.9611510791366906,
|
|
"grad_norm": 0.15454506500789747,
|
|
"learning_rate": 3.4133886641932834e-05,
|
|
"loss": 0.377,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 2.962589928057554,
|
|
"grad_norm": 0.19767544950757399,
|
|
"learning_rate": 3.409413743990076e-05,
|
|
"loss": 0.3777,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 2.9640287769784175,
|
|
"grad_norm": 0.20114233970399212,
|
|
"learning_rate": 3.4054394198980705e-05,
|
|
"loss": 0.3784,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 2.9654676258992807,
|
|
"grad_norm": 0.20937453973906428,
|
|
"learning_rate": 3.401465695928768e-05,
|
|
"loss": 0.3964,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 2.966906474820144,
|
|
"grad_norm": 0.2441041957715334,
|
|
"learning_rate": 3.3974925760930694e-05,
|
|
"loss": 0.377,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 2.968345323741007,
|
|
"grad_norm": 0.2007504355151718,
|
|
"learning_rate": 3.393520064401264e-05,
|
|
"loss": 0.3872,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 2.9697841726618703,
|
|
"grad_norm": 0.19647329118372192,
|
|
"learning_rate": 3.3895481648630234e-05,
|
|
"loss": 0.3831,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 2.971223021582734,
|
|
"grad_norm": 0.18122081326202852,
|
|
"learning_rate": 3.385576881487405e-05,
|
|
"loss": 0.3888,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 2.972661870503597,
|
|
"grad_norm": 0.25555287682710004,
|
|
"learning_rate": 3.381606218282846e-05,
|
|
"loss": 0.3916,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 2.9741007194244604,
|
|
"grad_norm": 0.23255831887686393,
|
|
"learning_rate": 3.377636179257153e-05,
|
|
"loss": 0.3823,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 2.9755395683453236,
|
|
"grad_norm": 0.14364406848385716,
|
|
"learning_rate": 3.373666768417505e-05,
|
|
"loss": 0.3818,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 2.976978417266187,
|
|
"grad_norm": 0.2618212972538752,
|
|
"learning_rate": 3.3696979897704466e-05,
|
|
"loss": 0.3795,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 2.9784172661870505,
|
|
"grad_norm": 0.2530419402530481,
|
|
"learning_rate": 3.3657298473218864e-05,
|
|
"loss": 0.3838,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 2.9798561151079137,
|
|
"grad_norm": 0.25782215341443526,
|
|
"learning_rate": 3.361762345077087e-05,
|
|
"loss": 0.3776,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 2.981294964028777,
|
|
"grad_norm": 0.16161716170460844,
|
|
"learning_rate": 3.3577954870406656e-05,
|
|
"loss": 0.3754,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 2.98273381294964,
|
|
"grad_norm": 0.22681483693059457,
|
|
"learning_rate": 3.3538292772165936e-05,
|
|
"loss": 0.3881,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 2.9841726618705033,
|
|
"grad_norm": 0.2530069917852246,
|
|
"learning_rate": 3.3498637196081825e-05,
|
|
"loss": 0.389,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 2.985611510791367,
|
|
"grad_norm": 0.19632426599184888,
|
|
"learning_rate": 3.345898818218089e-05,
|
|
"loss": 0.3774,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 2.98705035971223,
|
|
"grad_norm": 0.23862954905472558,
|
|
"learning_rate": 3.341934577048304e-05,
|
|
"loss": 0.3739,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 2.9884892086330934,
|
|
"grad_norm": 0.18179678789242132,
|
|
"learning_rate": 3.337971000100157e-05,
|
|
"loss": 0.3812,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 2.989928057553957,
|
|
"grad_norm": 0.19456753284664338,
|
|
"learning_rate": 3.334008091374303e-05,
|
|
"loss": 0.3853,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 2.9913669064748203,
|
|
"grad_norm": 0.20535186183704324,
|
|
"learning_rate": 3.3300458548707214e-05,
|
|
"loss": 0.3853,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 2.9928057553956835,
|
|
"grad_norm": 0.1618595694578767,
|
|
"learning_rate": 3.326084294588721e-05,
|
|
"loss": 0.382,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 2.9942446043165467,
|
|
"grad_norm": 0.16428242621568054,
|
|
"learning_rate": 3.322123414526917e-05,
|
|
"loss": 0.3797,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 2.99568345323741,
|
|
"grad_norm": 0.18039994978818347,
|
|
"learning_rate": 3.3181632186832485e-05,
|
|
"loss": 0.3864,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 2.9971223021582736,
|
|
"grad_norm": 0.19291373445577667,
|
|
"learning_rate": 3.3142037110549546e-05,
|
|
"loss": 0.3807,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 2.998561151079137,
|
|
"grad_norm": 0.14060435573310243,
|
|
"learning_rate": 3.310244895638587e-05,
|
|
"loss": 0.3833,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.20962392724853285,
|
|
"learning_rate": 3.306286776429995e-05,
|
|
"loss": 0.3662,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 3.001438848920863,
|
|
"grad_norm": 0.21619889856359806,
|
|
"learning_rate": 3.302329357424326e-05,
|
|
"loss": 0.3587,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 3.0028776978417264,
|
|
"grad_norm": 0.16528095084457858,
|
|
"learning_rate": 3.2983726426160204e-05,
|
|
"loss": 0.3682,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 3.00431654676259,
|
|
"grad_norm": 0.22390906521786053,
|
|
"learning_rate": 3.2944166359988083e-05,
|
|
"loss": 0.3636,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 3.0057553956834533,
|
|
"grad_norm": 0.2065680724849588,
|
|
"learning_rate": 3.290461341565707e-05,
|
|
"loss": 0.3619,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 3.0071942446043165,
|
|
"grad_norm": 0.2081271520773317,
|
|
"learning_rate": 3.286506763309009e-05,
|
|
"loss": 0.3649,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 3.0086330935251797,
|
|
"grad_norm": 0.22042237214404262,
|
|
"learning_rate": 3.2825529052202904e-05,
|
|
"loss": 0.3679,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 3.0100719424460434,
|
|
"grad_norm": 0.2219849468694502,
|
|
"learning_rate": 3.278599771290397e-05,
|
|
"loss": 0.3627,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 3.0115107913669066,
|
|
"grad_norm": 0.20449507922114485,
|
|
"learning_rate": 3.274647365509445e-05,
|
|
"loss": 0.3618,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 3.01294964028777,
|
|
"grad_norm": 0.19516773345146313,
|
|
"learning_rate": 3.2706956918668126e-05,
|
|
"loss": 0.3555,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 3.014388489208633,
|
|
"grad_norm": 0.21468549130026254,
|
|
"learning_rate": 3.266744754351144e-05,
|
|
"loss": 0.3557,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 3.015827338129496,
|
|
"grad_norm": 0.16744208356106816,
|
|
"learning_rate": 3.262794556950338e-05,
|
|
"loss": 0.3554,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 3.01726618705036,
|
|
"grad_norm": 0.16109833458786763,
|
|
"learning_rate": 3.2588451036515435e-05,
|
|
"loss": 0.3479,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 3.018705035971223,
|
|
"grad_norm": 0.1654559438250083,
|
|
"learning_rate": 3.2548963984411623e-05,
|
|
"loss": 0.3525,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 3.0201438848920863,
|
|
"grad_norm": 0.23346365778974035,
|
|
"learning_rate": 3.2509484453048413e-05,
|
|
"loss": 0.3538,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 3.0215827338129495,
|
|
"grad_norm": 0.2016023591604391,
|
|
"learning_rate": 3.247001248227465e-05,
|
|
"loss": 0.371,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 3.023021582733813,
|
|
"grad_norm": 0.19309219962287688,
|
|
"learning_rate": 3.2430548111931574e-05,
|
|
"loss": 0.3581,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 3.0244604316546764,
|
|
"grad_norm": 0.22893289524912117,
|
|
"learning_rate": 3.239109138185275e-05,
|
|
"loss": 0.352,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 3.0258992805755396,
|
|
"grad_norm": 0.1823579350695761,
|
|
"learning_rate": 3.2351642331864024e-05,
|
|
"loss": 0.3461,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 3.027338129496403,
|
|
"grad_norm": 0.23057632583515714,
|
|
"learning_rate": 3.2312201001783473e-05,
|
|
"loss": 0.3632,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 3.028776978417266,
|
|
"grad_norm": 0.19407913247317765,
|
|
"learning_rate": 3.2272767431421416e-05,
|
|
"loss": 0.3568,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 3.0302158273381297,
|
|
"grad_norm": 0.21499094081792575,
|
|
"learning_rate": 3.2233341660580335e-05,
|
|
"loss": 0.3561,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 3.031654676258993,
|
|
"grad_norm": 0.18429910400193175,
|
|
"learning_rate": 3.219392372905482e-05,
|
|
"loss": 0.3569,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 3.033093525179856,
|
|
"grad_norm": 0.19670709356593538,
|
|
"learning_rate": 3.215451367663156e-05,
|
|
"loss": 0.3554,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 3.0345323741007193,
|
|
"grad_norm": 0.15302691580328823,
|
|
"learning_rate": 3.211511154308927e-05,
|
|
"loss": 0.3576,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 3.0359712230215825,
|
|
"grad_norm": 0.2198168802390004,
|
|
"learning_rate": 3.207571736819873e-05,
|
|
"loss": 0.3604,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 3.037410071942446,
|
|
"grad_norm": 0.19695175083534847,
|
|
"learning_rate": 3.203633119172262e-05,
|
|
"loss": 0.3607,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 3.0388489208633094,
|
|
"grad_norm": 0.17183911872453927,
|
|
"learning_rate": 3.1996953053415575e-05,
|
|
"loss": 0.3531,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 3.0402877697841726,
|
|
"grad_norm": 0.15950445760316337,
|
|
"learning_rate": 3.1957582993024135e-05,
|
|
"loss": 0.3505,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 3.041726618705036,
|
|
"grad_norm": 0.16651024348404972,
|
|
"learning_rate": 3.191822105028665e-05,
|
|
"loss": 0.3572,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 3.0431654676258995,
|
|
"grad_norm": 0.18357069222324512,
|
|
"learning_rate": 3.1878867264933305e-05,
|
|
"loss": 0.3705,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 3.0446043165467627,
|
|
"grad_norm": 0.1655722157556818,
|
|
"learning_rate": 3.1839521676686026e-05,
|
|
"loss": 0.3606,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 3.046043165467626,
|
|
"grad_norm": 0.15533569118435317,
|
|
"learning_rate": 3.1800184325258494e-05,
|
|
"loss": 0.3587,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 3.047482014388489,
|
|
"grad_norm": 0.1687168913745226,
|
|
"learning_rate": 3.176085525035607e-05,
|
|
"loss": 0.3569,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 3.0489208633093523,
|
|
"grad_norm": 0.16557517890379822,
|
|
"learning_rate": 3.172153449167574e-05,
|
|
"loss": 0.3685,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 3.050359712230216,
|
|
"grad_norm": 0.1488864035704192,
|
|
"learning_rate": 3.1682222088906096e-05,
|
|
"loss": 0.3598,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 3.051798561151079,
|
|
"grad_norm": 0.1568998577905944,
|
|
"learning_rate": 3.1642918081727327e-05,
|
|
"loss": 0.3675,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 3.0532374100719424,
|
|
"grad_norm": 0.16988616928641276,
|
|
"learning_rate": 3.1603622509811144e-05,
|
|
"loss": 0.3559,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 3.0546762589928056,
|
|
"grad_norm": 0.14438896311486202,
|
|
"learning_rate": 3.156433541282069e-05,
|
|
"loss": 0.3547,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 3.0561151079136692,
|
|
"grad_norm": 0.2006520256627741,
|
|
"learning_rate": 3.152505683041062e-05,
|
|
"loss": 0.3617,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 3.0575539568345325,
|
|
"grad_norm": 0.15484064636149827,
|
|
"learning_rate": 3.1485786802226976e-05,
|
|
"loss": 0.3582,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 3.0589928057553957,
|
|
"grad_norm": 0.17080563201151322,
|
|
"learning_rate": 3.1446525367907134e-05,
|
|
"loss": 0.3622,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 3.060431654676259,
|
|
"grad_norm": 0.17380609629341132,
|
|
"learning_rate": 3.1407272567079834e-05,
|
|
"loss": 0.3584,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 3.061870503597122,
|
|
"grad_norm": 0.16282376148024774,
|
|
"learning_rate": 3.136802843936509e-05,
|
|
"loss": 0.3577,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 3.0633093525179858,
|
|
"grad_norm": 0.14662575053519,
|
|
"learning_rate": 3.132879302437416e-05,
|
|
"loss": 0.3558,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 3.064748201438849,
|
|
"grad_norm": 0.17026769515325033,
|
|
"learning_rate": 3.128956636170949e-05,
|
|
"loss": 0.3581,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 3.066187050359712,
|
|
"grad_norm": 0.15946481231758242,
|
|
"learning_rate": 3.125034849096471e-05,
|
|
"loss": 0.3525,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 3.0676258992805754,
|
|
"grad_norm": 0.17697372484259227,
|
|
"learning_rate": 3.1211139451724605e-05,
|
|
"loss": 0.3596,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 3.069064748201439,
|
|
"grad_norm": 0.15305642408227726,
|
|
"learning_rate": 3.1171939283564986e-05,
|
|
"loss": 0.3559,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 3.0705035971223023,
|
|
"grad_norm": 0.15696537699311786,
|
|
"learning_rate": 3.113274802605276e-05,
|
|
"loss": 0.3445,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 3.0719424460431655,
|
|
"grad_norm": 0.14849813980515722,
|
|
"learning_rate": 3.109356571874579e-05,
|
|
"loss": 0.3555,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 3.0733812949640287,
|
|
"grad_norm": 0.16017208473120856,
|
|
"learning_rate": 3.105439240119296e-05,
|
|
"loss": 0.3635,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 3.074820143884892,
|
|
"grad_norm": 0.14835229388200896,
|
|
"learning_rate": 3.101522811293405e-05,
|
|
"loss": 0.3664,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 3.0762589928057555,
|
|
"grad_norm": 0.15700473213427069,
|
|
"learning_rate": 3.0976072893499724e-05,
|
|
"loss": 0.3661,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 3.0776978417266188,
|
|
"grad_norm": 0.14757851206651312,
|
|
"learning_rate": 3.093692678241151e-05,
|
|
"loss": 0.3544,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 3.079136690647482,
|
|
"grad_norm": 0.16265247324400633,
|
|
"learning_rate": 3.0897789819181715e-05,
|
|
"loss": 0.3565,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 3.080575539568345,
|
|
"grad_norm": 0.15856324610208725,
|
|
"learning_rate": 3.0858662043313456e-05,
|
|
"loss": 0.3526,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 3.082014388489209,
|
|
"grad_norm": 0.18882952537764694,
|
|
"learning_rate": 3.081954349430051e-05,
|
|
"loss": 0.3544,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 3.083453237410072,
|
|
"grad_norm": 0.17735256938040708,
|
|
"learning_rate": 3.0780434211627415e-05,
|
|
"loss": 0.3594,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 3.0848920863309353,
|
|
"grad_norm": 0.2095682087082436,
|
|
"learning_rate": 3.074133423476932e-05,
|
|
"loss": 0.3619,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 3.0863309352517985,
|
|
"grad_norm": 0.23080295866770068,
|
|
"learning_rate": 3.070224360319197e-05,
|
|
"loss": 0.3704,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 3.0877697841726617,
|
|
"grad_norm": 0.2172824516455839,
|
|
"learning_rate": 3.066316235635168e-05,
|
|
"loss": 0.3489,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 3.0892086330935253,
|
|
"grad_norm": 0.17519786457840006,
|
|
"learning_rate": 3.0624090533695324e-05,
|
|
"loss": 0.3594,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 3.0906474820143885,
|
|
"grad_norm": 0.19943922375475642,
|
|
"learning_rate": 3.0585028174660236e-05,
|
|
"loss": 0.3561,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 3.0920863309352518,
|
|
"grad_norm": 0.16959968381893364,
|
|
"learning_rate": 3.054597531867419e-05,
|
|
"loss": 0.3527,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 3.093525179856115,
|
|
"grad_norm": 0.18635838999318552,
|
|
"learning_rate": 3.0506932005155407e-05,
|
|
"loss": 0.3545,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 3.0949640287769786,
|
|
"grad_norm": 0.18123809302767452,
|
|
"learning_rate": 3.0467898273512446e-05,
|
|
"loss": 0.3672,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 3.096402877697842,
|
|
"grad_norm": 0.16577631906873233,
|
|
"learning_rate": 3.042887416314418e-05,
|
|
"loss": 0.3617,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 3.097841726618705,
|
|
"grad_norm": 0.18178233676652397,
|
|
"learning_rate": 3.03898597134398e-05,
|
|
"loss": 0.3562,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 3.0992805755395683,
|
|
"grad_norm": 0.13203131371888968,
|
|
"learning_rate": 3.0350854963778755e-05,
|
|
"loss": 0.3606,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 3.1007194244604315,
|
|
"grad_norm": 0.17389978928109825,
|
|
"learning_rate": 3.0311859953530672e-05,
|
|
"loss": 0.354,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 3.102158273381295,
|
|
"grad_norm": 0.13848256797484568,
|
|
"learning_rate": 3.027287472205535e-05,
|
|
"loss": 0.3534,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 3.1035971223021583,
|
|
"grad_norm": 0.1427812330933784,
|
|
"learning_rate": 3.0233899308702722e-05,
|
|
"loss": 0.3601,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 3.1050359712230216,
|
|
"grad_norm": 0.1767226919733937,
|
|
"learning_rate": 3.0194933752812853e-05,
|
|
"loss": 0.3579,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 3.1064748201438848,
|
|
"grad_norm": 0.1458653293170556,
|
|
"learning_rate": 3.0155978093715787e-05,
|
|
"loss": 0.3667,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 3.1079136690647484,
|
|
"grad_norm": 0.18231251272438925,
|
|
"learning_rate": 3.011703237073162e-05,
|
|
"loss": 0.3552,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 3.1093525179856116,
|
|
"grad_norm": 0.1454439098431874,
|
|
"learning_rate": 3.0078096623170442e-05,
|
|
"loss": 0.3518,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 3.110791366906475,
|
|
"grad_norm": 0.15499114895666516,
|
|
"learning_rate": 3.0039170890332214e-05,
|
|
"loss": 0.3622,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 3.112230215827338,
|
|
"grad_norm": 0.16113668741732023,
|
|
"learning_rate": 3.0000255211506836e-05,
|
|
"loss": 0.3534,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 3.1136690647482013,
|
|
"grad_norm": 0.13820184555157555,
|
|
"learning_rate": 2.9961349625974022e-05,
|
|
"loss": 0.3596,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 3.115107913669065,
|
|
"grad_norm": 0.1454512781333098,
|
|
"learning_rate": 2.992245417300335e-05,
|
|
"loss": 0.3499,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 3.116546762589928,
|
|
"grad_norm": 0.1609290958314594,
|
|
"learning_rate": 2.9883568891854118e-05,
|
|
"loss": 0.3608,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 3.1179856115107913,
|
|
"grad_norm": 0.1558444581398874,
|
|
"learning_rate": 2.9844693821775394e-05,
|
|
"loss": 0.3628,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 3.1194244604316546,
|
|
"grad_norm": 0.18996332001830474,
|
|
"learning_rate": 2.9805829002005907e-05,
|
|
"loss": 0.3636,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 3.1208633093525178,
|
|
"grad_norm": 0.18681862933572344,
|
|
"learning_rate": 2.9766974471774072e-05,
|
|
"loss": 0.3559,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 3.1223021582733814,
|
|
"grad_norm": 0.40458429991323674,
|
|
"learning_rate": 2.9728130270297913e-05,
|
|
"loss": 0.3529,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 3.1237410071942446,
|
|
"grad_norm": 0.2009835950937076,
|
|
"learning_rate": 2.968929643678499e-05,
|
|
"loss": 0.3669,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 3.125179856115108,
|
|
"grad_norm": 0.1519825409827222,
|
|
"learning_rate": 2.965047301043246e-05,
|
|
"loss": 0.3606,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 3.126618705035971,
|
|
"grad_norm": 0.20851808280182513,
|
|
"learning_rate": 2.961166003042692e-05,
|
|
"loss": 0.3643,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 3.1280575539568347,
|
|
"grad_norm": 0.38362336773904415,
|
|
"learning_rate": 2.9572857535944473e-05,
|
|
"loss": 0.366,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 3.129496402877698,
|
|
"grad_norm": 0.18246350592432198,
|
|
"learning_rate": 2.9534065566150567e-05,
|
|
"loss": 0.3526,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 3.130935251798561,
|
|
"grad_norm": 0.16192331992495992,
|
|
"learning_rate": 2.9495284160200105e-05,
|
|
"loss": 0.3626,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 3.1323741007194243,
|
|
"grad_norm": 0.1609986155841853,
|
|
"learning_rate": 2.9456513357237305e-05,
|
|
"loss": 0.3581,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 3.133812949640288,
|
|
"grad_norm": 0.17686318929039027,
|
|
"learning_rate": 2.9417753196395637e-05,
|
|
"loss": 0.3665,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 3.135251798561151,
|
|
"grad_norm": 0.16851649032856184,
|
|
"learning_rate": 2.9379003716797877e-05,
|
|
"loss": 0.3711,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 3.1366906474820144,
|
|
"grad_norm": 0.17286215935183727,
|
|
"learning_rate": 2.9340264957556018e-05,
|
|
"loss": 0.3597,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 3.1381294964028776,
|
|
"grad_norm": 0.1526652947832057,
|
|
"learning_rate": 2.9301536957771218e-05,
|
|
"loss": 0.3619,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 3.139568345323741,
|
|
"grad_norm": 0.17140378918432134,
|
|
"learning_rate": 2.9262819756533754e-05,
|
|
"loss": 0.35,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 3.1410071942446045,
|
|
"grad_norm": 0.15847265023950896,
|
|
"learning_rate": 2.922411339292306e-05,
|
|
"loss": 0.3664,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 3.1424460431654677,
|
|
"grad_norm": 0.1811184384471024,
|
|
"learning_rate": 2.9185417906007586e-05,
|
|
"loss": 0.3564,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 3.143884892086331,
|
|
"grad_norm": 0.14846907918685054,
|
|
"learning_rate": 2.914673333484481e-05,
|
|
"loss": 0.3643,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 3.145323741007194,
|
|
"grad_norm": 0.1552778685516918,
|
|
"learning_rate": 2.9108059718481184e-05,
|
|
"loss": 0.3622,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 3.1467625899280574,
|
|
"grad_norm": 0.1329837326760857,
|
|
"learning_rate": 2.906939709595216e-05,
|
|
"loss": 0.3564,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 3.148201438848921,
|
|
"grad_norm": 0.16564191102546028,
|
|
"learning_rate": 2.9030745506282017e-05,
|
|
"loss": 0.3559,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 3.149640287769784,
|
|
"grad_norm": 0.14185703037757968,
|
|
"learning_rate": 2.8992104988483943e-05,
|
|
"loss": 0.3533,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 3.1510791366906474,
|
|
"grad_norm": 0.18316037907405922,
|
|
"learning_rate": 2.895347558155992e-05,
|
|
"loss": 0.3586,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 3.1525179856115106,
|
|
"grad_norm": 0.13124942273831314,
|
|
"learning_rate": 2.8914857324500767e-05,
|
|
"loss": 0.3631,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 3.1539568345323743,
|
|
"grad_norm": 0.1400941668310528,
|
|
"learning_rate": 2.887625025628599e-05,
|
|
"loss": 0.3549,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 3.1553956834532375,
|
|
"grad_norm": 0.151108362713932,
|
|
"learning_rate": 2.8837654415883817e-05,
|
|
"loss": 0.3609,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 3.1568345323741007,
|
|
"grad_norm": 0.16152994319613845,
|
|
"learning_rate": 2.879906984225119e-05,
|
|
"loss": 0.366,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 3.158273381294964,
|
|
"grad_norm": 0.14889481634896684,
|
|
"learning_rate": 2.8760496574333613e-05,
|
|
"loss": 0.3673,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 3.159712230215827,
|
|
"grad_norm": 0.15840840261709813,
|
|
"learning_rate": 2.8721934651065227e-05,
|
|
"loss": 0.3631,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 3.161151079136691,
|
|
"grad_norm": 0.13297913513938822,
|
|
"learning_rate": 2.8683384111368675e-05,
|
|
"loss": 0.3507,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 3.162589928057554,
|
|
"grad_norm": 0.15330226120846352,
|
|
"learning_rate": 2.864484499415517e-05,
|
|
"loss": 0.3554,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 3.1640287769784172,
|
|
"grad_norm": 0.14835817294795542,
|
|
"learning_rate": 2.8606317338324347e-05,
|
|
"loss": 0.3578,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 3.1654676258992804,
|
|
"grad_norm": 0.14776839676726475,
|
|
"learning_rate": 2.856780118276429e-05,
|
|
"loss": 0.3696,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 3.166906474820144,
|
|
"grad_norm": 0.13836434670229775,
|
|
"learning_rate": 2.852929656635146e-05,
|
|
"loss": 0.3579,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 3.1683453237410073,
|
|
"grad_norm": 0.1597831002988147,
|
|
"learning_rate": 2.8490803527950706e-05,
|
|
"loss": 0.3571,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 3.1697841726618705,
|
|
"grad_norm": 0.14586007986347574,
|
|
"learning_rate": 2.845232210641517e-05,
|
|
"loss": 0.3617,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 3.1712230215827337,
|
|
"grad_norm": 0.14769325230225838,
|
|
"learning_rate": 2.841385234058624e-05,
|
|
"loss": 0.3578,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 3.172661870503597,
|
|
"grad_norm": 0.14751625696146559,
|
|
"learning_rate": 2.83753942692936e-05,
|
|
"loss": 0.3651,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 3.1741007194244606,
|
|
"grad_norm": 0.15487013983260006,
|
|
"learning_rate": 2.8336947931355096e-05,
|
|
"loss": 0.3598,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 3.175539568345324,
|
|
"grad_norm": 0.14141578868652568,
|
|
"learning_rate": 2.8298513365576715e-05,
|
|
"loss": 0.3584,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 3.176978417266187,
|
|
"grad_norm": 0.18765406245819877,
|
|
"learning_rate": 2.826009061075257e-05,
|
|
"loss": 0.3578,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 3.1784172661870502,
|
|
"grad_norm": 0.13736677026106558,
|
|
"learning_rate": 2.822167970566488e-05,
|
|
"loss": 0.3469,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 3.1798561151079134,
|
|
"grad_norm": 0.16510463366560285,
|
|
"learning_rate": 2.8183280689083895e-05,
|
|
"loss": 0.3569,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 3.181294964028777,
|
|
"grad_norm": 0.14971654632581352,
|
|
"learning_rate": 2.8144893599767828e-05,
|
|
"loss": 0.3549,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 3.1827338129496403,
|
|
"grad_norm": 0.143577257012092,
|
|
"learning_rate": 2.8106518476462886e-05,
|
|
"loss": 0.3533,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 3.1841726618705035,
|
|
"grad_norm": 0.15126433224148872,
|
|
"learning_rate": 2.806815535790321e-05,
|
|
"loss": 0.3604,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 3.1856115107913667,
|
|
"grad_norm": 0.1404826037556668,
|
|
"learning_rate": 2.8029804282810794e-05,
|
|
"loss": 0.3526,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 3.1870503597122304,
|
|
"grad_norm": 0.13722372918194886,
|
|
"learning_rate": 2.7991465289895497e-05,
|
|
"loss": 0.3642,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 3.1884892086330936,
|
|
"grad_norm": 0.17306256527116945,
|
|
"learning_rate": 2.7953138417854952e-05,
|
|
"loss": 0.3596,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 3.189928057553957,
|
|
"grad_norm": 0.17492121799632276,
|
|
"learning_rate": 2.79148237053746e-05,
|
|
"loss": 0.362,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 3.19136690647482,
|
|
"grad_norm": 0.15364917608966572,
|
|
"learning_rate": 2.787652119112758e-05,
|
|
"loss": 0.3581,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 3.1928057553956837,
|
|
"grad_norm": 0.16759147828120005,
|
|
"learning_rate": 2.783823091377472e-05,
|
|
"loss": 0.3605,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 3.194244604316547,
|
|
"grad_norm": 0.1422712418251711,
|
|
"learning_rate": 2.7799952911964535e-05,
|
|
"loss": 0.3555,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 3.19568345323741,
|
|
"grad_norm": 0.1657225339074256,
|
|
"learning_rate": 2.776168722433308e-05,
|
|
"loss": 0.366,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 3.1971223021582733,
|
|
"grad_norm": 0.1678486095534346,
|
|
"learning_rate": 2.7723433889504046e-05,
|
|
"loss": 0.3522,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 3.1985611510791365,
|
|
"grad_norm": 0.1212953779120693,
|
|
"learning_rate": 2.7685192946088597e-05,
|
|
"loss": 0.3563,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"grad_norm": 0.15928502973260728,
|
|
"learning_rate": 2.7646964432685456e-05,
|
|
"loss": 0.3565,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 3.2014388489208634,
|
|
"grad_norm": 0.15905484756442395,
|
|
"learning_rate": 2.7608748387880754e-05,
|
|
"loss": 0.3608,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 3.2028776978417266,
|
|
"grad_norm": 0.14273128301009436,
|
|
"learning_rate": 2.7570544850248047e-05,
|
|
"loss": 0.3574,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 3.20431654676259,
|
|
"grad_norm": 0.1809504936287176,
|
|
"learning_rate": 2.753235385834824e-05,
|
|
"loss": 0.3652,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 3.205755395683453,
|
|
"grad_norm": 0.17464637087694193,
|
|
"learning_rate": 2.749417545072964e-05,
|
|
"loss": 0.3643,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 3.2071942446043167,
|
|
"grad_norm": 0.1798547259536615,
|
|
"learning_rate": 2.7456009665927807e-05,
|
|
"loss": 0.3601,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 3.20863309352518,
|
|
"grad_norm": 0.1504329725715847,
|
|
"learning_rate": 2.741785654246555e-05,
|
|
"loss": 0.3599,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 3.210071942446043,
|
|
"grad_norm": 0.16106180659050923,
|
|
"learning_rate": 2.7379716118852927e-05,
|
|
"loss": 0.3685,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 3.2115107913669063,
|
|
"grad_norm": 0.16272467307799718,
|
|
"learning_rate": 2.734158843358718e-05,
|
|
"loss": 0.3508,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 3.21294964028777,
|
|
"grad_norm": 0.14105728825169508,
|
|
"learning_rate": 2.730347352515266e-05,
|
|
"loss": 0.3657,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 3.214388489208633,
|
|
"grad_norm": 0.18161044470519083,
|
|
"learning_rate": 2.7265371432020836e-05,
|
|
"loss": 0.3605,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 3.2158273381294964,
|
|
"grad_norm": 0.3468701669170712,
|
|
"learning_rate": 2.7227282192650258e-05,
|
|
"loss": 0.3636,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 3.2172661870503596,
|
|
"grad_norm": 0.16576349570536256,
|
|
"learning_rate": 2.7189205845486503e-05,
|
|
"loss": 0.3655,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 3.218705035971223,
|
|
"grad_norm": 0.22069371131896498,
|
|
"learning_rate": 2.7151142428962103e-05,
|
|
"loss": 0.3632,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 3.2201438848920865,
|
|
"grad_norm": 0.15046740408729908,
|
|
"learning_rate": 2.711309198149655e-05,
|
|
"loss": 0.3565,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 3.2215827338129497,
|
|
"grad_norm": 0.2103313930960915,
|
|
"learning_rate": 2.7075054541496296e-05,
|
|
"loss": 0.358,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 3.223021582733813,
|
|
"grad_norm": 0.1717013577464213,
|
|
"learning_rate": 2.7037030147354582e-05,
|
|
"loss": 0.3545,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 3.224460431654676,
|
|
"grad_norm": 0.15153230266381415,
|
|
"learning_rate": 2.6999018837451523e-05,
|
|
"loss": 0.3572,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 3.2258992805755398,
|
|
"grad_norm": 0.15989903694813515,
|
|
"learning_rate": 2.6961020650154057e-05,
|
|
"loss": 0.3603,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 3.227338129496403,
|
|
"grad_norm": 0.13555752458055306,
|
|
"learning_rate": 2.6923035623815824e-05,
|
|
"loss": 0.3637,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 3.228776978417266,
|
|
"grad_norm": 0.18182116541132493,
|
|
"learning_rate": 2.6885063796777195e-05,
|
|
"loss": 0.3612,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 3.2302158273381294,
|
|
"grad_norm": 0.1495539257612094,
|
|
"learning_rate": 2.6847105207365225e-05,
|
|
"loss": 0.3607,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 3.2316546762589926,
|
|
"grad_norm": 0.1561386390982124,
|
|
"learning_rate": 2.6809159893893624e-05,
|
|
"loss": 0.3565,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 3.2330935251798563,
|
|
"grad_norm": 0.1486865053919953,
|
|
"learning_rate": 2.6771227894662666e-05,
|
|
"loss": 0.3564,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 3.2345323741007195,
|
|
"grad_norm": 0.14680720407831932,
|
|
"learning_rate": 2.6733309247959217e-05,
|
|
"loss": 0.3559,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 3.2359712230215827,
|
|
"grad_norm": 0.12788781019606907,
|
|
"learning_rate": 2.669540399205664e-05,
|
|
"loss": 0.3604,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 3.237410071942446,
|
|
"grad_norm": 0.14332587176818692,
|
|
"learning_rate": 2.6657512165214806e-05,
|
|
"loss": 0.3571,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 3.2388489208633096,
|
|
"grad_norm": 0.13770654743710328,
|
|
"learning_rate": 2.6619633805680028e-05,
|
|
"loss": 0.3631,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 3.2402877697841728,
|
|
"grad_norm": 0.1524526800989179,
|
|
"learning_rate": 2.6581768951684992e-05,
|
|
"loss": 0.3617,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 3.241726618705036,
|
|
"grad_norm": 0.16450729565922312,
|
|
"learning_rate": 2.6543917641448813e-05,
|
|
"loss": 0.3598,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 3.243165467625899,
|
|
"grad_norm": 0.143712409749315,
|
|
"learning_rate": 2.650607991317687e-05,
|
|
"loss": 0.3565,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 3.2446043165467624,
|
|
"grad_norm": 0.16136538887196314,
|
|
"learning_rate": 2.6468255805060885e-05,
|
|
"loss": 0.3677,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 3.246043165467626,
|
|
"grad_norm": 0.15121109613915254,
|
|
"learning_rate": 2.6430445355278788e-05,
|
|
"loss": 0.3656,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 3.2474820143884893,
|
|
"grad_norm": 0.13522391851895807,
|
|
"learning_rate": 2.639264860199477e-05,
|
|
"loss": 0.3597,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 3.2489208633093525,
|
|
"grad_norm": 0.13935757474333615,
|
|
"learning_rate": 2.6354865583359175e-05,
|
|
"loss": 0.3598,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 3.2503597122302157,
|
|
"grad_norm": 0.16227623052925946,
|
|
"learning_rate": 2.631709633750847e-05,
|
|
"loss": 0.3518,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 3.2517985611510793,
|
|
"grad_norm": 0.15938750392073342,
|
|
"learning_rate": 2.6279340902565217e-05,
|
|
"loss": 0.3596,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 3.2532374100719426,
|
|
"grad_norm": 0.1571640019797342,
|
|
"learning_rate": 2.6241599316638084e-05,
|
|
"loss": 0.3507,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 3.2546762589928058,
|
|
"grad_norm": 0.18156430842098326,
|
|
"learning_rate": 2.6203871617821717e-05,
|
|
"loss": 0.3681,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 3.256115107913669,
|
|
"grad_norm": 0.15861081349221523,
|
|
"learning_rate": 2.6166157844196755e-05,
|
|
"loss": 0.3583,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 3.257553956834532,
|
|
"grad_norm": 0.1754939034664979,
|
|
"learning_rate": 2.6128458033829792e-05,
|
|
"loss": 0.3628,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 3.258992805755396,
|
|
"grad_norm": 0.16250727921041547,
|
|
"learning_rate": 2.609077222477332e-05,
|
|
"loss": 0.3636,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 3.260431654676259,
|
|
"grad_norm": 0.13976758337721248,
|
|
"learning_rate": 2.6053100455065693e-05,
|
|
"loss": 0.3577,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 3.2618705035971223,
|
|
"grad_norm": 0.15272755107229993,
|
|
"learning_rate": 2.6015442762731095e-05,
|
|
"loss": 0.3643,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 3.2633093525179855,
|
|
"grad_norm": 0.15038693676990583,
|
|
"learning_rate": 2.5977799185779534e-05,
|
|
"loss": 0.3608,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 3.2647482014388487,
|
|
"grad_norm": 0.1632962339901094,
|
|
"learning_rate": 2.5940169762206722e-05,
|
|
"loss": 0.3505,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 3.2661870503597124,
|
|
"grad_norm": 0.15624086561364903,
|
|
"learning_rate": 2.5902554529994105e-05,
|
|
"loss": 0.3633,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 3.2676258992805756,
|
|
"grad_norm": 0.1527163631998461,
|
|
"learning_rate": 2.5864953527108805e-05,
|
|
"loss": 0.3573,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 3.2690647482014388,
|
|
"grad_norm": 0.1793300035684387,
|
|
"learning_rate": 2.58273667915036e-05,
|
|
"loss": 0.3646,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 3.270503597122302,
|
|
"grad_norm": 0.1305822763521924,
|
|
"learning_rate": 2.578979436111684e-05,
|
|
"loss": 0.3547,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 3.2719424460431656,
|
|
"grad_norm": 0.15354817288483616,
|
|
"learning_rate": 2.5752236273872432e-05,
|
|
"loss": 0.3572,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 3.273381294964029,
|
|
"grad_norm": 0.14215684473449045,
|
|
"learning_rate": 2.5714692567679853e-05,
|
|
"loss": 0.36,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 3.274820143884892,
|
|
"grad_norm": 0.1472891987907641,
|
|
"learning_rate": 2.5677163280433995e-05,
|
|
"loss": 0.3549,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 3.2762589928057553,
|
|
"grad_norm": 0.15823957876492176,
|
|
"learning_rate": 2.5639648450015268e-05,
|
|
"loss": 0.3586,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 3.277697841726619,
|
|
"grad_norm": 0.1411313098343854,
|
|
"learning_rate": 2.5602148114289415e-05,
|
|
"loss": 0.3585,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 3.279136690647482,
|
|
"grad_norm": 0.15001804374767283,
|
|
"learning_rate": 2.556466231110762e-05,
|
|
"loss": 0.3538,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 3.2805755395683454,
|
|
"grad_norm": 0.14540172740675816,
|
|
"learning_rate": 2.552719107830635e-05,
|
|
"loss": 0.3548,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 3.2820143884892086,
|
|
"grad_norm": 0.15021653131969173,
|
|
"learning_rate": 2.54897344537074e-05,
|
|
"loss": 0.3568,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 3.283453237410072,
|
|
"grad_norm": 0.1435323600914942,
|
|
"learning_rate": 2.5452292475117767e-05,
|
|
"loss": 0.3606,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 3.2848920863309354,
|
|
"grad_norm": 0.12766231684290757,
|
|
"learning_rate": 2.541486518032973e-05,
|
|
"loss": 0.3566,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 3.2863309352517986,
|
|
"grad_norm": 0.14974787727683006,
|
|
"learning_rate": 2.5377452607120722e-05,
|
|
"loss": 0.3704,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 3.287769784172662,
|
|
"grad_norm": 0.1213297804279398,
|
|
"learning_rate": 2.5340054793253276e-05,
|
|
"loss": 0.3646,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 3.289208633093525,
|
|
"grad_norm": 0.13332629641887717,
|
|
"learning_rate": 2.5302671776475098e-05,
|
|
"loss": 0.3555,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 3.2906474820143883,
|
|
"grad_norm": 0.13369505877369847,
|
|
"learning_rate": 2.526530359451892e-05,
|
|
"loss": 0.3646,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 3.292086330935252,
|
|
"grad_norm": 0.12489160418672422,
|
|
"learning_rate": 2.522795028510249e-05,
|
|
"loss": 0.3657,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 3.293525179856115,
|
|
"grad_norm": 0.1553858291324706,
|
|
"learning_rate": 2.5190611885928547e-05,
|
|
"loss": 0.3621,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 3.2949640287769784,
|
|
"grad_norm": 0.12359568704528343,
|
|
"learning_rate": 2.5153288434684816e-05,
|
|
"loss": 0.3671,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 3.2964028776978416,
|
|
"grad_norm": 0.17855700014936876,
|
|
"learning_rate": 2.5115979969043914e-05,
|
|
"loss": 0.366,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 3.2978417266187052,
|
|
"grad_norm": 0.11336485761390447,
|
|
"learning_rate": 2.5078686526663304e-05,
|
|
"loss": 0.3629,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 3.2992805755395684,
|
|
"grad_norm": 0.15624923854775533,
|
|
"learning_rate": 2.5041408145185312e-05,
|
|
"loss": 0.3538,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 3.3007194244604317,
|
|
"grad_norm": 0.13692819822572633,
|
|
"learning_rate": 2.5004144862237084e-05,
|
|
"loss": 0.3617,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 3.302158273381295,
|
|
"grad_norm": 0.13039864259204392,
|
|
"learning_rate": 2.4966896715430484e-05,
|
|
"loss": 0.3551,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 3.3035971223021585,
|
|
"grad_norm": 0.13051609893273122,
|
|
"learning_rate": 2.4929663742362103e-05,
|
|
"loss": 0.3638,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 3.3050359712230217,
|
|
"grad_norm": 0.13438198468863471,
|
|
"learning_rate": 2.4892445980613254e-05,
|
|
"loss": 0.3535,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 3.306474820143885,
|
|
"grad_norm": 0.15271796197412282,
|
|
"learning_rate": 2.4855243467749865e-05,
|
|
"loss": 0.3645,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 3.307913669064748,
|
|
"grad_norm": 0.1342832986230481,
|
|
"learning_rate": 2.481805624132247e-05,
|
|
"loss": 0.354,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 3.3093525179856114,
|
|
"grad_norm": 0.143975397931549,
|
|
"learning_rate": 2.478088433886618e-05,
|
|
"loss": 0.3527,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 3.310791366906475,
|
|
"grad_norm": 0.13866793733766922,
|
|
"learning_rate": 2.4743727797900668e-05,
|
|
"loss": 0.358,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 3.3122302158273382,
|
|
"grad_norm": 0.12846799366140196,
|
|
"learning_rate": 2.4706586655930042e-05,
|
|
"loss": 0.3577,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 3.3136690647482014,
|
|
"grad_norm": 0.13717711802025714,
|
|
"learning_rate": 2.4669460950442926e-05,
|
|
"loss": 0.3631,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 3.3151079136690647,
|
|
"grad_norm": 0.1363132195422716,
|
|
"learning_rate": 2.463235071891231e-05,
|
|
"loss": 0.3608,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 3.316546762589928,
|
|
"grad_norm": 0.1331741331637199,
|
|
"learning_rate": 2.4595255998795625e-05,
|
|
"loss": 0.3501,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 3.3179856115107915,
|
|
"grad_norm": 0.13459070412849602,
|
|
"learning_rate": 2.4558176827534587e-05,
|
|
"loss": 0.3552,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 3.3194244604316547,
|
|
"grad_norm": 0.13005135826849681,
|
|
"learning_rate": 2.452111324255524e-05,
|
|
"loss": 0.3544,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 3.320863309352518,
|
|
"grad_norm": 0.12491435853238587,
|
|
"learning_rate": 2.448406528126793e-05,
|
|
"loss": 0.3651,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 3.322302158273381,
|
|
"grad_norm": 0.1359713427944666,
|
|
"learning_rate": 2.444703298106718e-05,
|
|
"loss": 0.3539,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 3.3237410071942444,
|
|
"grad_norm": 0.12266181214822873,
|
|
"learning_rate": 2.441001637933173e-05,
|
|
"loss": 0.3619,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 3.325179856115108,
|
|
"grad_norm": 0.1361459682259731,
|
|
"learning_rate": 2.437301551342447e-05,
|
|
"loss": 0.3643,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 3.3266187050359712,
|
|
"grad_norm": 0.11697691298703675,
|
|
"learning_rate": 2.433603042069242e-05,
|
|
"loss": 0.3579,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 3.3280575539568344,
|
|
"grad_norm": 0.14278665657465608,
|
|
"learning_rate": 2.4299061138466667e-05,
|
|
"loss": 0.3586,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 3.3294964028776977,
|
|
"grad_norm": 0.14194617777031146,
|
|
"learning_rate": 2.4262107704062343e-05,
|
|
"loss": 0.3604,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 3.3309352517985613,
|
|
"grad_norm": 0.12804588715397874,
|
|
"learning_rate": 2.4225170154778562e-05,
|
|
"loss": 0.3467,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 3.3323741007194245,
|
|
"grad_norm": 0.1411025517895258,
|
|
"learning_rate": 2.4188248527898446e-05,
|
|
"loss": 0.3637,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 3.3338129496402877,
|
|
"grad_norm": 0.13573605633113175,
|
|
"learning_rate": 2.415134286068903e-05,
|
|
"loss": 0.3582,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 3.335251798561151,
|
|
"grad_norm": 0.15526197289807425,
|
|
"learning_rate": 2.411445319040121e-05,
|
|
"loss": 0.3547,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 3.3366906474820146,
|
|
"grad_norm": 0.13681017548192137,
|
|
"learning_rate": 2.407757955426977e-05,
|
|
"loss": 0.3583,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 3.338129496402878,
|
|
"grad_norm": 0.13413798078700748,
|
|
"learning_rate": 2.4040721989513314e-05,
|
|
"loss": 0.3599,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 3.339568345323741,
|
|
"grad_norm": 0.14492026904511848,
|
|
"learning_rate": 2.40038805333342e-05,
|
|
"loss": 0.3579,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 3.3410071942446042,
|
|
"grad_norm": 0.1460176387676694,
|
|
"learning_rate": 2.396705522291852e-05,
|
|
"loss": 0.3635,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 3.3424460431654675,
|
|
"grad_norm": 0.1427558669895796,
|
|
"learning_rate": 2.393024609543611e-05,
|
|
"loss": 0.3576,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 3.343884892086331,
|
|
"grad_norm": 0.1503174663862373,
|
|
"learning_rate": 2.3893453188040442e-05,
|
|
"loss": 0.3525,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 3.3453237410071943,
|
|
"grad_norm": 0.12550541372571222,
|
|
"learning_rate": 2.3856676537868614e-05,
|
|
"loss": 0.3626,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 3.3467625899280575,
|
|
"grad_norm": 0.1548590123262535,
|
|
"learning_rate": 2.3819916182041318e-05,
|
|
"loss": 0.3533,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 3.3482014388489207,
|
|
"grad_norm": 0.1263417379611214,
|
|
"learning_rate": 2.378317215766283e-05,
|
|
"loss": 0.353,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 3.349640287769784,
|
|
"grad_norm": 0.1423916513032497,
|
|
"learning_rate": 2.3746444501820886e-05,
|
|
"loss": 0.3659,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 3.3510791366906476,
|
|
"grad_norm": 0.146018171990259,
|
|
"learning_rate": 2.370973325158675e-05,
|
|
"loss": 0.3528,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 3.352517985611511,
|
|
"grad_norm": 0.12917348110304563,
|
|
"learning_rate": 2.3673038444015087e-05,
|
|
"loss": 0.3644,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 3.353956834532374,
|
|
"grad_norm": 0.162388382347929,
|
|
"learning_rate": 2.363636011614401e-05,
|
|
"loss": 0.3606,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 3.3553956834532372,
|
|
"grad_norm": 0.1263358690887275,
|
|
"learning_rate": 2.3599698304994946e-05,
|
|
"loss": 0.3651,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 3.356834532374101,
|
|
"grad_norm": 0.1345888012757111,
|
|
"learning_rate": 2.3563053047572683e-05,
|
|
"loss": 0.3639,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 3.358273381294964,
|
|
"grad_norm": 0.12108077947101649,
|
|
"learning_rate": 2.352642438086533e-05,
|
|
"loss": 0.3457,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 3.3597122302158273,
|
|
"grad_norm": 0.12872902434431419,
|
|
"learning_rate": 2.348981234184417e-05,
|
|
"loss": 0.3539,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 3.3611510791366905,
|
|
"grad_norm": 0.12448642000947208,
|
|
"learning_rate": 2.3453216967463785e-05,
|
|
"loss": 0.3633,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 3.362589928057554,
|
|
"grad_norm": 0.12227705541103388,
|
|
"learning_rate": 2.3416638294661864e-05,
|
|
"loss": 0.3564,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 3.3640287769784174,
|
|
"grad_norm": 0.1289725605013229,
|
|
"learning_rate": 2.3380076360359293e-05,
|
|
"loss": 0.3631,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 3.3654676258992806,
|
|
"grad_norm": 0.13325272783206263,
|
|
"learning_rate": 2.3343531201460067e-05,
|
|
"loss": 0.3525,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 3.366906474820144,
|
|
"grad_norm": 0.12442859906941255,
|
|
"learning_rate": 2.3307002854851188e-05,
|
|
"loss": 0.3609,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 3.368345323741007,
|
|
"grad_norm": 0.11626147568596167,
|
|
"learning_rate": 2.3270491357402715e-05,
|
|
"loss": 0.3545,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 3.3697841726618707,
|
|
"grad_norm": 0.1370012894591926,
|
|
"learning_rate": 2.3233996745967772e-05,
|
|
"loss": 0.3636,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 3.371223021582734,
|
|
"grad_norm": 0.12347199223282525,
|
|
"learning_rate": 2.3197519057382326e-05,
|
|
"loss": 0.3472,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 3.372661870503597,
|
|
"grad_norm": 0.11907853878194635,
|
|
"learning_rate": 2.316105832846532e-05,
|
|
"loss": 0.3503,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 3.3741007194244603,
|
|
"grad_norm": 0.132556253690504,
|
|
"learning_rate": 2.3124614596018606e-05,
|
|
"loss": 0.3516,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 3.3755395683453235,
|
|
"grad_norm": 0.1314947677583825,
|
|
"learning_rate": 2.308818789682682e-05,
|
|
"loss": 0.346,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 3.376978417266187,
|
|
"grad_norm": 0.13249467054552308,
|
|
"learning_rate": 2.3051778267657436e-05,
|
|
"loss": 0.3591,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 3.3784172661870504,
|
|
"grad_norm": 0.6554229547344406,
|
|
"learning_rate": 2.3015385745260704e-05,
|
|
"loss": 0.3651,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 3.3798561151079136,
|
|
"grad_norm": 0.13833165570422243,
|
|
"learning_rate": 2.2979010366369595e-05,
|
|
"loss": 0.3619,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 3.381294964028777,
|
|
"grad_norm": 0.16271306949076872,
|
|
"learning_rate": 2.294265216769978e-05,
|
|
"loss": 0.3528,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 3.38273381294964,
|
|
"grad_norm": 0.17397619358336086,
|
|
"learning_rate": 2.2906311185949605e-05,
|
|
"loss": 0.3666,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 3.3841726618705037,
|
|
"grad_norm": 0.1359464733503164,
|
|
"learning_rate": 2.2869987457799977e-05,
|
|
"loss": 0.3493,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 3.385611510791367,
|
|
"grad_norm": 0.15687078095255172,
|
|
"learning_rate": 2.283368101991448e-05,
|
|
"loss": 0.3553,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 3.38705035971223,
|
|
"grad_norm": 0.13510603073989289,
|
|
"learning_rate": 2.2797391908939196e-05,
|
|
"loss": 0.3639,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 3.3884892086330938,
|
|
"grad_norm": 0.2511263659161115,
|
|
"learning_rate": 2.2761120161502674e-05,
|
|
"loss": 0.3549,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 3.389928057553957,
|
|
"grad_norm": 0.1330162893115787,
|
|
"learning_rate": 2.2724865814216042e-05,
|
|
"loss": 0.3485,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 3.39136690647482,
|
|
"grad_norm": 0.15645979825320158,
|
|
"learning_rate": 2.2688628903672792e-05,
|
|
"loss": 0.3582,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 3.3928057553956834,
|
|
"grad_norm": 0.14495146705210177,
|
|
"learning_rate": 2.265240946644881e-05,
|
|
"loss": 0.3666,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 3.3942446043165466,
|
|
"grad_norm": 0.17089599602701658,
|
|
"learning_rate": 2.261620753910238e-05,
|
|
"loss": 0.3684,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 3.3956834532374103,
|
|
"grad_norm": 0.1320292665901007,
|
|
"learning_rate": 2.25800231581741e-05,
|
|
"loss": 0.3569,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 3.3971223021582735,
|
|
"grad_norm": 0.15269296516702346,
|
|
"learning_rate": 2.254385636018686e-05,
|
|
"loss": 0.3582,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 3.3985611510791367,
|
|
"grad_norm": 0.1453534513775358,
|
|
"learning_rate": 2.250770718164579e-05,
|
|
"loss": 0.3633,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"grad_norm": 0.13731500081739823,
|
|
"learning_rate": 2.247157565903825e-05,
|
|
"loss": 0.3618,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 3.401438848920863,
|
|
"grad_norm": 0.13094744744861844,
|
|
"learning_rate": 2.243546182883377e-05,
|
|
"loss": 0.36,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 3.402877697841727,
|
|
"grad_norm": 0.13232752026909125,
|
|
"learning_rate": 2.2399365727484047e-05,
|
|
"loss": 0.3561,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 3.40431654676259,
|
|
"grad_norm": 0.1363781758346583,
|
|
"learning_rate": 2.2363287391422806e-05,
|
|
"loss": 0.3592,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 3.405755395683453,
|
|
"grad_norm": 0.1371966852526462,
|
|
"learning_rate": 2.2327226857065954e-05,
|
|
"loss": 0.3633,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 3.4071942446043164,
|
|
"grad_norm": 0.13897728150727473,
|
|
"learning_rate": 2.2291184160811374e-05,
|
|
"loss": 0.365,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 3.4086330935251796,
|
|
"grad_norm": 0.1289567357284446,
|
|
"learning_rate": 2.22551593390389e-05,
|
|
"loss": 0.3613,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 3.4100719424460433,
|
|
"grad_norm": 0.1229096347586057,
|
|
"learning_rate": 2.2219152428110368e-05,
|
|
"loss": 0.3546,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 3.4115107913669065,
|
|
"grad_norm": 0.12827203508637464,
|
|
"learning_rate": 2.218316346436959e-05,
|
|
"loss": 0.3545,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 3.4129496402877697,
|
|
"grad_norm": 0.12090145990055598,
|
|
"learning_rate": 2.2147192484142154e-05,
|
|
"loss": 0.3551,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 3.414388489208633,
|
|
"grad_norm": 0.13088616424917873,
|
|
"learning_rate": 2.2111239523735568e-05,
|
|
"loss": 0.3537,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 3.4158273381294966,
|
|
"grad_norm": 0.11964851374911992,
|
|
"learning_rate": 2.2075304619439127e-05,
|
|
"loss": 0.3614,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 3.41726618705036,
|
|
"grad_norm": 0.3236581971429123,
|
|
"learning_rate": 2.2039387807523914e-05,
|
|
"loss": 0.3609,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 3.418705035971223,
|
|
"grad_norm": 0.11347408778704647,
|
|
"learning_rate": 2.2003489124242742e-05,
|
|
"loss": 0.3608,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 3.420143884892086,
|
|
"grad_norm": 0.13629563465066122,
|
|
"learning_rate": 2.1967608605830115e-05,
|
|
"loss": 0.3592,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 3.42158273381295,
|
|
"grad_norm": 0.12235466951643538,
|
|
"learning_rate": 2.1931746288502235e-05,
|
|
"loss": 0.3626,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 3.423021582733813,
|
|
"grad_norm": 0.14053693777605458,
|
|
"learning_rate": 2.1895902208456903e-05,
|
|
"loss": 0.363,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 3.4244604316546763,
|
|
"grad_norm": 0.11528967694817244,
|
|
"learning_rate": 2.186007640187353e-05,
|
|
"loss": 0.3633,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 3.4258992805755395,
|
|
"grad_norm": 0.1336451508220521,
|
|
"learning_rate": 2.1824268904913036e-05,
|
|
"loss": 0.3567,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 3.4273381294964027,
|
|
"grad_norm": 0.1464517350902614,
|
|
"learning_rate": 2.1788479753717935e-05,
|
|
"loss": 0.3586,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 3.4287769784172664,
|
|
"grad_norm": 0.13920911239418232,
|
|
"learning_rate": 2.1752708984412196e-05,
|
|
"loss": 0.3538,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 3.4302158273381296,
|
|
"grad_norm": 0.13214343377532833,
|
|
"learning_rate": 2.171695663310119e-05,
|
|
"loss": 0.3529,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 3.431654676258993,
|
|
"grad_norm": 0.12564349441961334,
|
|
"learning_rate": 2.1681222735871747e-05,
|
|
"loss": 0.3595,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 3.433093525179856,
|
|
"grad_norm": 0.14069795131144316,
|
|
"learning_rate": 2.1645507328792058e-05,
|
|
"loss": 0.3639,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 3.434532374100719,
|
|
"grad_norm": 0.13822519325261556,
|
|
"learning_rate": 2.1609810447911637e-05,
|
|
"loss": 0.3634,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 3.435971223021583,
|
|
"grad_norm": 0.12351982346168865,
|
|
"learning_rate": 2.157413212926133e-05,
|
|
"loss": 0.3535,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 3.437410071942446,
|
|
"grad_norm": 0.1272338422052163,
|
|
"learning_rate": 2.1538472408853206e-05,
|
|
"loss": 0.3607,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 3.4388489208633093,
|
|
"grad_norm": 0.12301547403151704,
|
|
"learning_rate": 2.1502831322680598e-05,
|
|
"loss": 0.3628,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 3.4402877697841725,
|
|
"grad_norm": 0.14899159797368783,
|
|
"learning_rate": 2.1467208906718008e-05,
|
|
"loss": 0.3518,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 3.441726618705036,
|
|
"grad_norm": 0.13355084036256545,
|
|
"learning_rate": 2.1431605196921103e-05,
|
|
"loss": 0.3678,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 3.4431654676258994,
|
|
"grad_norm": 0.14470477198286488,
|
|
"learning_rate": 2.1396020229226666e-05,
|
|
"loss": 0.3604,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 3.4446043165467626,
|
|
"grad_norm": 0.1327329601850092,
|
|
"learning_rate": 2.1360454039552577e-05,
|
|
"loss": 0.3481,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 3.446043165467626,
|
|
"grad_norm": 0.14070059083331754,
|
|
"learning_rate": 2.1324906663797718e-05,
|
|
"loss": 0.3623,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 3.4474820143884894,
|
|
"grad_norm": 0.1380167970732036,
|
|
"learning_rate": 2.1289378137842008e-05,
|
|
"loss": 0.3633,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 3.4489208633093527,
|
|
"grad_norm": 0.12576448812769042,
|
|
"learning_rate": 2.125386849754639e-05,
|
|
"loss": 0.3588,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 3.450359712230216,
|
|
"grad_norm": 0.13961975563472587,
|
|
"learning_rate": 2.121837777875266e-05,
|
|
"loss": 0.363,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 3.451798561151079,
|
|
"grad_norm": 0.12572193036278917,
|
|
"learning_rate": 2.118290601728354e-05,
|
|
"loss": 0.3547,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 3.4532374100719423,
|
|
"grad_norm": 0.1359858682701226,
|
|
"learning_rate": 2.1147453248942687e-05,
|
|
"loss": 0.3545,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 3.454676258992806,
|
|
"grad_norm": 0.1371534042068942,
|
|
"learning_rate": 2.1112019509514478e-05,
|
|
"loss": 0.3627,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 3.456115107913669,
|
|
"grad_norm": 0.13451286715691832,
|
|
"learning_rate": 2.1076604834764154e-05,
|
|
"loss": 0.3578,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 3.4575539568345324,
|
|
"grad_norm": 0.13398541315378715,
|
|
"learning_rate": 2.1041209260437694e-05,
|
|
"loss": 0.3629,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 3.4589928057553956,
|
|
"grad_norm": 0.14556980997688831,
|
|
"learning_rate": 2.10058328222618e-05,
|
|
"loss": 0.3594,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 3.460431654676259,
|
|
"grad_norm": 0.15336314990662656,
|
|
"learning_rate": 2.097047555594385e-05,
|
|
"loss": 0.3576,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 3.4618705035971225,
|
|
"grad_norm": 0.14742938260450789,
|
|
"learning_rate": 2.0935137497171904e-05,
|
|
"loss": 0.3617,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 3.4633093525179857,
|
|
"grad_norm": 0.18791162733485917,
|
|
"learning_rate": 2.0899818681614557e-05,
|
|
"loss": 0.3634,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 3.464748201438849,
|
|
"grad_norm": 0.14357175741680217,
|
|
"learning_rate": 2.086451914492108e-05,
|
|
"loss": 0.3594,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 3.466187050359712,
|
|
"grad_norm": 0.1622532647292526,
|
|
"learning_rate": 2.082923892272124e-05,
|
|
"loss": 0.3591,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 3.4676258992805753,
|
|
"grad_norm": 0.15153511500254388,
|
|
"learning_rate": 2.079397805062526e-05,
|
|
"loss": 0.3617,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 3.469064748201439,
|
|
"grad_norm": 0.14675102048393093,
|
|
"learning_rate": 2.0758736564223937e-05,
|
|
"loss": 0.3552,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 3.470503597122302,
|
|
"grad_norm": 0.13196392146758773,
|
|
"learning_rate": 2.0723514499088388e-05,
|
|
"loss": 0.3641,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 3.4719424460431654,
|
|
"grad_norm": 0.1459218944890939,
|
|
"learning_rate": 2.068831189077021e-05,
|
|
"loss": 0.365,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 3.4733812949640286,
|
|
"grad_norm": 0.1212800868161018,
|
|
"learning_rate": 2.065312877480133e-05,
|
|
"loss": 0.3458,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 3.4748201438848922,
|
|
"grad_norm": 0.1421206335254316,
|
|
"learning_rate": 2.0617965186694e-05,
|
|
"loss": 0.3559,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 3.4762589928057555,
|
|
"grad_norm": 0.123253730656715,
|
|
"learning_rate": 2.058282116194076e-05,
|
|
"loss": 0.3676,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 3.4776978417266187,
|
|
"grad_norm": 0.14997814265777137,
|
|
"learning_rate": 2.0547696736014415e-05,
|
|
"loss": 0.3642,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 3.479136690647482,
|
|
"grad_norm": 0.12833942780692223,
|
|
"learning_rate": 2.0512591944367976e-05,
|
|
"loss": 0.3639,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 3.4805755395683455,
|
|
"grad_norm": 0.15445796749271754,
|
|
"learning_rate": 2.0477506822434644e-05,
|
|
"loss": 0.3569,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 3.4820143884892087,
|
|
"grad_norm": 0.12936671642461817,
|
|
"learning_rate": 2.0442441405627776e-05,
|
|
"loss": 0.3593,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 3.483453237410072,
|
|
"grad_norm": 0.14406509559610453,
|
|
"learning_rate": 2.0407395729340792e-05,
|
|
"loss": 0.3575,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 3.484892086330935,
|
|
"grad_norm": 0.12487672626712636,
|
|
"learning_rate": 2.037236982894723e-05,
|
|
"loss": 0.3553,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 3.4863309352517984,
|
|
"grad_norm": 0.15651615626601662,
|
|
"learning_rate": 2.0337363739800695e-05,
|
|
"loss": 0.3557,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 3.487769784172662,
|
|
"grad_norm": 0.12649653032654964,
|
|
"learning_rate": 2.030237749723472e-05,
|
|
"loss": 0.3583,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 3.4892086330935252,
|
|
"grad_norm": 0.13865375979925104,
|
|
"learning_rate": 2.026741113656284e-05,
|
|
"loss": 0.3633,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 3.4906474820143885,
|
|
"grad_norm": 0.14502997414446508,
|
|
"learning_rate": 2.0232464693078578e-05,
|
|
"loss": 0.3675,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 3.4920863309352517,
|
|
"grad_norm": 0.12384178660046358,
|
|
"learning_rate": 2.0197538202055246e-05,
|
|
"loss": 0.3602,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 3.493525179856115,
|
|
"grad_norm": 0.13535612704113442,
|
|
"learning_rate": 2.01626316987461e-05,
|
|
"loss": 0.3585,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 3.4949640287769785,
|
|
"grad_norm": 0.114994173462321,
|
|
"learning_rate": 2.0127745218384193e-05,
|
|
"loss": 0.3597,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 3.4964028776978417,
|
|
"grad_norm": 0.1290186003183694,
|
|
"learning_rate": 2.009287879618236e-05,
|
|
"loss": 0.3626,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 3.497841726618705,
|
|
"grad_norm": 0.13063559435441932,
|
|
"learning_rate": 2.0058032467333204e-05,
|
|
"loss": 0.367,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 3.499280575539568,
|
|
"grad_norm": 0.1241254363479751,
|
|
"learning_rate": 2.0023206267009056e-05,
|
|
"loss": 0.3661,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 3.5007194244604314,
|
|
"grad_norm": 0.1118891419274359,
|
|
"learning_rate": 1.9988400230361872e-05,
|
|
"loss": 0.3638,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 3.502158273381295,
|
|
"grad_norm": 0.12734305282310668,
|
|
"learning_rate": 1.9953614392523345e-05,
|
|
"loss": 0.348,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 3.5035971223021583,
|
|
"grad_norm": 0.13566982086480212,
|
|
"learning_rate": 1.9918848788604738e-05,
|
|
"loss": 0.3674,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 3.5050359712230215,
|
|
"grad_norm": 0.12337652686373772,
|
|
"learning_rate": 1.9884103453696837e-05,
|
|
"loss": 0.3671,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 3.506474820143885,
|
|
"grad_norm": 0.12428232715056108,
|
|
"learning_rate": 1.9849378422870082e-05,
|
|
"loss": 0.3647,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 3.5079136690647483,
|
|
"grad_norm": 0.1159695325461507,
|
|
"learning_rate": 1.9814673731174315e-05,
|
|
"loss": 0.3659,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 3.5093525179856115,
|
|
"grad_norm": 0.13440488580517748,
|
|
"learning_rate": 1.97799894136389e-05,
|
|
"loss": 0.3597,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 3.5107913669064748,
|
|
"grad_norm": 0.13808442733323134,
|
|
"learning_rate": 1.9745325505272633e-05,
|
|
"loss": 0.3532,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 3.512230215827338,
|
|
"grad_norm": 0.13663788892710846,
|
|
"learning_rate": 1.9710682041063705e-05,
|
|
"loss": 0.3659,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 3.5136690647482016,
|
|
"grad_norm": 0.15263079589013945,
|
|
"learning_rate": 1.9676059055979663e-05,
|
|
"loss": 0.3561,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 3.515107913669065,
|
|
"grad_norm": 0.1257017231925036,
|
|
"learning_rate": 1.9641456584967392e-05,
|
|
"loss": 0.353,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 3.516546762589928,
|
|
"grad_norm": 0.13066763491627248,
|
|
"learning_rate": 1.9606874662953076e-05,
|
|
"loss": 0.3601,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 3.5179856115107913,
|
|
"grad_norm": 0.13179118778254603,
|
|
"learning_rate": 1.9572313324842148e-05,
|
|
"loss": 0.3532,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 3.5194244604316545,
|
|
"grad_norm": 0.13023874132230948,
|
|
"learning_rate": 1.9537772605519285e-05,
|
|
"loss": 0.3559,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 3.520863309352518,
|
|
"grad_norm": 0.1264535046360738,
|
|
"learning_rate": 1.950325253984828e-05,
|
|
"loss": 0.365,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 3.5223021582733813,
|
|
"grad_norm": 0.12411410063119681,
|
|
"learning_rate": 1.946875316267219e-05,
|
|
"loss": 0.3565,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 3.5237410071942445,
|
|
"grad_norm": 0.1626384481169853,
|
|
"learning_rate": 1.9434274508813135e-05,
|
|
"loss": 0.3631,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 3.5251798561151078,
|
|
"grad_norm": 0.1250128505871958,
|
|
"learning_rate": 1.9399816613072287e-05,
|
|
"loss": 0.3558,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 3.526618705035971,
|
|
"grad_norm": 0.14509609755684688,
|
|
"learning_rate": 1.9365379510229888e-05,
|
|
"loss": 0.3633,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 3.5280575539568346,
|
|
"grad_norm": 0.17146305577392454,
|
|
"learning_rate": 1.9330963235045253e-05,
|
|
"loss": 0.3634,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 3.529496402877698,
|
|
"grad_norm": 0.13491748503688653,
|
|
"learning_rate": 1.9296567822256577e-05,
|
|
"loss": 0.3667,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 3.530935251798561,
|
|
"grad_norm": 0.1119851205907144,
|
|
"learning_rate": 1.9262193306581052e-05,
|
|
"loss": 0.3543,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 3.5323741007194247,
|
|
"grad_norm": 0.11870023944569377,
|
|
"learning_rate": 1.922783972271477e-05,
|
|
"loss": 0.3565,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 3.533812949640288,
|
|
"grad_norm": 0.12364761563760958,
|
|
"learning_rate": 1.9193507105332702e-05,
|
|
"loss": 0.3475,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 3.535251798561151,
|
|
"grad_norm": 0.14139004889475257,
|
|
"learning_rate": 1.9159195489088636e-05,
|
|
"loss": 0.3524,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 3.5366906474820143,
|
|
"grad_norm": 0.1347364124407686,
|
|
"learning_rate": 1.9124904908615178e-05,
|
|
"loss": 0.3573,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 3.5381294964028775,
|
|
"grad_norm": 0.1318931175247576,
|
|
"learning_rate": 1.9090635398523698e-05,
|
|
"loss": 0.3524,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 3.539568345323741,
|
|
"grad_norm": 0.13056163493884235,
|
|
"learning_rate": 1.9056386993404294e-05,
|
|
"loss": 0.3621,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 3.5410071942446044,
|
|
"grad_norm": 0.11994265037369371,
|
|
"learning_rate": 1.902215972782579e-05,
|
|
"loss": 0.3584,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 3.5424460431654676,
|
|
"grad_norm": 0.1450439221196064,
|
|
"learning_rate": 1.8987953636335595e-05,
|
|
"loss": 0.3534,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 3.543884892086331,
|
|
"grad_norm": 0.1600994116274118,
|
|
"learning_rate": 1.8953768753459863e-05,
|
|
"loss": 0.3613,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 3.545323741007194,
|
|
"grad_norm": 0.14351787826300197,
|
|
"learning_rate": 1.8919605113703227e-05,
|
|
"loss": 0.3618,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 3.5467625899280577,
|
|
"grad_norm": 0.1240170497214509,
|
|
"learning_rate": 1.888546275154895e-05,
|
|
"loss": 0.3616,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 3.548201438848921,
|
|
"grad_norm": 0.15006729294889773,
|
|
"learning_rate": 1.885134170145879e-05,
|
|
"loss": 0.3559,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 3.549640287769784,
|
|
"grad_norm": 0.1139450029845211,
|
|
"learning_rate": 1.8817241997873007e-05,
|
|
"loss": 0.345,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 3.5510791366906473,
|
|
"grad_norm": 0.12540092627631935,
|
|
"learning_rate": 1.8783163675210307e-05,
|
|
"loss": 0.3633,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 3.5525179856115106,
|
|
"grad_norm": 0.11996787143771767,
|
|
"learning_rate": 1.8749106767867808e-05,
|
|
"loss": 0.3597,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 3.553956834532374,
|
|
"grad_norm": 0.1589719769900905,
|
|
"learning_rate": 1.871507131022103e-05,
|
|
"loss": 0.363,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 3.5553956834532374,
|
|
"grad_norm": 0.15099661251200622,
|
|
"learning_rate": 1.8681057336623825e-05,
|
|
"loss": 0.3619,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 3.5568345323741006,
|
|
"grad_norm": 0.13269953732243062,
|
|
"learning_rate": 1.864706488140839e-05,
|
|
"loss": 0.3572,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 3.5582733812949643,
|
|
"grad_norm": 0.1776513991142787,
|
|
"learning_rate": 1.861309397888513e-05,
|
|
"loss": 0.3625,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 3.5597122302158275,
|
|
"grad_norm": 0.12329366964743796,
|
|
"learning_rate": 1.857914466334279e-05,
|
|
"loss": 0.3483,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 3.5611510791366907,
|
|
"grad_norm": 0.13533349975063866,
|
|
"learning_rate": 1.8545216969048288e-05,
|
|
"loss": 0.3551,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 3.562589928057554,
|
|
"grad_norm": 0.15591724084094533,
|
|
"learning_rate": 1.851131093024668e-05,
|
|
"loss": 0.3642,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 3.564028776978417,
|
|
"grad_norm": 0.12966019739866835,
|
|
"learning_rate": 1.8477426581161192e-05,
|
|
"loss": 0.3561,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 3.565467625899281,
|
|
"grad_norm": 0.14209671782546404,
|
|
"learning_rate": 1.844356395599322e-05,
|
|
"loss": 0.3664,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 3.566906474820144,
|
|
"grad_norm": 0.12091525965275862,
|
|
"learning_rate": 1.840972308892211e-05,
|
|
"loss": 0.3561,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 3.568345323741007,
|
|
"grad_norm": 0.17197872584184457,
|
|
"learning_rate": 1.837590401410532e-05,
|
|
"loss": 0.356,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 3.5697841726618704,
|
|
"grad_norm": 0.13084384960521728,
|
|
"learning_rate": 1.8342106765678358e-05,
|
|
"loss": 0.3596,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 3.5712230215827336,
|
|
"grad_norm": 0.15225685341882964,
|
|
"learning_rate": 1.8308331377754584e-05,
|
|
"loss": 0.3611,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 3.5726618705035973,
|
|
"grad_norm": 0.17802182946652378,
|
|
"learning_rate": 1.8274577884425383e-05,
|
|
"loss": 0.3581,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 3.5741007194244605,
|
|
"grad_norm": 0.13325763067130736,
|
|
"learning_rate": 1.8240846319760012e-05,
|
|
"loss": 0.356,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 3.5755395683453237,
|
|
"grad_norm": 0.15596219798111047,
|
|
"learning_rate": 1.8207136717805585e-05,
|
|
"loss": 0.3436,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 3.576978417266187,
|
|
"grad_norm": 0.1519071790702426,
|
|
"learning_rate": 1.8173449112587062e-05,
|
|
"loss": 0.3544,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 3.57841726618705,
|
|
"grad_norm": 0.13853154120596817,
|
|
"learning_rate": 1.813978353810722e-05,
|
|
"loss": 0.3475,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 3.579856115107914,
|
|
"grad_norm": 0.14429316793510719,
|
|
"learning_rate": 1.8106140028346526e-05,
|
|
"loss": 0.3545,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 3.581294964028777,
|
|
"grad_norm": 0.14569953892821894,
|
|
"learning_rate": 1.8072518617263276e-05,
|
|
"loss": 0.3615,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 3.58273381294964,
|
|
"grad_norm": 0.12093956405951821,
|
|
"learning_rate": 1.803891933879338e-05,
|
|
"loss": 0.3564,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 3.584172661870504,
|
|
"grad_norm": 0.14326630774687052,
|
|
"learning_rate": 1.8005342226850423e-05,
|
|
"loss": 0.3643,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 3.5856115107913666,
|
|
"grad_norm": 0.12940780696692908,
|
|
"learning_rate": 1.7971787315325684e-05,
|
|
"loss": 0.3646,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 3.5870503597122303,
|
|
"grad_norm": 0.12921905741347023,
|
|
"learning_rate": 1.7938254638087946e-05,
|
|
"loss": 0.3553,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 3.5884892086330935,
|
|
"grad_norm": 0.13776123398267903,
|
|
"learning_rate": 1.7904744228983585e-05,
|
|
"loss": 0.3553,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 3.5899280575539567,
|
|
"grad_norm": 0.1491174324470864,
|
|
"learning_rate": 1.7871256121836507e-05,
|
|
"loss": 0.3567,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 3.5913669064748204,
|
|
"grad_norm": 0.14337613514228842,
|
|
"learning_rate": 1.7837790350448098e-05,
|
|
"loss": 0.3558,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 3.5928057553956836,
|
|
"grad_norm": 0.1255228126491884,
|
|
"learning_rate": 1.7804346948597206e-05,
|
|
"loss": 0.3522,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 3.594244604316547,
|
|
"grad_norm": 0.15563753743587475,
|
|
"learning_rate": 1.7770925950040114e-05,
|
|
"loss": 0.35,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 3.59568345323741,
|
|
"grad_norm": 0.13485702928294702,
|
|
"learning_rate": 1.773752738851042e-05,
|
|
"loss": 0.3543,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 3.597122302158273,
|
|
"grad_norm": 0.14256133164076262,
|
|
"learning_rate": 1.770415129771918e-05,
|
|
"loss": 0.361,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 3.598561151079137,
|
|
"grad_norm": 0.12677386416486275,
|
|
"learning_rate": 1.7670797711354724e-05,
|
|
"loss": 0.3606,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 3.6,
|
|
"grad_norm": 0.17430855788293317,
|
|
"learning_rate": 1.763746666308261e-05,
|
|
"loss": 0.3603,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 3.6014388489208633,
|
|
"grad_norm": 0.1743900845154039,
|
|
"learning_rate": 1.760415818654574e-05,
|
|
"loss": 0.3708,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 3.6028776978417265,
|
|
"grad_norm": 0.13137316154321138,
|
|
"learning_rate": 1.75708723153642e-05,
|
|
"loss": 0.3598,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 3.6043165467625897,
|
|
"grad_norm": 0.15871088095931402,
|
|
"learning_rate": 1.7537609083135224e-05,
|
|
"loss": 0.3558,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 3.6057553956834534,
|
|
"grad_norm": 0.13806880183091022,
|
|
"learning_rate": 1.7504368523433216e-05,
|
|
"loss": 0.3502,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 3.6071942446043166,
|
|
"grad_norm": 0.16323580252845718,
|
|
"learning_rate": 1.747115066980974e-05,
|
|
"loss": 0.3481,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 3.60863309352518,
|
|
"grad_norm": 0.12810345739192744,
|
|
"learning_rate": 1.7437955555793372e-05,
|
|
"loss": 0.36,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 3.610071942446043,
|
|
"grad_norm": 0.13057737548588205,
|
|
"learning_rate": 1.740478321488978e-05,
|
|
"loss": 0.3587,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 3.6115107913669062,
|
|
"grad_norm": 0.12543990631111623,
|
|
"learning_rate": 1.737163368058162e-05,
|
|
"loss": 0.3579,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 3.61294964028777,
|
|
"grad_norm": 0.14943734334581701,
|
|
"learning_rate": 1.7338506986328552e-05,
|
|
"loss": 0.3531,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 3.614388489208633,
|
|
"grad_norm": 0.15664866507572717,
|
|
"learning_rate": 1.730540316556717e-05,
|
|
"loss": 0.3552,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 3.6158273381294963,
|
|
"grad_norm": 0.13301861330299317,
|
|
"learning_rate": 1.727232225171098e-05,
|
|
"loss": 0.3544,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 3.61726618705036,
|
|
"grad_norm": 0.12941594011383822,
|
|
"learning_rate": 1.7239264278150364e-05,
|
|
"loss": 0.3642,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 3.618705035971223,
|
|
"grad_norm": 0.5476902737057576,
|
|
"learning_rate": 1.7206229278252577e-05,
|
|
"loss": 0.3753,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 3.6201438848920864,
|
|
"grad_norm": 0.1137244825283613,
|
|
"learning_rate": 1.717321728536163e-05,
|
|
"loss": 0.3593,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 3.6215827338129496,
|
|
"grad_norm": 0.14240502893185195,
|
|
"learning_rate": 1.7140228332798336e-05,
|
|
"loss": 0.3562,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 3.623021582733813,
|
|
"grad_norm": 0.12648094244090205,
|
|
"learning_rate": 1.7107262453860308e-05,
|
|
"loss": 0.3643,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 3.6244604316546765,
|
|
"grad_norm": 0.12836560538642358,
|
|
"learning_rate": 1.707431968182179e-05,
|
|
"loss": 0.354,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 3.6258992805755397,
|
|
"grad_norm": 0.11559715485021059,
|
|
"learning_rate": 1.7041400049933726e-05,
|
|
"loss": 0.3568,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 3.627338129496403,
|
|
"grad_norm": 0.1296136397113548,
|
|
"learning_rate": 1.700850359142373e-05,
|
|
"loss": 0.3542,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 3.628776978417266,
|
|
"grad_norm": 0.13048139475271783,
|
|
"learning_rate": 1.6975630339496e-05,
|
|
"loss": 0.3605,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 3.6302158273381293,
|
|
"grad_norm": 0.11412371410417778,
|
|
"learning_rate": 1.6942780327331317e-05,
|
|
"loss": 0.3629,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 3.631654676258993,
|
|
"grad_norm": 0.7573756953403586,
|
|
"learning_rate": 1.6909953588087024e-05,
|
|
"loss": 0.3614,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 3.633093525179856,
|
|
"grad_norm": 0.1302436552442138,
|
|
"learning_rate": 1.687715015489691e-05,
|
|
"loss": 0.3494,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 3.6345323741007194,
|
|
"grad_norm": 0.1676359189472315,
|
|
"learning_rate": 1.6844370060871324e-05,
|
|
"loss": 0.362,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 3.6359712230215826,
|
|
"grad_norm": 0.15621851026130218,
|
|
"learning_rate": 1.6811613339097022e-05,
|
|
"loss": 0.3529,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 3.637410071942446,
|
|
"grad_norm": 0.146513754162598,
|
|
"learning_rate": 1.6778880022637123e-05,
|
|
"loss": 0.3605,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 3.6388489208633095,
|
|
"grad_norm": 0.1314598452906993,
|
|
"learning_rate": 1.674617014453121e-05,
|
|
"loss": 0.3517,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 3.6402877697841727,
|
|
"grad_norm": 0.13851614423151853,
|
|
"learning_rate": 1.6713483737795155e-05,
|
|
"loss": 0.3574,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 3.641726618705036,
|
|
"grad_norm": 0.1315194030128455,
|
|
"learning_rate": 1.6680820835421124e-05,
|
|
"loss": 0.3638,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 3.6431654676258995,
|
|
"grad_norm": 0.14398548550533488,
|
|
"learning_rate": 1.664818147037758e-05,
|
|
"loss": 0.3633,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 3.6446043165467623,
|
|
"grad_norm": 0.127385917966144,
|
|
"learning_rate": 1.6615565675609272e-05,
|
|
"loss": 0.355,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 3.646043165467626,
|
|
"grad_norm": 0.12560142077885314,
|
|
"learning_rate": 1.6582973484037076e-05,
|
|
"loss": 0.3527,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 3.647482014388489,
|
|
"grad_norm": 0.14065761887460176,
|
|
"learning_rate": 1.6550404928558094e-05,
|
|
"loss": 0.3598,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 3.6489208633093524,
|
|
"grad_norm": 0.11868386851289228,
|
|
"learning_rate": 1.6517860042045564e-05,
|
|
"loss": 0.3611,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 3.650359712230216,
|
|
"grad_norm": 0.13769286928563224,
|
|
"learning_rate": 1.6485338857348826e-05,
|
|
"loss": 0.3587,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 3.6517985611510793,
|
|
"grad_norm": 0.1215357439652093,
|
|
"learning_rate": 1.6452841407293307e-05,
|
|
"loss": 0.3523,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 3.6532374100719425,
|
|
"grad_norm": 0.12540317674732537,
|
|
"learning_rate": 1.642036772468047e-05,
|
|
"loss": 0.3592,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 3.6546762589928057,
|
|
"grad_norm": 0.11901406795168544,
|
|
"learning_rate": 1.6387917842287783e-05,
|
|
"loss": 0.3608,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 3.656115107913669,
|
|
"grad_norm": 0.13000228164985114,
|
|
"learning_rate": 1.635549179286871e-05,
|
|
"loss": 0.3569,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 3.6575539568345325,
|
|
"grad_norm": 0.11338543646172514,
|
|
"learning_rate": 1.6323089609152648e-05,
|
|
"loss": 0.361,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 3.6589928057553958,
|
|
"grad_norm": 0.12801333971391984,
|
|
"learning_rate": 1.6290711323844866e-05,
|
|
"loss": 0.3593,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 3.660431654676259,
|
|
"grad_norm": 0.1128731226781219,
|
|
"learning_rate": 1.6258356969626614e-05,
|
|
"loss": 0.3603,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 3.661870503597122,
|
|
"grad_norm": 0.11889357871856436,
|
|
"learning_rate": 1.622602657915487e-05,
|
|
"loss": 0.3578,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 3.6633093525179854,
|
|
"grad_norm": 0.11528073260224325,
|
|
"learning_rate": 1.6193720185062484e-05,
|
|
"loss": 0.3679,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 3.664748201438849,
|
|
"grad_norm": 0.12308788543331071,
|
|
"learning_rate": 1.6161437819958087e-05,
|
|
"loss": 0.3662,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 3.6661870503597123,
|
|
"grad_norm": 0.11874326453214566,
|
|
"learning_rate": 1.6129179516426048e-05,
|
|
"loss": 0.3596,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 3.6676258992805755,
|
|
"grad_norm": 0.1341208815297137,
|
|
"learning_rate": 1.609694530702644e-05,
|
|
"loss": 0.3533,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 3.6690647482014387,
|
|
"grad_norm": 0.11595502532150927,
|
|
"learning_rate": 1.6064735224295027e-05,
|
|
"loss": 0.3566,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 3.670503597122302,
|
|
"grad_norm": 0.12664971384594076,
|
|
"learning_rate": 1.603254930074322e-05,
|
|
"loss": 0.3553,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 3.6719424460431656,
|
|
"grad_norm": 0.10790904796534909,
|
|
"learning_rate": 1.6000387568858042e-05,
|
|
"loss": 0.3632,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 3.6733812949640288,
|
|
"grad_norm": 0.11018895215840635,
|
|
"learning_rate": 1.5968250061102105e-05,
|
|
"loss": 0.3586,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 3.674820143884892,
|
|
"grad_norm": 0.11151993057932329,
|
|
"learning_rate": 1.593613680991353e-05,
|
|
"loss": 0.3581,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 3.6762589928057556,
|
|
"grad_norm": 0.10603546279456053,
|
|
"learning_rate": 1.590404784770603e-05,
|
|
"loss": 0.3595,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 3.677697841726619,
|
|
"grad_norm": 0.10532594627475655,
|
|
"learning_rate": 1.5871983206868756e-05,
|
|
"loss": 0.3717,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 3.679136690647482,
|
|
"grad_norm": 0.12876654466036538,
|
|
"learning_rate": 1.583994291976629e-05,
|
|
"loss": 0.354,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 3.6805755395683453,
|
|
"grad_norm": 0.10272028352483324,
|
|
"learning_rate": 1.580792701873865e-05,
|
|
"loss": 0.3613,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 3.6820143884892085,
|
|
"grad_norm": 0.12447031796919335,
|
|
"learning_rate": 1.5775935536101296e-05,
|
|
"loss": 0.3566,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 3.683453237410072,
|
|
"grad_norm": 0.11662128855035621,
|
|
"learning_rate": 1.5743968504144946e-05,
|
|
"loss": 0.3555,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 3.6848920863309353,
|
|
"grad_norm": 0.11219474206770183,
|
|
"learning_rate": 1.57120259551357e-05,
|
|
"loss": 0.3537,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 3.6863309352517986,
|
|
"grad_norm": 0.12271993986326493,
|
|
"learning_rate": 1.5680107921314926e-05,
|
|
"loss": 0.3519,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 3.6877697841726618,
|
|
"grad_norm": 0.11758813263441525,
|
|
"learning_rate": 1.5648214434899257e-05,
|
|
"loss": 0.3583,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 3.689208633093525,
|
|
"grad_norm": 0.10969603490507965,
|
|
"learning_rate": 1.5616345528080537e-05,
|
|
"loss": 0.3501,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 3.6906474820143886,
|
|
"grad_norm": 0.09726216395298677,
|
|
"learning_rate": 1.5584501233025813e-05,
|
|
"loss": 0.36,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 3.692086330935252,
|
|
"grad_norm": 0.10963071310064547,
|
|
"learning_rate": 1.555268158187728e-05,
|
|
"loss": 0.3545,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 3.693525179856115,
|
|
"grad_norm": 0.10313640285284159,
|
|
"learning_rate": 1.552088660675227e-05,
|
|
"loss": 0.3543,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 3.6949640287769783,
|
|
"grad_norm": 0.11264116707122993,
|
|
"learning_rate": 1.54891163397432e-05,
|
|
"loss": 0.3632,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 3.6964028776978415,
|
|
"grad_norm": 0.11662856859127053,
|
|
"learning_rate": 1.5457370812917526e-05,
|
|
"loss": 0.3545,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 3.697841726618705,
|
|
"grad_norm": 0.11104526603738171,
|
|
"learning_rate": 1.5425650058317795e-05,
|
|
"loss": 0.3602,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 3.6992805755395683,
|
|
"grad_norm": 0.10837909674841804,
|
|
"learning_rate": 1.5393954107961467e-05,
|
|
"loss": 0.357,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 3.7007194244604316,
|
|
"grad_norm": 0.1223151402155435,
|
|
"learning_rate": 1.536228299384102e-05,
|
|
"loss": 0.3614,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 3.702158273381295,
|
|
"grad_norm": 0.0978936100518978,
|
|
"learning_rate": 1.533063674792389e-05,
|
|
"loss": 0.3588,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 3.7035971223021584,
|
|
"grad_norm": 0.12570338100228484,
|
|
"learning_rate": 1.529901540215233e-05,
|
|
"loss": 0.3514,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 3.7050359712230216,
|
|
"grad_norm": 0.1202957345776564,
|
|
"learning_rate": 1.5267418988443517e-05,
|
|
"loss": 0.359,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 3.706474820143885,
|
|
"grad_norm": 0.1362700054748285,
|
|
"learning_rate": 1.5235847538689452e-05,
|
|
"loss": 0.3572,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 3.707913669064748,
|
|
"grad_norm": 0.11331667587689345,
|
|
"learning_rate": 1.5204301084756936e-05,
|
|
"loss": 0.3626,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 3.7093525179856117,
|
|
"grad_norm": 0.12800236900616,
|
|
"learning_rate": 1.5172779658487539e-05,
|
|
"loss": 0.3609,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 3.710791366906475,
|
|
"grad_norm": 0.11848626804604828,
|
|
"learning_rate": 1.5141283291697587e-05,
|
|
"loss": 0.3676,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 3.712230215827338,
|
|
"grad_norm": 0.11752769024207647,
|
|
"learning_rate": 1.5109812016178053e-05,
|
|
"loss": 0.3605,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 3.7136690647482014,
|
|
"grad_norm": 0.11811200137536339,
|
|
"learning_rate": 1.5078365863694667e-05,
|
|
"loss": 0.3451,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 3.7151079136690646,
|
|
"grad_norm": 0.1177090883568179,
|
|
"learning_rate": 1.5046944865987763e-05,
|
|
"loss": 0.3527,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 3.716546762589928,
|
|
"grad_norm": 0.13251101602237106,
|
|
"learning_rate": 1.501554905477224e-05,
|
|
"loss": 0.3656,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 3.7179856115107914,
|
|
"grad_norm": 0.11989688640698341,
|
|
"learning_rate": 1.4984178461737663e-05,
|
|
"loss": 0.363,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 3.7194244604316546,
|
|
"grad_norm": 0.1026784692318372,
|
|
"learning_rate": 1.4952833118548094e-05,
|
|
"loss": 0.3609,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 3.720863309352518,
|
|
"grad_norm": 0.10451808471519809,
|
|
"learning_rate": 1.492151305684208e-05,
|
|
"loss": 0.3553,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 3.722302158273381,
|
|
"grad_norm": 0.13454611265177935,
|
|
"learning_rate": 1.4890218308232704e-05,
|
|
"loss": 0.3586,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 3.7237410071942447,
|
|
"grad_norm": 0.1035669768180215,
|
|
"learning_rate": 1.4858948904307476e-05,
|
|
"loss": 0.3602,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 3.725179856115108,
|
|
"grad_norm": 0.1197346828955967,
|
|
"learning_rate": 1.4827704876628319e-05,
|
|
"loss": 0.3688,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 3.726618705035971,
|
|
"grad_norm": 0.11395012248409299,
|
|
"learning_rate": 1.4796486256731561e-05,
|
|
"loss": 0.3581,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 3.728057553956835,
|
|
"grad_norm": 0.10644552652396097,
|
|
"learning_rate": 1.4765293076127862e-05,
|
|
"loss": 0.3637,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 3.7294964028776976,
|
|
"grad_norm": 0.12427602594080052,
|
|
"learning_rate": 1.4734125366302224e-05,
|
|
"loss": 0.3584,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 3.7309352517985612,
|
|
"grad_norm": 0.11833744284262779,
|
|
"learning_rate": 1.470298315871392e-05,
|
|
"loss": 0.3507,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 3.7323741007194244,
|
|
"grad_norm": 0.11110463738141138,
|
|
"learning_rate": 1.4671866484796505e-05,
|
|
"loss": 0.3604,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 3.7338129496402876,
|
|
"grad_norm": 0.11989401785135248,
|
|
"learning_rate": 1.4640775375957742e-05,
|
|
"loss": 0.3579,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 3.7352517985611513,
|
|
"grad_norm": 0.10898344084854887,
|
|
"learning_rate": 1.4609709863579622e-05,
|
|
"loss": 0.3589,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 3.7366906474820145,
|
|
"grad_norm": 0.11376066586004366,
|
|
"learning_rate": 1.4578669979018231e-05,
|
|
"loss": 0.3667,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 3.7381294964028777,
|
|
"grad_norm": 0.12674430420028582,
|
|
"learning_rate": 1.454765575360385e-05,
|
|
"loss": 0.3628,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 3.739568345323741,
|
|
"grad_norm": 0.11188650041470716,
|
|
"learning_rate": 1.4516667218640877e-05,
|
|
"loss": 0.3592,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 3.741007194244604,
|
|
"grad_norm": 0.11756966731429112,
|
|
"learning_rate": 1.4485704405407699e-05,
|
|
"loss": 0.3634,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 3.742446043165468,
|
|
"grad_norm": 0.1307529105136656,
|
|
"learning_rate": 1.4454767345156806e-05,
|
|
"loss": 0.3548,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 3.743884892086331,
|
|
"grad_norm": 0.10626212816144823,
|
|
"learning_rate": 1.4423856069114677e-05,
|
|
"loss": 0.358,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 3.7453237410071942,
|
|
"grad_norm": 0.11277400243828548,
|
|
"learning_rate": 1.4392970608481758e-05,
|
|
"loss": 0.3562,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 3.7467625899280574,
|
|
"grad_norm": 0.1158089837480085,
|
|
"learning_rate": 1.4362110994432445e-05,
|
|
"loss": 0.3602,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 3.7482014388489207,
|
|
"grad_norm": 0.10974760310595462,
|
|
"learning_rate": 1.433127725811505e-05,
|
|
"loss": 0.356,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 3.7496402877697843,
|
|
"grad_norm": 0.10737176632294589,
|
|
"learning_rate": 1.4300469430651754e-05,
|
|
"loss": 0.3511,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 3.7510791366906475,
|
|
"grad_norm": 0.10501924501629609,
|
|
"learning_rate": 1.4269687543138594e-05,
|
|
"loss": 0.3618,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 3.7525179856115107,
|
|
"grad_norm": 0.11931109815286713,
|
|
"learning_rate": 1.4238931626645434e-05,
|
|
"loss": 0.3587,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 3.753956834532374,
|
|
"grad_norm": 0.13334531947744938,
|
|
"learning_rate": 1.4208201712215871e-05,
|
|
"loss": 0.3581,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 3.755395683453237,
|
|
"grad_norm": 0.10233870991699558,
|
|
"learning_rate": 1.4177497830867348e-05,
|
|
"loss": 0.3586,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 3.756834532374101,
|
|
"grad_norm": 0.11627191086972338,
|
|
"learning_rate": 1.4146820013590973e-05,
|
|
"loss": 0.3548,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 3.758273381294964,
|
|
"grad_norm": 0.12774094830333566,
|
|
"learning_rate": 1.411616829135153e-05,
|
|
"loss": 0.3446,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 3.7597122302158272,
|
|
"grad_norm": 0.11383805210317897,
|
|
"learning_rate": 1.4085542695087502e-05,
|
|
"loss": 0.3632,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 3.761151079136691,
|
|
"grad_norm": 0.3490873668419385,
|
|
"learning_rate": 1.4054943255710987e-05,
|
|
"loss": 0.3697,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 3.762589928057554,
|
|
"grad_norm": 0.11336341418744804,
|
|
"learning_rate": 1.4024370004107683e-05,
|
|
"loss": 0.3525,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 3.7640287769784173,
|
|
"grad_norm": 0.14174429593645113,
|
|
"learning_rate": 1.3993822971136859e-05,
|
|
"loss": 0.3537,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 3.7654676258992805,
|
|
"grad_norm": 0.12115907628392039,
|
|
"learning_rate": 1.3963302187631316e-05,
|
|
"loss": 0.3676,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 3.7669064748201437,
|
|
"grad_norm": 0.10889457598085418,
|
|
"learning_rate": 1.3932807684397348e-05,
|
|
"loss": 0.3663,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 3.7683453237410074,
|
|
"grad_norm": 0.1354744707348399,
|
|
"learning_rate": 1.3902339492214751e-05,
|
|
"loss": 0.3648,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 3.7697841726618706,
|
|
"grad_norm": 0.10616783880769398,
|
|
"learning_rate": 1.387189764183674e-05,
|
|
"loss": 0.3541,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 3.771223021582734,
|
|
"grad_norm": 0.14617339127289408,
|
|
"learning_rate": 1.384148216398995e-05,
|
|
"loss": 0.3541,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 3.772661870503597,
|
|
"grad_norm": 0.12649799078529905,
|
|
"learning_rate": 1.381109308937441e-05,
|
|
"loss": 0.362,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 3.7741007194244602,
|
|
"grad_norm": 0.15004845796136357,
|
|
"learning_rate": 1.3780730448663456e-05,
|
|
"loss": 0.3521,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 3.775539568345324,
|
|
"grad_norm": 0.12294581185530035,
|
|
"learning_rate": 1.3750394272503775e-05,
|
|
"loss": 0.3505,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 3.776978417266187,
|
|
"grad_norm": 0.11881559288082162,
|
|
"learning_rate": 1.3720084591515374e-05,
|
|
"loss": 0.3639,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 3.7784172661870503,
|
|
"grad_norm": 0.11488628145302997,
|
|
"learning_rate": 1.3689801436291448e-05,
|
|
"loss": 0.3569,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 3.7798561151079135,
|
|
"grad_norm": 0.11242412925423802,
|
|
"learning_rate": 1.365954483739846e-05,
|
|
"loss": 0.3573,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 3.7812949640287767,
|
|
"grad_norm": 0.1309501733347288,
|
|
"learning_rate": 1.3629314825376061e-05,
|
|
"loss": 0.3504,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 3.7827338129496404,
|
|
"grad_norm": 0.11650454717431889,
|
|
"learning_rate": 1.359911143073707e-05,
|
|
"loss": 0.3612,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 3.7841726618705036,
|
|
"grad_norm": 0.11280195740081997,
|
|
"learning_rate": 1.3568934683967427e-05,
|
|
"loss": 0.3614,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 3.785611510791367,
|
|
"grad_norm": 0.12777252083039053,
|
|
"learning_rate": 1.3538784615526188e-05,
|
|
"loss": 0.3574,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 3.7870503597122305,
|
|
"grad_norm": 0.11813874358356513,
|
|
"learning_rate": 1.3508661255845477e-05,
|
|
"loss": 0.3652,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 3.7884892086330937,
|
|
"grad_norm": 0.10331260519598369,
|
|
"learning_rate": 1.3478564635330455e-05,
|
|
"loss": 0.3526,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 3.789928057553957,
|
|
"grad_norm": 0.11869416041355761,
|
|
"learning_rate": 1.344849478435931e-05,
|
|
"loss": 0.3658,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 3.79136690647482,
|
|
"grad_norm": 0.11032829412242892,
|
|
"learning_rate": 1.3418451733283156e-05,
|
|
"loss": 0.3583,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 3.7928057553956833,
|
|
"grad_norm": 0.11583359662606506,
|
|
"learning_rate": 1.3388435512426142e-05,
|
|
"loss": 0.358,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 3.794244604316547,
|
|
"grad_norm": 0.1026743721807789,
|
|
"learning_rate": 1.3358446152085289e-05,
|
|
"loss": 0.3704,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 3.79568345323741,
|
|
"grad_norm": 0.11842345763691707,
|
|
"learning_rate": 1.332848368253048e-05,
|
|
"loss": 0.3682,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 3.7971223021582734,
|
|
"grad_norm": 0.10440475732333304,
|
|
"learning_rate": 1.3298548134004498e-05,
|
|
"loss": 0.3592,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 3.7985611510791366,
|
|
"grad_norm": 0.1050015618768611,
|
|
"learning_rate": 1.326863953672294e-05,
|
|
"loss": 0.3558,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"grad_norm": 0.10235644062579993,
|
|
"learning_rate": 1.3238757920874203e-05,
|
|
"loss": 0.3598,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 3.8014388489208635,
|
|
"grad_norm": 0.10464215000630468,
|
|
"learning_rate": 1.3208903316619436e-05,
|
|
"loss": 0.3611,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 3.8028776978417267,
|
|
"grad_norm": 0.10074734004953428,
|
|
"learning_rate": 1.317907575409254e-05,
|
|
"loss": 0.3581,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 3.80431654676259,
|
|
"grad_norm": 0.10927948957002165,
|
|
"learning_rate": 1.3149275263400116e-05,
|
|
"loss": 0.3552,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 3.805755395683453,
|
|
"grad_norm": 0.10095400949095648,
|
|
"learning_rate": 1.3119501874621437e-05,
|
|
"loss": 0.3515,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 3.8071942446043163,
|
|
"grad_norm": 0.1037942588742392,
|
|
"learning_rate": 1.3089755617808417e-05,
|
|
"loss": 0.3553,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 3.80863309352518,
|
|
"grad_norm": 0.12286708411983675,
|
|
"learning_rate": 1.3060036522985598e-05,
|
|
"loss": 0.3656,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 3.810071942446043,
|
|
"grad_norm": 0.10489733005076696,
|
|
"learning_rate": 1.3030344620150105e-05,
|
|
"loss": 0.3529,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 3.8115107913669064,
|
|
"grad_norm": 0.1241163256324747,
|
|
"learning_rate": 1.3000679939271588e-05,
|
|
"loss": 0.3591,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 3.81294964028777,
|
|
"grad_norm": 0.11428160887154262,
|
|
"learning_rate": 1.2971042510292238e-05,
|
|
"loss": 0.3506,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 3.814388489208633,
|
|
"grad_norm": 0.1186954341479797,
|
|
"learning_rate": 1.2941432363126784e-05,
|
|
"loss": 0.3628,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 3.8158273381294965,
|
|
"grad_norm": 0.10756992056945552,
|
|
"learning_rate": 1.2911849527662335e-05,
|
|
"loss": 0.3643,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 3.8172661870503597,
|
|
"grad_norm": 0.10791438084583001,
|
|
"learning_rate": 1.2882294033758473e-05,
|
|
"loss": 0.364,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 3.818705035971223,
|
|
"grad_norm": 0.12054702187975937,
|
|
"learning_rate": 1.2852765911247227e-05,
|
|
"loss": 0.3528,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 3.8201438848920866,
|
|
"grad_norm": 0.11519627712646355,
|
|
"learning_rate": 1.2823265189932914e-05,
|
|
"loss": 0.3633,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 3.8215827338129498,
|
|
"grad_norm": 0.11243908356900202,
|
|
"learning_rate": 1.2793791899592254e-05,
|
|
"loss": 0.3682,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 3.823021582733813,
|
|
"grad_norm": 0.11731029809957938,
|
|
"learning_rate": 1.2764346069974249e-05,
|
|
"loss": 0.3526,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 3.824460431654676,
|
|
"grad_norm": 0.11080355369777115,
|
|
"learning_rate": 1.2734927730800206e-05,
|
|
"loss": 0.3545,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 3.8258992805755394,
|
|
"grad_norm": 0.12865242012466924,
|
|
"learning_rate": 1.2705536911763665e-05,
|
|
"loss": 0.3615,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 3.827338129496403,
|
|
"grad_norm": 0.10306121671873968,
|
|
"learning_rate": 1.2676173642530417e-05,
|
|
"loss": 0.35,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 3.8287769784172663,
|
|
"grad_norm": 0.10717883464518228,
|
|
"learning_rate": 1.2646837952738382e-05,
|
|
"loss": 0.3529,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 3.8302158273381295,
|
|
"grad_norm": 0.14018587074603728,
|
|
"learning_rate": 1.2617529871997727e-05,
|
|
"loss": 0.3578,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 3.8316546762589927,
|
|
"grad_norm": 0.10347517288119708,
|
|
"learning_rate": 1.2588249429890706e-05,
|
|
"loss": 0.3563,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 3.833093525179856,
|
|
"grad_norm": 0.1191073088776568,
|
|
"learning_rate": 1.2558996655971644e-05,
|
|
"loss": 0.3614,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 3.8345323741007196,
|
|
"grad_norm": 0.09770567865947667,
|
|
"learning_rate": 1.2529771579767024e-05,
|
|
"loss": 0.3622,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 3.8359712230215828,
|
|
"grad_norm": 0.10862119268822773,
|
|
"learning_rate": 1.2500574230775294e-05,
|
|
"loss": 0.3609,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 3.837410071942446,
|
|
"grad_norm": 0.10245004605966078,
|
|
"learning_rate": 1.2471404638466949e-05,
|
|
"loss": 0.3543,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 3.838848920863309,
|
|
"grad_norm": 0.11266943730783477,
|
|
"learning_rate": 1.2442262832284464e-05,
|
|
"loss": 0.362,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 3.8402877697841724,
|
|
"grad_norm": 0.09723345972340638,
|
|
"learning_rate": 1.2413148841642268e-05,
|
|
"loss": 0.35,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 3.841726618705036,
|
|
"grad_norm": 0.10932277746963713,
|
|
"learning_rate": 1.2384062695926713e-05,
|
|
"loss": 0.3549,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 3.8431654676258993,
|
|
"grad_norm": 0.09433828000061141,
|
|
"learning_rate": 1.235500442449605e-05,
|
|
"loss": 0.3457,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 3.8446043165467625,
|
|
"grad_norm": 0.11500350384067237,
|
|
"learning_rate": 1.232597405668039e-05,
|
|
"loss": 0.3542,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 3.846043165467626,
|
|
"grad_norm": 0.11179416212917084,
|
|
"learning_rate": 1.2296971621781677e-05,
|
|
"loss": 0.3497,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 3.8474820143884894,
|
|
"grad_norm": 0.13151272981483575,
|
|
"learning_rate": 1.2267997149073679e-05,
|
|
"loss": 0.3586,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 3.8489208633093526,
|
|
"grad_norm": 0.11677862419145454,
|
|
"learning_rate": 1.2239050667801885e-05,
|
|
"loss": 0.3635,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 3.850359712230216,
|
|
"grad_norm": 0.10933062334990651,
|
|
"learning_rate": 1.2210132207183611e-05,
|
|
"loss": 0.3661,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 3.851798561151079,
|
|
"grad_norm": 0.1216128663162942,
|
|
"learning_rate": 1.2181241796407855e-05,
|
|
"loss": 0.3611,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 3.8532374100719426,
|
|
"grad_norm": 0.10342353436314398,
|
|
"learning_rate": 1.2152379464635264e-05,
|
|
"loss": 0.3595,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 3.854676258992806,
|
|
"grad_norm": 0.10864881699245128,
|
|
"learning_rate": 1.2123545240998182e-05,
|
|
"loss": 0.3568,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 3.856115107913669,
|
|
"grad_norm": 0.10476101319822075,
|
|
"learning_rate": 1.2094739154600616e-05,
|
|
"loss": 0.355,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 3.8575539568345323,
|
|
"grad_norm": 0.13086796881827456,
|
|
"learning_rate": 1.2065961234518096e-05,
|
|
"loss": 0.3525,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 3.8589928057553955,
|
|
"grad_norm": 0.09350912269514637,
|
|
"learning_rate": 1.2037211509797771e-05,
|
|
"loss": 0.3584,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 3.860431654676259,
|
|
"grad_norm": 0.11375920661241448,
|
|
"learning_rate": 1.2008490009458322e-05,
|
|
"loss": 0.3522,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 3.8618705035971224,
|
|
"grad_norm": 0.09855581353227875,
|
|
"learning_rate": 1.1979796762489934e-05,
|
|
"loss": 0.3578,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 3.8633093525179856,
|
|
"grad_norm": 0.09822909604254701,
|
|
"learning_rate": 1.195113179785429e-05,
|
|
"loss": 0.3601,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 3.864748201438849,
|
|
"grad_norm": 0.10060037584726066,
|
|
"learning_rate": 1.1922495144484504e-05,
|
|
"loss": 0.3499,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 3.866187050359712,
|
|
"grad_norm": 0.1156349578996354,
|
|
"learning_rate": 1.1893886831285136e-05,
|
|
"loss": 0.3569,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 3.8676258992805757,
|
|
"grad_norm": 0.10283778432123902,
|
|
"learning_rate": 1.1865306887132122e-05,
|
|
"loss": 0.3496,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 3.869064748201439,
|
|
"grad_norm": 0.09807849964738495,
|
|
"learning_rate": 1.183675534087279e-05,
|
|
"loss": 0.3533,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 3.870503597122302,
|
|
"grad_norm": 0.09971537988093189,
|
|
"learning_rate": 1.1808232221325749e-05,
|
|
"loss": 0.3494,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 3.8719424460431657,
|
|
"grad_norm": 0.09755839112080743,
|
|
"learning_rate": 1.1779737557280985e-05,
|
|
"loss": 0.3563,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 3.873381294964029,
|
|
"grad_norm": 0.10504962071290429,
|
|
"learning_rate": 1.1751271377499736e-05,
|
|
"loss": 0.3552,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 3.874820143884892,
|
|
"grad_norm": 0.09986952917329708,
|
|
"learning_rate": 1.1722833710714454e-05,
|
|
"loss": 0.3604,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 3.8762589928057554,
|
|
"grad_norm": 0.5264225803811349,
|
|
"learning_rate": 1.1694424585628861e-05,
|
|
"loss": 0.3665,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 3.8776978417266186,
|
|
"grad_norm": 0.1082100877371194,
|
|
"learning_rate": 1.166604403091784e-05,
|
|
"loss": 0.3492,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 3.8791366906474822,
|
|
"grad_norm": 0.0958220183370604,
|
|
"learning_rate": 1.1637692075227451e-05,
|
|
"loss": 0.3573,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 3.8805755395683454,
|
|
"grad_norm": 0.11386652658291223,
|
|
"learning_rate": 1.1609368747174883e-05,
|
|
"loss": 0.3627,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 3.8820143884892087,
|
|
"grad_norm": 0.10512839303925268,
|
|
"learning_rate": 1.1581074075348431e-05,
|
|
"loss": 0.3656,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 3.883453237410072,
|
|
"grad_norm": 0.11027057847861182,
|
|
"learning_rate": 1.155280808830746e-05,
|
|
"loss": 0.356,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 3.884892086330935,
|
|
"grad_norm": 0.10262976807413786,
|
|
"learning_rate": 1.15245708145824e-05,
|
|
"loss": 0.3551,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 3.8863309352517987,
|
|
"grad_norm": 0.11198339555215743,
|
|
"learning_rate": 1.1496362282674647e-05,
|
|
"loss": 0.3541,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 3.887769784172662,
|
|
"grad_norm": 0.09885385152955428,
|
|
"learning_rate": 1.1468182521056663e-05,
|
|
"loss": 0.3544,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 3.889208633093525,
|
|
"grad_norm": 0.11589175374952031,
|
|
"learning_rate": 1.1440031558171834e-05,
|
|
"loss": 0.3593,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 3.8906474820143884,
|
|
"grad_norm": 0.10517889280061239,
|
|
"learning_rate": 1.1411909422434441e-05,
|
|
"loss": 0.3533,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 3.8920863309352516,
|
|
"grad_norm": 0.09937887472262578,
|
|
"learning_rate": 1.1383816142229715e-05,
|
|
"loss": 0.3462,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 3.8935251798561152,
|
|
"grad_norm": 0.11530708777635593,
|
|
"learning_rate": 1.1355751745913781e-05,
|
|
"loss": 0.357,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 3.8949640287769784,
|
|
"grad_norm": 0.09649376255140736,
|
|
"learning_rate": 1.1327716261813539e-05,
|
|
"loss": 0.3547,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 3.8964028776978417,
|
|
"grad_norm": 0.1293818147266087,
|
|
"learning_rate": 1.1299709718226745e-05,
|
|
"loss": 0.3682,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 3.897841726618705,
|
|
"grad_norm": 0.11668264038080124,
|
|
"learning_rate": 1.1271732143421992e-05,
|
|
"loss": 0.3629,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 3.899280575539568,
|
|
"grad_norm": 0.11266857712907898,
|
|
"learning_rate": 1.1243783565638533e-05,
|
|
"loss": 0.3586,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 3.9007194244604317,
|
|
"grad_norm": 0.14031280786863934,
|
|
"learning_rate": 1.121586401308643e-05,
|
|
"loss": 0.3624,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 3.902158273381295,
|
|
"grad_norm": 0.09597333095419308,
|
|
"learning_rate": 1.1187973513946417e-05,
|
|
"loss": 0.3561,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 3.903597122302158,
|
|
"grad_norm": 0.09878906761340493,
|
|
"learning_rate": 1.1160112096369913e-05,
|
|
"loss": 0.3667,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 3.905035971223022,
|
|
"grad_norm": 0.09775817204348314,
|
|
"learning_rate": 1.1132279788478977e-05,
|
|
"loss": 0.355,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 3.906474820143885,
|
|
"grad_norm": 0.10797884110402228,
|
|
"learning_rate": 1.1104476618366298e-05,
|
|
"loss": 0.3666,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 3.9079136690647482,
|
|
"grad_norm": 0.10464518470203751,
|
|
"learning_rate": 1.1076702614095116e-05,
|
|
"loss": 0.3477,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 3.9093525179856115,
|
|
"grad_norm": 0.10118356821149127,
|
|
"learning_rate": 1.1048957803699292e-05,
|
|
"loss": 0.367,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 3.9107913669064747,
|
|
"grad_norm": 0.09569305088055104,
|
|
"learning_rate": 1.1021242215183193e-05,
|
|
"loss": 0.3564,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 3.9122302158273383,
|
|
"grad_norm": 0.10455358784915099,
|
|
"learning_rate": 1.0993555876521658e-05,
|
|
"loss": 0.3493,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 3.9136690647482015,
|
|
"grad_norm": 0.10399549848143307,
|
|
"learning_rate": 1.096589881566005e-05,
|
|
"loss": 0.3546,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 3.9151079136690647,
|
|
"grad_norm": 0.1003835075912956,
|
|
"learning_rate": 1.0938271060514162e-05,
|
|
"loss": 0.3541,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 3.916546762589928,
|
|
"grad_norm": 0.11023027609612408,
|
|
"learning_rate": 1.0910672638970206e-05,
|
|
"loss": 0.3569,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 3.917985611510791,
|
|
"grad_norm": 0.09971295331480436,
|
|
"learning_rate": 1.0883103578884784e-05,
|
|
"loss": 0.3534,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 3.919424460431655,
|
|
"grad_norm": 0.10054026540597664,
|
|
"learning_rate": 1.085556390808487e-05,
|
|
"loss": 0.3674,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 3.920863309352518,
|
|
"grad_norm": 0.1018220827569752,
|
|
"learning_rate": 1.082805365436776e-05,
|
|
"loss": 0.3575,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 3.9223021582733812,
|
|
"grad_norm": 0.09009141371922277,
|
|
"learning_rate": 1.0800572845501095e-05,
|
|
"loss": 0.3598,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 3.9237410071942445,
|
|
"grad_norm": 0.12146479165164342,
|
|
"learning_rate": 1.0773121509222712e-05,
|
|
"loss": 0.3665,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 3.9251798561151077,
|
|
"grad_norm": 0.09290392292964182,
|
|
"learning_rate": 1.0745699673240808e-05,
|
|
"loss": 0.3626,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 3.9266187050359713,
|
|
"grad_norm": 0.10532368055585792,
|
|
"learning_rate": 1.0718307365233737e-05,
|
|
"loss": 0.3568,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 3.9280575539568345,
|
|
"grad_norm": 0.11815214970843398,
|
|
"learning_rate": 1.0690944612850052e-05,
|
|
"loss": 0.3612,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 3.9294964028776977,
|
|
"grad_norm": 0.10280329871021558,
|
|
"learning_rate": 1.0663611443708471e-05,
|
|
"loss": 0.3536,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 3.9309352517985614,
|
|
"grad_norm": 0.10407769114156153,
|
|
"learning_rate": 1.0636307885397911e-05,
|
|
"loss": 0.3608,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 3.9323741007194246,
|
|
"grad_norm": 0.10476045377491583,
|
|
"learning_rate": 1.0609033965477318e-05,
|
|
"loss": 0.3584,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 3.933812949640288,
|
|
"grad_norm": 0.1075199208232273,
|
|
"learning_rate": 1.0581789711475752e-05,
|
|
"loss": 0.3568,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 3.935251798561151,
|
|
"grad_norm": 0.09564908267328476,
|
|
"learning_rate": 1.0554575150892386e-05,
|
|
"loss": 0.3631,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 3.9366906474820142,
|
|
"grad_norm": 0.09411574513258825,
|
|
"learning_rate": 1.0527390311196326e-05,
|
|
"loss": 0.3641,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 3.938129496402878,
|
|
"grad_norm": 0.12126363003013729,
|
|
"learning_rate": 1.0500235219826748e-05,
|
|
"loss": 0.3625,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 3.939568345323741,
|
|
"grad_norm": 0.09945556382737729,
|
|
"learning_rate": 1.0473109904192773e-05,
|
|
"loss": 0.3501,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 3.9410071942446043,
|
|
"grad_norm": 0.11413158860654109,
|
|
"learning_rate": 1.0446014391673476e-05,
|
|
"loss": 0.3607,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 3.9424460431654675,
|
|
"grad_norm": 0.09655669712953781,
|
|
"learning_rate": 1.0418948709617846e-05,
|
|
"loss": 0.3536,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 3.9438848920863308,
|
|
"grad_norm": 0.11852095009011772,
|
|
"learning_rate": 1.0391912885344784e-05,
|
|
"loss": 0.3643,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 3.9453237410071944,
|
|
"grad_norm": 0.11556721823902316,
|
|
"learning_rate": 1.0364906946142996e-05,
|
|
"loss": 0.3532,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 3.9467625899280576,
|
|
"grad_norm": 0.10561197829420542,
|
|
"learning_rate": 1.0337930919271094e-05,
|
|
"loss": 0.3573,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 3.948201438848921,
|
|
"grad_norm": 0.12670980952559088,
|
|
"learning_rate": 1.0310984831957471e-05,
|
|
"loss": 0.3628,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 3.949640287769784,
|
|
"grad_norm": 0.09827441688841664,
|
|
"learning_rate": 1.0284068711400254e-05,
|
|
"loss": 0.3517,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 3.9510791366906473,
|
|
"grad_norm": 0.11864119057990981,
|
|
"learning_rate": 1.0257182584767423e-05,
|
|
"loss": 0.3556,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 3.952517985611511,
|
|
"grad_norm": 0.1097874560194306,
|
|
"learning_rate": 1.0230326479196573e-05,
|
|
"loss": 0.3537,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 3.953956834532374,
|
|
"grad_norm": 0.10892763559806799,
|
|
"learning_rate": 1.0203500421795075e-05,
|
|
"loss": 0.3642,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 3.9553956834532373,
|
|
"grad_norm": 0.09406895867825495,
|
|
"learning_rate": 1.017670443963994e-05,
|
|
"loss": 0.3568,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 3.956834532374101,
|
|
"grad_norm": 0.09991161856171779,
|
|
"learning_rate": 1.0149938559777825e-05,
|
|
"loss": 0.3566,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 3.9582733812949638,
|
|
"grad_norm": 0.11133442387626255,
|
|
"learning_rate": 1.0123202809225009e-05,
|
|
"loss": 0.3602,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 3.9597122302158274,
|
|
"grad_norm": 0.36582264941443743,
|
|
"learning_rate": 1.0096497214967349e-05,
|
|
"loss": 0.3631,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 3.9611510791366906,
|
|
"grad_norm": 0.10060646919838295,
|
|
"learning_rate": 1.0069821803960277e-05,
|
|
"loss": 0.3607,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 3.962589928057554,
|
|
"grad_norm": 0.09826432694266087,
|
|
"learning_rate": 1.0043176603128755e-05,
|
|
"loss": 0.3604,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 3.9640287769784175,
|
|
"grad_norm": 0.09180959908885702,
|
|
"learning_rate": 1.0016561639367253e-05,
|
|
"loss": 0.357,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 3.9654676258992807,
|
|
"grad_norm": 0.09520060578992777,
|
|
"learning_rate": 9.989976939539687e-06,
|
|
"loss": 0.3614,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 3.966906474820144,
|
|
"grad_norm": 0.09456249690603385,
|
|
"learning_rate": 9.963422530479496e-06,
|
|
"loss": 0.3541,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 3.968345323741007,
|
|
"grad_norm": 0.09316295392554305,
|
|
"learning_rate": 9.936898438989507e-06,
|
|
"loss": 0.3493,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 3.9697841726618703,
|
|
"grad_norm": 0.09266724959468829,
|
|
"learning_rate": 9.910404691841915e-06,
|
|
"loss": 0.3615,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 3.971223021582734,
|
|
"grad_norm": 0.09922663825579149,
|
|
"learning_rate": 9.883941315778319e-06,
|
|
"loss": 0.3546,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 3.972661870503597,
|
|
"grad_norm": 0.11978379802765968,
|
|
"learning_rate": 9.857508337509692e-06,
|
|
"loss": 0.3631,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 3.9741007194244604,
|
|
"grad_norm": 0.0925039608395731,
|
|
"learning_rate": 9.831105783716266e-06,
|
|
"loss": 0.3562,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 3.9755395683453236,
|
|
"grad_norm": 0.10921882584033765,
|
|
"learning_rate": 9.8047336810476e-06,
|
|
"loss": 0.3579,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 3.976978417266187,
|
|
"grad_norm": 0.09324814965442098,
|
|
"learning_rate": 9.778392056122503e-06,
|
|
"loss": 0.3607,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 3.9784172661870505,
|
|
"grad_norm": 0.09798685924200788,
|
|
"learning_rate": 9.752080935529037e-06,
|
|
"loss": 0.3592,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 3.9798561151079137,
|
|
"grad_norm": 0.10211817737295668,
|
|
"learning_rate": 9.725800345824453e-06,
|
|
"loss": 0.3528,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 3.981294964028777,
|
|
"grad_norm": 0.10672671054358651,
|
|
"learning_rate": 9.699550313535196e-06,
|
|
"loss": 0.3583,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 3.98273381294964,
|
|
"grad_norm": 0.09839746377820723,
|
|
"learning_rate": 9.673330865156875e-06,
|
|
"loss": 0.3503,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 3.9841726618705033,
|
|
"grad_norm": 0.0896695676539772,
|
|
"learning_rate": 9.647142027154222e-06,
|
|
"loss": 0.3595,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 3.985611510791367,
|
|
"grad_norm": 0.11119432610269267,
|
|
"learning_rate": 9.620983825961078e-06,
|
|
"loss": 0.356,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 3.98705035971223,
|
|
"grad_norm": 0.09837614580808235,
|
|
"learning_rate": 9.594856287980323e-06,
|
|
"loss": 0.3528,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 3.9884892086330934,
|
|
"grad_norm": 0.10646557609432261,
|
|
"learning_rate": 9.56875943958396e-06,
|
|
"loss": 0.3527,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 3.989928057553957,
|
|
"grad_norm": 0.09746116964157704,
|
|
"learning_rate": 9.542693307112949e-06,
|
|
"loss": 0.3549,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 3.9913669064748203,
|
|
"grad_norm": 0.09531911049282502,
|
|
"learning_rate": 9.516657916877272e-06,
|
|
"loss": 0.3577,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 3.9928057553956835,
|
|
"grad_norm": 0.08881507626374872,
|
|
"learning_rate": 9.490653295155891e-06,
|
|
"loss": 0.3519,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 3.9942446043165467,
|
|
"grad_norm": 0.09785326754749585,
|
|
"learning_rate": 9.464679468196696e-06,
|
|
"loss": 0.361,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 3.99568345323741,
|
|
"grad_norm": 0.10014891408525683,
|
|
"learning_rate": 9.438736462216496e-06,
|
|
"loss": 0.3556,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 3.9971223021582736,
|
|
"grad_norm": 0.09375527040810888,
|
|
"learning_rate": 9.412824303401003e-06,
|
|
"loss": 0.3499,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 3.998561151079137,
|
|
"grad_norm": 0.094577051908538,
|
|
"learning_rate": 9.38694301790478e-06,
|
|
"loss": 0.3609,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.0917167244048322,
|
|
"learning_rate": 9.361092631851228e-06,
|
|
"loss": 0.3504,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 4.001438848920864,
|
|
"grad_norm": 0.13713041483231528,
|
|
"learning_rate": 9.335273171332581e-06,
|
|
"loss": 0.3382,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 4.002877697841726,
|
|
"grad_norm": 0.10825650380421963,
|
|
"learning_rate": 9.30948466240981e-06,
|
|
"loss": 0.3367,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 4.00431654676259,
|
|
"grad_norm": 0.10143207257120908,
|
|
"learning_rate": 9.2837271311127e-06,
|
|
"loss": 0.3428,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 4.005755395683453,
|
|
"grad_norm": 0.11379848064530074,
|
|
"learning_rate": 9.25800060343975e-06,
|
|
"loss": 0.3382,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 4.0071942446043165,
|
|
"grad_norm": 0.12002285171386044,
|
|
"learning_rate": 9.232305105358139e-06,
|
|
"loss": 0.3287,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 4.00863309352518,
|
|
"grad_norm": 0.11895028248151479,
|
|
"learning_rate": 9.206640662803746e-06,
|
|
"loss": 0.3379,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 4.010071942446043,
|
|
"grad_norm": 0.11282514290583004,
|
|
"learning_rate": 9.181007301681135e-06,
|
|
"loss": 0.3361,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 4.011510791366907,
|
|
"grad_norm": 0.11139261191751898,
|
|
"learning_rate": 9.155405047863439e-06,
|
|
"loss": 0.3445,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 4.012949640287769,
|
|
"grad_norm": 0.11129194803145648,
|
|
"learning_rate": 9.12983392719243e-06,
|
|
"loss": 0.3393,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 4.014388489208633,
|
|
"grad_norm": 0.1101183549401519,
|
|
"learning_rate": 9.104293965478446e-06,
|
|
"loss": 0.3528,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 4.015827338129497,
|
|
"grad_norm": 0.10703508365752723,
|
|
"learning_rate": 9.078785188500378e-06,
|
|
"loss": 0.333,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 4.017266187050359,
|
|
"grad_norm": 0.11082001184961557,
|
|
"learning_rate": 9.053307622005639e-06,
|
|
"loss": 0.3431,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 4.018705035971223,
|
|
"grad_norm": 0.1123439247075706,
|
|
"learning_rate": 9.02786129171013e-06,
|
|
"loss": 0.34,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 4.020143884892087,
|
|
"grad_norm": 0.10612997383192646,
|
|
"learning_rate": 9.002446223298244e-06,
|
|
"loss": 0.3439,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 4.0215827338129495,
|
|
"grad_norm": 0.11561413264571645,
|
|
"learning_rate": 8.977062442422796e-06,
|
|
"loss": 0.3358,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 4.023021582733813,
|
|
"grad_norm": 0.10461507351475334,
|
|
"learning_rate": 8.951709974705057e-06,
|
|
"loss": 0.3383,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 4.024460431654676,
|
|
"grad_norm": 0.10235116374079214,
|
|
"learning_rate": 8.926388845734624e-06,
|
|
"loss": 0.3413,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 4.02589928057554,
|
|
"grad_norm": 0.10725138234199226,
|
|
"learning_rate": 8.901099081069553e-06,
|
|
"loss": 0.3365,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 4.027338129496403,
|
|
"grad_norm": 0.10522063584483221,
|
|
"learning_rate": 8.875840706236163e-06,
|
|
"loss": 0.3332,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 4.028776978417266,
|
|
"grad_norm": 0.1014543004483598,
|
|
"learning_rate": 8.850613746729117e-06,
|
|
"loss": 0.3353,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 4.03021582733813,
|
|
"grad_norm": 0.10491053114697506,
|
|
"learning_rate": 8.825418228011413e-06,
|
|
"loss": 0.3386,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 4.031654676258992,
|
|
"grad_norm": 0.09886245220552668,
|
|
"learning_rate": 8.80025417551424e-06,
|
|
"loss": 0.3386,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 4.033093525179856,
|
|
"grad_norm": 0.10139979892300416,
|
|
"learning_rate": 8.775121614637064e-06,
|
|
"loss": 0.3451,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 4.03453237410072,
|
|
"grad_norm": 0.10575529776846063,
|
|
"learning_rate": 8.750020570747568e-06,
|
|
"loss": 0.3462,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 4.0359712230215825,
|
|
"grad_norm": 0.10671851976333528,
|
|
"learning_rate": 8.724951069181617e-06,
|
|
"loss": 0.3355,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 4.037410071942446,
|
|
"grad_norm": 0.09313958404938776,
|
|
"learning_rate": 8.699913135243237e-06,
|
|
"loss": 0.3397,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 4.038848920863309,
|
|
"grad_norm": 0.10662551188582356,
|
|
"learning_rate": 8.6749067942046e-06,
|
|
"loss": 0.3445,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 4.040287769784173,
|
|
"grad_norm": 0.10490330202404324,
|
|
"learning_rate": 8.649932071305952e-06,
|
|
"loss": 0.3357,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 4.041726618705036,
|
|
"grad_norm": 0.09719854271901346,
|
|
"learning_rate": 8.624988991755687e-06,
|
|
"loss": 0.3446,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 4.043165467625899,
|
|
"grad_norm": 0.10741399795771371,
|
|
"learning_rate": 8.60007758073023e-06,
|
|
"loss": 0.3452,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 4.044604316546763,
|
|
"grad_norm": 0.10364055341241285,
|
|
"learning_rate": 8.575197863374006e-06,
|
|
"loss": 0.342,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 4.046043165467626,
|
|
"grad_norm": 0.10454493746599082,
|
|
"learning_rate": 8.550349864799505e-06,
|
|
"loss": 0.3412,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 4.047482014388489,
|
|
"grad_norm": 0.09107120133027494,
|
|
"learning_rate": 8.525533610087193e-06,
|
|
"loss": 0.331,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 4.048920863309353,
|
|
"grad_norm": 0.10721792701209804,
|
|
"learning_rate": 8.500749124285455e-06,
|
|
"loss": 0.34,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 4.0503597122302155,
|
|
"grad_norm": 0.0998301304216625,
|
|
"learning_rate": 8.475996432410642e-06,
|
|
"loss": 0.341,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 4.051798561151079,
|
|
"grad_norm": 0.08959148021714519,
|
|
"learning_rate": 8.451275559447011e-06,
|
|
"loss": 0.3357,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 4.053237410071943,
|
|
"grad_norm": 0.10160609707382978,
|
|
"learning_rate": 8.426586530346705e-06,
|
|
"loss": 0.3421,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 4.054676258992806,
|
|
"grad_norm": 0.09150415948373967,
|
|
"learning_rate": 8.401929370029708e-06,
|
|
"loss": 0.3446,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 4.056115107913669,
|
|
"grad_norm": 0.10811368980362772,
|
|
"learning_rate": 8.377304103383857e-06,
|
|
"loss": 0.336,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 4.057553956834532,
|
|
"grad_norm": 0.0949710197224721,
|
|
"learning_rate": 8.352710755264786e-06,
|
|
"loss": 0.3394,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 4.058992805755396,
|
|
"grad_norm": 0.09748655592280163,
|
|
"learning_rate": 8.328149350495916e-06,
|
|
"loss": 0.3388,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 4.060431654676259,
|
|
"grad_norm": 0.1075528947458018,
|
|
"learning_rate": 8.303619913868427e-06,
|
|
"loss": 0.3419,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 4.061870503597122,
|
|
"grad_norm": 0.09072048247762113,
|
|
"learning_rate": 8.279122470141208e-06,
|
|
"loss": 0.3291,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 4.063309352517986,
|
|
"grad_norm": 0.10981114370291654,
|
|
"learning_rate": 8.254657044040914e-06,
|
|
"loss": 0.3446,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 4.0647482014388485,
|
|
"grad_norm": 0.0887300625708499,
|
|
"learning_rate": 8.230223660261814e-06,
|
|
"loss": 0.3414,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 4.066187050359712,
|
|
"grad_norm": 0.09762360018897448,
|
|
"learning_rate": 8.205822343465865e-06,
|
|
"loss": 0.3448,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 4.067625899280576,
|
|
"grad_norm": 0.1025088180830334,
|
|
"learning_rate": 8.181453118282694e-06,
|
|
"loss": 0.3362,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 4.069064748201439,
|
|
"grad_norm": 0.09367985061197814,
|
|
"learning_rate": 8.157116009309467e-06,
|
|
"loss": 0.3399,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 4.070503597122302,
|
|
"grad_norm": 0.09846512345845994,
|
|
"learning_rate": 8.132811041110976e-06,
|
|
"loss": 0.348,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 4.071942446043165,
|
|
"grad_norm": 0.10185815945305376,
|
|
"learning_rate": 8.108538238219564e-06,
|
|
"loss": 0.3378,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 4.073381294964029,
|
|
"grad_norm": 0.08781018707536696,
|
|
"learning_rate": 8.084297625135104e-06,
|
|
"loss": 0.3429,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 4.074820143884892,
|
|
"grad_norm": 0.09381709718591656,
|
|
"learning_rate": 8.060089226324987e-06,
|
|
"loss": 0.3392,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 4.076258992805755,
|
|
"grad_norm": 0.0908491375640224,
|
|
"learning_rate": 8.035913066224088e-06,
|
|
"loss": 0.3349,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 4.077697841726619,
|
|
"grad_norm": 0.0990564119163002,
|
|
"learning_rate": 8.0117691692347e-06,
|
|
"loss": 0.3384,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 4.079136690647482,
|
|
"grad_norm": 0.09353821742539495,
|
|
"learning_rate": 7.987657559726628e-06,
|
|
"loss": 0.3369,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 4.080575539568345,
|
|
"grad_norm": 0.09611582901892067,
|
|
"learning_rate": 7.963578262037038e-06,
|
|
"loss": 0.3401,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 4.082014388489209,
|
|
"grad_norm": 0.09036617635445916,
|
|
"learning_rate": 7.939531300470458e-06,
|
|
"loss": 0.3435,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 4.083453237410072,
|
|
"grad_norm": 0.0946922051846469,
|
|
"learning_rate": 7.915516699298847e-06,
|
|
"loss": 0.3503,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 4.084892086330935,
|
|
"grad_norm": 0.09649145129735148,
|
|
"learning_rate": 7.891534482761463e-06,
|
|
"loss": 0.3385,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 4.086330935251799,
|
|
"grad_norm": 0.0871982799173451,
|
|
"learning_rate": 7.867584675064846e-06,
|
|
"loss": 0.3406,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 4.087769784172662,
|
|
"grad_norm": 0.08813555242584366,
|
|
"learning_rate": 7.843667300382863e-06,
|
|
"loss": 0.3354,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 4.089208633093525,
|
|
"grad_norm": 0.0946943585869182,
|
|
"learning_rate": 7.81978238285667e-06,
|
|
"loss": 0.3387,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 4.090647482014388,
|
|
"grad_norm": 0.09124076873160737,
|
|
"learning_rate": 7.795929946594584e-06,
|
|
"loss": 0.3323,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 4.092086330935252,
|
|
"grad_norm": 0.08993659999080425,
|
|
"learning_rate": 7.772110015672209e-06,
|
|
"loss": 0.3322,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 4.093525179856115,
|
|
"grad_norm": 0.1051729512013219,
|
|
"learning_rate": 7.748322614132297e-06,
|
|
"loss": 0.3438,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 4.094964028776978,
|
|
"grad_norm": 0.09275464970186449,
|
|
"learning_rate": 7.72456776598479e-06,
|
|
"loss": 0.3367,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 4.096402877697842,
|
|
"grad_norm": 0.09314292669466309,
|
|
"learning_rate": 7.70084549520676e-06,
|
|
"loss": 0.3371,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 4.097841726618705,
|
|
"grad_norm": 0.08895350700868503,
|
|
"learning_rate": 7.6771558257424e-06,
|
|
"loss": 0.3382,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 4.099280575539568,
|
|
"grad_norm": 0.09138718109977773,
|
|
"learning_rate": 7.653498781502997e-06,
|
|
"loss": 0.3357,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 4.100719424460432,
|
|
"grad_norm": 0.0935380260827374,
|
|
"learning_rate": 7.629874386366918e-06,
|
|
"loss": 0.3319,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 4.102158273381295,
|
|
"grad_norm": 0.09559528855841865,
|
|
"learning_rate": 7.606282664179545e-06,
|
|
"loss": 0.3417,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 4.103597122302158,
|
|
"grad_norm": 0.09418016870565976,
|
|
"learning_rate": 7.5827236387532976e-06,
|
|
"loss": 0.3399,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 4.105035971223022,
|
|
"grad_norm": 0.09420353857545724,
|
|
"learning_rate": 7.559197333867629e-06,
|
|
"loss": 0.3267,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 4.106474820143885,
|
|
"grad_norm": 0.10169377700564375,
|
|
"learning_rate": 7.53570377326891e-06,
|
|
"loss": 0.3417,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 4.107913669064748,
|
|
"grad_norm": 0.09501684854134128,
|
|
"learning_rate": 7.512242980670481e-06,
|
|
"loss": 0.3392,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 4.109352517985611,
|
|
"grad_norm": 0.0953029196449315,
|
|
"learning_rate": 7.488814979752615e-06,
|
|
"loss": 0.338,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 4.110791366906475,
|
|
"grad_norm": 0.09341778047833724,
|
|
"learning_rate": 7.465419794162487e-06,
|
|
"loss": 0.3413,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 4.1122302158273385,
|
|
"grad_norm": 0.08484454492745429,
|
|
"learning_rate": 7.442057447514144e-06,
|
|
"loss": 0.3334,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 4.113669064748201,
|
|
"grad_norm": 0.09491049060272971,
|
|
"learning_rate": 7.418727963388481e-06,
|
|
"loss": 0.3491,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 4.115107913669065,
|
|
"grad_norm": 0.09349999826803333,
|
|
"learning_rate": 7.395431365333241e-06,
|
|
"loss": 0.3435,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 4.116546762589928,
|
|
"grad_norm": 0.08379134201803941,
|
|
"learning_rate": 7.372167676862952e-06,
|
|
"loss": 0.3287,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 4.117985611510791,
|
|
"grad_norm": 0.09231662565659948,
|
|
"learning_rate": 7.348936921458949e-06,
|
|
"loss": 0.3382,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 4.119424460431655,
|
|
"grad_norm": 0.08749389113184515,
|
|
"learning_rate": 7.325739122569282e-06,
|
|
"loss": 0.3418,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 4.120863309352518,
|
|
"grad_norm": 0.08609571283903228,
|
|
"learning_rate": 7.302574303608794e-06,
|
|
"loss": 0.3374,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 4.122302158273381,
|
|
"grad_norm": 0.08947204841869578,
|
|
"learning_rate": 7.279442487959012e-06,
|
|
"loss": 0.3334,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 4.123741007194244,
|
|
"grad_norm": 0.08662526206401704,
|
|
"learning_rate": 7.256343698968131e-06,
|
|
"loss": 0.35,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 4.125179856115108,
|
|
"grad_norm": 0.08776342758635262,
|
|
"learning_rate": 7.233277959951026e-06,
|
|
"loss": 0.3429,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 4.1266187050359715,
|
|
"grad_norm": 0.0885720314053477,
|
|
"learning_rate": 7.210245294189251e-06,
|
|
"loss": 0.336,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 4.128057553956834,
|
|
"grad_norm": 0.0846412177696435,
|
|
"learning_rate": 7.187245724930911e-06,
|
|
"loss": 0.3472,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 4.129496402877698,
|
|
"grad_norm": 0.08676222017879676,
|
|
"learning_rate": 7.164279275390749e-06,
|
|
"loss": 0.3355,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 4.130935251798562,
|
|
"grad_norm": 0.0935179087565233,
|
|
"learning_rate": 7.14134596875006e-06,
|
|
"loss": 0.3393,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 4.132374100719424,
|
|
"grad_norm": 0.08937900061629589,
|
|
"learning_rate": 7.118445828156697e-06,
|
|
"loss": 0.3359,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 4.133812949640288,
|
|
"grad_norm": 0.08682583986929543,
|
|
"learning_rate": 7.0955788767250334e-06,
|
|
"loss": 0.3403,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 4.135251798561151,
|
|
"grad_norm": 0.08791111872352998,
|
|
"learning_rate": 7.0727451375359345e-06,
|
|
"loss": 0.3425,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 4.136690647482014,
|
|
"grad_norm": 0.10425898955521935,
|
|
"learning_rate": 7.049944633636756e-06,
|
|
"loss": 0.3429,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 4.138129496402878,
|
|
"grad_norm": 0.09408970451769752,
|
|
"learning_rate": 7.027177388041311e-06,
|
|
"loss": 0.3393,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 4.139568345323741,
|
|
"grad_norm": 0.10293837548173992,
|
|
"learning_rate": 7.004443423729808e-06,
|
|
"loss": 0.3434,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 4.1410071942446045,
|
|
"grad_norm": 0.09027354419162233,
|
|
"learning_rate": 6.981742763648891e-06,
|
|
"loss": 0.3438,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 4.142446043165467,
|
|
"grad_norm": 0.15024629224054306,
|
|
"learning_rate": 6.959075430711614e-06,
|
|
"loss": 0.3454,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 4.143884892086331,
|
|
"grad_norm": 0.0883437167664882,
|
|
"learning_rate": 6.936441447797335e-06,
|
|
"loss": 0.341,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 4.145323741007195,
|
|
"grad_norm": 0.09266713187580487,
|
|
"learning_rate": 6.913840837751778e-06,
|
|
"loss": 0.3374,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 4.146762589928057,
|
|
"grad_norm": 0.09639907624755323,
|
|
"learning_rate": 6.8912736233870095e-06,
|
|
"loss": 0.3424,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 4.148201438848921,
|
|
"grad_norm": 0.08682283614124682,
|
|
"learning_rate": 6.868739827481335e-06,
|
|
"loss": 0.3327,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 4.149640287769784,
|
|
"grad_norm": 0.08343524321291787,
|
|
"learning_rate": 6.846239472779359e-06,
|
|
"loss": 0.3385,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 4.151079136690647,
|
|
"grad_norm": 0.08691086801769372,
|
|
"learning_rate": 6.82377258199193e-06,
|
|
"loss": 0.336,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 4.152517985611511,
|
|
"grad_norm": 0.08436617180672995,
|
|
"learning_rate": 6.80133917779612e-06,
|
|
"loss": 0.3385,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 4.153956834532374,
|
|
"grad_norm": 0.09489538925457837,
|
|
"learning_rate": 6.778939282835195e-06,
|
|
"loss": 0.341,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 4.1553956834532375,
|
|
"grad_norm": 0.10243162001760892,
|
|
"learning_rate": 6.756572919718611e-06,
|
|
"loss": 0.3427,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 4.1568345323741,
|
|
"grad_norm": 0.08469861790393396,
|
|
"learning_rate": 6.734240111021937e-06,
|
|
"loss": 0.3435,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 4.158273381294964,
|
|
"grad_norm": 0.09120117696792877,
|
|
"learning_rate": 6.711940879286944e-06,
|
|
"loss": 0.3358,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 4.159712230215828,
|
|
"grad_norm": 0.09280202290659313,
|
|
"learning_rate": 6.689675247021461e-06,
|
|
"loss": 0.3449,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 4.16115107913669,
|
|
"grad_norm": 0.08818958513949897,
|
|
"learning_rate": 6.667443236699398e-06,
|
|
"loss": 0.3387,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 4.162589928057554,
|
|
"grad_norm": 0.09317886151055729,
|
|
"learning_rate": 6.64524487076077e-06,
|
|
"loss": 0.347,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 4.164028776978418,
|
|
"grad_norm": 0.09688555437917103,
|
|
"learning_rate": 6.623080171611605e-06,
|
|
"loss": 0.3321,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 4.16546762589928,
|
|
"grad_norm": 0.08885842193368952,
|
|
"learning_rate": 6.600949161623939e-06,
|
|
"loss": 0.3308,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 4.166906474820144,
|
|
"grad_norm": 0.092905678460002,
|
|
"learning_rate": 6.578851863135831e-06,
|
|
"loss": 0.337,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 4.168345323741007,
|
|
"grad_norm": 0.08983236438661389,
|
|
"learning_rate": 6.556788298451291e-06,
|
|
"loss": 0.3376,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 4.1697841726618705,
|
|
"grad_norm": 0.0946212450118569,
|
|
"learning_rate": 6.534758489840296e-06,
|
|
"loss": 0.3396,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 4.171223021582734,
|
|
"grad_norm": 0.09044452937013017,
|
|
"learning_rate": 6.512762459538744e-06,
|
|
"loss": 0.3457,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 4.172661870503597,
|
|
"grad_norm": 0.09136369404616203,
|
|
"learning_rate": 6.49080022974843e-06,
|
|
"loss": 0.3361,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 4.174100719424461,
|
|
"grad_norm": 0.09248931444136521,
|
|
"learning_rate": 6.468871822637051e-06,
|
|
"loss": 0.3349,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 4.175539568345323,
|
|
"grad_norm": 0.09149693760748749,
|
|
"learning_rate": 6.446977260338152e-06,
|
|
"loss": 0.3417,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 4.176978417266187,
|
|
"grad_norm": 0.0917115108583082,
|
|
"learning_rate": 6.425116564951115e-06,
|
|
"loss": 0.342,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 4.178417266187051,
|
|
"grad_norm": 0.08542206903325246,
|
|
"learning_rate": 6.403289758541143e-06,
|
|
"loss": 0.3428,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 4.179856115107913,
|
|
"grad_norm": 0.08628879214933147,
|
|
"learning_rate": 6.381496863139247e-06,
|
|
"loss": 0.3383,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 4.181294964028777,
|
|
"grad_norm": 0.08957513619514115,
|
|
"learning_rate": 6.3597379007421755e-06,
|
|
"loss": 0.3413,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 4.18273381294964,
|
|
"grad_norm": 0.13700097462266245,
|
|
"learning_rate": 6.338012893312444e-06,
|
|
"loss": 0.347,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 4.1841726618705035,
|
|
"grad_norm": 0.08584032522348313,
|
|
"learning_rate": 6.31632186277833e-06,
|
|
"loss": 0.3383,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 4.185611510791367,
|
|
"grad_norm": 0.12333451165583399,
|
|
"learning_rate": 6.294664831033746e-06,
|
|
"loss": 0.3533,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 4.18705035971223,
|
|
"grad_norm": 0.08472017387730195,
|
|
"learning_rate": 6.273041819938343e-06,
|
|
"loss": 0.3424,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 4.188489208633094,
|
|
"grad_norm": 0.08771039626817008,
|
|
"learning_rate": 6.251452851317421e-06,
|
|
"loss": 0.3503,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 4.189928057553957,
|
|
"grad_norm": 0.08982871217898847,
|
|
"learning_rate": 6.229897946961903e-06,
|
|
"loss": 0.3447,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 4.19136690647482,
|
|
"grad_norm": 0.0893331917093145,
|
|
"learning_rate": 6.20837712862834e-06,
|
|
"loss": 0.337,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 4.192805755395684,
|
|
"grad_norm": 0.09008920205309279,
|
|
"learning_rate": 6.186890418038887e-06,
|
|
"loss": 0.3486,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 4.194244604316546,
|
|
"grad_norm": 0.0873452984974155,
|
|
"learning_rate": 6.165437836881256e-06,
|
|
"loss": 0.3388,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 4.19568345323741,
|
|
"grad_norm": 0.08653973212791738,
|
|
"learning_rate": 6.144019406808724e-06,
|
|
"loss": 0.3422,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 4.197122302158274,
|
|
"grad_norm": 0.08777157856151459,
|
|
"learning_rate": 6.122635149440093e-06,
|
|
"loss": 0.3356,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 4.1985611510791365,
|
|
"grad_norm": 0.08742347228978128,
|
|
"learning_rate": 6.101285086359645e-06,
|
|
"loss": 0.3544,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 4.2,
|
|
"grad_norm": 0.09328191718140964,
|
|
"learning_rate": 6.079969239117201e-06,
|
|
"loss": 0.3407,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 4.201438848920863,
|
|
"grad_norm": 0.08698423357007497,
|
|
"learning_rate": 6.05868762922802e-06,
|
|
"loss": 0.3431,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 4.202877697841727,
|
|
"grad_norm": 0.08633140573952361,
|
|
"learning_rate": 6.037440278172782e-06,
|
|
"loss": 0.3438,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 4.20431654676259,
|
|
"grad_norm": 0.09017326966372885,
|
|
"learning_rate": 6.016227207397616e-06,
|
|
"loss": 0.3348,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 4.205755395683453,
|
|
"grad_norm": 0.09147055624219488,
|
|
"learning_rate": 5.995048438314044e-06,
|
|
"loss": 0.3325,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 4.207194244604317,
|
|
"grad_norm": 0.08854855680648924,
|
|
"learning_rate": 5.973903992298962e-06,
|
|
"loss": 0.3416,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 4.2086330935251794,
|
|
"grad_norm": 0.10116816355651234,
|
|
"learning_rate": 5.952793890694617e-06,
|
|
"loss": 0.3414,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 4.210071942446043,
|
|
"grad_norm": 0.09423393260026765,
|
|
"learning_rate": 5.9317181548086055e-06,
|
|
"loss": 0.3352,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 4.211510791366907,
|
|
"grad_norm": 0.08415850306369263,
|
|
"learning_rate": 5.910676805913822e-06,
|
|
"loss": 0.3485,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 4.2129496402877695,
|
|
"grad_norm": 0.10183092899103621,
|
|
"learning_rate": 5.889669865248455e-06,
|
|
"loss": 0.3423,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 4.214388489208633,
|
|
"grad_norm": 0.09043959659686408,
|
|
"learning_rate": 5.8686973540159706e-06,
|
|
"loss": 0.344,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 4.215827338129497,
|
|
"grad_norm": 0.0898633407977311,
|
|
"learning_rate": 5.847759293385075e-06,
|
|
"loss": 0.3388,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 4.21726618705036,
|
|
"grad_norm": 0.08920608984521348,
|
|
"learning_rate": 5.8268557044897175e-06,
|
|
"loss": 0.3468,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 4.218705035971223,
|
|
"grad_norm": 0.09126869349723989,
|
|
"learning_rate": 5.805986608429019e-06,
|
|
"loss": 0.3423,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 4.220143884892086,
|
|
"grad_norm": 0.08270298762424408,
|
|
"learning_rate": 5.785152026267309e-06,
|
|
"loss": 0.3346,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 4.22158273381295,
|
|
"grad_norm": 0.08796078944911688,
|
|
"learning_rate": 5.764351979034102e-06,
|
|
"loss": 0.3353,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 4.223021582733813,
|
|
"grad_norm": 0.0850295236957985,
|
|
"learning_rate": 5.743586487724e-06,
|
|
"loss": 0.3367,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 4.224460431654676,
|
|
"grad_norm": 0.09071534712774458,
|
|
"learning_rate": 5.722855573296775e-06,
|
|
"loss": 0.342,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 4.22589928057554,
|
|
"grad_norm": 0.08817948712514039,
|
|
"learning_rate": 5.702159256677266e-06,
|
|
"loss": 0.3356,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 4.2273381294964025,
|
|
"grad_norm": 0.0905410641726735,
|
|
"learning_rate": 5.681497558755417e-06,
|
|
"loss": 0.3473,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 4.228776978417266,
|
|
"grad_norm": 0.08158967715880118,
|
|
"learning_rate": 5.6608705003862085e-06,
|
|
"loss": 0.3326,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 4.23021582733813,
|
|
"grad_norm": 0.2954280485654759,
|
|
"learning_rate": 5.6402781023896695e-06,
|
|
"loss": 0.3422,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 4.231654676258993,
|
|
"grad_norm": 0.09628113302042636,
|
|
"learning_rate": 5.619720385550835e-06,
|
|
"loss": 0.3401,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 4.233093525179856,
|
|
"grad_norm": 0.09306757530745068,
|
|
"learning_rate": 5.5991973706197445e-06,
|
|
"loss": 0.3356,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 4.234532374100719,
|
|
"grad_norm": 0.08581309979625035,
|
|
"learning_rate": 5.578709078311417e-06,
|
|
"loss": 0.3402,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 4.235971223021583,
|
|
"grad_norm": 0.09106753857909423,
|
|
"learning_rate": 5.558255529305779e-06,
|
|
"loss": 0.3383,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 4.237410071942446,
|
|
"grad_norm": 0.09717330388567412,
|
|
"learning_rate": 5.537836744247753e-06,
|
|
"loss": 0.3402,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 4.238848920863309,
|
|
"grad_norm": 0.08892386649402609,
|
|
"learning_rate": 5.517452743747145e-06,
|
|
"loss": 0.3402,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 4.240287769784173,
|
|
"grad_norm": 0.09556595375942487,
|
|
"learning_rate": 5.497103548378628e-06,
|
|
"loss": 0.341,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 4.2417266187050355,
|
|
"grad_norm": 0.08558442768728346,
|
|
"learning_rate": 5.476789178681769e-06,
|
|
"loss": 0.3336,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 4.243165467625899,
|
|
"grad_norm": 0.0887764992263677,
|
|
"learning_rate": 5.456509655160989e-06,
|
|
"loss": 0.3393,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 4.244604316546763,
|
|
"grad_norm": 0.08739488209434544,
|
|
"learning_rate": 5.436264998285516e-06,
|
|
"loss": 0.3448,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 4.246043165467626,
|
|
"grad_norm": 0.0923635593302988,
|
|
"learning_rate": 5.4160552284894075e-06,
|
|
"loss": 0.3503,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 4.247482014388489,
|
|
"grad_norm": 0.09096814834978915,
|
|
"learning_rate": 5.3958803661714865e-06,
|
|
"loss": 0.3329,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 4.248920863309353,
|
|
"grad_norm": 0.09563463578590477,
|
|
"learning_rate": 5.375740431695353e-06,
|
|
"loss": 0.3424,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 4.250359712230216,
|
|
"grad_norm": 0.6981400502298232,
|
|
"learning_rate": 5.355635445389355e-06,
|
|
"loss": 0.3484,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 4.251798561151079,
|
|
"grad_norm": 0.09191908750713068,
|
|
"learning_rate": 5.3355654275465584e-06,
|
|
"loss": 0.3463,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 4.253237410071942,
|
|
"grad_norm": 0.09563720341466324,
|
|
"learning_rate": 5.315530398424735e-06,
|
|
"loss": 0.3375,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 4.254676258992806,
|
|
"grad_norm": 0.09532016302878546,
|
|
"learning_rate": 5.295530378246354e-06,
|
|
"loss": 0.3453,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 4.256115107913669,
|
|
"grad_norm": 0.08735459338701515,
|
|
"learning_rate": 5.27556538719852e-06,
|
|
"loss": 0.3502,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 4.257553956834532,
|
|
"grad_norm": 0.09067000740336473,
|
|
"learning_rate": 5.2556354454329895e-06,
|
|
"loss": 0.3318,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 4.258992805755396,
|
|
"grad_norm": 0.09181855585969392,
|
|
"learning_rate": 5.235740573066186e-06,
|
|
"loss": 0.3319,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 4.260431654676259,
|
|
"grad_norm": 0.08794907539727558,
|
|
"learning_rate": 5.21588079017906e-06,
|
|
"loss": 0.3397,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 4.261870503597122,
|
|
"grad_norm": 0.088177500764635,
|
|
"learning_rate": 5.196056116817194e-06,
|
|
"loss": 0.3369,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 4.263309352517986,
|
|
"grad_norm": 0.09769645773946038,
|
|
"learning_rate": 5.1762665729907424e-06,
|
|
"loss": 0.34,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 4.264748201438849,
|
|
"grad_norm": 0.08503017773460393,
|
|
"learning_rate": 5.156512178674358e-06,
|
|
"loss": 0.3436,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 4.266187050359712,
|
|
"grad_norm": 0.09539193028819509,
|
|
"learning_rate": 5.136792953807242e-06,
|
|
"loss": 0.3315,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 4.267625899280576,
|
|
"grad_norm": 0.09924080952539983,
|
|
"learning_rate": 5.117108918293095e-06,
|
|
"loss": 0.3362,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 4.269064748201439,
|
|
"grad_norm": 0.08087901612680283,
|
|
"learning_rate": 5.097460092000095e-06,
|
|
"loss": 0.3386,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 4.270503597122302,
|
|
"grad_norm": 0.10167716497154271,
|
|
"learning_rate": 5.07784649476089e-06,
|
|
"loss": 0.3353,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 4.271942446043165,
|
|
"grad_norm": 0.09150376207315622,
|
|
"learning_rate": 5.058268146372562e-06,
|
|
"loss": 0.3471,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 4.273381294964029,
|
|
"grad_norm": 0.08418899919105094,
|
|
"learning_rate": 5.038725066596595e-06,
|
|
"loss": 0.3397,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 4.274820143884892,
|
|
"grad_norm": 0.0800735959924383,
|
|
"learning_rate": 5.019217275158923e-06,
|
|
"loss": 0.3452,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 4.276258992805755,
|
|
"grad_norm": 0.08983262564929755,
|
|
"learning_rate": 4.9997447917498276e-06,
|
|
"loss": 0.3405,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 4.277697841726619,
|
|
"grad_norm": 0.08642274808719454,
|
|
"learning_rate": 4.9803076360239335e-06,
|
|
"loss": 0.3352,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 4.279136690647482,
|
|
"grad_norm": 0.08044266022586027,
|
|
"learning_rate": 4.960905827600266e-06,
|
|
"loss": 0.3408,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 4.280575539568345,
|
|
"grad_norm": 0.08473498739286515,
|
|
"learning_rate": 4.941539386062113e-06,
|
|
"loss": 0.344,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 4.282014388489209,
|
|
"grad_norm": 0.08515420033810775,
|
|
"learning_rate": 4.922208330957094e-06,
|
|
"loss": 0.3402,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 4.283453237410072,
|
|
"grad_norm": 0.0839405529220656,
|
|
"learning_rate": 4.902912681797114e-06,
|
|
"loss": 0.3417,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 4.284892086330935,
|
|
"grad_norm": 0.08315460400153792,
|
|
"learning_rate": 4.88365245805833e-06,
|
|
"loss": 0.3435,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 4.286330935251798,
|
|
"grad_norm": 0.08674623811833201,
|
|
"learning_rate": 4.864427679181143e-06,
|
|
"loss": 0.3304,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 4.287769784172662,
|
|
"grad_norm": 0.09078637286363689,
|
|
"learning_rate": 4.8452383645701815e-06,
|
|
"loss": 0.3371,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 4.2892086330935255,
|
|
"grad_norm": 0.08635534112838321,
|
|
"learning_rate": 4.826084533594277e-06,
|
|
"loss": 0.3375,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 4.290647482014388,
|
|
"grad_norm": 0.08760729145208422,
|
|
"learning_rate": 4.806966205586441e-06,
|
|
"loss": 0.3372,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 4.292086330935252,
|
|
"grad_norm": 0.08574673213028934,
|
|
"learning_rate": 4.787883399843871e-06,
|
|
"loss": 0.3465,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 4.293525179856115,
|
|
"grad_norm": 0.08672626164083187,
|
|
"learning_rate": 4.768836135627859e-06,
|
|
"loss": 0.3424,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 4.294964028776978,
|
|
"grad_norm": 0.08180599921354789,
|
|
"learning_rate": 4.749824432163888e-06,
|
|
"loss": 0.3386,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 4.296402877697842,
|
|
"grad_norm": 0.08238692676033868,
|
|
"learning_rate": 4.730848308641509e-06,
|
|
"loss": 0.3327,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 4.297841726618705,
|
|
"grad_norm": 0.08370292192097512,
|
|
"learning_rate": 4.711907784214358e-06,
|
|
"loss": 0.3413,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 4.299280575539568,
|
|
"grad_norm": 0.07828027615902021,
|
|
"learning_rate": 4.693002878000146e-06,
|
|
"loss": 0.3361,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 4.300719424460432,
|
|
"grad_norm": 0.08036524968541889,
|
|
"learning_rate": 4.674133609080658e-06,
|
|
"loss": 0.3463,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 4.302158273381295,
|
|
"grad_norm": 0.0814107213111417,
|
|
"learning_rate": 4.6552999965016634e-06,
|
|
"loss": 0.3394,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 4.3035971223021585,
|
|
"grad_norm": 0.08372769567501073,
|
|
"learning_rate": 4.6365020592729694e-06,
|
|
"loss": 0.3451,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 4.305035971223021,
|
|
"grad_norm": 0.08068421091497034,
|
|
"learning_rate": 4.617739816368367e-06,
|
|
"loss": 0.3395,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 4.306474820143885,
|
|
"grad_norm": 0.08222448302128223,
|
|
"learning_rate": 4.599013286725624e-06,
|
|
"loss": 0.3417,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 4.307913669064749,
|
|
"grad_norm": 0.07995473955463997,
|
|
"learning_rate": 4.580322489246456e-06,
|
|
"loss": 0.3348,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 4.309352517985611,
|
|
"grad_norm": 0.07696338491769454,
|
|
"learning_rate": 4.5616674427965135e-06,
|
|
"loss": 0.3392,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 4.310791366906475,
|
|
"grad_norm": 0.08277317828807447,
|
|
"learning_rate": 4.543048166205357e-06,
|
|
"loss": 0.3421,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 4.312230215827338,
|
|
"grad_norm": 0.08368969917180806,
|
|
"learning_rate": 4.524464678266452e-06,
|
|
"loss": 0.3391,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 4.313669064748201,
|
|
"grad_norm": 0.08079515503339124,
|
|
"learning_rate": 4.505916997737143e-06,
|
|
"loss": 0.3383,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 4.315107913669065,
|
|
"grad_norm": 0.08013977018188613,
|
|
"learning_rate": 4.487405143338599e-06,
|
|
"loss": 0.3416,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 4.316546762589928,
|
|
"grad_norm": 0.08229305074809554,
|
|
"learning_rate": 4.468929133755881e-06,
|
|
"loss": 0.3343,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 4.3179856115107915,
|
|
"grad_norm": 0.0811550856027192,
|
|
"learning_rate": 4.450488987637824e-06,
|
|
"loss": 0.3431,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 4.319424460431654,
|
|
"grad_norm": 0.08267234198643046,
|
|
"learning_rate": 4.43208472359709e-06,
|
|
"loss": 0.3459,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 4.320863309352518,
|
|
"grad_norm": 0.08228726652965723,
|
|
"learning_rate": 4.4137163602101114e-06,
|
|
"loss": 0.3488,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 4.322302158273382,
|
|
"grad_norm": 0.08091470784514326,
|
|
"learning_rate": 4.3953839160170906e-06,
|
|
"loss": 0.3403,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 4.323741007194244,
|
|
"grad_norm": 0.08210864333416333,
|
|
"learning_rate": 4.377087409521972e-06,
|
|
"loss": 0.3411,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 4.325179856115108,
|
|
"grad_norm": 0.08726373394699848,
|
|
"learning_rate": 4.358826859192422e-06,
|
|
"loss": 0.3322,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 4.326618705035971,
|
|
"grad_norm": 0.0798526982801952,
|
|
"learning_rate": 4.340602283459827e-06,
|
|
"loss": 0.3439,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 4.3280575539568344,
|
|
"grad_norm": 0.08298086490028514,
|
|
"learning_rate": 4.322413700719246e-06,
|
|
"loss": 0.3418,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 4.329496402877698,
|
|
"grad_norm": 0.08763630191050892,
|
|
"learning_rate": 4.3042611293294276e-06,
|
|
"loss": 0.3379,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 4.330935251798561,
|
|
"grad_norm": 0.08297996125525092,
|
|
"learning_rate": 4.28614458761274e-06,
|
|
"loss": 0.3422,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 4.3323741007194245,
|
|
"grad_norm": 0.08299526083382419,
|
|
"learning_rate": 4.2680640938552245e-06,
|
|
"loss": 0.3335,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 4.333812949640288,
|
|
"grad_norm": 0.08261205844770146,
|
|
"learning_rate": 4.250019666306515e-06,
|
|
"loss": 0.3338,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 4.335251798561151,
|
|
"grad_norm": 0.08223953277133327,
|
|
"learning_rate": 4.232011323179839e-06,
|
|
"loss": 0.3409,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 4.336690647482015,
|
|
"grad_norm": 0.07873257694366213,
|
|
"learning_rate": 4.214039082652002e-06,
|
|
"loss": 0.3428,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 4.338129496402877,
|
|
"grad_norm": 0.07839354439067495,
|
|
"learning_rate": 4.1961029628634e-06,
|
|
"loss": 0.3386,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 4.339568345323741,
|
|
"grad_norm": 0.08281022294273022,
|
|
"learning_rate": 4.17820298191792e-06,
|
|
"loss": 0.3362,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 4.341007194244605,
|
|
"grad_norm": 0.08196257544243071,
|
|
"learning_rate": 4.160339157883e-06,
|
|
"loss": 0.3392,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 4.3424460431654675,
|
|
"grad_norm": 0.08296488195374803,
|
|
"learning_rate": 4.142511508789606e-06,
|
|
"loss": 0.3419,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 4.343884892086331,
|
|
"grad_norm": 0.08182522238953462,
|
|
"learning_rate": 4.1247200526321364e-06,
|
|
"loss": 0.3461,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 4.345323741007194,
|
|
"grad_norm": 0.07963338138448188,
|
|
"learning_rate": 4.106964807368496e-06,
|
|
"loss": 0.3444,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 4.3467625899280575,
|
|
"grad_norm": 0.08041943070742388,
|
|
"learning_rate": 4.089245790920031e-06,
|
|
"loss": 0.3392,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 4.348201438848921,
|
|
"grad_norm": 0.0850595920778549,
|
|
"learning_rate": 4.071563021171523e-06,
|
|
"loss": 0.3382,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 4.349640287769784,
|
|
"grad_norm": 0.08531559140313064,
|
|
"learning_rate": 4.0539165159711615e-06,
|
|
"loss": 0.3432,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 4.351079136690648,
|
|
"grad_norm": 0.08336228857660057,
|
|
"learning_rate": 4.036306293130543e-06,
|
|
"loss": 0.3439,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 4.35251798561151,
|
|
"grad_norm": 0.08142790058774997,
|
|
"learning_rate": 4.01873237042461e-06,
|
|
"loss": 0.3356,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 4.353956834532374,
|
|
"grad_norm": 0.08408258055547096,
|
|
"learning_rate": 4.001194765591723e-06,
|
|
"loss": 0.3349,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 4.355395683453238,
|
|
"grad_norm": 0.0807156443354227,
|
|
"learning_rate": 3.983693496333522e-06,
|
|
"loss": 0.3387,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 4.3568345323741005,
|
|
"grad_norm": 0.07893284763050724,
|
|
"learning_rate": 3.966228580315017e-06,
|
|
"loss": 0.3373,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 4.358273381294964,
|
|
"grad_norm": 0.07980566343625019,
|
|
"learning_rate": 3.9488000351645036e-06,
|
|
"loss": 0.337,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 4.359712230215827,
|
|
"grad_norm": 0.07994854701841371,
|
|
"learning_rate": 3.931407878473575e-06,
|
|
"loss": 0.3422,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 4.3611510791366905,
|
|
"grad_norm": 0.07913447986843827,
|
|
"learning_rate": 3.914052127797088e-06,
|
|
"loss": 0.3365,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 4.362589928057554,
|
|
"grad_norm": 0.07690809644896524,
|
|
"learning_rate": 3.8967328006531605e-06,
|
|
"loss": 0.3418,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 4.364028776978417,
|
|
"grad_norm": 0.07942698720013788,
|
|
"learning_rate": 3.879449914523137e-06,
|
|
"loss": 0.3396,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 4.365467625899281,
|
|
"grad_norm": 0.08030872424811798,
|
|
"learning_rate": 3.862203486851588e-06,
|
|
"loss": 0.3373,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 4.366906474820144,
|
|
"grad_norm": 0.07791635455976675,
|
|
"learning_rate": 3.844993535046291e-06,
|
|
"loss": 0.3399,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 4.368345323741007,
|
|
"grad_norm": 0.08819293142672449,
|
|
"learning_rate": 3.8278200764781725e-06,
|
|
"loss": 0.3407,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 4.369784172661871,
|
|
"grad_norm": 0.07821625030673124,
|
|
"learning_rate": 3.8106831284813718e-06,
|
|
"loss": 0.331,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 4.3712230215827335,
|
|
"grad_norm": 0.08071876819805424,
|
|
"learning_rate": 3.7935827083531585e-06,
|
|
"loss": 0.3495,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 4.372661870503597,
|
|
"grad_norm": 0.08015179652048537,
|
|
"learning_rate": 3.7765188333539037e-06,
|
|
"loss": 0.3455,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 4.374100719424461,
|
|
"grad_norm": 0.07778228507768749,
|
|
"learning_rate": 3.759491520707119e-06,
|
|
"loss": 0.3406,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 4.3755395683453235,
|
|
"grad_norm": 0.07979272352009509,
|
|
"learning_rate": 3.74250078759943e-06,
|
|
"loss": 0.3373,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 4.376978417266187,
|
|
"grad_norm": 0.08151237958220892,
|
|
"learning_rate": 3.7255466511805007e-06,
|
|
"loss": 0.3467,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 4.37841726618705,
|
|
"grad_norm": 0.08556890075523273,
|
|
"learning_rate": 3.7086291285630683e-06,
|
|
"loss": 0.3349,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 4.379856115107914,
|
|
"grad_norm": 0.07796801097061706,
|
|
"learning_rate": 3.6917482368229406e-06,
|
|
"loss": 0.34,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 4.381294964028777,
|
|
"grad_norm": 0.07702496532459792,
|
|
"learning_rate": 3.674903992998915e-06,
|
|
"loss": 0.3292,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 4.38273381294964,
|
|
"grad_norm": 0.0800837304466674,
|
|
"learning_rate": 3.6580964140928133e-06,
|
|
"loss": 0.3403,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 4.384172661870504,
|
|
"grad_norm": 0.07635302944953785,
|
|
"learning_rate": 3.6413255170694515e-06,
|
|
"loss": 0.3428,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 4.385611510791367,
|
|
"grad_norm": 0.08873349062176596,
|
|
"learning_rate": 3.6245913188566227e-06,
|
|
"loss": 0.3331,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 4.38705035971223,
|
|
"grad_norm": 0.08036913149247746,
|
|
"learning_rate": 3.607893836345069e-06,
|
|
"loss": 0.3337,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 4.388489208633094,
|
|
"grad_norm": 0.08004969414734284,
|
|
"learning_rate": 3.5912330863884904e-06,
|
|
"loss": 0.3435,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 4.3899280575539565,
|
|
"grad_norm": 0.08316414449666425,
|
|
"learning_rate": 3.574609085803471e-06,
|
|
"loss": 0.3371,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 4.39136690647482,
|
|
"grad_norm": 0.08198315469521096,
|
|
"learning_rate": 3.5580218513695573e-06,
|
|
"loss": 0.3446,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 4.392805755395684,
|
|
"grad_norm": 0.08342839586192755,
|
|
"learning_rate": 3.5414713998291483e-06,
|
|
"loss": 0.3491,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 4.394244604316547,
|
|
"grad_norm": 0.08264581520427611,
|
|
"learning_rate": 3.524957747887512e-06,
|
|
"loss": 0.3357,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 4.39568345323741,
|
|
"grad_norm": 0.08650667581494435,
|
|
"learning_rate": 3.5084809122128125e-06,
|
|
"loss": 0.3366,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 4.397122302158273,
|
|
"grad_norm": 0.07884841898802619,
|
|
"learning_rate": 3.4920409094360054e-06,
|
|
"loss": 0.3327,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 4.398561151079137,
|
|
"grad_norm": 0.08310187268604498,
|
|
"learning_rate": 3.475637756150896e-06,
|
|
"loss": 0.3457,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"grad_norm": 0.07919313263466449,
|
|
"learning_rate": 3.4592714689140895e-06,
|
|
"loss": 0.3361,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 4.401438848920863,
|
|
"grad_norm": 0.08518013919100485,
|
|
"learning_rate": 3.442942064244981e-06,
|
|
"loss": 0.3459,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 4.402877697841727,
|
|
"grad_norm": 0.0774766805097951,
|
|
"learning_rate": 3.426649558625732e-06,
|
|
"loss": 0.3444,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 4.4043165467625895,
|
|
"grad_norm": 0.07923242700690634,
|
|
"learning_rate": 3.4103939685012823e-06,
|
|
"loss": 0.3387,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 4.405755395683453,
|
|
"grad_norm": 0.0827970503455779,
|
|
"learning_rate": 3.3941753102792617e-06,
|
|
"loss": 0.3372,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 4.407194244604317,
|
|
"grad_norm": 0.08243192847071701,
|
|
"learning_rate": 3.377993600330083e-06,
|
|
"loss": 0.3364,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 4.40863309352518,
|
|
"grad_norm": 0.0813750312475761,
|
|
"learning_rate": 3.361848854986831e-06,
|
|
"loss": 0.3447,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 4.410071942446043,
|
|
"grad_norm": 0.08119183981371995,
|
|
"learning_rate": 3.3457410905452624e-06,
|
|
"loss": 0.3333,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 4.411510791366906,
|
|
"grad_norm": 0.07921764063952673,
|
|
"learning_rate": 3.3296703232638606e-06,
|
|
"loss": 0.3431,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 4.41294964028777,
|
|
"grad_norm": 0.08004108156832544,
|
|
"learning_rate": 3.3136365693637294e-06,
|
|
"loss": 0.337,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 4.414388489208633,
|
|
"grad_norm": 0.07902768400573136,
|
|
"learning_rate": 3.297639845028604e-06,
|
|
"loss": 0.3484,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 4.415827338129496,
|
|
"grad_norm": 0.07986747532330556,
|
|
"learning_rate": 3.281680166404857e-06,
|
|
"loss": 0.3428,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 4.41726618705036,
|
|
"grad_norm": 0.0835404492763592,
|
|
"learning_rate": 3.265757549601496e-06,
|
|
"loss": 0.3352,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 4.418705035971223,
|
|
"grad_norm": 0.08147736837521294,
|
|
"learning_rate": 3.249872010690074e-06,
|
|
"loss": 0.3406,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 4.420143884892086,
|
|
"grad_norm": 0.07703189963871299,
|
|
"learning_rate": 3.234023565704738e-06,
|
|
"loss": 0.3394,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 4.42158273381295,
|
|
"grad_norm": 0.08402054419343993,
|
|
"learning_rate": 3.2182122306422035e-06,
|
|
"loss": 0.3371,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 4.423021582733813,
|
|
"grad_norm": 0.08425104631462281,
|
|
"learning_rate": 3.2024380214617136e-06,
|
|
"loss": 0.3341,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 4.424460431654676,
|
|
"grad_norm": 0.08263657553658552,
|
|
"learning_rate": 3.186700954085056e-06,
|
|
"loss": 0.3477,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 4.42589928057554,
|
|
"grad_norm": 0.07705110160468519,
|
|
"learning_rate": 3.1710010443965065e-06,
|
|
"loss": 0.3353,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 4.427338129496403,
|
|
"grad_norm": 0.07680773903432846,
|
|
"learning_rate": 3.1553383082428568e-06,
|
|
"loss": 0.335,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 4.428776978417266,
|
|
"grad_norm": 0.08180455394906352,
|
|
"learning_rate": 3.139712761433367e-06,
|
|
"loss": 0.3359,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 4.430215827338129,
|
|
"grad_norm": 0.08462652095687563,
|
|
"learning_rate": 3.1241244197397626e-06,
|
|
"loss": 0.3398,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 4.431654676258993,
|
|
"grad_norm": 0.08601972764148784,
|
|
"learning_rate": 3.1085732988962003e-06,
|
|
"loss": 0.3349,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 4.433093525179856,
|
|
"grad_norm": 0.08006739834461737,
|
|
"learning_rate": 3.0930594145993063e-06,
|
|
"loss": 0.339,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 4.434532374100719,
|
|
"grad_norm": 0.08144715895142003,
|
|
"learning_rate": 3.077582782508075e-06,
|
|
"loss": 0.3417,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 4.435971223021583,
|
|
"grad_norm": 0.0925231717171499,
|
|
"learning_rate": 3.0621434182439345e-06,
|
|
"loss": 0.3459,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 4.437410071942446,
|
|
"grad_norm": 0.08886946372359183,
|
|
"learning_rate": 3.0467413373906773e-06,
|
|
"loss": 0.3505,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 4.438848920863309,
|
|
"grad_norm": 0.10772731064214743,
|
|
"learning_rate": 3.0313765554944806e-06,
|
|
"loss": 0.3487,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 4.440287769784173,
|
|
"grad_norm": 0.08162841499865223,
|
|
"learning_rate": 3.0160490880638593e-06,
|
|
"loss": 0.3456,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 4.441726618705036,
|
|
"grad_norm": 0.07940361071577196,
|
|
"learning_rate": 3.0007589505696645e-06,
|
|
"loss": 0.344,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 4.443165467625899,
|
|
"grad_norm": 0.08652091112221592,
|
|
"learning_rate": 2.9855061584450795e-06,
|
|
"loss": 0.3407,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 4.444604316546762,
|
|
"grad_norm": 0.08009178167176363,
|
|
"learning_rate": 2.97029072708559e-06,
|
|
"loss": 0.3408,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 4.446043165467626,
|
|
"grad_norm": 0.08338790762048429,
|
|
"learning_rate": 2.955112671848963e-06,
|
|
"loss": 0.3312,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 4.4474820143884894,
|
|
"grad_norm": 0.07643552439314327,
|
|
"learning_rate": 2.9399720080552383e-06,
|
|
"loss": 0.3376,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 4.448920863309352,
|
|
"grad_norm": 0.08145338642453284,
|
|
"learning_rate": 2.924868750986729e-06,
|
|
"loss": 0.3394,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 4.450359712230216,
|
|
"grad_norm": 0.08331752026438048,
|
|
"learning_rate": 2.9098029158879914e-06,
|
|
"loss": 0.3442,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 4.4517985611510795,
|
|
"grad_norm": 0.08788492992032383,
|
|
"learning_rate": 2.8947745179657815e-06,
|
|
"loss": 0.3371,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 4.453237410071942,
|
|
"grad_norm": 0.07930784187531895,
|
|
"learning_rate": 2.8797835723890944e-06,
|
|
"loss": 0.3337,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 4.454676258992806,
|
|
"grad_norm": 0.12006256221162455,
|
|
"learning_rate": 2.864830094289137e-06,
|
|
"loss": 0.3502,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 4.456115107913669,
|
|
"grad_norm": 0.07962340750340086,
|
|
"learning_rate": 2.84991409875925e-06,
|
|
"loss": 0.3424,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 4.457553956834532,
|
|
"grad_norm": 0.08101182360169899,
|
|
"learning_rate": 2.8350356008549806e-06,
|
|
"loss": 0.3388,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 4.458992805755396,
|
|
"grad_norm": 0.07723481623142371,
|
|
"learning_rate": 2.8201946155940142e-06,
|
|
"loss": 0.3353,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 4.460431654676259,
|
|
"grad_norm": 0.08962994589792168,
|
|
"learning_rate": 2.8053911579561764e-06,
|
|
"loss": 0.3483,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 4.4618705035971225,
|
|
"grad_norm": 0.08469748465641859,
|
|
"learning_rate": 2.7906252428834044e-06,
|
|
"loss": 0.3393,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 4.463309352517985,
|
|
"grad_norm": 0.07578960150566837,
|
|
"learning_rate": 2.7758968852797542e-06,
|
|
"loss": 0.3377,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 4.464748201438849,
|
|
"grad_norm": 0.07779776282064302,
|
|
"learning_rate": 2.761206100011369e-06,
|
|
"loss": 0.3387,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 4.4661870503597125,
|
|
"grad_norm": 0.08566669322016698,
|
|
"learning_rate": 2.746552901906463e-06,
|
|
"loss": 0.3399,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 4.467625899280575,
|
|
"grad_norm": 0.08213074178412459,
|
|
"learning_rate": 2.731937305755321e-06,
|
|
"loss": 0.3428,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 4.469064748201439,
|
|
"grad_norm": 0.0806167541835678,
|
|
"learning_rate": 2.717359326310249e-06,
|
|
"loss": 0.3345,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 4.470503597122303,
|
|
"grad_norm": 0.07430145741689023,
|
|
"learning_rate": 2.702818978285633e-06,
|
|
"loss": 0.3342,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 4.471942446043165,
|
|
"grad_norm": 0.07631953476746617,
|
|
"learning_rate": 2.688316276357825e-06,
|
|
"loss": 0.3387,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 4.473381294964029,
|
|
"grad_norm": 0.08522084001774857,
|
|
"learning_rate": 2.6738512351652012e-06,
|
|
"loss": 0.3379,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 4.474820143884892,
|
|
"grad_norm": 0.08431792606462506,
|
|
"learning_rate": 2.65942386930814e-06,
|
|
"loss": 0.3432,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 4.4762589928057555,
|
|
"grad_norm": 0.080558657223605,
|
|
"learning_rate": 2.645034193348961e-06,
|
|
"loss": 0.3474,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 4.477697841726619,
|
|
"grad_norm": 0.07607729398398339,
|
|
"learning_rate": 2.6306822218119533e-06,
|
|
"loss": 0.3366,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 4.479136690647482,
|
|
"grad_norm": 0.08437013026296378,
|
|
"learning_rate": 2.61636796918336e-06,
|
|
"loss": 0.345,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 4.4805755395683455,
|
|
"grad_norm": 0.08561711675412192,
|
|
"learning_rate": 2.6020914499113438e-06,
|
|
"loss": 0.3456,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 4.482014388489208,
|
|
"grad_norm": 0.07956342304595213,
|
|
"learning_rate": 2.587852678405973e-06,
|
|
"loss": 0.3406,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 4.483453237410072,
|
|
"grad_norm": 0.08134459959483603,
|
|
"learning_rate": 2.5736516690392366e-06,
|
|
"loss": 0.3369,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 4.484892086330936,
|
|
"grad_norm": 0.0864412304161587,
|
|
"learning_rate": 2.5594884361449746e-06,
|
|
"loss": 0.3399,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 4.486330935251798,
|
|
"grad_norm": 0.08059530436852871,
|
|
"learning_rate": 2.5453629940189338e-06,
|
|
"loss": 0.3446,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 4.487769784172662,
|
|
"grad_norm": 0.07633772485352214,
|
|
"learning_rate": 2.531275356918701e-06,
|
|
"loss": 0.3328,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 4.489208633093525,
|
|
"grad_norm": 0.07825083057422907,
|
|
"learning_rate": 2.5172255390636878e-06,
|
|
"loss": 0.3356,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 4.4906474820143885,
|
|
"grad_norm": 0.07853405488557741,
|
|
"learning_rate": 2.5032135546351644e-06,
|
|
"loss": 0.3387,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 4.492086330935252,
|
|
"grad_norm": 0.08179052949679409,
|
|
"learning_rate": 2.4892394177761947e-06,
|
|
"loss": 0.3539,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 4.493525179856115,
|
|
"grad_norm": 0.0745566591695839,
|
|
"learning_rate": 2.475303142591634e-06,
|
|
"loss": 0.3412,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 4.4949640287769785,
|
|
"grad_norm": 0.0759304283253562,
|
|
"learning_rate": 2.461404743148141e-06,
|
|
"loss": 0.3395,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 4.496402877697841,
|
|
"grad_norm": 0.08132598699344705,
|
|
"learning_rate": 2.4475442334741306e-06,
|
|
"loss": 0.3433,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 4.497841726618705,
|
|
"grad_norm": 0.0827135794593225,
|
|
"learning_rate": 2.43372162755978e-06,
|
|
"loss": 0.3416,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 4.499280575539569,
|
|
"grad_norm": 0.07755632558475646,
|
|
"learning_rate": 2.419936939357004e-06,
|
|
"loss": 0.3352,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 4.500719424460431,
|
|
"grad_norm": 0.07551850013554301,
|
|
"learning_rate": 2.4061901827794466e-06,
|
|
"loss": 0.331,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 4.502158273381295,
|
|
"grad_norm": 0.08752456471283097,
|
|
"learning_rate": 2.3924813717024663e-06,
|
|
"loss": 0.3346,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 4.503597122302159,
|
|
"grad_norm": 0.08041101552706037,
|
|
"learning_rate": 2.378810519963124e-06,
|
|
"loss": 0.3435,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 4.5050359712230215,
|
|
"grad_norm": 0.08208133026768205,
|
|
"learning_rate": 2.3651776413601634e-06,
|
|
"loss": 0.3414,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 4.506474820143885,
|
|
"grad_norm": 0.08175858558950128,
|
|
"learning_rate": 2.3515827496539823e-06,
|
|
"loss": 0.3434,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 4.507913669064748,
|
|
"grad_norm": 0.07739929862013953,
|
|
"learning_rate": 2.3380258585666793e-06,
|
|
"loss": 0.3381,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 4.5093525179856115,
|
|
"grad_norm": 0.07518688692051177,
|
|
"learning_rate": 2.324506981781949e-06,
|
|
"loss": 0.3317,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 4.510791366906475,
|
|
"grad_norm": 0.07598130570896115,
|
|
"learning_rate": 2.311026132945138e-06,
|
|
"loss": 0.3415,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 4.512230215827338,
|
|
"grad_norm": 0.07647551764392961,
|
|
"learning_rate": 2.297583325663233e-06,
|
|
"loss": 0.3358,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 4.513669064748202,
|
|
"grad_norm": 0.07888399769612829,
|
|
"learning_rate": 2.2841785735047717e-06,
|
|
"loss": 0.3368,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 4.515107913669064,
|
|
"grad_norm": 0.08005429483457568,
|
|
"learning_rate": 2.2708118899999175e-06,
|
|
"loss": 0.3413,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 4.516546762589928,
|
|
"grad_norm": 0.07935650422223063,
|
|
"learning_rate": 2.2574832886403988e-06,
|
|
"loss": 0.3422,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 4.517985611510792,
|
|
"grad_norm": 0.0809845408502723,
|
|
"learning_rate": 2.2441927828795106e-06,
|
|
"loss": 0.3376,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 4.5194244604316545,
|
|
"grad_norm": 0.08178247235152346,
|
|
"learning_rate": 2.230940386132088e-06,
|
|
"loss": 0.3399,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 4.520863309352518,
|
|
"grad_norm": 0.07580755500412786,
|
|
"learning_rate": 2.21772611177451e-06,
|
|
"loss": 0.3425,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 4.522302158273382,
|
|
"grad_norm": 0.07700169983717937,
|
|
"learning_rate": 2.204549973144654e-06,
|
|
"loss": 0.3457,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 4.5237410071942445,
|
|
"grad_norm": 0.08185307025988758,
|
|
"learning_rate": 2.1914119835419358e-06,
|
|
"loss": 0.3443,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 4.525179856115108,
|
|
"grad_norm": 0.07591200217623965,
|
|
"learning_rate": 2.178312156227258e-06,
|
|
"loss": 0.3438,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 4.526618705035971,
|
|
"grad_norm": 0.07719929349244754,
|
|
"learning_rate": 2.1652505044229734e-06,
|
|
"loss": 0.3397,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 4.528057553956835,
|
|
"grad_norm": 0.0816124793734048,
|
|
"learning_rate": 2.1522270413129444e-06,
|
|
"loss": 0.3486,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 4.529496402877697,
|
|
"grad_norm": 0.08446617568048903,
|
|
"learning_rate": 2.1392417800424738e-06,
|
|
"loss": 0.342,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 4.530935251798561,
|
|
"grad_norm": 0.07742795546303136,
|
|
"learning_rate": 2.1262947337182815e-06,
|
|
"loss": 0.3424,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 4.532374100719425,
|
|
"grad_norm": 0.08013949928276871,
|
|
"learning_rate": 2.113385915408546e-06,
|
|
"loss": 0.3394,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 4.5338129496402875,
|
|
"grad_norm": 0.07950227198102502,
|
|
"learning_rate": 2.100515338142839e-06,
|
|
"loss": 0.3354,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 4.535251798561151,
|
|
"grad_norm": 0.07724691661729202,
|
|
"learning_rate": 2.087683014912152e-06,
|
|
"loss": 0.3337,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 4.536690647482015,
|
|
"grad_norm": 0.07792630229385845,
|
|
"learning_rate": 2.0748889586688526e-06,
|
|
"loss": 0.3403,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 4.5381294964028775,
|
|
"grad_norm": 0.07795961651742396,
|
|
"learning_rate": 2.0621331823266777e-06,
|
|
"loss": 0.3338,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 4.539568345323741,
|
|
"grad_norm": 0.08053135224983389,
|
|
"learning_rate": 2.049415698760746e-06,
|
|
"loss": 0.3419,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 4.541007194244604,
|
|
"grad_norm": 0.07766178884622095,
|
|
"learning_rate": 2.036736520807505e-06,
|
|
"loss": 0.3423,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 4.542446043165468,
|
|
"grad_norm": 0.07641908220066557,
|
|
"learning_rate": 2.0240956612647487e-06,
|
|
"loss": 0.3413,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 4.543884892086331,
|
|
"grad_norm": 0.07361499773569952,
|
|
"learning_rate": 2.011493132891591e-06,
|
|
"loss": 0.3322,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 4.545323741007194,
|
|
"grad_norm": 0.08239083777318651,
|
|
"learning_rate": 1.998928948408465e-06,
|
|
"loss": 0.3439,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 4.546762589928058,
|
|
"grad_norm": 0.07859111136130291,
|
|
"learning_rate": 1.9864031204970847e-06,
|
|
"loss": 0.3379,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 4.5482014388489205,
|
|
"grad_norm": 0.0842101824564384,
|
|
"learning_rate": 1.973915661800452e-06,
|
|
"loss": 0.3382,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 4.549640287769784,
|
|
"grad_norm": 0.07524641608552496,
|
|
"learning_rate": 1.9614665849228666e-06,
|
|
"loss": 0.3442,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 4.551079136690648,
|
|
"grad_norm": 0.07641778328753646,
|
|
"learning_rate": 1.949055902429846e-06,
|
|
"loss": 0.3525,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 4.5525179856115106,
|
|
"grad_norm": 0.0839915400791954,
|
|
"learning_rate": 1.936683626848179e-06,
|
|
"loss": 0.3502,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 4.553956834532374,
|
|
"grad_norm": 0.08075484285666798,
|
|
"learning_rate": 1.9243497706658944e-06,
|
|
"loss": 0.3375,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 4.555395683453238,
|
|
"grad_norm": 0.07807078367068927,
|
|
"learning_rate": 1.9120543463322238e-06,
|
|
"loss": 0.3427,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 4.556834532374101,
|
|
"grad_norm": 0.3173383811149354,
|
|
"learning_rate": 1.899797366257614e-06,
|
|
"loss": 0.3454,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 4.558273381294964,
|
|
"grad_norm": 0.07461543433135709,
|
|
"learning_rate": 1.887578842813711e-06,
|
|
"loss": 0.3289,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 4.559712230215827,
|
|
"grad_norm": 0.075642925812048,
|
|
"learning_rate": 1.875398788333347e-06,
|
|
"loss": 0.3471,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 4.561151079136691,
|
|
"grad_norm": 0.07349040600731549,
|
|
"learning_rate": 1.8632572151105189e-06,
|
|
"loss": 0.3321,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 4.5625899280575535,
|
|
"grad_norm": 0.07207173670558871,
|
|
"learning_rate": 1.8511541354003882e-06,
|
|
"loss": 0.3284,
|
|
"step": 3171
|
|
},
|
|
{
|
|
"epoch": 4.564028776978417,
|
|
"grad_norm": 0.07554191405690153,
|
|
"learning_rate": 1.8390895614192405e-06,
|
|
"loss": 0.3438,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 4.565467625899281,
|
|
"grad_norm": 0.07359507546541226,
|
|
"learning_rate": 1.8270635053445352e-06,
|
|
"loss": 0.3387,
|
|
"step": 3173
|
|
},
|
|
{
|
|
"epoch": 4.566906474820144,
|
|
"grad_norm": 0.0747236053121895,
|
|
"learning_rate": 1.8150759793148332e-06,
|
|
"loss": 0.3434,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 4.568345323741007,
|
|
"grad_norm": 0.07482777908412824,
|
|
"learning_rate": 1.803126995429789e-06,
|
|
"loss": 0.3446,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 4.569784172661871,
|
|
"grad_norm": 0.07523667576140679,
|
|
"learning_rate": 1.7912165657501779e-06,
|
|
"loss": 0.3454,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 4.571223021582734,
|
|
"grad_norm": 0.07444677781279199,
|
|
"learning_rate": 1.779344702297845e-06,
|
|
"loss": 0.3433,
|
|
"step": 3177
|
|
},
|
|
{
|
|
"epoch": 4.572661870503597,
|
|
"grad_norm": 0.07217935602989244,
|
|
"learning_rate": 1.767511417055725e-06,
|
|
"loss": 0.3411,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 4.57410071942446,
|
|
"grad_norm": 0.07528132196013555,
|
|
"learning_rate": 1.7557167219678018e-06,
|
|
"loss": 0.3318,
|
|
"step": 3179
|
|
},
|
|
{
|
|
"epoch": 4.575539568345324,
|
|
"grad_norm": 0.0736760369407776,
|
|
"learning_rate": 1.7439606289391032e-06,
|
|
"loss": 0.3379,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 4.576978417266187,
|
|
"grad_norm": 0.07190879778513005,
|
|
"learning_rate": 1.7322431498357063e-06,
|
|
"loss": 0.3345,
|
|
"step": 3181
|
|
},
|
|
{
|
|
"epoch": 4.57841726618705,
|
|
"grad_norm": 0.07780013022125608,
|
|
"learning_rate": 1.7205642964847103e-06,
|
|
"loss": 0.344,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 4.579856115107914,
|
|
"grad_norm": 0.07435339381238124,
|
|
"learning_rate": 1.7089240806742147e-06,
|
|
"loss": 0.3331,
|
|
"step": 3183
|
|
},
|
|
{
|
|
"epoch": 4.581294964028777,
|
|
"grad_norm": 0.07739412376541555,
|
|
"learning_rate": 1.697322514153341e-06,
|
|
"loss": 0.3452,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 4.58273381294964,
|
|
"grad_norm": 0.07733329977370151,
|
|
"learning_rate": 1.6857596086321848e-06,
|
|
"loss": 0.3372,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 4.584172661870504,
|
|
"grad_norm": 0.0784942498266723,
|
|
"learning_rate": 1.6742353757818187e-06,
|
|
"loss": 0.3413,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 4.585611510791367,
|
|
"grad_norm": 0.0798223229311642,
|
|
"learning_rate": 1.6627498272342802e-06,
|
|
"loss": 0.3371,
|
|
"step": 3187
|
|
},
|
|
{
|
|
"epoch": 4.58705035971223,
|
|
"grad_norm": 0.07648322664300722,
|
|
"learning_rate": 1.6513029745825803e-06,
|
|
"loss": 0.3411,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 4.588489208633094,
|
|
"grad_norm": 0.07476418357376681,
|
|
"learning_rate": 1.6398948293806504e-06,
|
|
"loss": 0.3385,
|
|
"step": 3189
|
|
},
|
|
{
|
|
"epoch": 4.589928057553957,
|
|
"grad_norm": 0.07562523839402076,
|
|
"learning_rate": 1.6285254031433462e-06,
|
|
"loss": 0.3368,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 4.59136690647482,
|
|
"grad_norm": 0.07340817695448247,
|
|
"learning_rate": 1.6171947073464834e-06,
|
|
"loss": 0.3309,
|
|
"step": 3191
|
|
},
|
|
{
|
|
"epoch": 4.592805755395683,
|
|
"grad_norm": 0.07618984154382705,
|
|
"learning_rate": 1.6059027534267313e-06,
|
|
"loss": 0.3382,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 4.594244604316547,
|
|
"grad_norm": 0.07339749655492743,
|
|
"learning_rate": 1.594649552781693e-06,
|
|
"loss": 0.3384,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 4.5956834532374105,
|
|
"grad_norm": 0.07672770122165963,
|
|
"learning_rate": 1.5834351167698336e-06,
|
|
"loss": 0.3446,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 4.597122302158273,
|
|
"grad_norm": 0.07626500606386213,
|
|
"learning_rate": 1.572259456710512e-06,
|
|
"loss": 0.3451,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 4.598561151079137,
|
|
"grad_norm": 0.07717505511292386,
|
|
"learning_rate": 1.5611225838839272e-06,
|
|
"loss": 0.3409,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"grad_norm": 0.07117298078393891,
|
|
"learning_rate": 1.550024509531145e-06,
|
|
"loss": 0.338,
|
|
"step": 3197
|
|
},
|
|
{
|
|
"epoch": 4.601438848920863,
|
|
"grad_norm": 0.07870449278858095,
|
|
"learning_rate": 1.5389652448540537e-06,
|
|
"loss": 0.338,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 4.602877697841727,
|
|
"grad_norm": 0.07671617056283826,
|
|
"learning_rate": 1.527944801015382e-06,
|
|
"loss": 0.3422,
|
|
"step": 3199
|
|
},
|
|
{
|
|
"epoch": 4.60431654676259,
|
|
"grad_norm": 0.08166686077096344,
|
|
"learning_rate": 1.5169631891386805e-06,
|
|
"loss": 0.3446,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 4.605755395683453,
|
|
"grad_norm": 0.07420698159181902,
|
|
"learning_rate": 1.506020420308274e-06,
|
|
"loss": 0.3344,
|
|
"step": 3201
|
|
},
|
|
{
|
|
"epoch": 4.607194244604317,
|
|
"grad_norm": 0.07264625710704506,
|
|
"learning_rate": 1.495116505569314e-06,
|
|
"loss": 0.3334,
|
|
"step": 3202
|
|
},
|
|
{
|
|
"epoch": 4.60863309352518,
|
|
"grad_norm": 0.07863700679279616,
|
|
"learning_rate": 1.4842514559277254e-06,
|
|
"loss": 0.3375,
|
|
"step": 3203
|
|
},
|
|
{
|
|
"epoch": 4.6100719424460435,
|
|
"grad_norm": 0.07786506165371339,
|
|
"learning_rate": 1.4734252823501894e-06,
|
|
"loss": 0.339,
|
|
"step": 3204
|
|
},
|
|
{
|
|
"epoch": 4.611510791366906,
|
|
"grad_norm": 0.07848683079739098,
|
|
"learning_rate": 1.4626379957641646e-06,
|
|
"loss": 0.3404,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 4.61294964028777,
|
|
"grad_norm": 0.07272650619602501,
|
|
"learning_rate": 1.451889607057848e-06,
|
|
"loss": 0.338,
|
|
"step": 3206
|
|
},
|
|
{
|
|
"epoch": 4.614388489208633,
|
|
"grad_norm": 0.07914954016112813,
|
|
"learning_rate": 1.4411801270801885e-06,
|
|
"loss": 0.3467,
|
|
"step": 3207
|
|
},
|
|
{
|
|
"epoch": 4.615827338129496,
|
|
"grad_norm": 0.07714157293126453,
|
|
"learning_rate": 1.4305095666408453e-06,
|
|
"loss": 0.3322,
|
|
"step": 3208
|
|
},
|
|
{
|
|
"epoch": 4.61726618705036,
|
|
"grad_norm": 0.1387597489395609,
|
|
"learning_rate": 1.4198779365102077e-06,
|
|
"loss": 0.3329,
|
|
"step": 3209
|
|
},
|
|
{
|
|
"epoch": 4.618705035971223,
|
|
"grad_norm": 0.07343647519706754,
|
|
"learning_rate": 1.409285247419363e-06,
|
|
"loss": 0.3347,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 4.620143884892086,
|
|
"grad_norm": 0.07361496404583943,
|
|
"learning_rate": 1.3987315100600961e-06,
|
|
"loss": 0.3419,
|
|
"step": 3211
|
|
},
|
|
{
|
|
"epoch": 4.62158273381295,
|
|
"grad_norm": 0.07255891350641182,
|
|
"learning_rate": 1.3882167350848686e-06,
|
|
"loss": 0.332,
|
|
"step": 3212
|
|
},
|
|
{
|
|
"epoch": 4.623021582733813,
|
|
"grad_norm": 0.07772898430885429,
|
|
"learning_rate": 1.3777409331068258e-06,
|
|
"loss": 0.3381,
|
|
"step": 3213
|
|
},
|
|
{
|
|
"epoch": 4.6244604316546765,
|
|
"grad_norm": 0.07496992806173428,
|
|
"learning_rate": 1.3673041146997768e-06,
|
|
"loss": 0.3433,
|
|
"step": 3214
|
|
},
|
|
{
|
|
"epoch": 4.625899280575539,
|
|
"grad_norm": 0.07540960347793695,
|
|
"learning_rate": 1.35690629039817e-06,
|
|
"loss": 0.3435,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 4.627338129496403,
|
|
"grad_norm": 0.07379174463709964,
|
|
"learning_rate": 1.346547470697095e-06,
|
|
"loss": 0.3425,
|
|
"step": 3216
|
|
},
|
|
{
|
|
"epoch": 4.6287769784172665,
|
|
"grad_norm": 0.07422793154761914,
|
|
"learning_rate": 1.3362276660522943e-06,
|
|
"loss": 0.337,
|
|
"step": 3217
|
|
},
|
|
{
|
|
"epoch": 4.630215827338129,
|
|
"grad_norm": 0.07504393110160347,
|
|
"learning_rate": 1.325946886880103e-06,
|
|
"loss": 0.3301,
|
|
"step": 3218
|
|
},
|
|
{
|
|
"epoch": 4.631654676258993,
|
|
"grad_norm": 0.07517399092897552,
|
|
"learning_rate": 1.315705143557482e-06,
|
|
"loss": 0.3479,
|
|
"step": 3219
|
|
},
|
|
{
|
|
"epoch": 4.633093525179856,
|
|
"grad_norm": 0.07742645202423526,
|
|
"learning_rate": 1.3055024464219846e-06,
|
|
"loss": 0.3411,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 4.634532374100719,
|
|
"grad_norm": 0.0729927182833301,
|
|
"learning_rate": 1.295338805771751e-06,
|
|
"loss": 0.3458,
|
|
"step": 3221
|
|
},
|
|
{
|
|
"epoch": 4.635971223021583,
|
|
"grad_norm": 0.0767211095528576,
|
|
"learning_rate": 1.285214231865508e-06,
|
|
"loss": 0.34,
|
|
"step": 3222
|
|
},
|
|
{
|
|
"epoch": 4.637410071942446,
|
|
"grad_norm": 0.07441491683750855,
|
|
"learning_rate": 1.2751287349225484e-06,
|
|
"loss": 0.3407,
|
|
"step": 3223
|
|
},
|
|
{
|
|
"epoch": 4.6388489208633095,
|
|
"grad_norm": 0.07910836702752078,
|
|
"learning_rate": 1.2650823251227062e-06,
|
|
"loss": 0.3424,
|
|
"step": 3224
|
|
},
|
|
{
|
|
"epoch": 4.640287769784173,
|
|
"grad_norm": 0.07143806340470796,
|
|
"learning_rate": 1.255075012606386e-06,
|
|
"loss": 0.3377,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 4.641726618705036,
|
|
"grad_norm": 0.07278214584048101,
|
|
"learning_rate": 1.2451068074745254e-06,
|
|
"loss": 0.3376,
|
|
"step": 3226
|
|
},
|
|
{
|
|
"epoch": 4.6431654676258995,
|
|
"grad_norm": 0.07443543265237368,
|
|
"learning_rate": 1.2351777197885606e-06,
|
|
"loss": 0.3423,
|
|
"step": 3227
|
|
},
|
|
{
|
|
"epoch": 4.644604316546762,
|
|
"grad_norm": 0.07598782355833514,
|
|
"learning_rate": 1.2252877595704838e-06,
|
|
"loss": 0.3382,
|
|
"step": 3228
|
|
},
|
|
{
|
|
"epoch": 4.646043165467626,
|
|
"grad_norm": 0.07525855717177064,
|
|
"learning_rate": 1.2154369368027763e-06,
|
|
"loss": 0.3411,
|
|
"step": 3229
|
|
},
|
|
{
|
|
"epoch": 4.647482014388489,
|
|
"grad_norm": 0.07135255654629626,
|
|
"learning_rate": 1.2056252614284047e-06,
|
|
"loss": 0.3355,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 4.648920863309352,
|
|
"grad_norm": 0.07772177191161711,
|
|
"learning_rate": 1.1958527433508381e-06,
|
|
"loss": 0.3368,
|
|
"step": 3231
|
|
},
|
|
{
|
|
"epoch": 4.650359712230216,
|
|
"grad_norm": 0.07415415297379552,
|
|
"learning_rate": 1.1861193924340176e-06,
|
|
"loss": 0.3464,
|
|
"step": 3232
|
|
},
|
|
{
|
|
"epoch": 4.651798561151079,
|
|
"grad_norm": 0.07195595605097793,
|
|
"learning_rate": 1.176425218502346e-06,
|
|
"loss": 0.3355,
|
|
"step": 3233
|
|
},
|
|
{
|
|
"epoch": 4.6532374100719425,
|
|
"grad_norm": 0.07512048540608449,
|
|
"learning_rate": 1.1667702313406903e-06,
|
|
"loss": 0.3324,
|
|
"step": 3234
|
|
},
|
|
{
|
|
"epoch": 4.654676258992806,
|
|
"grad_norm": 0.07618358169975284,
|
|
"learning_rate": 1.1571544406943614e-06,
|
|
"loss": 0.3342,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 4.656115107913669,
|
|
"grad_norm": 0.07529611445137313,
|
|
"learning_rate": 1.147577856269102e-06,
|
|
"loss": 0.3399,
|
|
"step": 3236
|
|
},
|
|
{
|
|
"epoch": 4.6575539568345325,
|
|
"grad_norm": 0.07352043587904812,
|
|
"learning_rate": 1.1380404877310957e-06,
|
|
"loss": 0.3347,
|
|
"step": 3237
|
|
},
|
|
{
|
|
"epoch": 4.658992805755395,
|
|
"grad_norm": 0.07423900218181873,
|
|
"learning_rate": 1.1285423447069133e-06,
|
|
"loss": 0.3371,
|
|
"step": 3238
|
|
},
|
|
{
|
|
"epoch": 4.660431654676259,
|
|
"grad_norm": 0.07460566992546153,
|
|
"learning_rate": 1.1190834367835701e-06,
|
|
"loss": 0.3423,
|
|
"step": 3239
|
|
},
|
|
{
|
|
"epoch": 4.661870503597123,
|
|
"grad_norm": 0.07741318384984146,
|
|
"learning_rate": 1.1096637735084602e-06,
|
|
"loss": 0.3367,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 4.663309352517985,
|
|
"grad_norm": 0.07184966828873665,
|
|
"learning_rate": 1.1002833643893606e-06,
|
|
"loss": 0.335,
|
|
"step": 3241
|
|
},
|
|
{
|
|
"epoch": 4.664748201438849,
|
|
"grad_norm": 0.07118947060450682,
|
|
"learning_rate": 1.0909422188944308e-06,
|
|
"loss": 0.3432,
|
|
"step": 3242
|
|
},
|
|
{
|
|
"epoch": 4.666187050359712,
|
|
"grad_norm": 0.07351627449524818,
|
|
"learning_rate": 1.0816403464522262e-06,
|
|
"loss": 0.3313,
|
|
"step": 3243
|
|
},
|
|
{
|
|
"epoch": 4.6676258992805755,
|
|
"grad_norm": 0.07662083903900367,
|
|
"learning_rate": 1.0723777564516148e-06,
|
|
"loss": 0.3386,
|
|
"step": 3244
|
|
},
|
|
{
|
|
"epoch": 4.669064748201439,
|
|
"grad_norm": 0.07225056921018504,
|
|
"learning_rate": 1.0631544582418463e-06,
|
|
"loss": 0.3336,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 4.670503597122302,
|
|
"grad_norm": 0.07635787007874706,
|
|
"learning_rate": 1.0539704611325008e-06,
|
|
"loss": 0.3433,
|
|
"step": 3246
|
|
},
|
|
{
|
|
"epoch": 4.6719424460431656,
|
|
"grad_norm": 0.07108048461879242,
|
|
"learning_rate": 1.0448257743934964e-06,
|
|
"loss": 0.3334,
|
|
"step": 3247
|
|
},
|
|
{
|
|
"epoch": 4.673381294964029,
|
|
"grad_norm": 0.07569441958211506,
|
|
"learning_rate": 1.0357204072550676e-06,
|
|
"loss": 0.3329,
|
|
"step": 3248
|
|
},
|
|
{
|
|
"epoch": 4.674820143884892,
|
|
"grad_norm": 0.07577710999224627,
|
|
"learning_rate": 1.0266543689077602e-06,
|
|
"loss": 0.3418,
|
|
"step": 3249
|
|
},
|
|
{
|
|
"epoch": 4.676258992805756,
|
|
"grad_norm": 0.07245750403517706,
|
|
"learning_rate": 1.0176276685024233e-06,
|
|
"loss": 0.3373,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 4.677697841726618,
|
|
"grad_norm": 0.07595488539375203,
|
|
"learning_rate": 1.0086403151502088e-06,
|
|
"loss": 0.3434,
|
|
"step": 3251
|
|
},
|
|
{
|
|
"epoch": 4.679136690647482,
|
|
"grad_norm": 0.07392728377794754,
|
|
"learning_rate": 9.996923179225448e-07,
|
|
"loss": 0.3385,
|
|
"step": 3252
|
|
},
|
|
{
|
|
"epoch": 4.680575539568346,
|
|
"grad_norm": 0.08646828714653161,
|
|
"learning_rate": 9.90783685851131e-07,
|
|
"loss": 0.3433,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 4.6820143884892085,
|
|
"grad_norm": 0.07385858504891947,
|
|
"learning_rate": 9.81914427927948e-07,
|
|
"loss": 0.3363,
|
|
"step": 3254
|
|
},
|
|
{
|
|
"epoch": 4.683453237410072,
|
|
"grad_norm": 0.0734870825557715,
|
|
"learning_rate": 9.730845531052214e-07,
|
|
"loss": 0.3384,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 4.684892086330935,
|
|
"grad_norm": 0.07196612240682561,
|
|
"learning_rate": 9.642940702954306e-07,
|
|
"loss": 0.3454,
|
|
"step": 3256
|
|
},
|
|
{
|
|
"epoch": 4.686330935251799,
|
|
"grad_norm": 0.07306287572274306,
|
|
"learning_rate": 9.555429883712963e-07,
|
|
"loss": 0.3364,
|
|
"step": 3257
|
|
},
|
|
{
|
|
"epoch": 4.687769784172662,
|
|
"grad_norm": 0.07488607500357483,
|
|
"learning_rate": 9.468313161657617e-07,
|
|
"loss": 0.3423,
|
|
"step": 3258
|
|
},
|
|
{
|
|
"epoch": 4.689208633093525,
|
|
"grad_norm": 0.07680733480946132,
|
|
"learning_rate": 9.381590624719972e-07,
|
|
"loss": 0.3402,
|
|
"step": 3259
|
|
},
|
|
{
|
|
"epoch": 4.690647482014389,
|
|
"grad_norm": 0.0733057339735744,
|
|
"learning_rate": 9.295262360433921e-07,
|
|
"loss": 0.3359,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 4.692086330935252,
|
|
"grad_norm": 0.07465883269164474,
|
|
"learning_rate": 9.209328455935274e-07,
|
|
"loss": 0.3371,
|
|
"step": 3261
|
|
},
|
|
{
|
|
"epoch": 4.693525179856115,
|
|
"grad_norm": 0.0744661896429338,
|
|
"learning_rate": 9.123788997961847e-07,
|
|
"loss": 0.3394,
|
|
"step": 3262
|
|
},
|
|
{
|
|
"epoch": 4.694964028776979,
|
|
"grad_norm": 0.07511771560453312,
|
|
"learning_rate": 9.038644072853331e-07,
|
|
"loss": 0.3314,
|
|
"step": 3263
|
|
},
|
|
{
|
|
"epoch": 4.6964028776978415,
|
|
"grad_norm": 0.07601342917253862,
|
|
"learning_rate": 8.953893766551203e-07,
|
|
"loss": 0.3405,
|
|
"step": 3264
|
|
},
|
|
{
|
|
"epoch": 4.697841726618705,
|
|
"grad_norm": 0.07294150254569454,
|
|
"learning_rate": 8.86953816459859e-07,
|
|
"loss": 0.3414,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 4.699280575539568,
|
|
"grad_norm": 0.07567462326529949,
|
|
"learning_rate": 8.785577352140317e-07,
|
|
"loss": 0.3364,
|
|
"step": 3266
|
|
},
|
|
{
|
|
"epoch": 4.700719424460432,
|
|
"grad_norm": 0.07910672257618175,
|
|
"learning_rate": 8.702011413922506e-07,
|
|
"loss": 0.3411,
|
|
"step": 3267
|
|
},
|
|
{
|
|
"epoch": 4.702158273381295,
|
|
"grad_norm": 0.07626137074119264,
|
|
"learning_rate": 8.61884043429293e-07,
|
|
"loss": 0.3349,
|
|
"step": 3268
|
|
},
|
|
{
|
|
"epoch": 4.703597122302158,
|
|
"grad_norm": 0.07618899494016042,
|
|
"learning_rate": 8.536064497200702e-07,
|
|
"loss": 0.3366,
|
|
"step": 3269
|
|
},
|
|
{
|
|
"epoch": 4.705035971223022,
|
|
"grad_norm": 0.072781587610881,
|
|
"learning_rate": 8.453683686196012e-07,
|
|
"loss": 0.3387,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 4.706474820143885,
|
|
"grad_norm": 0.07743757691805236,
|
|
"learning_rate": 8.371698084430346e-07,
|
|
"loss": 0.3355,
|
|
"step": 3271
|
|
},
|
|
{
|
|
"epoch": 4.707913669064748,
|
|
"grad_norm": 0.07965771882602032,
|
|
"learning_rate": 8.290107774656441e-07,
|
|
"loss": 0.3435,
|
|
"step": 3272
|
|
},
|
|
{
|
|
"epoch": 4.709352517985612,
|
|
"grad_norm": 0.07431764305131706,
|
|
"learning_rate": 8.208912839227712e-07,
|
|
"loss": 0.3303,
|
|
"step": 3273
|
|
},
|
|
{
|
|
"epoch": 4.7107913669064745,
|
|
"grad_norm": 0.07654361286491726,
|
|
"learning_rate": 8.128113360098777e-07,
|
|
"loss": 0.3396,
|
|
"step": 3274
|
|
},
|
|
{
|
|
"epoch": 4.712230215827338,
|
|
"grad_norm": 0.07239179661063891,
|
|
"learning_rate": 8.047709418824934e-07,
|
|
"loss": 0.3422,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 4.713669064748202,
|
|
"grad_norm": 0.07309768354208275,
|
|
"learning_rate": 7.96770109656233e-07,
|
|
"loss": 0.3456,
|
|
"step": 3276
|
|
},
|
|
{
|
|
"epoch": 4.715107913669065,
|
|
"grad_norm": 0.07550125672874092,
|
|
"learning_rate": 7.88808847406779e-07,
|
|
"loss": 0.34,
|
|
"step": 3277
|
|
},
|
|
{
|
|
"epoch": 4.716546762589928,
|
|
"grad_norm": 0.07164010197519863,
|
|
"learning_rate": 7.808871631698723e-07,
|
|
"loss": 0.3467,
|
|
"step": 3278
|
|
},
|
|
{
|
|
"epoch": 4.717985611510791,
|
|
"grad_norm": 0.07042308057525253,
|
|
"learning_rate": 7.730050649412946e-07,
|
|
"loss": 0.3336,
|
|
"step": 3279
|
|
},
|
|
{
|
|
"epoch": 4.719424460431655,
|
|
"grad_norm": 0.07904259102678277,
|
|
"learning_rate": 7.651625606768908e-07,
|
|
"loss": 0.3388,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 4.720863309352518,
|
|
"grad_norm": 0.07271438092404038,
|
|
"learning_rate": 7.573596582925291e-07,
|
|
"loss": 0.329,
|
|
"step": 3281
|
|
},
|
|
{
|
|
"epoch": 4.722302158273381,
|
|
"grad_norm": 0.07826349264953744,
|
|
"learning_rate": 7.495963656641048e-07,
|
|
"loss": 0.3486,
|
|
"step": 3282
|
|
},
|
|
{
|
|
"epoch": 4.723741007194245,
|
|
"grad_norm": 0.07158695176007285,
|
|
"learning_rate": 7.418726906275497e-07,
|
|
"loss": 0.3445,
|
|
"step": 3283
|
|
},
|
|
{
|
|
"epoch": 4.725179856115108,
|
|
"grad_norm": 0.07428090944789502,
|
|
"learning_rate": 7.341886409787746e-07,
|
|
"loss": 0.3303,
|
|
"step": 3284
|
|
},
|
|
{
|
|
"epoch": 4.726618705035971,
|
|
"grad_norm": 0.07543792056934653,
|
|
"learning_rate": 7.265442244737264e-07,
|
|
"loss": 0.3313,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 4.728057553956835,
|
|
"grad_norm": 0.07063401244385142,
|
|
"learning_rate": 7.189394488283307e-07,
|
|
"loss": 0.3396,
|
|
"step": 3286
|
|
},
|
|
{
|
|
"epoch": 4.729496402877698,
|
|
"grad_norm": 0.07464363486715006,
|
|
"learning_rate": 7.113743217185099e-07,
|
|
"loss": 0.3346,
|
|
"step": 3287
|
|
},
|
|
{
|
|
"epoch": 4.730935251798561,
|
|
"grad_norm": 0.07519415727264557,
|
|
"learning_rate": 7.0384885078016e-07,
|
|
"loss": 0.3487,
|
|
"step": 3288
|
|
},
|
|
{
|
|
"epoch": 4.732374100719424,
|
|
"grad_norm": 0.07238800697494471,
|
|
"learning_rate": 6.963630436091518e-07,
|
|
"loss": 0.3384,
|
|
"step": 3289
|
|
},
|
|
{
|
|
"epoch": 4.733812949640288,
|
|
"grad_norm": 0.35764799861020935,
|
|
"learning_rate": 6.889169077613212e-07,
|
|
"loss": 0.3412,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 4.735251798561151,
|
|
"grad_norm": 0.07142562718720785,
|
|
"learning_rate": 6.815104507524695e-07,
|
|
"loss": 0.3359,
|
|
"step": 3291
|
|
},
|
|
{
|
|
"epoch": 4.736690647482014,
|
|
"grad_norm": 0.07479273502234299,
|
|
"learning_rate": 6.741436800583367e-07,
|
|
"loss": 0.3475,
|
|
"step": 3292
|
|
},
|
|
{
|
|
"epoch": 4.738129496402878,
|
|
"grad_norm": 0.07178553850172956,
|
|
"learning_rate": 6.668166031146062e-07,
|
|
"loss": 0.3413,
|
|
"step": 3293
|
|
},
|
|
{
|
|
"epoch": 4.739568345323741,
|
|
"grad_norm": 0.07249117524241656,
|
|
"learning_rate": 6.595292273169041e-07,
|
|
"loss": 0.3373,
|
|
"step": 3294
|
|
},
|
|
{
|
|
"epoch": 4.741007194244604,
|
|
"grad_norm": 0.8112901459470919,
|
|
"learning_rate": 6.522815600207866e-07,
|
|
"loss": 0.3429,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 4.742446043165468,
|
|
"grad_norm": 0.0722865459063432,
|
|
"learning_rate": 6.450736085417086e-07,
|
|
"loss": 0.334,
|
|
"step": 3296
|
|
},
|
|
{
|
|
"epoch": 4.743884892086331,
|
|
"grad_norm": 0.07854644408822169,
|
|
"learning_rate": 6.379053801550594e-07,
|
|
"loss": 0.3361,
|
|
"step": 3297
|
|
},
|
|
{
|
|
"epoch": 4.745323741007194,
|
|
"grad_norm": 0.07366570052337461,
|
|
"learning_rate": 6.307768820961269e-07,
|
|
"loss": 0.3365,
|
|
"step": 3298
|
|
},
|
|
{
|
|
"epoch": 4.746762589928058,
|
|
"grad_norm": 0.07098826512538522,
|
|
"learning_rate": 6.236881215600976e-07,
|
|
"loss": 0.3384,
|
|
"step": 3299
|
|
},
|
|
{
|
|
"epoch": 4.748201438848921,
|
|
"grad_norm": 0.07211764277349217,
|
|
"learning_rate": 6.166391057020438e-07,
|
|
"loss": 0.3379,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 4.749640287769784,
|
|
"grad_norm": 0.07239029722125066,
|
|
"learning_rate": 6.096298416369273e-07,
|
|
"loss": 0.3476,
|
|
"step": 3301
|
|
},
|
|
{
|
|
"epoch": 4.751079136690647,
|
|
"grad_norm": 0.07328340538376953,
|
|
"learning_rate": 6.026603364395867e-07,
|
|
"loss": 0.3418,
|
|
"step": 3302
|
|
},
|
|
{
|
|
"epoch": 4.752517985611511,
|
|
"grad_norm": 0.0736017815406655,
|
|
"learning_rate": 5.957305971447192e-07,
|
|
"loss": 0.334,
|
|
"step": 3303
|
|
},
|
|
{
|
|
"epoch": 4.753956834532374,
|
|
"grad_norm": 0.07740292930619243,
|
|
"learning_rate": 5.888406307468986e-07,
|
|
"loss": 0.3433,
|
|
"step": 3304
|
|
},
|
|
{
|
|
"epoch": 4.755395683453237,
|
|
"grad_norm": 0.07237403850856305,
|
|
"learning_rate": 5.819904442005442e-07,
|
|
"loss": 0.3397,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 4.756834532374101,
|
|
"grad_norm": 0.07493296433228153,
|
|
"learning_rate": 5.751800444199295e-07,
|
|
"loss": 0.3382,
|
|
"step": 3306
|
|
},
|
|
{
|
|
"epoch": 4.7582733812949645,
|
|
"grad_norm": 0.07380120973190582,
|
|
"learning_rate": 5.684094382791605e-07,
|
|
"loss": 0.3528,
|
|
"step": 3307
|
|
},
|
|
{
|
|
"epoch": 4.759712230215827,
|
|
"grad_norm": 0.07682311518971242,
|
|
"learning_rate": 5.616786326121837e-07,
|
|
"loss": 0.3398,
|
|
"step": 3308
|
|
},
|
|
{
|
|
"epoch": 4.761151079136691,
|
|
"grad_norm": 0.07286471512968075,
|
|
"learning_rate": 5.54987634212778e-07,
|
|
"loss": 0.3378,
|
|
"step": 3309
|
|
},
|
|
{
|
|
"epoch": 4.762589928057554,
|
|
"grad_norm": 0.07328395710084569,
|
|
"learning_rate": 5.483364498345279e-07,
|
|
"loss": 0.3342,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 4.764028776978417,
|
|
"grad_norm": 0.07072044453818775,
|
|
"learning_rate": 5.417250861908452e-07,
|
|
"loss": 0.3398,
|
|
"step": 3311
|
|
},
|
|
{
|
|
"epoch": 4.76546762589928,
|
|
"grad_norm": 0.073515313330534,
|
|
"learning_rate": 5.351535499549387e-07,
|
|
"loss": 0.3407,
|
|
"step": 3312
|
|
},
|
|
{
|
|
"epoch": 4.766906474820144,
|
|
"grad_norm": 0.07423139154998544,
|
|
"learning_rate": 5.286218477598226e-07,
|
|
"loss": 0.3431,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 4.768345323741007,
|
|
"grad_norm": 0.07452704365897643,
|
|
"learning_rate": 5.221299861983075e-07,
|
|
"loss": 0.3381,
|
|
"step": 3314
|
|
},
|
|
{
|
|
"epoch": 4.76978417266187,
|
|
"grad_norm": 0.0714005043922764,
|
|
"learning_rate": 5.156779718229787e-07,
|
|
"loss": 0.3392,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 4.771223021582734,
|
|
"grad_norm": 0.07578586539590883,
|
|
"learning_rate": 5.092658111462179e-07,
|
|
"loss": 0.3375,
|
|
"step": 3316
|
|
},
|
|
{
|
|
"epoch": 4.7726618705035975,
|
|
"grad_norm": 0.07281137702224516,
|
|
"learning_rate": 5.028935106401678e-07,
|
|
"loss": 0.3406,
|
|
"step": 3317
|
|
},
|
|
{
|
|
"epoch": 4.77410071942446,
|
|
"grad_norm": 0.07642975839486812,
|
|
"learning_rate": 4.965610767367413e-07,
|
|
"loss": 0.338,
|
|
"step": 3318
|
|
},
|
|
{
|
|
"epoch": 4.775539568345324,
|
|
"grad_norm": 0.07196386685696313,
|
|
"learning_rate": 4.902685158276078e-07,
|
|
"loss": 0.3423,
|
|
"step": 3319
|
|
},
|
|
{
|
|
"epoch": 4.7769784172661875,
|
|
"grad_norm": 0.07257624387987198,
|
|
"learning_rate": 4.840158342642021e-07,
|
|
"loss": 0.3379,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 4.77841726618705,
|
|
"grad_norm": 0.07344158188188277,
|
|
"learning_rate": 4.778030383577026e-07,
|
|
"loss": 0.3372,
|
|
"step": 3321
|
|
},
|
|
{
|
|
"epoch": 4.779856115107914,
|
|
"grad_norm": 0.07417159476456575,
|
|
"learning_rate": 4.716301343790175e-07,
|
|
"loss": 0.3492,
|
|
"step": 3322
|
|
},
|
|
{
|
|
"epoch": 4.781294964028777,
|
|
"grad_norm": 0.0762812725214866,
|
|
"learning_rate": 4.6549712855879837e-07,
|
|
"loss": 0.3304,
|
|
"step": 3323
|
|
},
|
|
{
|
|
"epoch": 4.78273381294964,
|
|
"grad_norm": 0.07123416876768943,
|
|
"learning_rate": 4.5940402708744005e-07,
|
|
"loss": 0.3398,
|
|
"step": 3324
|
|
},
|
|
{
|
|
"epoch": 4.784172661870503,
|
|
"grad_norm": 0.07393372087953605,
|
|
"learning_rate": 4.5335083611502293e-07,
|
|
"loss": 0.3478,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 4.785611510791367,
|
|
"grad_norm": 0.0738600358624016,
|
|
"learning_rate": 4.473375617513842e-07,
|
|
"loss": 0.3441,
|
|
"step": 3326
|
|
},
|
|
{
|
|
"epoch": 4.7870503597122305,
|
|
"grad_norm": 0.07376542983928906,
|
|
"learning_rate": 4.41364210066042e-07,
|
|
"loss": 0.3388,
|
|
"step": 3327
|
|
},
|
|
{
|
|
"epoch": 4.788489208633093,
|
|
"grad_norm": 0.07177589250051176,
|
|
"learning_rate": 4.3543078708823126e-07,
|
|
"loss": 0.3426,
|
|
"step": 3328
|
|
},
|
|
{
|
|
"epoch": 4.789928057553957,
|
|
"grad_norm": 0.0725422540926415,
|
|
"learning_rate": 4.295372988068813e-07,
|
|
"loss": 0.3446,
|
|
"step": 3329
|
|
},
|
|
{
|
|
"epoch": 4.7913669064748206,
|
|
"grad_norm": 0.07153068209529502,
|
|
"learning_rate": 4.2368375117062043e-07,
|
|
"loss": 0.3397,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 4.792805755395683,
|
|
"grad_norm": 0.07299880611422803,
|
|
"learning_rate": 4.178701500877491e-07,
|
|
"loss": 0.332,
|
|
"step": 3331
|
|
},
|
|
{
|
|
"epoch": 4.794244604316547,
|
|
"grad_norm": 0.07097634636391109,
|
|
"learning_rate": 4.120965014262579e-07,
|
|
"loss": 0.3437,
|
|
"step": 3332
|
|
},
|
|
{
|
|
"epoch": 4.79568345323741,
|
|
"grad_norm": 0.07292469412024352,
|
|
"learning_rate": 4.063628110138096e-07,
|
|
"loss": 0.3447,
|
|
"step": 3333
|
|
},
|
|
{
|
|
"epoch": 4.797122302158273,
|
|
"grad_norm": 0.07000660025133346,
|
|
"learning_rate": 4.0066908463772593e-07,
|
|
"loss": 0.3342,
|
|
"step": 3334
|
|
},
|
|
{
|
|
"epoch": 4.798561151079137,
|
|
"grad_norm": 0.07641738103959955,
|
|
"learning_rate": 3.9501532804500974e-07,
|
|
"loss": 0.3445,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 4.8,
|
|
"grad_norm": 0.07303678100771258,
|
|
"learning_rate": 3.894015469423007e-07,
|
|
"loss": 0.3422,
|
|
"step": 3336
|
|
},
|
|
{
|
|
"epoch": 4.8014388489208635,
|
|
"grad_norm": 0.07050192283800265,
|
|
"learning_rate": 3.838277469958973e-07,
|
|
"loss": 0.3397,
|
|
"step": 3337
|
|
},
|
|
{
|
|
"epoch": 4.802877697841726,
|
|
"grad_norm": 0.0738892380252446,
|
|
"learning_rate": 3.7829393383174375e-07,
|
|
"loss": 0.3464,
|
|
"step": 3338
|
|
},
|
|
{
|
|
"epoch": 4.80431654676259,
|
|
"grad_norm": 0.0724670159391283,
|
|
"learning_rate": 3.7280011303542084e-07,
|
|
"loss": 0.341,
|
|
"step": 3339
|
|
},
|
|
{
|
|
"epoch": 4.805755395683454,
|
|
"grad_norm": 0.0751690109170617,
|
|
"learning_rate": 3.673462901521463e-07,
|
|
"loss": 0.3369,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 4.807194244604316,
|
|
"grad_norm": 0.06911355963625368,
|
|
"learning_rate": 3.619324706867655e-07,
|
|
"loss": 0.3378,
|
|
"step": 3341
|
|
},
|
|
{
|
|
"epoch": 4.80863309352518,
|
|
"grad_norm": 0.5247530226428313,
|
|
"learning_rate": 3.5655866010373853e-07,
|
|
"loss": 0.341,
|
|
"step": 3342
|
|
},
|
|
{
|
|
"epoch": 4.810071942446044,
|
|
"grad_norm": 0.07443038217705054,
|
|
"learning_rate": 3.5122486382715314e-07,
|
|
"loss": 0.3413,
|
|
"step": 3343
|
|
},
|
|
{
|
|
"epoch": 4.811510791366906,
|
|
"grad_norm": 0.07494878528269994,
|
|
"learning_rate": 3.459310872407029e-07,
|
|
"loss": 0.3377,
|
|
"step": 3344
|
|
},
|
|
{
|
|
"epoch": 4.81294964028777,
|
|
"grad_norm": 0.07153024713317388,
|
|
"learning_rate": 3.4067733568768246e-07,
|
|
"loss": 0.3464,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 4.814388489208633,
|
|
"grad_norm": 0.07267467926066022,
|
|
"learning_rate": 3.3546361447099664e-07,
|
|
"loss": 0.3416,
|
|
"step": 3346
|
|
},
|
|
{
|
|
"epoch": 4.8158273381294965,
|
|
"grad_norm": 0.07242482460224721,
|
|
"learning_rate": 3.3028992885314247e-07,
|
|
"loss": 0.3435,
|
|
"step": 3347
|
|
},
|
|
{
|
|
"epoch": 4.817266187050359,
|
|
"grad_norm": 0.0695779281123221,
|
|
"learning_rate": 3.2515628405620503e-07,
|
|
"loss": 0.3368,
|
|
"step": 3348
|
|
},
|
|
{
|
|
"epoch": 4.818705035971223,
|
|
"grad_norm": 0.06903721044225027,
|
|
"learning_rate": 3.2006268526184824e-07,
|
|
"loss": 0.3392,
|
|
"step": 3349
|
|
},
|
|
{
|
|
"epoch": 4.820143884892087,
|
|
"grad_norm": 0.07086480803592914,
|
|
"learning_rate": 3.150091376113329e-07,
|
|
"loss": 0.3331,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 4.821582733812949,
|
|
"grad_norm": 0.0749622825241667,
|
|
"learning_rate": 3.0999564620547207e-07,
|
|
"loss": 0.3391,
|
|
"step": 3351
|
|
},
|
|
{
|
|
"epoch": 4.823021582733813,
|
|
"grad_norm": 0.0725005489385286,
|
|
"learning_rate": 3.0502221610465786e-07,
|
|
"loss": 0.3509,
|
|
"step": 3352
|
|
},
|
|
{
|
|
"epoch": 4.824460431654677,
|
|
"grad_norm": 0.07380310521158122,
|
|
"learning_rate": 3.0008885232886144e-07,
|
|
"loss": 0.3419,
|
|
"step": 3353
|
|
},
|
|
{
|
|
"epoch": 4.825899280575539,
|
|
"grad_norm": 0.07084506708605391,
|
|
"learning_rate": 2.95195559857584e-07,
|
|
"loss": 0.3304,
|
|
"step": 3354
|
|
},
|
|
{
|
|
"epoch": 4.827338129496403,
|
|
"grad_norm": 0.0701086583352957,
|
|
"learning_rate": 2.9034234362989687e-07,
|
|
"loss": 0.3377,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 4.828776978417266,
|
|
"grad_norm": 0.07099288505354957,
|
|
"learning_rate": 2.855292085444239e-07,
|
|
"loss": 0.3423,
|
|
"step": 3356
|
|
},
|
|
{
|
|
"epoch": 4.8302158273381295,
|
|
"grad_norm": 0.1285097978042475,
|
|
"learning_rate": 2.8075615945932333e-07,
|
|
"loss": 0.3401,
|
|
"step": 3357
|
|
},
|
|
{
|
|
"epoch": 4.831654676258993,
|
|
"grad_norm": 0.07296833851731571,
|
|
"learning_rate": 2.7602320119229254e-07,
|
|
"loss": 0.3364,
|
|
"step": 3358
|
|
},
|
|
{
|
|
"epoch": 4.833093525179856,
|
|
"grad_norm": 0.07325568868025965,
|
|
"learning_rate": 2.7133033852057675e-07,
|
|
"loss": 0.3448,
|
|
"step": 3359
|
|
},
|
|
{
|
|
"epoch": 4.83453237410072,
|
|
"grad_norm": 0.07062146195928415,
|
|
"learning_rate": 2.666775761809337e-07,
|
|
"loss": 0.3412,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 4.835971223021582,
|
|
"grad_norm": 0.0709098406472014,
|
|
"learning_rate": 2.620649188696511e-07,
|
|
"loss": 0.341,
|
|
"step": 3361
|
|
},
|
|
{
|
|
"epoch": 4.837410071942446,
|
|
"grad_norm": 0.07405077359731799,
|
|
"learning_rate": 2.574923712425426e-07,
|
|
"loss": 0.3463,
|
|
"step": 3362
|
|
},
|
|
{
|
|
"epoch": 4.83884892086331,
|
|
"grad_norm": 0.07637014507461627,
|
|
"learning_rate": 2.52959937914925e-07,
|
|
"loss": 0.3466,
|
|
"step": 3363
|
|
},
|
|
{
|
|
"epoch": 4.840287769784172,
|
|
"grad_norm": 0.07213843218307364,
|
|
"learning_rate": 2.484676234616412e-07,
|
|
"loss": 0.3457,
|
|
"step": 3364
|
|
},
|
|
{
|
|
"epoch": 4.841726618705036,
|
|
"grad_norm": 0.07236647626947157,
|
|
"learning_rate": 2.440154324170285e-07,
|
|
"loss": 0.3427,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 4.8431654676259,
|
|
"grad_norm": 0.07298977673984589,
|
|
"learning_rate": 2.3960336927492333e-07,
|
|
"loss": 0.3427,
|
|
"step": 3366
|
|
},
|
|
{
|
|
"epoch": 4.8446043165467625,
|
|
"grad_norm": 0.07225342788634946,
|
|
"learning_rate": 2.3523143848867003e-07,
|
|
"loss": 0.3363,
|
|
"step": 3367
|
|
},
|
|
{
|
|
"epoch": 4.846043165467626,
|
|
"grad_norm": 0.07327398551342061,
|
|
"learning_rate": 2.3089964447109425e-07,
|
|
"loss": 0.3357,
|
|
"step": 3368
|
|
},
|
|
{
|
|
"epoch": 4.847482014388489,
|
|
"grad_norm": 0.07230004596197907,
|
|
"learning_rate": 2.2660799159451629e-07,
|
|
"loss": 0.3375,
|
|
"step": 3369
|
|
},
|
|
{
|
|
"epoch": 4.848920863309353,
|
|
"grad_norm": 0.0717989156609734,
|
|
"learning_rate": 2.2235648419073773e-07,
|
|
"loss": 0.3345,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 4.850359712230215,
|
|
"grad_norm": 0.07221183524270315,
|
|
"learning_rate": 2.1814512655103703e-07,
|
|
"loss": 0.3381,
|
|
"step": 3371
|
|
},
|
|
{
|
|
"epoch": 4.851798561151079,
|
|
"grad_norm": 0.06844619507605493,
|
|
"learning_rate": 2.1397392292617392e-07,
|
|
"loss": 0.3344,
|
|
"step": 3372
|
|
},
|
|
{
|
|
"epoch": 4.853237410071943,
|
|
"grad_norm": 0.07414004997346904,
|
|
"learning_rate": 2.0984287752636722e-07,
|
|
"loss": 0.3396,
|
|
"step": 3373
|
|
},
|
|
{
|
|
"epoch": 4.854676258992805,
|
|
"grad_norm": 0.07363538673888881,
|
|
"learning_rate": 2.0575199452131268e-07,
|
|
"loss": 0.3381,
|
|
"step": 3374
|
|
},
|
|
{
|
|
"epoch": 4.856115107913669,
|
|
"grad_norm": 0.06976212658966592,
|
|
"learning_rate": 2.017012780401606e-07,
|
|
"loss": 0.3387,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 4.857553956834533,
|
|
"grad_norm": 0.07200059699956848,
|
|
"learning_rate": 1.9769073217152933e-07,
|
|
"loss": 0.3348,
|
|
"step": 3376
|
|
},
|
|
{
|
|
"epoch": 4.8589928057553955,
|
|
"grad_norm": 0.07186316129623146,
|
|
"learning_rate": 1.9372036096347414e-07,
|
|
"loss": 0.3502,
|
|
"step": 3377
|
|
},
|
|
{
|
|
"epoch": 4.860431654676259,
|
|
"grad_norm": 0.07198447493054352,
|
|
"learning_rate": 1.8979016842350928e-07,
|
|
"loss": 0.3348,
|
|
"step": 3378
|
|
},
|
|
{
|
|
"epoch": 4.861870503597122,
|
|
"grad_norm": 0.07437674615300546,
|
|
"learning_rate": 1.8590015851860376e-07,
|
|
"loss": 0.3347,
|
|
"step": 3379
|
|
},
|
|
{
|
|
"epoch": 4.863309352517986,
|
|
"grad_norm": 0.07381920175626645,
|
|
"learning_rate": 1.8205033517515015e-07,
|
|
"loss": 0.3436,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 4.864748201438849,
|
|
"grad_norm": 0.07051114068633912,
|
|
"learning_rate": 1.7824070227899115e-07,
|
|
"loss": 0.3399,
|
|
"step": 3381
|
|
},
|
|
{
|
|
"epoch": 4.866187050359712,
|
|
"grad_norm": 0.07339410689034713,
|
|
"learning_rate": 1.7447126367539313e-07,
|
|
"loss": 0.3445,
|
|
"step": 3382
|
|
},
|
|
{
|
|
"epoch": 4.867625899280576,
|
|
"grad_norm": 0.07062195738599672,
|
|
"learning_rate": 1.7074202316906374e-07,
|
|
"loss": 0.3436,
|
|
"step": 3383
|
|
},
|
|
{
|
|
"epoch": 4.869064748201438,
|
|
"grad_norm": 0.07045392970674895,
|
|
"learning_rate": 1.6705298452412978e-07,
|
|
"loss": 0.3373,
|
|
"step": 3384
|
|
},
|
|
{
|
|
"epoch": 4.870503597122302,
|
|
"grad_norm": 0.07298309630175809,
|
|
"learning_rate": 1.6340415146414157e-07,
|
|
"loss": 0.3431,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 4.871942446043166,
|
|
"grad_norm": 0.07245966102937863,
|
|
"learning_rate": 1.597955276720642e-07,
|
|
"loss": 0.3325,
|
|
"step": 3386
|
|
},
|
|
{
|
|
"epoch": 4.8733812949640285,
|
|
"grad_norm": 0.07070486387275271,
|
|
"learning_rate": 1.562271167902818e-07,
|
|
"loss": 0.3351,
|
|
"step": 3387
|
|
},
|
|
{
|
|
"epoch": 4.874820143884892,
|
|
"grad_norm": 0.07150260485367149,
|
|
"learning_rate": 1.526989224205888e-07,
|
|
"loss": 0.342,
|
|
"step": 3388
|
|
},
|
|
{
|
|
"epoch": 4.876258992805756,
|
|
"grad_norm": 0.06953976955584909,
|
|
"learning_rate": 1.4921094812418103e-07,
|
|
"loss": 0.3425,
|
|
"step": 3389
|
|
},
|
|
{
|
|
"epoch": 4.877697841726619,
|
|
"grad_norm": 0.0721449016026273,
|
|
"learning_rate": 1.457631974216689e-07,
|
|
"loss": 0.3436,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 4.879136690647482,
|
|
"grad_norm": 0.07298563312883435,
|
|
"learning_rate": 1.4235567379305536e-07,
|
|
"loss": 0.3392,
|
|
"step": 3391
|
|
},
|
|
{
|
|
"epoch": 4.880575539568345,
|
|
"grad_norm": 0.07037571596695216,
|
|
"learning_rate": 1.389883806777359e-07,
|
|
"loss": 0.3408,
|
|
"step": 3392
|
|
},
|
|
{
|
|
"epoch": 4.882014388489209,
|
|
"grad_norm": 0.0737784105748141,
|
|
"learning_rate": 1.356613214745117e-07,
|
|
"loss": 0.342,
|
|
"step": 3393
|
|
},
|
|
{
|
|
"epoch": 4.883453237410072,
|
|
"grad_norm": 0.07137915303139238,
|
|
"learning_rate": 1.3237449954156767e-07,
|
|
"loss": 0.3404,
|
|
"step": 3394
|
|
},
|
|
{
|
|
"epoch": 4.884892086330935,
|
|
"grad_norm": 0.0739015447371161,
|
|
"learning_rate": 1.2912791819646774e-07,
|
|
"loss": 0.3443,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 4.886330935251799,
|
|
"grad_norm": 0.07138469215553753,
|
|
"learning_rate": 1.2592158071616844e-07,
|
|
"loss": 0.3392,
|
|
"step": 3396
|
|
},
|
|
{
|
|
"epoch": 4.8877697841726615,
|
|
"grad_norm": 0.0712079637515095,
|
|
"learning_rate": 1.2275549033700097e-07,
|
|
"loss": 0.3323,
|
|
"step": 3397
|
|
},
|
|
{
|
|
"epoch": 4.889208633093525,
|
|
"grad_norm": 0.0689221669637899,
|
|
"learning_rate": 1.1962965025467564e-07,
|
|
"loss": 0.3392,
|
|
"step": 3398
|
|
},
|
|
{
|
|
"epoch": 4.890647482014389,
|
|
"grad_norm": 0.07003262472053341,
|
|
"learning_rate": 1.1654406362427762e-07,
|
|
"loss": 0.342,
|
|
"step": 3399
|
|
},
|
|
{
|
|
"epoch": 4.892086330935252,
|
|
"grad_norm": 0.07127884410461295,
|
|
"learning_rate": 1.1349873356025332e-07,
|
|
"loss": 0.3408,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 4.893525179856115,
|
|
"grad_norm": 0.07076053721644888,
|
|
"learning_rate": 1.1049366313642395e-07,
|
|
"loss": 0.3436,
|
|
"step": 3401
|
|
},
|
|
{
|
|
"epoch": 4.894964028776979,
|
|
"grad_norm": 0.07045391983696048,
|
|
"learning_rate": 1.0752885538598102e-07,
|
|
"loss": 0.3372,
|
|
"step": 3402
|
|
},
|
|
{
|
|
"epoch": 4.896402877697842,
|
|
"grad_norm": 0.06729384070897462,
|
|
"learning_rate": 1.0460431330145515e-07,
|
|
"loss": 0.3297,
|
|
"step": 3403
|
|
},
|
|
{
|
|
"epoch": 4.897841726618705,
|
|
"grad_norm": 0.07244226990594317,
|
|
"learning_rate": 1.0172003983475176e-07,
|
|
"loss": 0.3414,
|
|
"step": 3404
|
|
},
|
|
{
|
|
"epoch": 4.899280575539568,
|
|
"grad_norm": 0.06948324969308772,
|
|
"learning_rate": 9.887603789712875e-08,
|
|
"loss": 0.3326,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 4.900719424460432,
|
|
"grad_norm": 0.14394094761760298,
|
|
"learning_rate": 9.607231035919651e-08,
|
|
"loss": 0.3574,
|
|
"step": 3406
|
|
},
|
|
{
|
|
"epoch": 4.9021582733812945,
|
|
"grad_norm": 0.07163785927680609,
|
|
"learning_rate": 9.330886005090467e-08,
|
|
"loss": 0.3341,
|
|
"step": 3407
|
|
},
|
|
{
|
|
"epoch": 4.903597122302158,
|
|
"grad_norm": 0.06834269104062363,
|
|
"learning_rate": 9.058568976155979e-08,
|
|
"loss": 0.3379,
|
|
"step": 3408
|
|
},
|
|
{
|
|
"epoch": 4.905035971223022,
|
|
"grad_norm": 0.07077005468157228,
|
|
"learning_rate": 8.790280223980763e-08,
|
|
"loss": 0.3416,
|
|
"step": 3409
|
|
},
|
|
{
|
|
"epoch": 4.906474820143885,
|
|
"grad_norm": 0.07197711650767392,
|
|
"learning_rate": 8.526020019363313e-08,
|
|
"loss": 0.3414,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 4.907913669064748,
|
|
"grad_norm": 0.0699129818003512,
|
|
"learning_rate": 8.265788629036043e-08,
|
|
"loss": 0.3348,
|
|
"step": 3411
|
|
},
|
|
{
|
|
"epoch": 4.909352517985612,
|
|
"grad_norm": 0.07204778109461402,
|
|
"learning_rate": 8.009586315664842e-08,
|
|
"loss": 0.3396,
|
|
"step": 3412
|
|
},
|
|
{
|
|
"epoch": 4.910791366906475,
|
|
"grad_norm": 0.0714094429217027,
|
|
"learning_rate": 7.757413337848629e-08,
|
|
"loss": 0.3411,
|
|
"step": 3413
|
|
},
|
|
{
|
|
"epoch": 4.912230215827338,
|
|
"grad_norm": 0.07276854128191725,
|
|
"learning_rate": 7.509269950119358e-08,
|
|
"loss": 0.3402,
|
|
"step": 3414
|
|
},
|
|
{
|
|
"epoch": 4.913669064748201,
|
|
"grad_norm": 0.0726187824469424,
|
|
"learning_rate": 7.265156402942452e-08,
|
|
"loss": 0.3438,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 4.915107913669065,
|
|
"grad_norm": 0.06945238463520567,
|
|
"learning_rate": 7.025072942714595e-08,
|
|
"loss": 0.3408,
|
|
"step": 3416
|
|
},
|
|
{
|
|
"epoch": 4.916546762589928,
|
|
"grad_norm": 0.07057806968899183,
|
|
"learning_rate": 6.789019811765052e-08,
|
|
"loss": 0.34,
|
|
"step": 3417
|
|
},
|
|
{
|
|
"epoch": 4.917985611510791,
|
|
"grad_norm": 0.07365845297871071,
|
|
"learning_rate": 6.556997248355679e-08,
|
|
"loss": 0.3463,
|
|
"step": 3418
|
|
},
|
|
{
|
|
"epoch": 4.919424460431655,
|
|
"grad_norm": 0.07282040465178004,
|
|
"learning_rate": 6.329005486679584e-08,
|
|
"loss": 0.3461,
|
|
"step": 3419
|
|
},
|
|
{
|
|
"epoch": 4.920863309352518,
|
|
"grad_norm": 0.06894641152043732,
|
|
"learning_rate": 6.105044756861134e-08,
|
|
"loss": 0.3358,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 4.922302158273381,
|
|
"grad_norm": 0.07114168789866467,
|
|
"learning_rate": 5.8851152849563886e-08,
|
|
"loss": 0.3379,
|
|
"step": 3421
|
|
},
|
|
{
|
|
"epoch": 4.923741007194245,
|
|
"grad_norm": 0.07026850885921519,
|
|
"learning_rate": 5.669217292952223e-08,
|
|
"loss": 0.3369,
|
|
"step": 3422
|
|
},
|
|
{
|
|
"epoch": 4.925179856115108,
|
|
"grad_norm": 0.0703483117973689,
|
|
"learning_rate": 5.4573509987663196e-08,
|
|
"loss": 0.3396,
|
|
"step": 3423
|
|
},
|
|
{
|
|
"epoch": 4.926618705035971,
|
|
"grad_norm": 0.070156042049294,
|
|
"learning_rate": 5.2495166162471747e-08,
|
|
"loss": 0.3296,
|
|
"step": 3424
|
|
},
|
|
{
|
|
"epoch": 4.928057553956835,
|
|
"grad_norm": 0.06957433178549864,
|
|
"learning_rate": 5.045714355173203e-08,
|
|
"loss": 0.3499,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 4.929496402877698,
|
|
"grad_norm": 0.07238347817005694,
|
|
"learning_rate": 4.845944421253634e-08,
|
|
"loss": 0.3454,
|
|
"step": 3426
|
|
},
|
|
{
|
|
"epoch": 4.930935251798561,
|
|
"grad_norm": 0.07078043039778184,
|
|
"learning_rate": 4.650207016126729e-08,
|
|
"loss": 0.3423,
|
|
"step": 3427
|
|
},
|
|
{
|
|
"epoch": 4.932374100719424,
|
|
"grad_norm": 0.07047681731986824,
|
|
"learning_rate": 4.458502337361115e-08,
|
|
"loss": 0.3376,
|
|
"step": 3428
|
|
},
|
|
{
|
|
"epoch": 4.933812949640288,
|
|
"grad_norm": 0.06919752792292813,
|
|
"learning_rate": 4.270830578455343e-08,
|
|
"loss": 0.3329,
|
|
"step": 3429
|
|
},
|
|
{
|
|
"epoch": 4.935251798561151,
|
|
"grad_norm": 0.0697140964536224,
|
|
"learning_rate": 4.087191928836554e-08,
|
|
"loss": 0.3311,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 4.936690647482014,
|
|
"grad_norm": 0.07061489249271885,
|
|
"learning_rate": 3.907586573860922e-08,
|
|
"loss": 0.3423,
|
|
"step": 3431
|
|
},
|
|
{
|
|
"epoch": 4.938129496402878,
|
|
"grad_norm": 0.07179831934460396,
|
|
"learning_rate": 3.7320146948149894e-08,
|
|
"loss": 0.335,
|
|
"step": 3432
|
|
},
|
|
{
|
|
"epoch": 4.939568345323741,
|
|
"grad_norm": 0.08946359830652675,
|
|
"learning_rate": 3.560476468912111e-08,
|
|
"loss": 0.342,
|
|
"step": 3433
|
|
},
|
|
{
|
|
"epoch": 4.941007194244604,
|
|
"grad_norm": 0.07254369202984635,
|
|
"learning_rate": 3.392972069295564e-08,
|
|
"loss": 0.3444,
|
|
"step": 3434
|
|
},
|
|
{
|
|
"epoch": 4.942446043165468,
|
|
"grad_norm": 0.07289201050608793,
|
|
"learning_rate": 3.229501665037216e-08,
|
|
"loss": 0.3395,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 4.943884892086331,
|
|
"grad_norm": 0.0701816338072883,
|
|
"learning_rate": 3.0700654211361925e-08,
|
|
"loss": 0.3428,
|
|
"step": 3436
|
|
},
|
|
{
|
|
"epoch": 4.945323741007194,
|
|
"grad_norm": 0.07355827010682237,
|
|
"learning_rate": 2.9146634985206535e-08,
|
|
"loss": 0.3354,
|
|
"step": 3437
|
|
},
|
|
{
|
|
"epoch": 4.946762589928057,
|
|
"grad_norm": 0.07325587884591869,
|
|
"learning_rate": 2.7632960540460162e-08,
|
|
"loss": 0.3451,
|
|
"step": 3438
|
|
},
|
|
{
|
|
"epoch": 4.948201438848921,
|
|
"grad_norm": 0.07129507516690547,
|
|
"learning_rate": 2.6159632404958447e-08,
|
|
"loss": 0.3489,
|
|
"step": 3439
|
|
},
|
|
{
|
|
"epoch": 4.9496402877697845,
|
|
"grad_norm": 0.06925834141292335,
|
|
"learning_rate": 2.472665206581404e-08,
|
|
"loss": 0.3401,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 4.951079136690647,
|
|
"grad_norm": 0.0694479397018319,
|
|
"learning_rate": 2.3334020969407733e-08,
|
|
"loss": 0.3377,
|
|
"step": 3441
|
|
},
|
|
{
|
|
"epoch": 4.952517985611511,
|
|
"grad_norm": 0.07015444631362944,
|
|
"learning_rate": 2.1981740521406226e-08,
|
|
"loss": 0.3392,
|
|
"step": 3442
|
|
},
|
|
{
|
|
"epoch": 4.953956834532374,
|
|
"grad_norm": 0.07069428998914262,
|
|
"learning_rate": 2.0669812086735464e-08,
|
|
"loss": 0.3444,
|
|
"step": 3443
|
|
},
|
|
{
|
|
"epoch": 4.955395683453237,
|
|
"grad_norm": 0.0710197704628013,
|
|
"learning_rate": 1.9398236989598418e-08,
|
|
"loss": 0.3368,
|
|
"step": 3444
|
|
},
|
|
{
|
|
"epoch": 4.956834532374101,
|
|
"grad_norm": 0.06831082896843226,
|
|
"learning_rate": 1.8167016513470636e-08,
|
|
"loss": 0.3368,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 4.958273381294964,
|
|
"grad_norm": 0.07051136677957565,
|
|
"learning_rate": 1.697615190107804e-08,
|
|
"loss": 0.34,
|
|
"step": 3446
|
|
},
|
|
{
|
|
"epoch": 4.959712230215827,
|
|
"grad_norm": 0.06962476342115365,
|
|
"learning_rate": 1.582564435444134e-08,
|
|
"loss": 0.3363,
|
|
"step": 3447
|
|
},
|
|
{
|
|
"epoch": 4.961151079136691,
|
|
"grad_norm": 0.07377815167190645,
|
|
"learning_rate": 1.4715495034818284e-08,
|
|
"loss": 0.345,
|
|
"step": 3448
|
|
},
|
|
{
|
|
"epoch": 4.962589928057554,
|
|
"grad_norm": 0.07094712761866803,
|
|
"learning_rate": 1.3645705062748094e-08,
|
|
"loss": 0.3456,
|
|
"step": 3449
|
|
},
|
|
{
|
|
"epoch": 4.9640287769784175,
|
|
"grad_norm": 0.07216082899688765,
|
|
"learning_rate": 1.2616275518033683e-08,
|
|
"loss": 0.3422,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 4.96546762589928,
|
|
"grad_norm": 0.06841421390401461,
|
|
"learning_rate": 1.1627207439728339e-08,
|
|
"loss": 0.3378,
|
|
"step": 3451
|
|
},
|
|
{
|
|
"epoch": 4.966906474820144,
|
|
"grad_norm": 0.07369616307893008,
|
|
"learning_rate": 1.0678501826153486e-08,
|
|
"loss": 0.3459,
|
|
"step": 3452
|
|
},
|
|
{
|
|
"epoch": 4.968345323741008,
|
|
"grad_norm": 0.07641013355472023,
|
|
"learning_rate": 9.770159634894249e-09,
|
|
"loss": 0.3422,
|
|
"step": 3453
|
|
},
|
|
{
|
|
"epoch": 4.96978417266187,
|
|
"grad_norm": 0.07342239542843579,
|
|
"learning_rate": 8.902181782786124e-09,
|
|
"loss": 0.3381,
|
|
"step": 3454
|
|
},
|
|
{
|
|
"epoch": 4.971223021582734,
|
|
"grad_norm": 0.07050469275986423,
|
|
"learning_rate": 8.074569145928301e-09,
|
|
"loss": 0.3432,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 4.972661870503597,
|
|
"grad_norm": 0.07235582837524834,
|
|
"learning_rate": 7.287322559679233e-09,
|
|
"loss": 0.339,
|
|
"step": 3456
|
|
},
|
|
{
|
|
"epoch": 4.97410071942446,
|
|
"grad_norm": 0.07553178234992268,
|
|
"learning_rate": 6.5404428186433e-09,
|
|
"loss": 0.3493,
|
|
"step": 3457
|
|
},
|
|
{
|
|
"epoch": 4.975539568345324,
|
|
"grad_norm": 0.07137437897993666,
|
|
"learning_rate": 5.833930676693023e-09,
|
|
"loss": 0.3423,
|
|
"step": 3458
|
|
},
|
|
{
|
|
"epoch": 4.976978417266187,
|
|
"grad_norm": 0.07117577241492566,
|
|
"learning_rate": 5.167786846946854e-09,
|
|
"loss": 0.3398,
|
|
"step": 3459
|
|
},
|
|
{
|
|
"epoch": 4.9784172661870505,
|
|
"grad_norm": 0.07065101069587568,
|
|
"learning_rate": 4.542012001778062e-09,
|
|
"loss": 0.34,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 4.979856115107914,
|
|
"grad_norm": 0.07148007521327346,
|
|
"learning_rate": 3.956606772823613e-09,
|
|
"loss": 0.3434,
|
|
"step": 3461
|
|
},
|
|
{
|
|
"epoch": 4.981294964028777,
|
|
"grad_norm": 0.0681956465681191,
|
|
"learning_rate": 3.4115717509619616e-09,
|
|
"loss": 0.3386,
|
|
"step": 3462
|
|
},
|
|
{
|
|
"epoch": 4.982733812949641,
|
|
"grad_norm": 0.0715606502909257,
|
|
"learning_rate": 2.9069074863219414e-09,
|
|
"loss": 0.3373,
|
|
"step": 3463
|
|
},
|
|
{
|
|
"epoch": 4.984172661870503,
|
|
"grad_norm": 0.07263384255801741,
|
|
"learning_rate": 2.4426144882916392e-09,
|
|
"loss": 0.3404,
|
|
"step": 3464
|
|
},
|
|
{
|
|
"epoch": 4.985611510791367,
|
|
"grad_norm": 0.07119548290979529,
|
|
"learning_rate": 2.018693225509516e-09,
|
|
"loss": 0.3447,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 4.98705035971223,
|
|
"grad_norm": 0.07440181713167043,
|
|
"learning_rate": 1.6351441258644073e-09,
|
|
"loss": 0.339,
|
|
"step": 3466
|
|
},
|
|
{
|
|
"epoch": 4.988489208633093,
|
|
"grad_norm": 0.07199407873576316,
|
|
"learning_rate": 1.2919675764910823e-09,
|
|
"loss": 0.3298,
|
|
"step": 3467
|
|
},
|
|
{
|
|
"epoch": 4.989928057553957,
|
|
"grad_norm": 0.06990723761858454,
|
|
"learning_rate": 9.89163923770242e-10,
|
|
"loss": 0.3325,
|
|
"step": 3468
|
|
},
|
|
{
|
|
"epoch": 4.99136690647482,
|
|
"grad_norm": 0.07083402032897038,
|
|
"learning_rate": 7.26733473350727e-10,
|
|
"loss": 0.3447,
|
|
"step": 3469
|
|
},
|
|
{
|
|
"epoch": 4.9928057553956835,
|
|
"grad_norm": 0.07405922885558312,
|
|
"learning_rate": 5.046764901095457e-10,
|
|
"loss": 0.3362,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 4.994244604316547,
|
|
"grad_norm": 0.07070447970027792,
|
|
"learning_rate": 3.2299319817852283e-10,
|
|
"loss": 0.3398,
|
|
"step": 3471
|
|
},
|
|
{
|
|
"epoch": 4.99568345323741,
|
|
"grad_norm": 0.07104321685523374,
|
|
"learning_rate": 1.816837809487382e-10,
|
|
"loss": 0.3455,
|
|
"step": 3472
|
|
},
|
|
{
|
|
"epoch": 4.997122302158274,
|
|
"grad_norm": 0.07119746291706079,
|
|
"learning_rate": 8.074838104832338e-11,
|
|
"loss": 0.3395,
|
|
"step": 3473
|
|
},
|
|
{
|
|
"epoch": 4.998561151079136,
|
|
"grad_norm": 0.07378004034244585,
|
|
"learning_rate": 2.0187100355784085e-11,
|
|
"loss": 0.3369,
|
|
"step": 3474
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.07027203874272335,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.3309,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"step": 3475,
|
|
"total_flos": 4.771898416222647e+19,
|
|
"train_loss": 0.3984597723861392,
|
|
"train_runtime": 127190.7012,
|
|
"train_samples_per_second": 13.986,
|
|
"train_steps_per_second": 0.027
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 3475,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.771898416222647e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|