5873 lines
142 KiB
JSON
5873 lines
142 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 834,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0036019810895992796,
|
|
"grad_norm": 8.959743121022326,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.8318,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.007203962179198559,
|
|
"grad_norm": 8.599514774705224,
|
|
"learning_rate": 1.1904761904761906e-07,
|
|
"loss": 1.8629,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.010805943268797838,
|
|
"grad_norm": 8.27099405489232,
|
|
"learning_rate": 2.3809523809523811e-07,
|
|
"loss": 1.8047,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.014407924358397118,
|
|
"grad_norm": 8.807297661164533,
|
|
"learning_rate": 3.5714285714285716e-07,
|
|
"loss": 1.838,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.018009905447996397,
|
|
"grad_norm": 9.118038313686268,
|
|
"learning_rate": 4.7619047619047623e-07,
|
|
"loss": 1.8445,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.021611886537595677,
|
|
"grad_norm": 9.342871560722859,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 1.8716,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.025213867627194957,
|
|
"grad_norm": 8.104634918034305,
|
|
"learning_rate": 7.142857142857143e-07,
|
|
"loss": 1.7625,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.028815848716794237,
|
|
"grad_norm": 8.497419415075033,
|
|
"learning_rate": 8.333333333333333e-07,
|
|
"loss": 1.8662,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.03241782980639352,
|
|
"grad_norm": 8.997271493582199,
|
|
"learning_rate": 9.523809523809525e-07,
|
|
"loss": 1.8798,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.03601981089599279,
|
|
"grad_norm": 7.186846943308143,
|
|
"learning_rate": 1.0714285714285714e-06,
|
|
"loss": 1.7647,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.03962179198559208,
|
|
"grad_norm": 7.023004901959916,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 1.8083,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.04322377307519135,
|
|
"grad_norm": 7.3197143781084195,
|
|
"learning_rate": 1.3095238095238096e-06,
|
|
"loss": 1.8125,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.04682575416479064,
|
|
"grad_norm": 5.742362390459089,
|
|
"learning_rate": 1.4285714285714286e-06,
|
|
"loss": 1.7355,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.05042773525438991,
|
|
"grad_norm": 5.054071547886992,
|
|
"learning_rate": 1.5476190476190479e-06,
|
|
"loss": 1.7257,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.0540297163439892,
|
|
"grad_norm": 5.035497816412668,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 1.744,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.05763169743358847,
|
|
"grad_norm": 4.701882228340393,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": 1.6886,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.06123367852318776,
|
|
"grad_norm": 4.331238204850519,
|
|
"learning_rate": 1.904761904761905e-06,
|
|
"loss": 1.6805,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.06483565961278703,
|
|
"grad_norm": 2.767544145879741,
|
|
"learning_rate": 2.023809523809524e-06,
|
|
"loss": 1.6131,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.06843764070238631,
|
|
"grad_norm": 2.6868680980231345,
|
|
"learning_rate": 2.1428571428571427e-06,
|
|
"loss": 1.5981,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.07203962179198559,
|
|
"grad_norm": 2.4108574787799673,
|
|
"learning_rate": 2.261904761904762e-06,
|
|
"loss": 1.5926,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.07564160288158488,
|
|
"grad_norm": 2.4672291267909237,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": 1.5968,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.07924358397118415,
|
|
"grad_norm": 2.1181123521738914,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 1.5456,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.08284556506078343,
|
|
"grad_norm": 2.3108498703640565,
|
|
"learning_rate": 2.6190476190476192e-06,
|
|
"loss": 1.5908,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.0864475461503827,
|
|
"grad_norm": 1.593382451878654,
|
|
"learning_rate": 2.7380952380952387e-06,
|
|
"loss": 1.5203,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.090049527239982,
|
|
"grad_norm": 1.8594286026551032,
|
|
"learning_rate": 2.8571428571428573e-06,
|
|
"loss": 1.4988,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.09365150832958127,
|
|
"grad_norm": 1.812932314030098,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": 1.5038,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.09725348941918055,
|
|
"grad_norm": 1.6854819502367606,
|
|
"learning_rate": 3.0952380952380957e-06,
|
|
"loss": 1.5037,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.10085547050877983,
|
|
"grad_norm": 1.4363843088807504,
|
|
"learning_rate": 3.2142857142857147e-06,
|
|
"loss": 1.4492,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.1044574515983791,
|
|
"grad_norm": 1.2720392687132038,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 1.4398,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.1080594326879784,
|
|
"grad_norm": 1.151633877445218,
|
|
"learning_rate": 3.4523809523809528e-06,
|
|
"loss": 1.427,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.11166141377757767,
|
|
"grad_norm": 0.9908597997660091,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": 1.4175,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.11526339486717695,
|
|
"grad_norm": 0.8183196011685436,
|
|
"learning_rate": 3.690476190476191e-06,
|
|
"loss": 1.3983,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.11886537595677622,
|
|
"grad_norm": 0.8635221903260426,
|
|
"learning_rate": 3.80952380952381e-06,
|
|
"loss": 1.4133,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.12246735704637551,
|
|
"grad_norm": 0.8674489391428222,
|
|
"learning_rate": 3.928571428571429e-06,
|
|
"loss": 1.3498,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.1260693381359748,
|
|
"grad_norm": 0.9238078524289024,
|
|
"learning_rate": 4.047619047619048e-06,
|
|
"loss": 1.3676,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.12967131922557407,
|
|
"grad_norm": 0.8890230038452177,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": 1.3671,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.13327330031517334,
|
|
"grad_norm": 0.7855135929116516,
|
|
"learning_rate": 4.2857142857142855e-06,
|
|
"loss": 1.3752,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.13687528140477262,
|
|
"grad_norm": 0.7376255448971343,
|
|
"learning_rate": 4.404761904761905e-06,
|
|
"loss": 1.3343,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.1404772624943719,
|
|
"grad_norm": 0.594654333690764,
|
|
"learning_rate": 4.523809523809524e-06,
|
|
"loss": 1.3251,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.14407924358397117,
|
|
"grad_norm": 0.5613128279020647,
|
|
"learning_rate": 4.642857142857144e-06,
|
|
"loss": 1.3126,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.14768122467357048,
|
|
"grad_norm": 0.560019251447276,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": 1.3397,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.15128320576316975,
|
|
"grad_norm": 0.5314471256259126,
|
|
"learning_rate": 4.880952380952381e-06,
|
|
"loss": 1.2782,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.15488518685276903,
|
|
"grad_norm": 0.5332998102282385,
|
|
"learning_rate": 5e-06,
|
|
"loss": 1.3021,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.1584871679423683,
|
|
"grad_norm": 0.5282474767077582,
|
|
"learning_rate": 5.119047619047619e-06,
|
|
"loss": 1.2855,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.16208914903196758,
|
|
"grad_norm": 0.5050735166019568,
|
|
"learning_rate": 5.2380952380952384e-06,
|
|
"loss": 1.2798,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.16569113012156686,
|
|
"grad_norm": 0.5264954959532085,
|
|
"learning_rate": 5.357142857142857e-06,
|
|
"loss": 1.2685,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.16929311121116614,
|
|
"grad_norm": 0.49891809978749935,
|
|
"learning_rate": 5.476190476190477e-06,
|
|
"loss": 1.3067,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.1728950923007654,
|
|
"grad_norm": 0.43713019827527205,
|
|
"learning_rate": 5.595238095238096e-06,
|
|
"loss": 1.2912,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.1764970733903647,
|
|
"grad_norm": 0.42268295993435495,
|
|
"learning_rate": 5.7142857142857145e-06,
|
|
"loss": 1.2677,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.180099054479964,
|
|
"grad_norm": 0.42192747607841885,
|
|
"learning_rate": 5.833333333333334e-06,
|
|
"loss": 1.2945,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.18370103556956327,
|
|
"grad_norm": 0.37897863688600525,
|
|
"learning_rate": 5.9523809523809525e-06,
|
|
"loss": 1.2431,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.18730301665916255,
|
|
"grad_norm": 0.40405168811471465,
|
|
"learning_rate": 6.071428571428571e-06,
|
|
"loss": 1.2804,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.19090499774876182,
|
|
"grad_norm": 0.3934601575838702,
|
|
"learning_rate": 6.1904761904761914e-06,
|
|
"loss": 1.2597,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.1945069788383611,
|
|
"grad_norm": 0.41031465320208005,
|
|
"learning_rate": 6.30952380952381e-06,
|
|
"loss": 1.2451,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.19810895992796038,
|
|
"grad_norm": 0.3681773008428082,
|
|
"learning_rate": 6.4285714285714295e-06,
|
|
"loss": 1.26,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.20171094101755965,
|
|
"grad_norm": 0.36871798332732425,
|
|
"learning_rate": 6.547619047619048e-06,
|
|
"loss": 1.2251,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.20531292210715893,
|
|
"grad_norm": 0.35510983491031706,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 1.2455,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.2089149031967582,
|
|
"grad_norm": 0.3348595552564557,
|
|
"learning_rate": 6.785714285714287e-06,
|
|
"loss": 1.2582,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.2125168842863575,
|
|
"grad_norm": 0.33479239236035946,
|
|
"learning_rate": 6.9047619047619055e-06,
|
|
"loss": 1.229,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.2161188653759568,
|
|
"grad_norm": 0.35235529909722807,
|
|
"learning_rate": 7.023809523809524e-06,
|
|
"loss": 1.194,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.21972084646555606,
|
|
"grad_norm": 0.3612868104143937,
|
|
"learning_rate": 7.1428571428571436e-06,
|
|
"loss": 1.216,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.22332282755515534,
|
|
"grad_norm": 0.33124367306424535,
|
|
"learning_rate": 7.261904761904762e-06,
|
|
"loss": 1.2322,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.22692480864475462,
|
|
"grad_norm": 0.31005107875726384,
|
|
"learning_rate": 7.380952380952382e-06,
|
|
"loss": 1.1965,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.2305267897343539,
|
|
"grad_norm": 0.35704168784229545,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 1.2472,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.23412877082395317,
|
|
"grad_norm": 0.3009141198350835,
|
|
"learning_rate": 7.61904761904762e-06,
|
|
"loss": 1.2042,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.23773075191355245,
|
|
"grad_norm": 0.3361466848573483,
|
|
"learning_rate": 7.738095238095238e-06,
|
|
"loss": 1.1972,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.24133273300315172,
|
|
"grad_norm": 0.32300651355695426,
|
|
"learning_rate": 7.857142857142858e-06,
|
|
"loss": 1.1821,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.24493471409275103,
|
|
"grad_norm": 0.34432276589140803,
|
|
"learning_rate": 7.976190476190477e-06,
|
|
"loss": 1.2158,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.2485366951823503,
|
|
"grad_norm": 0.32159883103552483,
|
|
"learning_rate": 8.095238095238097e-06,
|
|
"loss": 1.2273,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.2521386762719496,
|
|
"grad_norm": 0.3132280942086526,
|
|
"learning_rate": 8.214285714285714e-06,
|
|
"loss": 1.217,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.25574065736154883,
|
|
"grad_norm": 0.34857390044798864,
|
|
"learning_rate": 8.333333333333334e-06,
|
|
"loss": 1.1963,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.25934263845114813,
|
|
"grad_norm": 0.28499241440527673,
|
|
"learning_rate": 8.452380952380953e-06,
|
|
"loss": 1.1921,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.26294461954074744,
|
|
"grad_norm": 0.3170111251216066,
|
|
"learning_rate": 8.571428571428571e-06,
|
|
"loss": 1.1975,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.2665466006303467,
|
|
"grad_norm": 0.3211922078756118,
|
|
"learning_rate": 8.690476190476192e-06,
|
|
"loss": 1.1704,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.270148581719946,
|
|
"grad_norm": 0.30453515336097836,
|
|
"learning_rate": 8.80952380952381e-06,
|
|
"loss": 1.2062,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.27375056280954524,
|
|
"grad_norm": 0.3064941559502552,
|
|
"learning_rate": 8.92857142857143e-06,
|
|
"loss": 1.1928,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.27735254389914454,
|
|
"grad_norm": 0.33218232714495777,
|
|
"learning_rate": 9.047619047619049e-06,
|
|
"loss": 1.205,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.2809545249887438,
|
|
"grad_norm": 0.29079080164563587,
|
|
"learning_rate": 9.166666666666666e-06,
|
|
"loss": 1.2031,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.2845565060783431,
|
|
"grad_norm": 0.31159296882004955,
|
|
"learning_rate": 9.285714285714288e-06,
|
|
"loss": 1.212,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.28815848716794235,
|
|
"grad_norm": 0.2950167931965713,
|
|
"learning_rate": 9.404761904761905e-06,
|
|
"loss": 1.21,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.29176046825754165,
|
|
"grad_norm": 0.3168487800792039,
|
|
"learning_rate": 9.523809523809525e-06,
|
|
"loss": 1.1467,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.29536244934714095,
|
|
"grad_norm": 0.31180168015480736,
|
|
"learning_rate": 9.642857142857144e-06,
|
|
"loss": 1.1904,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.2989644304367402,
|
|
"grad_norm": 0.31394964544202014,
|
|
"learning_rate": 9.761904761904762e-06,
|
|
"loss": 1.1742,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.3025664115263395,
|
|
"grad_norm": 0.28380748068760736,
|
|
"learning_rate": 9.880952380952381e-06,
|
|
"loss": 1.2007,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.30616839261593876,
|
|
"grad_norm": 0.3122970930157758,
|
|
"learning_rate": 1e-05,
|
|
"loss": 1.1607,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.30977037370553806,
|
|
"grad_norm": 0.30341382381199433,
|
|
"learning_rate": 9.999956135155688e-06,
|
|
"loss": 1.1943,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.3133723547951373,
|
|
"grad_norm": 0.29699266871397906,
|
|
"learning_rate": 9.999824541392404e-06,
|
|
"loss": 1.156,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.3169743358847366,
|
|
"grad_norm": 0.3370219209966227,
|
|
"learning_rate": 9.999605221019082e-06,
|
|
"loss": 1.166,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.32057631697433586,
|
|
"grad_norm": 0.30301135573648547,
|
|
"learning_rate": 9.999298177883902e-06,
|
|
"loss": 1.186,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.32417829806393517,
|
|
"grad_norm": 0.30025420893856164,
|
|
"learning_rate": 9.998903417374228e-06,
|
|
"loss": 1.1832,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.32778027915353447,
|
|
"grad_norm": 0.3211722926193115,
|
|
"learning_rate": 9.9984209464165e-06,
|
|
"loss": 1.1309,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.3313822602431337,
|
|
"grad_norm": 0.34378918525170493,
|
|
"learning_rate": 9.997850773476126e-06,
|
|
"loss": 1.1822,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.334984241332733,
|
|
"grad_norm": 0.3202545861207382,
|
|
"learning_rate": 9.997192908557322e-06,
|
|
"loss": 1.1644,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.3385862224223323,
|
|
"grad_norm": 0.3192066377734346,
|
|
"learning_rate": 9.996447363202947e-06,
|
|
"loss": 1.1827,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.3421882035119316,
|
|
"grad_norm": 0.32504247647618456,
|
|
"learning_rate": 9.995614150494293e-06,
|
|
"loss": 1.16,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.3457901846015308,
|
|
"grad_norm": 0.37075579456497426,
|
|
"learning_rate": 9.994693285050858e-06,
|
|
"loss": 1.1813,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.34939216569113013,
|
|
"grad_norm": 0.3604600871283949,
|
|
"learning_rate": 9.99368478303009e-06,
|
|
"loss": 1.1535,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.3529941467807294,
|
|
"grad_norm": 0.3272940747117161,
|
|
"learning_rate": 9.9925886621271e-06,
|
|
"loss": 1.1636,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.3565961278703287,
|
|
"grad_norm": 0.3449509830414838,
|
|
"learning_rate": 9.99140494157436e-06,
|
|
"loss": 1.1575,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.360198108959928,
|
|
"grad_norm": 0.35962181776653873,
|
|
"learning_rate": 9.990133642141359e-06,
|
|
"loss": 1.1756,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.36380009004952724,
|
|
"grad_norm": 0.3255881417609746,
|
|
"learning_rate": 9.988774786134235e-06,
|
|
"loss": 1.1751,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.36740207113912654,
|
|
"grad_norm": 0.3466801749265495,
|
|
"learning_rate": 9.987328397395389e-06,
|
|
"loss": 1.148,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.3710040522287258,
|
|
"grad_norm": 0.3781154633191771,
|
|
"learning_rate": 9.98579450130307e-06,
|
|
"loss": 1.1672,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.3746060333183251,
|
|
"grad_norm": 0.3177289518646908,
|
|
"learning_rate": 9.984173124770924e-06,
|
|
"loss": 1.1767,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.37820801440792434,
|
|
"grad_norm": 0.3287531127302142,
|
|
"learning_rate": 9.982464296247523e-06,
|
|
"loss": 1.1729,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.38180999549752365,
|
|
"grad_norm": 0.35236574446805197,
|
|
"learning_rate": 9.980668045715864e-06,
|
|
"loss": 1.162,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.3854119765871229,
|
|
"grad_norm": 0.32366666300178654,
|
|
"learning_rate": 9.978784404692847e-06,
|
|
"loss": 1.1541,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.3890139576767222,
|
|
"grad_norm": 0.3441412737476968,
|
|
"learning_rate": 9.97681340622872e-06,
|
|
"loss": 1.1483,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.3926159387663215,
|
|
"grad_norm": 0.3368352565729486,
|
|
"learning_rate": 9.974755084906503e-06,
|
|
"loss": 1.1587,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.39621791985592075,
|
|
"grad_norm": 0.3146485277926942,
|
|
"learning_rate": 9.972609476841368e-06,
|
|
"loss": 1.1603,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.39981990094552006,
|
|
"grad_norm": 0.34336750676307926,
|
|
"learning_rate": 9.970376619680024e-06,
|
|
"loss": 1.1793,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.4034218820351193,
|
|
"grad_norm": 0.3079869001100948,
|
|
"learning_rate": 9.968056552600043e-06,
|
|
"loss": 1.1601,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.4070238631247186,
|
|
"grad_norm": 0.3194899482588308,
|
|
"learning_rate": 9.965649316309178e-06,
|
|
"loss": 1.1931,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.41062584421431786,
|
|
"grad_norm": 0.31236725178854713,
|
|
"learning_rate": 9.963154953044646e-06,
|
|
"loss": 1.1157,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.41422782530391716,
|
|
"grad_norm": 0.3641377454803935,
|
|
"learning_rate": 9.960573506572391e-06,
|
|
"loss": 1.1257,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.4178298063935164,
|
|
"grad_norm": 0.3367690084363564,
|
|
"learning_rate": 9.957905022186309e-06,
|
|
"loss": 1.1332,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.4214317874831157,
|
|
"grad_norm": 0.3282965716002517,
|
|
"learning_rate": 9.955149546707465e-06,
|
|
"loss": 1.0959,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.425033768572715,
|
|
"grad_norm": 0.3764974200013322,
|
|
"learning_rate": 9.952307128483257e-06,
|
|
"loss": 1.168,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.42863574966231427,
|
|
"grad_norm": 0.332077880161025,
|
|
"learning_rate": 9.94937781738658e-06,
|
|
"loss": 1.1847,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.4322377307519136,
|
|
"grad_norm": 0.3122087138952814,
|
|
"learning_rate": 9.946361664814942e-06,
|
|
"loss": 1.1214,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.4358397118415128,
|
|
"grad_norm": 0.321004643708737,
|
|
"learning_rate": 9.94325872368957e-06,
|
|
"loss": 1.1235,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.4394416929311121,
|
|
"grad_norm": 0.35397785938333604,
|
|
"learning_rate": 9.940069048454478e-06,
|
|
"loss": 1.1792,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.4430436740207114,
|
|
"grad_norm": 0.34751295835336804,
|
|
"learning_rate": 9.936792695075502e-06,
|
|
"loss": 1.1626,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.4466456551103107,
|
|
"grad_norm": 0.33334683436011303,
|
|
"learning_rate": 9.93342972103934e-06,
|
|
"loss": 1.1156,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.45024763619990993,
|
|
"grad_norm": 0.35572890521109984,
|
|
"learning_rate": 9.929980185352525e-06,
|
|
"loss": 1.134,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.45384961728950923,
|
|
"grad_norm": 0.3600252106382079,
|
|
"learning_rate": 9.926444148540394e-06,
|
|
"loss": 1.1552,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.45745159837910854,
|
|
"grad_norm": 0.31574099694060664,
|
|
"learning_rate": 9.922821672646028e-06,
|
|
"loss": 1.1294,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.4610535794687078,
|
|
"grad_norm": 0.3384836348033959,
|
|
"learning_rate": 9.919112821229165e-06,
|
|
"loss": 1.1415,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.4646555605583071,
|
|
"grad_norm": 0.36082038793653404,
|
|
"learning_rate": 9.915317659365078e-06,
|
|
"loss": 1.1486,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.46825754164790634,
|
|
"grad_norm": 0.3475974165432403,
|
|
"learning_rate": 9.911436253643445e-06,
|
|
"loss": 1.1265,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.47185952273750564,
|
|
"grad_norm": 0.3635545773479418,
|
|
"learning_rate": 9.907468672167165e-06,
|
|
"loss": 1.1549,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.4754615038271049,
|
|
"grad_norm": 0.347794452842081,
|
|
"learning_rate": 9.903414984551178e-06,
|
|
"loss": 1.1461,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.4790634849167042,
|
|
"grad_norm": 0.32822120698172536,
|
|
"learning_rate": 9.899275261921236e-06,
|
|
"loss": 1.1649,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.48266546600630345,
|
|
"grad_norm": 0.29837185314643394,
|
|
"learning_rate": 9.89504957691265e-06,
|
|
"loss": 1.1571,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.48626744709590275,
|
|
"grad_norm": 0.38306110575116503,
|
|
"learning_rate": 9.890738003669029e-06,
|
|
"loss": 1.1252,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.48986942818550205,
|
|
"grad_norm": 0.34086844442205383,
|
|
"learning_rate": 9.886340617840968e-06,
|
|
"loss": 1.1382,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.4934714092751013,
|
|
"grad_norm": 0.30015621390458924,
|
|
"learning_rate": 9.881857496584726e-06,
|
|
"loss": 1.1275,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.4970733903647006,
|
|
"grad_norm": 0.3614100125796906,
|
|
"learning_rate": 9.877288718560866e-06,
|
|
"loss": 1.1161,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.5006753714542999,
|
|
"grad_norm": 0.3267608562772437,
|
|
"learning_rate": 9.872634363932887e-06,
|
|
"loss": 1.1316,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.5042773525438992,
|
|
"grad_norm": 0.30874876519759176,
|
|
"learning_rate": 9.867894514365802e-06,
|
|
"loss": 1.1485,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.5078793336334985,
|
|
"grad_norm": 0.3155720475990474,
|
|
"learning_rate": 9.863069253024719e-06,
|
|
"loss": 1.1481,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.5114813147230977,
|
|
"grad_norm": 0.33367944966848107,
|
|
"learning_rate": 9.85815866457337e-06,
|
|
"loss": 1.108,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.515083295812697,
|
|
"grad_norm": 0.337631206990529,
|
|
"learning_rate": 9.853162835172638e-06,
|
|
"loss": 1.1292,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.5186852769022963,
|
|
"grad_norm": 0.353550852505754,
|
|
"learning_rate": 9.84808185247903e-06,
|
|
"loss": 1.1378,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.5222872579918956,
|
|
"grad_norm": 0.3545306891033168,
|
|
"learning_rate": 9.842915805643156e-06,
|
|
"loss": 1.108,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.5258892390814949,
|
|
"grad_norm": 0.42557248608508313,
|
|
"learning_rate": 9.83766478530815e-06,
|
|
"loss": 1.1334,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.5294912201710941,
|
|
"grad_norm": 0.3728030570909934,
|
|
"learning_rate": 9.832328883608088e-06,
|
|
"loss": 1.1381,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.5330932012606934,
|
|
"grad_norm": 0.49924286590903955,
|
|
"learning_rate": 9.82690819416637e-06,
|
|
"loss": 1.097,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.5366951823502927,
|
|
"grad_norm": 0.3439764618050822,
|
|
"learning_rate": 9.821402812094074e-06,
|
|
"loss": 1.1577,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.540297163439892,
|
|
"grad_norm": 0.4159129574954092,
|
|
"learning_rate": 9.815812833988292e-06,
|
|
"loss": 1.132,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.5438991445294912,
|
|
"grad_norm": 0.4129107171971605,
|
|
"learning_rate": 9.81013835793043e-06,
|
|
"loss": 1.1574,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.5475011256190905,
|
|
"grad_norm": 0.4296938671727486,
|
|
"learning_rate": 9.804379483484493e-06,
|
|
"loss": 1.1459,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.5511031067086898,
|
|
"grad_norm": 0.3766243233797345,
|
|
"learning_rate": 9.798536311695334e-06,
|
|
"loss": 1.1545,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.5547050877982891,
|
|
"grad_norm": 0.35435312036719,
|
|
"learning_rate": 9.79260894508688e-06,
|
|
"loss": 1.1171,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.5583070688878884,
|
|
"grad_norm": 0.37652608806645427,
|
|
"learning_rate": 9.786597487660336e-06,
|
|
"loss": 1.111,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.5619090499774876,
|
|
"grad_norm": 0.3453394867906792,
|
|
"learning_rate": 9.780502044892363e-06,
|
|
"loss": 1.1749,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.5655110310670869,
|
|
"grad_norm": 0.379890664116539,
|
|
"learning_rate": 9.774322723733216e-06,
|
|
"loss": 1.1481,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.5691130121566862,
|
|
"grad_norm": 0.39501637379719035,
|
|
"learning_rate": 9.768059632604881e-06,
|
|
"loss": 1.1061,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.5727149932462855,
|
|
"grad_norm": 0.3326705156892084,
|
|
"learning_rate": 9.761712881399164e-06,
|
|
"loss": 1.1412,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.5763169743358847,
|
|
"grad_norm": 0.3617516742176726,
|
|
"learning_rate": 9.755282581475769e-06,
|
|
"loss": 1.1789,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.579918955425484,
|
|
"grad_norm": 0.39263243654387053,
|
|
"learning_rate": 9.748768845660335e-06,
|
|
"loss": 1.152,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.5835209365150833,
|
|
"grad_norm": 0.34780105035332837,
|
|
"learning_rate": 9.742171788242468e-06,
|
|
"loss": 1.1267,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.5871229176046826,
|
|
"grad_norm": 0.45976305202766604,
|
|
"learning_rate": 9.735491524973723e-06,
|
|
"loss": 1.1043,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.5907248986942819,
|
|
"grad_norm": 0.3604888952284618,
|
|
"learning_rate": 9.728728173065584e-06,
|
|
"loss": 1.1105,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.5943268797838811,
|
|
"grad_norm": 0.4168296170026391,
|
|
"learning_rate": 9.721881851187406e-06,
|
|
"loss": 1.1362,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.5979288608734804,
|
|
"grad_norm": 0.3757592230549266,
|
|
"learning_rate": 9.714952679464324e-06,
|
|
"loss": 1.1405,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.6015308419630797,
|
|
"grad_norm": 0.40888701962345614,
|
|
"learning_rate": 9.707940779475151e-06,
|
|
"loss": 1.0968,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.605132823052679,
|
|
"grad_norm": 0.36126726128985687,
|
|
"learning_rate": 9.700846274250252e-06,
|
|
"loss": 1.1197,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.6087348041422782,
|
|
"grad_norm": 0.3792632202992676,
|
|
"learning_rate": 9.693669288269371e-06,
|
|
"loss": 1.1129,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.6123367852318775,
|
|
"grad_norm": 0.4094377957060575,
|
|
"learning_rate": 9.68640994745946e-06,
|
|
"loss": 1.1235,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.6159387663214768,
|
|
"grad_norm": 0.39486664256207166,
|
|
"learning_rate": 9.679068379192455e-06,
|
|
"loss": 1.1189,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.6195407474110761,
|
|
"grad_norm": 0.37256294824637853,
|
|
"learning_rate": 9.671644712283061e-06,
|
|
"loss": 1.0951,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.6231427285006754,
|
|
"grad_norm": 0.3627397815654964,
|
|
"learning_rate": 9.664139076986473e-06,
|
|
"loss": 1.1321,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.6267447095902746,
|
|
"grad_norm": 0.4248545009770537,
|
|
"learning_rate": 9.656551604996102e-06,
|
|
"loss": 1.1338,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.6303466906798739,
|
|
"grad_norm": 0.3492982058791957,
|
|
"learning_rate": 9.648882429441258e-06,
|
|
"loss": 1.1383,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.6339486717694732,
|
|
"grad_norm": 0.37748320014327436,
|
|
"learning_rate": 9.641131684884817e-06,
|
|
"loss": 1.1316,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.6375506528590725,
|
|
"grad_norm": 0.35682755754679785,
|
|
"learning_rate": 9.633299507320862e-06,
|
|
"loss": 1.09,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.6411526339486717,
|
|
"grad_norm": 0.34106929576049877,
|
|
"learning_rate": 9.62538603417229e-06,
|
|
"loss": 1.136,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.644754615038271,
|
|
"grad_norm": 0.34746889086894356,
|
|
"learning_rate": 9.617391404288412e-06,
|
|
"loss": 1.0943,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.6483565961278703,
|
|
"grad_norm": 0.36723025633050504,
|
|
"learning_rate": 9.609315757942504e-06,
|
|
"loss": 1.182,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.6519585772174696,
|
|
"grad_norm": 0.33451032057782015,
|
|
"learning_rate": 9.601159236829353e-06,
|
|
"loss": 1.1351,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.6555605583070689,
|
|
"grad_norm": 0.33248672869162116,
|
|
"learning_rate": 9.592921984062771e-06,
|
|
"loss": 1.1187,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.6591625393966681,
|
|
"grad_norm": 0.3331636644805929,
|
|
"learning_rate": 9.584604144173084e-06,
|
|
"loss": 1.1009,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.6627645204862674,
|
|
"grad_norm": 0.3268063515189842,
|
|
"learning_rate": 9.576205863104588e-06,
|
|
"loss": 1.1048,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.6663665015758667,
|
|
"grad_norm": 0.3651627841761447,
|
|
"learning_rate": 9.567727288213005e-06,
|
|
"loss": 1.1534,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.669968482665466,
|
|
"grad_norm": 0.3347297887818968,
|
|
"learning_rate": 9.55916856826288e-06,
|
|
"loss": 1.1567,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.6735704637550652,
|
|
"grad_norm": 0.3615161248575356,
|
|
"learning_rate": 9.550529853424979e-06,
|
|
"loss": 1.1278,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.6771724448446645,
|
|
"grad_norm": 0.3713014349141257,
|
|
"learning_rate": 9.541811295273657e-06,
|
|
"loss": 1.1101,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.6807744259342638,
|
|
"grad_norm": 0.34004372649926684,
|
|
"learning_rate": 9.53301304678419e-06,
|
|
"loss": 1.1276,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.6843764070238632,
|
|
"grad_norm": 0.35758706995009787,
|
|
"learning_rate": 9.524135262330098e-06,
|
|
"loss": 1.1109,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.6879783881134625,
|
|
"grad_norm": 0.3220236515093335,
|
|
"learning_rate": 9.515178097680437e-06,
|
|
"loss": 1.119,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.6915803692030617,
|
|
"grad_norm": 0.35195981649297625,
|
|
"learning_rate": 9.506141709997058e-06,
|
|
"loss": 1.0968,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.695182350292661,
|
|
"grad_norm": 0.3960324035518941,
|
|
"learning_rate": 9.497026257831856e-06,
|
|
"loss": 1.1396,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.6987843313822603,
|
|
"grad_norm": 0.36557327721045585,
|
|
"learning_rate": 9.487831901123989e-06,
|
|
"loss": 1.1238,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.7023863124718596,
|
|
"grad_norm": 0.3343162963854113,
|
|
"learning_rate": 9.478558801197065e-06,
|
|
"loss": 1.1293,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.7059882935614588,
|
|
"grad_norm": 0.40726069321694147,
|
|
"learning_rate": 9.46920712075632e-06,
|
|
"loss": 1.1103,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.7095902746510581,
|
|
"grad_norm": 0.3381887572157023,
|
|
"learning_rate": 9.459777023885754e-06,
|
|
"loss": 1.0944,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.7131922557406574,
|
|
"grad_norm": 0.3733440854292112,
|
|
"learning_rate": 9.450268676045261e-06,
|
|
"loss": 1.1909,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.7167942368302567,
|
|
"grad_norm": 0.3755723398224028,
|
|
"learning_rate": 9.440682244067724e-06,
|
|
"loss": 1.0909,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.720396217919856,
|
|
"grad_norm": 0.38426065259470316,
|
|
"learning_rate": 9.431017896156074e-06,
|
|
"loss": 1.136,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.7239981990094552,
|
|
"grad_norm": 0.379071589505192,
|
|
"learning_rate": 9.421275801880363e-06,
|
|
"loss": 1.1121,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.7276001800990545,
|
|
"grad_norm": 0.34461539082999465,
|
|
"learning_rate": 9.411456132174768e-06,
|
|
"loss": 1.0988,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.7312021611886538,
|
|
"grad_norm": 0.3654979888018069,
|
|
"learning_rate": 9.401559059334601e-06,
|
|
"loss": 1.1238,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.7348041422782531,
|
|
"grad_norm": 0.34977548124519137,
|
|
"learning_rate": 9.39158475701329e-06,
|
|
"loss": 1.11,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.7384061233678523,
|
|
"grad_norm": 0.3738440708255404,
|
|
"learning_rate": 9.381533400219319e-06,
|
|
"loss": 1.1023,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.7420081044574516,
|
|
"grad_norm": 0.34015234663312666,
|
|
"learning_rate": 9.371405165313169e-06,
|
|
"loss": 1.1271,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.7456100855470509,
|
|
"grad_norm": 0.344237085861616,
|
|
"learning_rate": 9.361200230004219e-06,
|
|
"loss": 1.1192,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.7492120666366502,
|
|
"grad_norm": 0.3485062946685098,
|
|
"learning_rate": 9.35091877334763e-06,
|
|
"loss": 1.147,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.7528140477262495,
|
|
"grad_norm": 0.3458232266553943,
|
|
"learning_rate": 9.340560975741198e-06,
|
|
"loss": 1.15,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.7564160288158487,
|
|
"grad_norm": 0.338158493876411,
|
|
"learning_rate": 9.330127018922195e-06,
|
|
"loss": 1.1367,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.760018009905448,
|
|
"grad_norm": 0.34361214214695057,
|
|
"learning_rate": 9.319617085964177e-06,
|
|
"loss": 1.0956,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.7636199909950473,
|
|
"grad_norm": 0.3321785776797715,
|
|
"learning_rate": 9.309031361273775e-06,
|
|
"loss": 1.138,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.7672219720846466,
|
|
"grad_norm": 0.36852431928120577,
|
|
"learning_rate": 9.298370030587456e-06,
|
|
"loss": 1.1271,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.7708239531742458,
|
|
"grad_norm": 0.36986638396232147,
|
|
"learning_rate": 9.287633280968263e-06,
|
|
"loss": 1.112,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.7744259342638451,
|
|
"grad_norm": 0.32561497084045427,
|
|
"learning_rate": 9.276821300802535e-06,
|
|
"loss": 1.08,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.7780279153534444,
|
|
"grad_norm": 0.3746970517644171,
|
|
"learning_rate": 9.265934279796602e-06,
|
|
"loss": 1.1136,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.7816298964430437,
|
|
"grad_norm": 0.4173073842610304,
|
|
"learning_rate": 9.25497240897346e-06,
|
|
"loss": 1.1273,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.785231877532643,
|
|
"grad_norm": 0.37809222920052,
|
|
"learning_rate": 9.24393588066941e-06,
|
|
"loss": 1.0955,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.7888338586222422,
|
|
"grad_norm": 0.35280564760591715,
|
|
"learning_rate": 9.232824888530689e-06,
|
|
"loss": 1.1037,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.7924358397118415,
|
|
"grad_norm": 0.4219603934746488,
|
|
"learning_rate": 9.221639627510076e-06,
|
|
"loss": 1.1389,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.7960378208014408,
|
|
"grad_norm": 0.34392171607237565,
|
|
"learning_rate": 9.210380293863462e-06,
|
|
"loss": 1.1329,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.7996398018910401,
|
|
"grad_norm": 0.3553225127256251,
|
|
"learning_rate": 9.199047085146415e-06,
|
|
"loss": 1.0945,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.8032417829806393,
|
|
"grad_norm": 0.3492982055796588,
|
|
"learning_rate": 9.18764020021071e-06,
|
|
"loss": 1.1537,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.8068437640702386,
|
|
"grad_norm": 0.329347516548987,
|
|
"learning_rate": 9.176159839200838e-06,
|
|
"loss": 1.0952,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.8104457451598379,
|
|
"grad_norm": 0.3256059168725448,
|
|
"learning_rate": 9.164606203550498e-06,
|
|
"loss": 1.1352,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.8140477262494372,
|
|
"grad_norm": 0.40382790613119113,
|
|
"learning_rate": 9.152979495979064e-06,
|
|
"loss": 1.1412,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.8176497073390365,
|
|
"grad_norm": 0.3575693980124408,
|
|
"learning_rate": 9.141279920488021e-06,
|
|
"loss": 1.1295,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.8212516884286357,
|
|
"grad_norm": 0.3488694495678355,
|
|
"learning_rate": 9.129507682357393e-06,
|
|
"loss": 1.0832,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.824853669518235,
|
|
"grad_norm": 0.3539230531756584,
|
|
"learning_rate": 9.117662988142138e-06,
|
|
"loss": 1.1281,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.8284556506078343,
|
|
"grad_norm": 0.38111067495290035,
|
|
"learning_rate": 9.10574604566852e-06,
|
|
"loss": 1.1212,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.8320576316974336,
|
|
"grad_norm": 0.3883299352729627,
|
|
"learning_rate": 9.093757064030473e-06,
|
|
"loss": 1.138,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.8356596127870328,
|
|
"grad_norm": 0.32925848731485013,
|
|
"learning_rate": 9.08169625358592e-06,
|
|
"loss": 1.1304,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.8392615938766321,
|
|
"grad_norm": 0.4180598977166807,
|
|
"learning_rate": 9.069563825953092e-06,
|
|
"loss": 1.1038,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.8428635749662314,
|
|
"grad_norm": 0.34456574190208067,
|
|
"learning_rate": 9.057359994006806e-06,
|
|
"loss": 1.0855,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.8464655560558307,
|
|
"grad_norm": 0.39699049033870243,
|
|
"learning_rate": 9.045084971874738e-06,
|
|
"loss": 1.1254,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.85006753714543,
|
|
"grad_norm": 0.40763691295552823,
|
|
"learning_rate": 9.032738974933663e-06,
|
|
"loss": 1.0794,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.8536695182350292,
|
|
"grad_norm": 0.36236914147842597,
|
|
"learning_rate": 9.020322219805674e-06,
|
|
"loss": 1.1203,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.8572714993246285,
|
|
"grad_norm": 0.3833655304553943,
|
|
"learning_rate": 9.007834924354384e-06,
|
|
"loss": 1.1262,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.8608734804142278,
|
|
"grad_norm": 0.37172135259336675,
|
|
"learning_rate": 8.9952773076811e-06,
|
|
"loss": 1.0626,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.8644754615038271,
|
|
"grad_norm": 0.3334721033923123,
|
|
"learning_rate": 8.982649590120982e-06,
|
|
"loss": 1.0791,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.8680774425934263,
|
|
"grad_norm": 0.34086631317663335,
|
|
"learning_rate": 8.969951993239177e-06,
|
|
"loss": 1.1154,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.8716794236830256,
|
|
"grad_norm": 0.3371727774416591,
|
|
"learning_rate": 8.957184739826929e-06,
|
|
"loss": 1.1387,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.875281404772625,
|
|
"grad_norm": 0.3633470123731805,
|
|
"learning_rate": 8.944348053897672e-06,
|
|
"loss": 1.1505,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.8788833858622243,
|
|
"grad_norm": 0.3695807933678788,
|
|
"learning_rate": 8.931442160683094e-06,
|
|
"loss": 1.1351,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.8824853669518236,
|
|
"grad_norm": 0.38189837725904396,
|
|
"learning_rate": 8.9184672866292e-06,
|
|
"loss": 1.099,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.8860873480414228,
|
|
"grad_norm": 0.38223867232950853,
|
|
"learning_rate": 8.905423659392316e-06,
|
|
"loss": 1.0558,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.889689329131022,
|
|
"grad_norm": 0.33464001960015227,
|
|
"learning_rate": 8.892311507835118e-06,
|
|
"loss": 1.1106,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.8932913102206214,
|
|
"grad_norm": 0.3500335015064184,
|
|
"learning_rate": 8.879131062022598e-06,
|
|
"loss": 1.0943,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.8968932913102207,
|
|
"grad_norm": 0.38615622184380055,
|
|
"learning_rate": 8.865882553218036e-06,
|
|
"loss": 1.1362,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.9004952723998199,
|
|
"grad_norm": 0.3549431372544129,
|
|
"learning_rate": 8.852566213878947e-06,
|
|
"loss": 1.1453,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.9040972534894192,
|
|
"grad_norm": 0.3324763448668655,
|
|
"learning_rate": 8.83918227765299e-06,
|
|
"loss": 1.1157,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.9076992345790185,
|
|
"grad_norm": 0.35594427834572845,
|
|
"learning_rate": 8.825730979373873e-06,
|
|
"loss": 1.1095,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.9113012156686178,
|
|
"grad_norm": 0.34567215550933944,
|
|
"learning_rate": 8.81221255505724e-06,
|
|
"loss": 1.1086,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.9149031967582171,
|
|
"grad_norm": 0.3653541234957228,
|
|
"learning_rate": 8.798627241896524e-06,
|
|
"loss": 1.0936,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.9185051778478163,
|
|
"grad_norm": 0.3795042606149247,
|
|
"learning_rate": 8.784975278258783e-06,
|
|
"loss": 1.1185,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.9221071589374156,
|
|
"grad_norm": 0.3660926060585707,
|
|
"learning_rate": 8.77125690368052e-06,
|
|
"loss": 1.1117,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.9257091400270149,
|
|
"grad_norm": 0.377064805408519,
|
|
"learning_rate": 8.757472358863481e-06,
|
|
"loss": 1.1265,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.9293111211166142,
|
|
"grad_norm": 0.36507443565402037,
|
|
"learning_rate": 8.743621885670431e-06,
|
|
"loss": 1.1493,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.9329131022062134,
|
|
"grad_norm": 0.36199439427145674,
|
|
"learning_rate": 8.729705727120911e-06,
|
|
"loss": 1.0902,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.9365150832958127,
|
|
"grad_norm": 0.365398128659048,
|
|
"learning_rate": 8.715724127386971e-06,
|
|
"loss": 1.1199,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.940117064385412,
|
|
"grad_norm": 0.3597974956274622,
|
|
"learning_rate": 8.701677331788891e-06,
|
|
"loss": 1.1349,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.9437190454750113,
|
|
"grad_norm": 0.34565112893965727,
|
|
"learning_rate": 8.68756558679087e-06,
|
|
"loss": 1.1093,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.9473210265646106,
|
|
"grad_norm": 0.35741324005523417,
|
|
"learning_rate": 8.673389139996708e-06,
|
|
"loss": 1.0965,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.9509230076542098,
|
|
"grad_norm": 0.32598922479655423,
|
|
"learning_rate": 8.659148240145456e-06,
|
|
"loss": 1.105,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.9545249887438091,
|
|
"grad_norm": 0.3329117672496498,
|
|
"learning_rate": 8.644843137107058e-06,
|
|
"loss": 1.0749,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.9581269698334084,
|
|
"grad_norm": 0.3937476383650613,
|
|
"learning_rate": 8.630474081877959e-06,
|
|
"loss": 1.1018,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.9617289509230077,
|
|
"grad_norm": 0.31471194111230605,
|
|
"learning_rate": 8.616041326576711e-06,
|
|
"loss": 1.1058,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.9653309320126069,
|
|
"grad_norm": 0.36368215737547877,
|
|
"learning_rate": 8.601545124439535e-06,
|
|
"loss": 1.1285,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.9689329131022062,
|
|
"grad_norm": 0.3473607661443015,
|
|
"learning_rate": 8.586985729815895e-06,
|
|
"loss": 1.1326,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.9725348941918055,
|
|
"grad_norm": 0.377774377998179,
|
|
"learning_rate": 8.572363398164017e-06,
|
|
"loss": 1.1253,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.9761368752814048,
|
|
"grad_norm": 0.3611382706567514,
|
|
"learning_rate": 8.557678386046429e-06,
|
|
"loss": 1.1152,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.9797388563710041,
|
|
"grad_norm": 0.360467222912271,
|
|
"learning_rate": 8.542930951125432e-06,
|
|
"loss": 1.0612,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.9833408374606033,
|
|
"grad_norm": 0.36148714655712805,
|
|
"learning_rate": 8.528121352158604e-06,
|
|
"loss": 1.1254,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.9869428185502026,
|
|
"grad_norm": 0.4620821380277117,
|
|
"learning_rate": 8.513249848994248e-06,
|
|
"loss": 1.1151,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.9905447996398019,
|
|
"grad_norm": 0.3699494680308604,
|
|
"learning_rate": 8.498316702566828e-06,
|
|
"loss": 1.1331,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.9941467807294012,
|
|
"grad_norm": 0.3944738632436142,
|
|
"learning_rate": 8.483322174892404e-06,
|
|
"loss": 1.1218,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.9977487618190004,
|
|
"grad_norm": 0.37443116561016265,
|
|
"learning_rate": 8.468266529064025e-06,
|
|
"loss": 1.0858,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.37443116561016265,
|
|
"learning_rate": 8.453150029247115e-06,
|
|
"loss": 1.1388,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 1.0036019810895993,
|
|
"grad_norm": 0.5412674452239241,
|
|
"learning_rate": 8.437972940674838e-06,
|
|
"loss": 1.0955,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 1.0072039621791986,
|
|
"grad_norm": 0.3606994742407109,
|
|
"learning_rate": 8.422735529643445e-06,
|
|
"loss": 1.0508,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 1.010805943268798,
|
|
"grad_norm": 0.500891921192956,
|
|
"learning_rate": 8.4074380635076e-06,
|
|
"loss": 1.0681,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 1.0144079243583972,
|
|
"grad_norm": 0.3250502662600929,
|
|
"learning_rate": 8.392080810675692e-06,
|
|
"loss": 1.0734,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 1.0180099054479963,
|
|
"grad_norm": 0.4496303032317973,
|
|
"learning_rate": 8.376664040605122e-06,
|
|
"loss": 1.0971,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 1.0216118865375956,
|
|
"grad_norm": 0.3582282301299312,
|
|
"learning_rate": 8.361188023797581e-06,
|
|
"loss": 1.1034,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 1.025213867627195,
|
|
"grad_norm": 0.4887190983762863,
|
|
"learning_rate": 8.345653031794292e-06,
|
|
"loss": 1.0937,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 1.0288158487167942,
|
|
"grad_norm": 0.419655234473874,
|
|
"learning_rate": 8.33005933717126e-06,
|
|
"loss": 1.0917,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 1.0324178298063935,
|
|
"grad_norm": 0.4289753872257514,
|
|
"learning_rate": 8.314407213534477e-06,
|
|
"loss": 1.0846,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 1.0360198108959928,
|
|
"grad_norm": 0.3868135415082272,
|
|
"learning_rate": 8.298696935515132e-06,
|
|
"loss": 1.0901,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 1.0396217919855921,
|
|
"grad_norm": 0.39062885945304754,
|
|
"learning_rate": 8.282928778764783e-06,
|
|
"loss": 1.1005,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 1.0432237730751914,
|
|
"grad_norm": 0.3750887902646042,
|
|
"learning_rate": 8.267103019950529e-06,
|
|
"loss": 1.092,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 1.0468257541647907,
|
|
"grad_norm": 0.3944074092851082,
|
|
"learning_rate": 8.251219936750145e-06,
|
|
"loss": 1.0559,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 1.0504277352543898,
|
|
"grad_norm": 0.442523607996674,
|
|
"learning_rate": 8.235279807847223e-06,
|
|
"loss": 1.0879,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 1.0540297163439891,
|
|
"grad_norm": 0.400701104226115,
|
|
"learning_rate": 8.21928291292627e-06,
|
|
"loss": 1.0761,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 1.0576316974335884,
|
|
"grad_norm": 0.3929872862213915,
|
|
"learning_rate": 8.203229532667808e-06,
|
|
"loss": 1.1122,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 1.0612336785231877,
|
|
"grad_norm": 0.4802600223498965,
|
|
"learning_rate": 8.18711994874345e-06,
|
|
"loss": 1.0431,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 1.064835659612787,
|
|
"grad_norm": 0.3896837748940082,
|
|
"learning_rate": 8.170954443810947e-06,
|
|
"loss": 1.0706,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 1.0684376407023863,
|
|
"grad_norm": 0.46461703487742634,
|
|
"learning_rate": 8.154733301509249e-06,
|
|
"loss": 1.1189,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 1.0720396217919856,
|
|
"grad_norm": 0.37467583421393963,
|
|
"learning_rate": 8.138456806453503e-06,
|
|
"loss": 1.0592,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 1.075641602881585,
|
|
"grad_norm": 0.4532029044825418,
|
|
"learning_rate": 8.12212524423008e-06,
|
|
"loss": 1.0787,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 1.0792435839711843,
|
|
"grad_norm": 0.3732477898818737,
|
|
"learning_rate": 8.105738901391553e-06,
|
|
"loss": 1.0592,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 1.0828455650607833,
|
|
"grad_norm": 0.39110742601041953,
|
|
"learning_rate": 8.089298065451673e-06,
|
|
"loss": 1.0412,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 1.0864475461503826,
|
|
"grad_norm": 0.4819959554895645,
|
|
"learning_rate": 8.072803024880322e-06,
|
|
"loss": 1.1164,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 1.090049527239982,
|
|
"grad_norm": 0.40121766412624266,
|
|
"learning_rate": 8.05625406909846e-06,
|
|
"loss": 1.1179,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 1.0936515083295812,
|
|
"grad_norm": 0.43991989176939206,
|
|
"learning_rate": 8.039651488473028e-06,
|
|
"loss": 1.0665,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 1.0972534894191805,
|
|
"grad_norm": 0.3655171144729394,
|
|
"learning_rate": 8.022995574311876e-06,
|
|
"loss": 1.0892,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 1.1008554705087799,
|
|
"grad_norm": 0.33944440562879685,
|
|
"learning_rate": 8.006286618858634e-06,
|
|
"loss": 1.0412,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 1.1044574515983792,
|
|
"grad_norm": 0.4079616754346733,
|
|
"learning_rate": 7.989524915287595e-06,
|
|
"loss": 1.0643,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 1.1080594326879785,
|
|
"grad_norm": 0.345780124558647,
|
|
"learning_rate": 7.972710757698567e-06,
|
|
"loss": 1.0982,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 1.1116614137775778,
|
|
"grad_norm": 0.42242099900664976,
|
|
"learning_rate": 7.95584444111171e-06,
|
|
"loss": 1.1034,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 1.1152633948671768,
|
|
"grad_norm": 0.4005977051798865,
|
|
"learning_rate": 7.938926261462366e-06,
|
|
"loss": 1.0703,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 1.1188653759567762,
|
|
"grad_norm": 0.36549179047552754,
|
|
"learning_rate": 7.921956515595861e-06,
|
|
"loss": 1.1015,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 1.1224673570463755,
|
|
"grad_norm": 0.4262110579462978,
|
|
"learning_rate": 7.904935501262301e-06,
|
|
"loss": 1.0648,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 1.1260693381359748,
|
|
"grad_norm": 0.37185963973126124,
|
|
"learning_rate": 7.887863517111337e-06,
|
|
"loss": 1.1019,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 1.129671319225574,
|
|
"grad_norm": 0.34072768994255465,
|
|
"learning_rate": 7.87074086268695e-06,
|
|
"loss": 1.049,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 1.1332733003151734,
|
|
"grad_norm": 0.38829026634405583,
|
|
"learning_rate": 7.85356783842216e-06,
|
|
"loss": 1.099,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 1.1368752814047727,
|
|
"grad_norm": 0.3454891333781692,
|
|
"learning_rate": 7.836344745633785e-06,
|
|
"loss": 1.0896,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 1.140477262494372,
|
|
"grad_norm": 0.40407073745828537,
|
|
"learning_rate": 7.819071886517134e-06,
|
|
"loss": 1.0885,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 1.1440792435839713,
|
|
"grad_norm": 0.40693031599207263,
|
|
"learning_rate": 7.801749564140724e-06,
|
|
"loss": 1.0621,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 1.1476812246735704,
|
|
"grad_norm": 0.41527967835091,
|
|
"learning_rate": 7.78437808244094e-06,
|
|
"loss": 1.0661,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 1.1512832057631697,
|
|
"grad_norm": 0.32461739079088076,
|
|
"learning_rate": 7.76695774621672e-06,
|
|
"loss": 1.0738,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 1.154885186852769,
|
|
"grad_norm": 0.369117935448055,
|
|
"learning_rate": 7.7494888611242e-06,
|
|
"loss": 1.0507,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 1.1584871679423683,
|
|
"grad_norm": 0.40286832831280445,
|
|
"learning_rate": 7.731971733671347e-06,
|
|
"loss": 1.0115,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 1.1620891490319676,
|
|
"grad_norm": 0.3562069260238656,
|
|
"learning_rate": 7.714406671212589e-06,
|
|
"loss": 1.0678,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 1.1656911301215669,
|
|
"grad_norm": 0.4156873725872057,
|
|
"learning_rate": 7.696793981943418e-06,
|
|
"loss": 1.0846,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 1.1692931112111662,
|
|
"grad_norm": 0.40288662850406076,
|
|
"learning_rate": 7.679133974894984e-06,
|
|
"loss": 1.0544,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 1.1728950923007655,
|
|
"grad_norm": 0.3739155064577844,
|
|
"learning_rate": 7.66142695992867e-06,
|
|
"loss": 1.0737,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 1.1764970733903648,
|
|
"grad_norm": 0.4406680015338461,
|
|
"learning_rate": 7.64367324773066e-06,
|
|
"loss": 1.1106,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 1.1800990544799639,
|
|
"grad_norm": 0.34345904522451653,
|
|
"learning_rate": 7.6258731498064796e-06,
|
|
"loss": 1.0759,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 1.1837010355695632,
|
|
"grad_norm": 0.4365590987719929,
|
|
"learning_rate": 7.6080269784755405e-06,
|
|
"loss": 1.0738,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 1.1873030166591625,
|
|
"grad_norm": 0.3633076504956387,
|
|
"learning_rate": 7.590135046865652e-06,
|
|
"loss": 1.1089,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 1.1909049977487618,
|
|
"grad_norm": 0.395068007524633,
|
|
"learning_rate": 7.572197668907533e-06,
|
|
"loss": 1.1244,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 1.194506978838361,
|
|
"grad_norm": 0.39729351127387735,
|
|
"learning_rate": 7.5542151593293e-06,
|
|
"loss": 1.0889,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 1.1981089599279604,
|
|
"grad_norm": 2.5701237358423255,
|
|
"learning_rate": 7.536187833650947e-06,
|
|
"loss": 1.1059,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 1.2017109410175597,
|
|
"grad_norm": 0.47480486252921483,
|
|
"learning_rate": 7.518116008178805e-06,
|
|
"loss": 1.0482,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 1.205312922107159,
|
|
"grad_norm": 0.40028253752269954,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 1.0897,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 1.2089149031967583,
|
|
"grad_norm": 0.45760958914786226,
|
|
"learning_rate": 7.481840126976885e-06,
|
|
"loss": 1.0907,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 1.2125168842863574,
|
|
"grad_norm": 0.40820185657609664,
|
|
"learning_rate": 7.463636707741458e-06,
|
|
"loss": 1.0526,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 1.2161188653759567,
|
|
"grad_norm": 0.4393002377107819,
|
|
"learning_rate": 7.445390061689782e-06,
|
|
"loss": 1.1063,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 1.219720846465556,
|
|
"grad_norm": 0.3828530976851444,
|
|
"learning_rate": 7.42710050897637e-06,
|
|
"loss": 1.0735,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 1.2233228275551553,
|
|
"grad_norm": 0.43353384279187324,
|
|
"learning_rate": 7.408768370508577e-06,
|
|
"loss": 1.0893,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 1.2269248086447546,
|
|
"grad_norm": 0.3888399760706739,
|
|
"learning_rate": 7.390393967940962e-06,
|
|
"loss": 1.0666,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 1.230526789734354,
|
|
"grad_norm": 0.4143338997564131,
|
|
"learning_rate": 7.371977623669646e-06,
|
|
"loss": 1.1293,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 1.2341287708239532,
|
|
"grad_norm": 0.32879022172398614,
|
|
"learning_rate": 7.353519660826665e-06,
|
|
"loss": 1.0879,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 1.2377307519135525,
|
|
"grad_norm": 0.357811781692411,
|
|
"learning_rate": 7.335020403274277e-06,
|
|
"loss": 1.0792,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 1.2413327330031518,
|
|
"grad_norm": 0.37966132927138474,
|
|
"learning_rate": 7.31648017559931e-06,
|
|
"loss": 1.0718,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 1.244934714092751,
|
|
"grad_norm": 0.3968902781929655,
|
|
"learning_rate": 7.297899303107441e-06,
|
|
"loss": 1.0676,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 1.2485366951823502,
|
|
"grad_norm": 0.35750082909244935,
|
|
"learning_rate": 7.279278111817502e-06,
|
|
"loss": 1.0227,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 1.2521386762719495,
|
|
"grad_norm": 0.42773056447698266,
|
|
"learning_rate": 7.260616928455754e-06,
|
|
"loss": 1.0496,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 1.2557406573615488,
|
|
"grad_norm": 0.40151554123482264,
|
|
"learning_rate": 7.241916080450163e-06,
|
|
"loss": 1.0696,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 1.2593426384511481,
|
|
"grad_norm": 0.4131771160185376,
|
|
"learning_rate": 7.223175895924638e-06,
|
|
"loss": 1.0915,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 1.2629446195407474,
|
|
"grad_norm": 0.4106261415779341,
|
|
"learning_rate": 7.2043967036932935e-06,
|
|
"loss": 1.0328,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 1.2665466006303467,
|
|
"grad_norm": 0.34879774503343425,
|
|
"learning_rate": 7.185578833254665e-06,
|
|
"loss": 1.057,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 1.270148581719946,
|
|
"grad_norm": 0.3836853786701187,
|
|
"learning_rate": 7.166722614785937e-06,
|
|
"loss": 1.0754,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 1.2737505628095454,
|
|
"grad_norm": 0.3800741116171829,
|
|
"learning_rate": 7.1478283791371415e-06,
|
|
"loss": 1.0841,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 1.2773525438991444,
|
|
"grad_norm": 0.34355233879245345,
|
|
"learning_rate": 7.128896457825364e-06,
|
|
"loss": 1.0632,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 1.2809545249887437,
|
|
"grad_norm": 0.42485239689947135,
|
|
"learning_rate": 7.1099271830289155e-06,
|
|
"loss": 1.0985,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 1.284556506078343,
|
|
"grad_norm": 0.4215135642816349,
|
|
"learning_rate": 7.090920887581507e-06,
|
|
"loss": 1.0707,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 1.2881584871679423,
|
|
"grad_norm": 0.35779792395285226,
|
|
"learning_rate": 7.071877904966422e-06,
|
|
"loss": 1.0815,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 1.2917604682575416,
|
|
"grad_norm": 0.43513590600966623,
|
|
"learning_rate": 7.052798569310641e-06,
|
|
"loss": 1.1024,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 1.295362449347141,
|
|
"grad_norm": 0.3709332183690677,
|
|
"learning_rate": 7.033683215379002e-06,
|
|
"loss": 1.0788,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 1.2989644304367403,
|
|
"grad_norm": 0.3951949031993291,
|
|
"learning_rate": 7.014532178568314e-06,
|
|
"loss": 1.05,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 1.3025664115263396,
|
|
"grad_norm": 0.33987216437327716,
|
|
"learning_rate": 6.995345794901477e-06,
|
|
"loss": 1.0697,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 1.3061683926159389,
|
|
"grad_norm": 0.3780771954200527,
|
|
"learning_rate": 6.976124401021583e-06,
|
|
"loss": 1.0729,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 1.309770373705538,
|
|
"grad_norm": 0.3908638367202069,
|
|
"learning_rate": 6.9568683341860135e-06,
|
|
"loss": 1.0328,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 1.3133723547951373,
|
|
"grad_norm": 0.38289027631075667,
|
|
"learning_rate": 6.9375779322605154e-06,
|
|
"loss": 1.0928,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 1.3169743358847366,
|
|
"grad_norm": 0.3614859878801377,
|
|
"learning_rate": 6.9182535337132824e-06,
|
|
"loss": 1.0756,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 1.3205763169743359,
|
|
"grad_norm": 0.3598395419280253,
|
|
"learning_rate": 6.898895477609007e-06,
|
|
"loss": 1.103,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 1.3241782980639352,
|
|
"grad_norm": 0.37455771512012165,
|
|
"learning_rate": 6.879504103602934e-06,
|
|
"loss": 1.0758,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 1.3277802791535345,
|
|
"grad_norm": 0.40574624953367006,
|
|
"learning_rate": 6.860079751934908e-06,
|
|
"loss": 1.0794,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 1.3313822602431338,
|
|
"grad_norm": 0.3725008186416374,
|
|
"learning_rate": 6.840622763423391e-06,
|
|
"loss": 1.0756,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 1.334984241332733,
|
|
"grad_norm": 0.3646005212300081,
|
|
"learning_rate": 6.821133479459492e-06,
|
|
"loss": 1.0959,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 1.3385862224223324,
|
|
"grad_norm": 0.4113247656948934,
|
|
"learning_rate": 6.8016122420009745e-06,
|
|
"loss": 1.0835,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 1.3421882035119315,
|
|
"grad_norm": 0.35986780686629694,
|
|
"learning_rate": 6.782059393566254e-06,
|
|
"loss": 1.0589,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 1.3457901846015308,
|
|
"grad_norm": 0.40465760674379114,
|
|
"learning_rate": 6.762475277228393e-06,
|
|
"loss": 1.0825,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 1.34939216569113,
|
|
"grad_norm": 0.3988091604755472,
|
|
"learning_rate": 6.7428602366090764e-06,
|
|
"loss": 1.0716,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 1.3529941467807294,
|
|
"grad_norm": 0.40966197148471073,
|
|
"learning_rate": 6.723214615872585e-06,
|
|
"loss": 1.0922,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 1.3565961278703287,
|
|
"grad_norm": 0.42278479417615644,
|
|
"learning_rate": 6.70353875971976e-06,
|
|
"loss": 1.0802,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 1.360198108959928,
|
|
"grad_norm": 0.35541048253819246,
|
|
"learning_rate": 6.683833013381942e-06,
|
|
"loss": 1.0872,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 1.3638000900495273,
|
|
"grad_norm": 0.3502392276010618,
|
|
"learning_rate": 6.664097722614934e-06,
|
|
"loss": 1.0583,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 1.3674020711391266,
|
|
"grad_norm": 0.38138433478225825,
|
|
"learning_rate": 6.644333233692917e-06,
|
|
"loss": 1.0692,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 1.371004052228726,
|
|
"grad_norm": 0.3377587100898131,
|
|
"learning_rate": 6.624539893402383e-06,
|
|
"loss": 1.079,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 1.374606033318325,
|
|
"grad_norm": 0.3409130145543055,
|
|
"learning_rate": 6.604718049036047e-06,
|
|
"loss": 1.0794,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 1.3782080144079243,
|
|
"grad_norm": 0.37652977172080476,
|
|
"learning_rate": 6.58486804838676e-06,
|
|
"loss": 1.0908,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 1.3818099954975236,
|
|
"grad_norm": 0.3257917788013114,
|
|
"learning_rate": 6.5649902397413915e-06,
|
|
"loss": 1.0856,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 1.385411976587123,
|
|
"grad_norm": 0.3195784470315217,
|
|
"learning_rate": 6.545084971874738e-06,
|
|
"loss": 1.1038,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 1.3890139576767222,
|
|
"grad_norm": 0.3616341536622016,
|
|
"learning_rate": 6.525152594043389e-06,
|
|
"loss": 1.0814,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 1.3926159387663215,
|
|
"grad_norm": 0.40527856810901475,
|
|
"learning_rate": 6.505193455979603e-06,
|
|
"loss": 1.0994,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 1.3962179198559208,
|
|
"grad_norm": 0.35495797940458634,
|
|
"learning_rate": 6.485207907885175e-06,
|
|
"loss": 1.063,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 1.3998199009455201,
|
|
"grad_norm": 0.3327326350539732,
|
|
"learning_rate": 6.465196300425287e-06,
|
|
"loss": 1.1113,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 1.4034218820351194,
|
|
"grad_norm": 0.36061962924782925,
|
|
"learning_rate": 6.445158984722358e-06,
|
|
"loss": 1.0644,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 1.4070238631247185,
|
|
"grad_norm": 0.34339629638301855,
|
|
"learning_rate": 6.425096312349881e-06,
|
|
"loss": 1.0904,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 1.4106258442143178,
|
|
"grad_norm": 0.34978035011668274,
|
|
"learning_rate": 6.4050086353262565e-06,
|
|
"loss": 1.0788,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 1.414227825303917,
|
|
"grad_norm": 0.38376522345500463,
|
|
"learning_rate": 6.384896306108612e-06,
|
|
"loss": 1.0564,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 1.4178298063935164,
|
|
"grad_norm": 0.35566301025630226,
|
|
"learning_rate": 6.364759677586627e-06,
|
|
"loss": 1.083,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 1.4214317874831157,
|
|
"grad_norm": 0.32888274597150713,
|
|
"learning_rate": 6.344599103076329e-06,
|
|
"loss": 1.0868,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 1.425033768572715,
|
|
"grad_norm": 0.3669493819260475,
|
|
"learning_rate": 6.324414936313904e-06,
|
|
"loss": 1.0402,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 1.4286357496623143,
|
|
"grad_norm": 0.3617926547245432,
|
|
"learning_rate": 6.304207531449486e-06,
|
|
"loss": 1.0905,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 1.4322377307519136,
|
|
"grad_norm": 0.36414284887525294,
|
|
"learning_rate": 6.28397724304094e-06,
|
|
"loss": 1.0827,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 1.435839711841513,
|
|
"grad_norm": 0.3386723453983854,
|
|
"learning_rate": 6.2637244260476474e-06,
|
|
"loss": 1.061,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 1.439441692931112,
|
|
"grad_norm": 0.34781644394622324,
|
|
"learning_rate": 6.243449435824276e-06,
|
|
"loss": 1.0802,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 1.4430436740207113,
|
|
"grad_norm": 0.3503485349700194,
|
|
"learning_rate": 6.223152628114537e-06,
|
|
"loss": 1.0664,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 1.4466456551103106,
|
|
"grad_norm": 0.3328702702553546,
|
|
"learning_rate": 6.202834359044959e-06,
|
|
"loss": 1.0701,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 1.45024763619991,
|
|
"grad_norm": 0.33914752395171327,
|
|
"learning_rate": 6.182494985118625e-06,
|
|
"loss": 1.0538,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 1.4538496172895092,
|
|
"grad_norm": 0.39570376225734244,
|
|
"learning_rate": 6.1621348632089205e-06,
|
|
"loss": 1.0608,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 1.4574515983791085,
|
|
"grad_norm": 0.3515088932100447,
|
|
"learning_rate": 6.141754350553279e-06,
|
|
"loss": 1.0665,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 1.4610535794687078,
|
|
"grad_norm": 0.3393163686581987,
|
|
"learning_rate": 6.121353804746907e-06,
|
|
"loss": 1.0678,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 1.4646555605583071,
|
|
"grad_norm": 0.363682561032409,
|
|
"learning_rate": 6.100933583736508e-06,
|
|
"loss": 1.0712,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 1.4682575416479065,
|
|
"grad_norm": 0.3852133533106999,
|
|
"learning_rate": 6.080494045814011e-06,
|
|
"loss": 1.0675,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 1.4718595227375055,
|
|
"grad_norm": 0.33227273331193696,
|
|
"learning_rate": 6.060035549610275e-06,
|
|
"loss": 1.0702,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 1.4754615038271048,
|
|
"grad_norm": 0.34242254777920744,
|
|
"learning_rate": 6.039558454088796e-06,
|
|
"loss": 1.0887,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 1.4790634849167041,
|
|
"grad_norm": 0.36440240139479513,
|
|
"learning_rate": 6.019063118539425e-06,
|
|
"loss": 1.0629,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 1.4826654660063034,
|
|
"grad_norm": 0.34522394073241824,
|
|
"learning_rate": 5.9985499025720354e-06,
|
|
"loss": 1.0604,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 1.4862674470959027,
|
|
"grad_norm": 0.35469984623846545,
|
|
"learning_rate": 5.978019166110242e-06,
|
|
"loss": 1.0732,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 1.489869428185502,
|
|
"grad_norm": 0.3327229153625461,
|
|
"learning_rate": 5.957471269385065e-06,
|
|
"loss": 1.0916,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 1.4934714092751014,
|
|
"grad_norm": 0.35947455556579844,
|
|
"learning_rate": 5.936906572928625e-06,
|
|
"loss": 1.0857,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 1.4970733903647007,
|
|
"grad_norm": 0.3523660373102713,
|
|
"learning_rate": 5.9163254375677995e-06,
|
|
"loss": 1.0354,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 1.5006753714543,
|
|
"grad_norm": 0.33259457908461304,
|
|
"learning_rate": 5.8957282244179125e-06,
|
|
"loss": 1.081,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 1.504277352543899,
|
|
"grad_norm": 0.36043116131180003,
|
|
"learning_rate": 5.8751152948763815e-06,
|
|
"loss": 1.0882,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 1.5078793336334986,
|
|
"grad_norm": 0.3644427491180247,
|
|
"learning_rate": 5.854487010616384e-06,
|
|
"loss": 1.0753,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 1.5114813147230977,
|
|
"grad_norm": 0.35395707576036406,
|
|
"learning_rate": 5.8338437335805124e-06,
|
|
"loss": 1.0953,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 1.515083295812697,
|
|
"grad_norm": 0.48229948381614823,
|
|
"learning_rate": 5.813185825974419e-06,
|
|
"loss": 1.1207,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 1.5186852769022963,
|
|
"grad_norm": 0.38738601915939525,
|
|
"learning_rate": 5.792513650260465e-06,
|
|
"loss": 1.0958,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 1.5222872579918956,
|
|
"grad_norm": 0.3259233561804611,
|
|
"learning_rate": 5.771827569151357e-06,
|
|
"loss": 1.0954,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 1.5258892390814949,
|
|
"grad_norm": 0.3332133749381523,
|
|
"learning_rate": 5.751127945603786e-06,
|
|
"loss": 1.0927,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 1.529491220171094,
|
|
"grad_norm": 0.32989085101834736,
|
|
"learning_rate": 5.730415142812059e-06,
|
|
"loss": 1.0527,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 1.5330932012606935,
|
|
"grad_norm": 0.36851444118947263,
|
|
"learning_rate": 5.709689524201723e-06,
|
|
"loss": 1.0583,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 1.5366951823502926,
|
|
"grad_norm": 0.35379667109046153,
|
|
"learning_rate": 5.68895145342319e-06,
|
|
"loss": 1.0943,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 1.540297163439892,
|
|
"grad_norm": 0.37779417160624446,
|
|
"learning_rate": 5.668201294345363e-06,
|
|
"loss": 1.0812,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 1.5438991445294912,
|
|
"grad_norm": 0.3256143401933206,
|
|
"learning_rate": 5.647439411049235e-06,
|
|
"loss": 1.0646,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 1.5475011256190905,
|
|
"grad_norm": 0.34874005725965246,
|
|
"learning_rate": 5.626666167821522e-06,
|
|
"loss": 1.0753,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 1.5511031067086898,
|
|
"grad_norm": 0.315911724351532,
|
|
"learning_rate": 5.605881929148254e-06,
|
|
"loss": 1.062,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 1.554705087798289,
|
|
"grad_norm": 0.3684384723712539,
|
|
"learning_rate": 5.585087059708389e-06,
|
|
"loss": 1.0853,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 1.5583070688878884,
|
|
"grad_norm": 0.3803316237726428,
|
|
"learning_rate": 5.5642819243674085e-06,
|
|
"loss": 1.0471,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 1.5619090499774875,
|
|
"grad_norm": 0.3322373067202158,
|
|
"learning_rate": 5.543466888170927e-06,
|
|
"loss": 1.0472,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 1.565511031067087,
|
|
"grad_norm": 0.35808606269858695,
|
|
"learning_rate": 5.522642316338268e-06,
|
|
"loss": 1.0435,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 1.569113012156686,
|
|
"grad_norm": 0.3651095676354401,
|
|
"learning_rate": 5.5018085742560745e-06,
|
|
"loss": 1.0415,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 1.5727149932462856,
|
|
"grad_norm": 0.3434911579987937,
|
|
"learning_rate": 5.480966027471889e-06,
|
|
"loss": 1.0683,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 1.5763169743358847,
|
|
"grad_norm": 0.3704214813504543,
|
|
"learning_rate": 5.460115041687737e-06,
|
|
"loss": 1.0413,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 1.579918955425484,
|
|
"grad_norm": 0.34635012302936774,
|
|
"learning_rate": 5.439255982753717e-06,
|
|
"loss": 1.097,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 1.5835209365150833,
|
|
"grad_norm": 0.3894870354521757,
|
|
"learning_rate": 5.41838921666158e-06,
|
|
"loss": 1.0078,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 1.5871229176046826,
|
|
"grad_norm": 0.32628973132390515,
|
|
"learning_rate": 5.3975151095383e-06,
|
|
"loss": 1.0708,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 1.590724898694282,
|
|
"grad_norm": 0.353493468436304,
|
|
"learning_rate": 5.376634027639664e-06,
|
|
"loss": 1.0893,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 1.594326879783881,
|
|
"grad_norm": 0.35574519333131815,
|
|
"learning_rate": 5.355746337343835e-06,
|
|
"loss": 1.0992,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 1.5979288608734805,
|
|
"grad_norm": 0.3721178525169961,
|
|
"learning_rate": 5.334852405144926e-06,
|
|
"loss": 1.08,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 1.6015308419630796,
|
|
"grad_norm": 0.35260152951310547,
|
|
"learning_rate": 5.3139525976465675e-06,
|
|
"loss": 1.0573,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 1.6051328230526791,
|
|
"grad_norm": 0.3102871481104457,
|
|
"learning_rate": 5.293047281555482e-06,
|
|
"loss": 1.0845,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 1.6087348041422782,
|
|
"grad_norm": 0.34166757066038683,
|
|
"learning_rate": 5.272136823675046e-06,
|
|
"loss": 1.0644,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 1.6123367852318775,
|
|
"grad_norm": 0.3555363375900774,
|
|
"learning_rate": 5.251221590898848e-06,
|
|
"loss": 1.086,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 1.6159387663214768,
|
|
"grad_norm": 0.3487085514723649,
|
|
"learning_rate": 5.230301950204261e-06,
|
|
"loss": 1.0655,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 1.6195407474110761,
|
|
"grad_norm": 0.3481330917315912,
|
|
"learning_rate": 5.209378268645998e-06,
|
|
"loss": 1.0804,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 1.6231427285006754,
|
|
"grad_norm": 0.3479613181209226,
|
|
"learning_rate": 5.188450913349674e-06,
|
|
"loss": 1.0802,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 1.6267447095902745,
|
|
"grad_norm": 0.329411221852206,
|
|
"learning_rate": 5.167520251505358e-06,
|
|
"loss": 1.049,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 1.630346690679874,
|
|
"grad_norm": 0.373931387358175,
|
|
"learning_rate": 5.146586650361143e-06,
|
|
"loss": 1.05,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 1.6339486717694731,
|
|
"grad_norm": 0.33179661919955405,
|
|
"learning_rate": 5.1256504772166885e-06,
|
|
"loss": 1.068,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 1.6375506528590726,
|
|
"grad_norm": 0.3248298243422403,
|
|
"learning_rate": 5.1047120994167855e-06,
|
|
"loss": 1.0459,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 1.6411526339486717,
|
|
"grad_norm": 0.32872162641417635,
|
|
"learning_rate": 5.083771884344908e-06,
|
|
"loss": 1.1005,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 1.644754615038271,
|
|
"grad_norm": 0.3350940372390754,
|
|
"learning_rate": 5.062830199416764e-06,
|
|
"loss": 1.0729,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 1.6483565961278703,
|
|
"grad_norm": 0.3500027893173043,
|
|
"learning_rate": 5.041887412073853e-06,
|
|
"loss": 1.056,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 1.6519585772174696,
|
|
"grad_norm": 0.352966799403548,
|
|
"learning_rate": 5.0209438897770205e-06,
|
|
"loss": 1.0368,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 1.655560558307069,
|
|
"grad_norm": 0.3570213767690983,
|
|
"learning_rate": 5e-06,
|
|
"loss": 1.1005,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 1.659162539396668,
|
|
"grad_norm": 0.35681386612113786,
|
|
"learning_rate": 4.979056110222982e-06,
|
|
"loss": 1.0552,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 1.6627645204862675,
|
|
"grad_norm": 0.37777499026486816,
|
|
"learning_rate": 4.9581125879261476e-06,
|
|
"loss": 1.0655,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 1.6663665015758666,
|
|
"grad_norm": 0.3720854550185324,
|
|
"learning_rate": 4.937169800583237e-06,
|
|
"loss": 1.0905,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 1.6699684826654662,
|
|
"grad_norm": 0.35871679077028334,
|
|
"learning_rate": 4.9162281156550945e-06,
|
|
"loss": 1.0735,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 1.6735704637550652,
|
|
"grad_norm": 0.32746933618519874,
|
|
"learning_rate": 4.895287900583216e-06,
|
|
"loss": 1.0375,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 1.6771724448446645,
|
|
"grad_norm": 0.38118331962443885,
|
|
"learning_rate": 4.874349522783313e-06,
|
|
"loss": 1.0646,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 1.6807744259342638,
|
|
"grad_norm": 0.35946893815132736,
|
|
"learning_rate": 4.853413349638859e-06,
|
|
"loss": 1.0828,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 1.6843764070238632,
|
|
"grad_norm": 0.3666759251106939,
|
|
"learning_rate": 4.832479748494643e-06,
|
|
"loss": 1.0551,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 1.6879783881134625,
|
|
"grad_norm": 0.3508414333413744,
|
|
"learning_rate": 4.811549086650327e-06,
|
|
"loss": 1.0599,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 1.6915803692030615,
|
|
"grad_norm": 0.35707827749845356,
|
|
"learning_rate": 4.7906217313540035e-06,
|
|
"loss": 1.0759,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 1.695182350292661,
|
|
"grad_norm": 0.3788611425520442,
|
|
"learning_rate": 4.769698049795739e-06,
|
|
"loss": 1.0357,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 1.6987843313822601,
|
|
"grad_norm": 0.31406557358200793,
|
|
"learning_rate": 4.748778409101153e-06,
|
|
"loss": 1.0835,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 1.7023863124718597,
|
|
"grad_norm": 0.330042469142287,
|
|
"learning_rate": 4.727863176324955e-06,
|
|
"loss": 1.0356,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 1.7059882935614588,
|
|
"grad_norm": 0.3692982637487251,
|
|
"learning_rate": 4.706952718444518e-06,
|
|
"loss": 1.0224,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 1.709590274651058,
|
|
"grad_norm": 0.3401427800549121,
|
|
"learning_rate": 4.686047402353433e-06,
|
|
"loss": 1.056,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 1.7131922557406574,
|
|
"grad_norm": 0.32559689457568264,
|
|
"learning_rate": 4.6651475948550765e-06,
|
|
"loss": 1.072,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 1.7167942368302567,
|
|
"grad_norm": 0.33516871825696326,
|
|
"learning_rate": 4.644253662656167e-06,
|
|
"loss": 1.056,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 1.720396217919856,
|
|
"grad_norm": 0.34667604652264583,
|
|
"learning_rate": 4.6233659723603374e-06,
|
|
"loss": 1.0555,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 1.723998199009455,
|
|
"grad_norm": 0.37161554561132193,
|
|
"learning_rate": 4.602484890461702e-06,
|
|
"loss": 1.0563,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 1.7276001800990546,
|
|
"grad_norm": 0.31848164234901527,
|
|
"learning_rate": 4.581610783338424e-06,
|
|
"loss": 1.0941,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.7312021611886537,
|
|
"grad_norm": 0.3594252163107309,
|
|
"learning_rate": 4.560744017246284e-06,
|
|
"loss": 1.0751,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 1.7348041422782532,
|
|
"grad_norm": 0.3544059030193539,
|
|
"learning_rate": 4.539884958312265e-06,
|
|
"loss": 1.0469,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 1.7384061233678523,
|
|
"grad_norm": 0.32959836916197055,
|
|
"learning_rate": 4.519033972528114e-06,
|
|
"loss": 1.063,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 1.7420081044574516,
|
|
"grad_norm": 0.31875669846466737,
|
|
"learning_rate": 4.4981914257439254e-06,
|
|
"loss": 1.0575,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 1.7456100855470509,
|
|
"grad_norm": 0.31894865236781544,
|
|
"learning_rate": 4.477357683661734e-06,
|
|
"loss": 1.0591,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 1.7492120666366502,
|
|
"grad_norm": 0.355450564535886,
|
|
"learning_rate": 4.456533111829076e-06,
|
|
"loss": 1.1063,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 1.7528140477262495,
|
|
"grad_norm": 0.35660689731655426,
|
|
"learning_rate": 4.4357180756325915e-06,
|
|
"loss": 1.025,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 1.7564160288158486,
|
|
"grad_norm": 0.35786503142864406,
|
|
"learning_rate": 4.414912940291614e-06,
|
|
"loss": 1.0731,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 1.760018009905448,
|
|
"grad_norm": 0.33168551352286857,
|
|
"learning_rate": 4.394118070851749e-06,
|
|
"loss": 1.0152,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 1.7636199909950472,
|
|
"grad_norm": 0.31557735871932313,
|
|
"learning_rate": 4.373333832178478e-06,
|
|
"loss": 1.0658,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.7672219720846467,
|
|
"grad_norm": 0.3643796172518758,
|
|
"learning_rate": 4.352560588950766e-06,
|
|
"loss": 1.0788,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 1.7708239531742458,
|
|
"grad_norm": 0.3342250830340594,
|
|
"learning_rate": 4.331798705654639e-06,
|
|
"loss": 1.0929,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 1.774425934263845,
|
|
"grad_norm": 0.8828365078514346,
|
|
"learning_rate": 4.31104854657681e-06,
|
|
"loss": 1.0723,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 1.7780279153534444,
|
|
"grad_norm": 0.32325289629440884,
|
|
"learning_rate": 4.290310475798278e-06,
|
|
"loss": 1.0472,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 1.7816298964430437,
|
|
"grad_norm": 0.36167307979154,
|
|
"learning_rate": 4.269584857187942e-06,
|
|
"loss": 1.0803,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.785231877532643,
|
|
"grad_norm": 0.3112535067026799,
|
|
"learning_rate": 4.248872054396215e-06,
|
|
"loss": 1.0522,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 1.788833858622242,
|
|
"grad_norm": 0.34086457664496106,
|
|
"learning_rate": 4.228172430848645e-06,
|
|
"loss": 1.0775,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 1.7924358397118416,
|
|
"grad_norm": 0.30310958551948036,
|
|
"learning_rate": 4.207486349739538e-06,
|
|
"loss": 1.0487,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 1.7960378208014407,
|
|
"grad_norm": 0.3511261941310994,
|
|
"learning_rate": 4.186814174025582e-06,
|
|
"loss": 1.0844,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 1.7996398018910402,
|
|
"grad_norm": 0.3503896982707501,
|
|
"learning_rate": 4.166156266419489e-06,
|
|
"loss": 1.0132,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.8032417829806393,
|
|
"grad_norm": 0.3436670120686724,
|
|
"learning_rate": 4.145512989383618e-06,
|
|
"loss": 1.0598,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 1.8068437640702386,
|
|
"grad_norm": 0.34785159088570466,
|
|
"learning_rate": 4.124884705123619e-06,
|
|
"loss": 1.035,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 1.810445745159838,
|
|
"grad_norm": 0.33165542782534474,
|
|
"learning_rate": 4.104271775582089e-06,
|
|
"loss": 1.0358,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 1.8140477262494372,
|
|
"grad_norm": 0.4070554454819295,
|
|
"learning_rate": 4.083674562432203e-06,
|
|
"loss": 1.0434,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 1.8176497073390365,
|
|
"grad_norm": 0.3624273928106286,
|
|
"learning_rate": 4.063093427071376e-06,
|
|
"loss": 1.0995,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.8212516884286356,
|
|
"grad_norm": 0.32728343459448644,
|
|
"learning_rate": 4.042528730614935e-06,
|
|
"loss": 1.0178,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 1.8248536695182351,
|
|
"grad_norm": 0.31687128264594894,
|
|
"learning_rate": 4.02198083388976e-06,
|
|
"loss": 1.0649,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 1.8284556506078342,
|
|
"grad_norm": 0.3744296721151693,
|
|
"learning_rate": 4.001450097427965e-06,
|
|
"loss": 1.0252,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 1.8320576316974337,
|
|
"grad_norm": 0.3132845490517284,
|
|
"learning_rate": 3.980936881460576e-06,
|
|
"loss": 1.0795,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 1.8356596127870328,
|
|
"grad_norm": 0.3420985303611589,
|
|
"learning_rate": 3.960441545911205e-06,
|
|
"loss": 1.0746,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.8392615938766321,
|
|
"grad_norm": 0.307478947705401,
|
|
"learning_rate": 3.939964450389728e-06,
|
|
"loss": 1.0853,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 1.8428635749662314,
|
|
"grad_norm": 0.33071208183560175,
|
|
"learning_rate": 3.91950595418599e-06,
|
|
"loss": 1.0528,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 1.8464655560558307,
|
|
"grad_norm": 0.3340631255697292,
|
|
"learning_rate": 3.899066416263493e-06,
|
|
"loss": 1.0198,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 1.85006753714543,
|
|
"grad_norm": 0.32856275060694273,
|
|
"learning_rate": 3.8786461952530955e-06,
|
|
"loss": 1.0969,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 1.8536695182350291,
|
|
"grad_norm": 0.3416526016934508,
|
|
"learning_rate": 3.8582456494467214e-06,
|
|
"loss": 1.0863,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.8572714993246286,
|
|
"grad_norm": 0.3185489633634486,
|
|
"learning_rate": 3.83786513679108e-06,
|
|
"loss": 1.0385,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 1.8608734804142277,
|
|
"grad_norm": 0.32623706503747446,
|
|
"learning_rate": 3.817505014881378e-06,
|
|
"loss": 1.0144,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 1.8644754615038273,
|
|
"grad_norm": 0.32218552881112167,
|
|
"learning_rate": 3.797165640955041e-06,
|
|
"loss": 1.0705,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.8680774425934263,
|
|
"grad_norm": 0.30139922286880777,
|
|
"learning_rate": 3.776847371885464e-06,
|
|
"loss": 1.0493,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.8716794236830256,
|
|
"grad_norm": 0.377029678977555,
|
|
"learning_rate": 3.756550564175727e-06,
|
|
"loss": 1.0494,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.875281404772625,
|
|
"grad_norm": 0.345992186114263,
|
|
"learning_rate": 3.736275573952354e-06,
|
|
"loss": 1.0228,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.8788833858622243,
|
|
"grad_norm": 0.3033717191767872,
|
|
"learning_rate": 3.716022756959061e-06,
|
|
"loss": 1.0896,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.8824853669518236,
|
|
"grad_norm": 0.32124744536039596,
|
|
"learning_rate": 3.695792468550517e-06,
|
|
"loss": 1.0467,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.8860873480414226,
|
|
"grad_norm": 0.3470918739977987,
|
|
"learning_rate": 3.6755850636860956e-06,
|
|
"loss": 1.0524,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.8896893291310222,
|
|
"grad_norm": 0.38530316140769727,
|
|
"learning_rate": 3.655400896923672e-06,
|
|
"loss": 1.0685,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.8932913102206212,
|
|
"grad_norm": 0.34989092164325525,
|
|
"learning_rate": 3.635240322413375e-06,
|
|
"loss": 1.0953,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.8968932913102208,
|
|
"grad_norm": 0.30619682455422553,
|
|
"learning_rate": 3.6151036938913887e-06,
|
|
"loss": 1.0866,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.9004952723998199,
|
|
"grad_norm": 0.33603759713948095,
|
|
"learning_rate": 3.5949913646737456e-06,
|
|
"loss": 1.0562,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.9040972534894192,
|
|
"grad_norm": 0.36283301326571393,
|
|
"learning_rate": 3.5749036876501196e-06,
|
|
"loss": 1.0877,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.9076992345790185,
|
|
"grad_norm": 0.32449495469021755,
|
|
"learning_rate": 3.5548410152776414e-06,
|
|
"loss": 1.0926,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.9113012156686178,
|
|
"grad_norm": 0.30909965481121454,
|
|
"learning_rate": 3.5348036995747135e-06,
|
|
"loss": 1.0924,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.914903196758217,
|
|
"grad_norm": 0.31489637458548564,
|
|
"learning_rate": 3.5147920921148267e-06,
|
|
"loss": 1.0828,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.9185051778478162,
|
|
"grad_norm": 0.3330350499732472,
|
|
"learning_rate": 3.4948065440203982e-06,
|
|
"loss": 1.0685,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.9221071589374157,
|
|
"grad_norm": 0.3107239605147675,
|
|
"learning_rate": 3.474847405956613e-06,
|
|
"loss": 1.0618,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.9257091400270148,
|
|
"grad_norm": 0.34315983029146,
|
|
"learning_rate": 3.4549150281252635e-06,
|
|
"loss": 1.1048,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.9293111211166143,
|
|
"grad_norm": 0.3227737291510128,
|
|
"learning_rate": 3.4350097602586085e-06,
|
|
"loss": 1.0491,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.9329131022062134,
|
|
"grad_norm": 0.30320716585058866,
|
|
"learning_rate": 3.4151319516132414e-06,
|
|
"loss": 1.0179,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.9365150832958127,
|
|
"grad_norm": 0.32851520671529355,
|
|
"learning_rate": 3.3952819509639534e-06,
|
|
"loss": 1.0495,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.940117064385412,
|
|
"grad_norm": 0.33703603920644415,
|
|
"learning_rate": 3.375460106597619e-06,
|
|
"loss": 1.0148,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.9437190454750113,
|
|
"grad_norm": 0.3097947001373875,
|
|
"learning_rate": 3.355666766307084e-06,
|
|
"loss": 1.1156,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.9473210265646106,
|
|
"grad_norm": 0.3068291690397491,
|
|
"learning_rate": 3.3359022773850673e-06,
|
|
"loss": 1.0315,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.9509230076542097,
|
|
"grad_norm": 0.3376120770611421,
|
|
"learning_rate": 3.31616698661806e-06,
|
|
"loss": 1.0564,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.9545249887438092,
|
|
"grad_norm": 0.29124426624612915,
|
|
"learning_rate": 3.2964612402802422e-06,
|
|
"loss": 1.0689,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.9581269698334083,
|
|
"grad_norm": 0.2948609004870433,
|
|
"learning_rate": 3.2767853841274154e-06,
|
|
"loss": 1.0684,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.9617289509230078,
|
|
"grad_norm": 0.3558137852450314,
|
|
"learning_rate": 3.2571397633909252e-06,
|
|
"loss": 1.0452,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.965330932012607,
|
|
"grad_norm": 0.3008490022930337,
|
|
"learning_rate": 3.2375247227716077e-06,
|
|
"loss": 1.0235,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.9689329131022062,
|
|
"grad_norm": 0.31102274440087274,
|
|
"learning_rate": 3.217940606433747e-06,
|
|
"loss": 1.0575,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.9725348941918055,
|
|
"grad_norm": 0.33011139669007306,
|
|
"learning_rate": 3.1983877579990276e-06,
|
|
"loss": 1.0419,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.9761368752814048,
|
|
"grad_norm": 0.314573659163516,
|
|
"learning_rate": 3.178866520540509e-06,
|
|
"loss": 1.0364,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.979738856371004,
|
|
"grad_norm": 0.313348256207242,
|
|
"learning_rate": 3.1593772365766107e-06,
|
|
"loss": 1.0615,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.9833408374606032,
|
|
"grad_norm": 0.32258850263277733,
|
|
"learning_rate": 3.139920248065095e-06,
|
|
"loss": 1.0896,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.9869428185502027,
|
|
"grad_norm": 0.3154271741355316,
|
|
"learning_rate": 3.1204958963970666e-06,
|
|
"loss": 1.0501,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.9905447996398018,
|
|
"grad_norm": 0.3406321762516789,
|
|
"learning_rate": 3.1011045223909954e-06,
|
|
"loss": 1.0761,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.9941467807294013,
|
|
"grad_norm": 0.31522039092493703,
|
|
"learning_rate": 3.0817464662867192e-06,
|
|
"loss": 1.0556,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.9977487618190004,
|
|
"grad_norm": 0.3147135586324213,
|
|
"learning_rate": 3.0624220677394854e-06,
|
|
"loss": 1.0857,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.44015502627582576,
|
|
"learning_rate": 3.043131665813988e-06,
|
|
"loss": 1.0463,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 2.003601981089599,
|
|
"grad_norm": 0.3983894180930443,
|
|
"learning_rate": 3.023875598978419e-06,
|
|
"loss": 1.0393,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 2.0072039621791986,
|
|
"grad_norm": 0.3067099309247845,
|
|
"learning_rate": 3.004654205098524e-06,
|
|
"loss": 1.0605,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 2.0108059432687977,
|
|
"grad_norm": 0.3357353560695444,
|
|
"learning_rate": 2.9854678214316875e-06,
|
|
"loss": 1.0417,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 2.014407924358397,
|
|
"grad_norm": 0.33999056854523163,
|
|
"learning_rate": 2.966316784621e-06,
|
|
"loss": 1.024,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 2.0180099054479963,
|
|
"grad_norm": 0.31226386349104857,
|
|
"learning_rate": 2.9472014306893605e-06,
|
|
"loss": 1.0485,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 2.021611886537596,
|
|
"grad_norm": 0.31603080640243814,
|
|
"learning_rate": 2.92812209503358e-06,
|
|
"loss": 1.0414,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 2.025213867627195,
|
|
"grad_norm": 0.3459067464369587,
|
|
"learning_rate": 2.9090791124184934e-06,
|
|
"loss": 1.0756,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 2.0288158487167944,
|
|
"grad_norm": 0.3330987662321279,
|
|
"learning_rate": 2.8900728169710866e-06,
|
|
"loss": 1.054,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 2.0324178298063935,
|
|
"grad_norm": 0.2927527791629305,
|
|
"learning_rate": 2.871103542174637e-06,
|
|
"loss": 1.0665,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 2.0360198108959926,
|
|
"grad_norm": 0.33886103259967365,
|
|
"learning_rate": 2.8521716208628597e-06,
|
|
"loss": 1.0595,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 2.039621791985592,
|
|
"grad_norm": 0.31642624101440003,
|
|
"learning_rate": 2.8332773852140644e-06,
|
|
"loss": 1.0177,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 2.043223773075191,
|
|
"grad_norm": 0.32001337639878913,
|
|
"learning_rate": 2.814421166745337e-06,
|
|
"loss": 1.0461,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 2.0468257541647907,
|
|
"grad_norm": 0.3024517860403017,
|
|
"learning_rate": 2.795603296306708e-06,
|
|
"loss": 1.0665,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 2.05042773525439,
|
|
"grad_norm": 0.3442352491720829,
|
|
"learning_rate": 2.776824104075364e-06,
|
|
"loss": 1.0368,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 2.0540297163439893,
|
|
"grad_norm": 0.3293545677344158,
|
|
"learning_rate": 2.7580839195498397e-06,
|
|
"loss": 1.043,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 2.0576316974335884,
|
|
"grad_norm": 0.31332600205637107,
|
|
"learning_rate": 2.739383071544246e-06,
|
|
"loss": 1.0476,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 2.061233678523188,
|
|
"grad_norm": 0.32030515645666624,
|
|
"learning_rate": 2.7207218881825016e-06,
|
|
"loss": 1.0542,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 2.064835659612787,
|
|
"grad_norm": 0.3364547680473557,
|
|
"learning_rate": 2.7021006968925613e-06,
|
|
"loss": 1.0364,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 2.068437640702386,
|
|
"grad_norm": 0.3145267889190637,
|
|
"learning_rate": 2.683519824400693e-06,
|
|
"loss": 1.0621,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 2.0720396217919856,
|
|
"grad_norm": 0.3221194724938311,
|
|
"learning_rate": 2.6649795967257243e-06,
|
|
"loss": 1.0827,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 2.0756416028815847,
|
|
"grad_norm": 0.31171845327350106,
|
|
"learning_rate": 2.646480339173337e-06,
|
|
"loss": 1.0327,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 2.0792435839711843,
|
|
"grad_norm": 0.2884769700670282,
|
|
"learning_rate": 2.6280223763303546e-06,
|
|
"loss": 1.0488,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 2.0828455650607833,
|
|
"grad_norm": 0.3214670803689811,
|
|
"learning_rate": 2.6096060320590393e-06,
|
|
"loss": 1.0175,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 2.086447546150383,
|
|
"grad_norm": 0.29588927971342016,
|
|
"learning_rate": 2.5912316294914232e-06,
|
|
"loss": 1.0299,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 2.090049527239982,
|
|
"grad_norm": 0.3179276570664923,
|
|
"learning_rate": 2.5728994910236304e-06,
|
|
"loss": 1.0416,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 2.0936515083295815,
|
|
"grad_norm": 0.32306719861333466,
|
|
"learning_rate": 2.5546099383102206e-06,
|
|
"loss": 1.043,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 2.0972534894191805,
|
|
"grad_norm": 0.3201304980987189,
|
|
"learning_rate": 2.536363292258543e-06,
|
|
"loss": 1.0612,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 2.1008554705087796,
|
|
"grad_norm": 0.2925431078505077,
|
|
"learning_rate": 2.518159873023116e-06,
|
|
"loss": 1.0317,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 2.104457451598379,
|
|
"grad_norm": 0.33029491252375176,
|
|
"learning_rate": 2.5000000000000015e-06,
|
|
"loss": 1.0468,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 2.1080594326879782,
|
|
"grad_norm": 0.3093075735098707,
|
|
"learning_rate": 2.4818839918211963e-06,
|
|
"loss": 1.0264,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 2.1116614137775778,
|
|
"grad_norm": 0.33517262514779006,
|
|
"learning_rate": 2.4638121663490546e-06,
|
|
"loss": 1.0125,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 2.115263394867177,
|
|
"grad_norm": 0.29875408693671646,
|
|
"learning_rate": 2.4457848406707014e-06,
|
|
"loss": 1.0145,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 2.1188653759567764,
|
|
"grad_norm": 0.3461841979966471,
|
|
"learning_rate": 2.4278023310924676e-06,
|
|
"loss": 1.0526,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 2.1224673570463755,
|
|
"grad_norm": 0.316595399237612,
|
|
"learning_rate": 2.40986495313435e-06,
|
|
"loss": 1.0276,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 2.126069338135975,
|
|
"grad_norm": 0.35509810185127894,
|
|
"learning_rate": 2.391973021524461e-06,
|
|
"loss": 1.0236,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 2.129671319225574,
|
|
"grad_norm": 0.31577714871143664,
|
|
"learning_rate": 2.3741268501935212e-06,
|
|
"loss": 1.0668,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 2.133273300315173,
|
|
"grad_norm": 0.30363778483757914,
|
|
"learning_rate": 2.356326752269342e-06,
|
|
"loss": 1.0399,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 2.1368752814047727,
|
|
"grad_norm": 0.2951535358604565,
|
|
"learning_rate": 2.338573040071332e-06,
|
|
"loss": 1.0674,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 2.1404772624943718,
|
|
"grad_norm": 0.2916733821330133,
|
|
"learning_rate": 2.320866025105016e-06,
|
|
"loss": 1.0251,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 2.1440792435839713,
|
|
"grad_norm": 0.28186913326636054,
|
|
"learning_rate": 2.303206018056583e-06,
|
|
"loss": 1.04,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 2.1476812246735704,
|
|
"grad_norm": 0.3291151473315765,
|
|
"learning_rate": 2.285593328787414e-06,
|
|
"loss": 1.0173,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 2.15128320576317,
|
|
"grad_norm": 0.3065042887064959,
|
|
"learning_rate": 2.268028266328655e-06,
|
|
"loss": 1.0294,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 2.154885186852769,
|
|
"grad_norm": 0.3025327291635849,
|
|
"learning_rate": 2.250511138875801e-06,
|
|
"loss": 1.046,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 2.1584871679423685,
|
|
"grad_norm": 0.28100597708822905,
|
|
"learning_rate": 2.23304225378328e-06,
|
|
"loss": 1.0289,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 2.1620891490319676,
|
|
"grad_norm": 0.31006702418509163,
|
|
"learning_rate": 2.2156219175590623e-06,
|
|
"loss": 1.023,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 2.1656911301215667,
|
|
"grad_norm": 0.2962970912734003,
|
|
"learning_rate": 2.1982504358592777e-06,
|
|
"loss": 1.0775,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 2.169293111211166,
|
|
"grad_norm": 0.32597592835782413,
|
|
"learning_rate": 2.1809281134828663e-06,
|
|
"loss": 1.0409,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 2.1728950923007653,
|
|
"grad_norm": 0.30783288748012444,
|
|
"learning_rate": 2.1636552543662187e-06,
|
|
"loss": 1.0609,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 2.176497073390365,
|
|
"grad_norm": 0.3197715534590088,
|
|
"learning_rate": 2.146432161577842e-06,
|
|
"loss": 1.0341,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 2.180099054479964,
|
|
"grad_norm": 0.30989671790932927,
|
|
"learning_rate": 2.1292591373130515e-06,
|
|
"loss": 1.0292,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 2.1837010355695634,
|
|
"grad_norm": 0.315438048629008,
|
|
"learning_rate": 2.112136482888663e-06,
|
|
"loss": 1.0171,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 2.1873030166591625,
|
|
"grad_norm": 0.3140514935443536,
|
|
"learning_rate": 2.095064498737701e-06,
|
|
"loss": 1.0406,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 2.190904997748762,
|
|
"grad_norm": 0.36444536122323545,
|
|
"learning_rate": 2.07804348440414e-06,
|
|
"loss": 1.0474,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 2.194506978838361,
|
|
"grad_norm": 0.32038773790017444,
|
|
"learning_rate": 2.061073738537635e-06,
|
|
"loss": 1.0406,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 2.19810895992796,
|
|
"grad_norm": 0.3327293671870662,
|
|
"learning_rate": 2.04415555888829e-06,
|
|
"loss": 1.0473,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 2.2017109410175597,
|
|
"grad_norm": 0.3110981786314029,
|
|
"learning_rate": 2.027289242301435e-06,
|
|
"loss": 1.0674,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 2.205312922107159,
|
|
"grad_norm": 0.3001638141607491,
|
|
"learning_rate": 2.0104750847124075e-06,
|
|
"loss": 1.0543,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 2.2089149031967583,
|
|
"grad_norm": 0.3336943922980599,
|
|
"learning_rate": 1.9937133811413666e-06,
|
|
"loss": 1.0378,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 2.2125168842863574,
|
|
"grad_norm": 0.3189387269628659,
|
|
"learning_rate": 1.977004425688126e-06,
|
|
"loss": 1.0182,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 2.216118865375957,
|
|
"grad_norm": 0.2953897029068166,
|
|
"learning_rate": 1.9603485115269743e-06,
|
|
"loss": 1.0307,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 2.219720846465556,
|
|
"grad_norm": 0.3354775874836228,
|
|
"learning_rate": 1.9437459309015426e-06,
|
|
"loss": 1.0582,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 2.2233228275551555,
|
|
"grad_norm": 0.3155756131913934,
|
|
"learning_rate": 1.927196975119678e-06,
|
|
"loss": 1.0729,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 2.2269248086447546,
|
|
"grad_norm": 0.30314021638523436,
|
|
"learning_rate": 1.910701934548329e-06,
|
|
"loss": 1.0675,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 2.2305267897343537,
|
|
"grad_norm": 0.3178641177812712,
|
|
"learning_rate": 1.8942610986084487e-06,
|
|
"loss": 1.0193,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 2.2341287708239532,
|
|
"grad_norm": 0.27497288823347316,
|
|
"learning_rate": 1.8778747557699223e-06,
|
|
"loss": 1.0397,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 2.2377307519135523,
|
|
"grad_norm": 0.27473090413221113,
|
|
"learning_rate": 1.8615431935464984e-06,
|
|
"loss": 1.0814,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 2.241332733003152,
|
|
"grad_norm": 0.29969676069735396,
|
|
"learning_rate": 1.8452666984907519e-06,
|
|
"loss": 1.0464,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 2.244934714092751,
|
|
"grad_norm": 0.29678899593418184,
|
|
"learning_rate": 1.829045556189053e-06,
|
|
"loss": 1.0635,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 2.2485366951823504,
|
|
"grad_norm": 0.30067878966920947,
|
|
"learning_rate": 1.8128800512565514e-06,
|
|
"loss": 1.0325,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 2.2521386762719495,
|
|
"grad_norm": 0.28275287186797854,
|
|
"learning_rate": 1.7967704673321917e-06,
|
|
"loss": 1.0606,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 2.2557406573615486,
|
|
"grad_norm": 0.27780010911864134,
|
|
"learning_rate": 1.7807170870737317e-06,
|
|
"loss": 1.061,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 2.259342638451148,
|
|
"grad_norm": 0.30625432629700144,
|
|
"learning_rate": 1.7647201921527802e-06,
|
|
"loss": 1.0008,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 2.2629446195407477,
|
|
"grad_norm": 0.30785488054179105,
|
|
"learning_rate": 1.7487800632498547e-06,
|
|
"loss": 1.0072,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 2.2665466006303467,
|
|
"grad_norm": 0.3052922709373008,
|
|
"learning_rate": 1.7328969800494727e-06,
|
|
"loss": 1.0549,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 2.270148581719946,
|
|
"grad_norm": 0.3040902290300385,
|
|
"learning_rate": 1.7170712212352187e-06,
|
|
"loss": 1.0484,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 2.2737505628095454,
|
|
"grad_norm": 0.3114270358030963,
|
|
"learning_rate": 1.7013030644848698e-06,
|
|
"loss": 1.065,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 2.2773525438991444,
|
|
"grad_norm": 0.30134193406734877,
|
|
"learning_rate": 1.6855927864655241e-06,
|
|
"loss": 1.0003,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 2.280954524988744,
|
|
"grad_norm": 0.29795611858634163,
|
|
"learning_rate": 1.6699406628287423e-06,
|
|
"loss": 1.0447,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 2.284556506078343,
|
|
"grad_norm": 0.3148996427378843,
|
|
"learning_rate": 1.6543469682057105e-06,
|
|
"loss": 1.0709,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 2.2881584871679426,
|
|
"grad_norm": 0.29121080028920326,
|
|
"learning_rate": 1.6388119762024213e-06,
|
|
"loss": 1.0482,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 2.2917604682575416,
|
|
"grad_norm": 0.288859982023708,
|
|
"learning_rate": 1.6233359593948777e-06,
|
|
"loss": 1.0803,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 2.2953624493471407,
|
|
"grad_norm": 0.33078409972348893,
|
|
"learning_rate": 1.6079191893243102e-06,
|
|
"loss": 1.0652,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 2.2989644304367403,
|
|
"grad_norm": 0.306929918665794,
|
|
"learning_rate": 1.5925619364924016e-06,
|
|
"loss": 1.0426,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 2.3025664115263393,
|
|
"grad_norm": 0.27619745792679457,
|
|
"learning_rate": 1.5772644703565564e-06,
|
|
"loss": 1.0295,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 2.306168392615939,
|
|
"grad_norm": 0.2889850236837387,
|
|
"learning_rate": 1.5620270593251635e-06,
|
|
"loss": 1.0017,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 2.309770373705538,
|
|
"grad_norm": 0.2997580312987809,
|
|
"learning_rate": 1.5468499707528856e-06,
|
|
"loss": 1.0433,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 2.3133723547951375,
|
|
"grad_norm": 0.2934596363171497,
|
|
"learning_rate": 1.531733470935976e-06,
|
|
"loss": 1.1025,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 2.3169743358847366,
|
|
"grad_norm": 0.2984641308954964,
|
|
"learning_rate": 1.5166778251075964e-06,
|
|
"loss": 1.0295,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 2.3205763169743356,
|
|
"grad_norm": 0.30803103965283024,
|
|
"learning_rate": 1.5016832974331725e-06,
|
|
"loss": 1.0625,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 2.324178298063935,
|
|
"grad_norm": 0.3075838465758903,
|
|
"learning_rate": 1.4867501510057548e-06,
|
|
"loss": 1.0274,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 2.3277802791535347,
|
|
"grad_norm": 0.2794304614904837,
|
|
"learning_rate": 1.4718786478413983e-06,
|
|
"loss": 1.0705,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 2.3313822602431338,
|
|
"grad_norm": 0.2854931127642351,
|
|
"learning_rate": 1.4570690488745687e-06,
|
|
"loss": 1.072,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 2.334984241332733,
|
|
"grad_norm": 0.2976715865621331,
|
|
"learning_rate": 1.4423216139535735e-06,
|
|
"loss": 1.0519,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 2.3385862224223324,
|
|
"grad_norm": 0.2867454551588936,
|
|
"learning_rate": 1.4276366018359845e-06,
|
|
"loss": 1.0656,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 2.3421882035119315,
|
|
"grad_norm": 0.2997877110355175,
|
|
"learning_rate": 1.4130142701841076e-06,
|
|
"loss": 1.0207,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 2.345790184601531,
|
|
"grad_norm": 0.29242665391081074,
|
|
"learning_rate": 1.3984548755604655e-06,
|
|
"loss": 1.0295,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 2.34939216569113,
|
|
"grad_norm": 0.28957351738255677,
|
|
"learning_rate": 1.3839586734232907e-06,
|
|
"loss": 1.0187,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 2.3529941467807296,
|
|
"grad_norm": 0.31920070289406366,
|
|
"learning_rate": 1.3695259181220405e-06,
|
|
"loss": 1.0309,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 2.3565961278703287,
|
|
"grad_norm": 0.3015410819374185,
|
|
"learning_rate": 1.3551568628929434e-06,
|
|
"loss": 1.0334,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 2.3601981089599278,
|
|
"grad_norm": 0.3076515876213567,
|
|
"learning_rate": 1.3408517598545446e-06,
|
|
"loss": 0.9868,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 2.3638000900495273,
|
|
"grad_norm": 0.3176154294797028,
|
|
"learning_rate": 1.3266108600032928e-06,
|
|
"loss": 1.0543,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 2.3674020711391264,
|
|
"grad_norm": 0.3071265193798959,
|
|
"learning_rate": 1.312434413209131e-06,
|
|
"loss": 1.0443,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 2.371004052228726,
|
|
"grad_norm": 0.2868126401515119,
|
|
"learning_rate": 1.2983226682111094e-06,
|
|
"loss": 1.0031,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 2.374606033318325,
|
|
"grad_norm": 0.30911382077034116,
|
|
"learning_rate": 1.2842758726130283e-06,
|
|
"loss": 1.0161,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 2.3782080144079245,
|
|
"grad_norm": 0.292638404203282,
|
|
"learning_rate": 1.2702942728790897e-06,
|
|
"loss": 1.0435,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 2.3818099954975236,
|
|
"grad_norm": 0.3131805047178825,
|
|
"learning_rate": 1.2563781143295705e-06,
|
|
"loss": 1.0817,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 2.3854119765871227,
|
|
"grad_norm": 0.27540737813728455,
|
|
"learning_rate": 1.24252764113652e-06,
|
|
"loss": 1.0299,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 2.389013957676722,
|
|
"grad_norm": 0.36871741578270023,
|
|
"learning_rate": 1.2287430963194807e-06,
|
|
"loss": 1.0207,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 2.3926159387663217,
|
|
"grad_norm": 0.2795983680218255,
|
|
"learning_rate": 1.2150247217412186e-06,
|
|
"loss": 1.0227,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 2.396217919855921,
|
|
"grad_norm": 0.32352488852148475,
|
|
"learning_rate": 1.2013727581034783e-06,
|
|
"loss": 1.0234,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 2.39981990094552,
|
|
"grad_norm": 0.297035764973451,
|
|
"learning_rate": 1.18778744494276e-06,
|
|
"loss": 1.0286,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 2.4034218820351194,
|
|
"grad_norm": 0.3019576240092055,
|
|
"learning_rate": 1.1742690206261293e-06,
|
|
"loss": 1.0221,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 2.4070238631247185,
|
|
"grad_norm": 0.2935186392415227,
|
|
"learning_rate": 1.160817722347014e-06,
|
|
"loss": 1.0341,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 2.410625844214318,
|
|
"grad_norm": 0.29550059263202566,
|
|
"learning_rate": 1.1474337861210543e-06,
|
|
"loss": 1.0312,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 2.414227825303917,
|
|
"grad_norm": 0.2919473066109118,
|
|
"learning_rate": 1.1341174467819637e-06,
|
|
"loss": 1.0145,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 2.4178298063935166,
|
|
"grad_norm": 0.2770789920290827,
|
|
"learning_rate": 1.120868937977404e-06,
|
|
"loss": 1.0233,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 2.4214317874831157,
|
|
"grad_norm": 0.2726692177451945,
|
|
"learning_rate": 1.1076884921648834e-06,
|
|
"loss": 1.0763,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 2.425033768572715,
|
|
"grad_norm": 0.2955147912539993,
|
|
"learning_rate": 1.0945763406076837e-06,
|
|
"loss": 1.0431,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 2.4286357496623143,
|
|
"grad_norm": 0.2937917795969739,
|
|
"learning_rate": 1.0815327133708015e-06,
|
|
"loss": 1.0238,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 2.4322377307519134,
|
|
"grad_norm": 0.26163762430548565,
|
|
"learning_rate": 1.0685578393169054e-06,
|
|
"loss": 1.0572,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 2.435839711841513,
|
|
"grad_norm": 0.30900006884034514,
|
|
"learning_rate": 1.0556519461023301e-06,
|
|
"loss": 1.0148,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 2.439441692931112,
|
|
"grad_norm": 0.28768364202012864,
|
|
"learning_rate": 1.0428152601730718e-06,
|
|
"loss": 1.0526,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 2.4430436740207115,
|
|
"grad_norm": 0.31064843044382456,
|
|
"learning_rate": 1.0300480067608232e-06,
|
|
"loss": 1.0131,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 2.4466456551103106,
|
|
"grad_norm": 0.2790738585338088,
|
|
"learning_rate": 1.0173504098790188e-06,
|
|
"loss": 1.0432,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 2.4502476361999097,
|
|
"grad_norm": 0.2675802235255538,
|
|
"learning_rate": 1.0047226923189024e-06,
|
|
"loss": 1.0592,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 2.4538496172895092,
|
|
"grad_norm": 0.2820500392897806,
|
|
"learning_rate": 9.921650756456164e-07,
|
|
"loss": 1.0406,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 2.4574515983791088,
|
|
"grad_norm": 0.2712979205650806,
|
|
"learning_rate": 9.79677780194327e-07,
|
|
"loss": 1.0469,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 2.461053579468708,
|
|
"grad_norm": 0.28678767820139744,
|
|
"learning_rate": 9.67261025066339e-07,
|
|
"loss": 0.9984,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 2.464655560558307,
|
|
"grad_norm": 0.2826669677633045,
|
|
"learning_rate": 9.549150281252633e-07,
|
|
"loss": 1.0439,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 2.4682575416479065,
|
|
"grad_norm": 0.2898206571793001,
|
|
"learning_rate": 9.426400059931956e-07,
|
|
"loss": 1.0046,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 2.4718595227375055,
|
|
"grad_norm": 0.3055004594875761,
|
|
"learning_rate": 9.304361740469103e-07,
|
|
"loss": 1.0269,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 2.475461503827105,
|
|
"grad_norm": 0.28000409560086253,
|
|
"learning_rate": 9.183037464140804e-07,
|
|
"loss": 1.0274,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 2.479063484916704,
|
|
"grad_norm": 0.2956303701365367,
|
|
"learning_rate": 9.06242935969528e-07,
|
|
"loss": 1.0458,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 2.4826654660063037,
|
|
"grad_norm": 0.29216773948217756,
|
|
"learning_rate": 8.942539543314799e-07,
|
|
"loss": 1.0254,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 2.4862674470959027,
|
|
"grad_norm": 0.3038042079243165,
|
|
"learning_rate": 8.823370118578628e-07,
|
|
"loss": 1.048,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 2.489869428185502,
|
|
"grad_norm": 0.27942852348483804,
|
|
"learning_rate": 8.704923176426072e-07,
|
|
"loss": 1.0092,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 2.4934714092751014,
|
|
"grad_norm": 0.28829213899925576,
|
|
"learning_rate": 8.587200795119793e-07,
|
|
"loss": 1.0443,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 2.4970733903647004,
|
|
"grad_norm": 0.3075901120188406,
|
|
"learning_rate": 8.470205040209362e-07,
|
|
"loss": 1.0754,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 2.5006753714543,
|
|
"grad_norm": 0.28958620497520454,
|
|
"learning_rate": 8.353937964495029e-07,
|
|
"loss": 1.0209,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 2.504277352543899,
|
|
"grad_norm": 0.27671142949543276,
|
|
"learning_rate": 8.238401607991647e-07,
|
|
"loss": 1.0168,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 2.5078793336334986,
|
|
"grad_norm": 0.273803663136562,
|
|
"learning_rate": 8.123597997892918e-07,
|
|
"loss": 1.0222,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 2.5114813147230977,
|
|
"grad_norm": 0.2822637921207432,
|
|
"learning_rate": 8.009529148535855e-07,
|
|
"loss": 1.0219,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 2.5150832958126967,
|
|
"grad_norm": 0.2943865446548967,
|
|
"learning_rate": 7.89619706136539e-07,
|
|
"loss": 1.0429,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 2.5186852769022963,
|
|
"grad_norm": 0.27277951808804135,
|
|
"learning_rate": 7.783603724899258e-07,
|
|
"loss": 1.0446,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 2.522287257991896,
|
|
"grad_norm": 0.27077579945237057,
|
|
"learning_rate": 7.671751114693104e-07,
|
|
"loss": 1.0381,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 2.525889239081495,
|
|
"grad_norm": 0.27280532919773465,
|
|
"learning_rate": 7.560641193305912e-07,
|
|
"loss": 1.0239,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 2.529491220171094,
|
|
"grad_norm": 0.2790422908370001,
|
|
"learning_rate": 7.450275910265415e-07,
|
|
"loss": 1.0249,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 2.5330932012606935,
|
|
"grad_norm": 0.27523036662265893,
|
|
"learning_rate": 7.34065720203399e-07,
|
|
"loss": 1.0051,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 2.5366951823502926,
|
|
"grad_norm": 0.2771023164701056,
|
|
"learning_rate": 7.23178699197467e-07,
|
|
"loss": 1.0354,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 2.540297163439892,
|
|
"grad_norm": 0.2666702640648939,
|
|
"learning_rate": 7.123667190317396e-07,
|
|
"loss": 1.0277,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 2.543899144529491,
|
|
"grad_norm": 0.2781381560124755,
|
|
"learning_rate": 7.01629969412545e-07,
|
|
"loss": 1.0658,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 2.5475011256190907,
|
|
"grad_norm": 0.2880670528582805,
|
|
"learning_rate": 6.909686387262255e-07,
|
|
"loss": 1.0257,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 2.55110310670869,
|
|
"grad_norm": 0.2772242207866922,
|
|
"learning_rate": 6.803829140358237e-07,
|
|
"loss": 1.0496,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 2.554705087798289,
|
|
"grad_norm": 0.34213950210212746,
|
|
"learning_rate": 6.698729810778065e-07,
|
|
"loss": 1.0317,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 2.5583070688878884,
|
|
"grad_norm": 0.29889700157480303,
|
|
"learning_rate": 6.594390242588044e-07,
|
|
"loss": 1.0357,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 2.5619090499774875,
|
|
"grad_norm": 0.281098345760278,
|
|
"learning_rate": 6.490812266523716e-07,
|
|
"loss": 1.0532,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 2.565511031067087,
|
|
"grad_norm": 0.27502660732949663,
|
|
"learning_rate": 6.387997699957815e-07,
|
|
"loss": 1.0581,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 2.569113012156686,
|
|
"grad_norm": 0.2914484204504316,
|
|
"learning_rate": 6.28594834686832e-07,
|
|
"loss": 0.9895,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 2.5727149932462856,
|
|
"grad_norm": 0.3129296314272526,
|
|
"learning_rate": 6.184665997806832e-07,
|
|
"loss": 1.0147,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 2.5763169743358847,
|
|
"grad_norm": 0.32079210829379234,
|
|
"learning_rate": 6.084152429867113e-07,
|
|
"loss": 1.0406,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 2.5799189554254838,
|
|
"grad_norm": 0.29202037625742366,
|
|
"learning_rate": 5.98440940665399e-07,
|
|
"loss": 1.0466,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 2.5835209365150833,
|
|
"grad_norm": 0.295396320039525,
|
|
"learning_rate": 5.885438678252342e-07,
|
|
"loss": 1.0549,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 2.587122917604683,
|
|
"grad_norm": 0.3054962097094399,
|
|
"learning_rate": 5.787241981196384e-07,
|
|
"loss": 1.0325,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 2.590724898694282,
|
|
"grad_norm": 0.28026222206941487,
|
|
"learning_rate": 5.689821038439264e-07,
|
|
"loss": 1.051,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 2.594326879783881,
|
|
"grad_norm": 0.32032760630187107,
|
|
"learning_rate": 5.593177559322776e-07,
|
|
"loss": 1.0006,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 2.5979288608734805,
|
|
"grad_norm": 0.3213596256851929,
|
|
"learning_rate": 5.497313239547374e-07,
|
|
"loss": 0.9861,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 2.6015308419630796,
|
|
"grad_norm": 0.28144430111804236,
|
|
"learning_rate": 5.402229761142464e-07,
|
|
"loss": 1.0751,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 2.605132823052679,
|
|
"grad_norm": 0.31364935141684536,
|
|
"learning_rate": 5.307928792436812e-07,
|
|
"loss": 1.0723,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 2.608734804142278,
|
|
"grad_norm": 0.27300612039452254,
|
|
"learning_rate": 5.214411988029355e-07,
|
|
"loss": 1.0382,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 2.6123367852318777,
|
|
"grad_norm": 0.2856505846764118,
|
|
"learning_rate": 5.121680988760125e-07,
|
|
"loss": 1.0649,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 2.615938766321477,
|
|
"grad_norm": 0.30242925411320504,
|
|
"learning_rate": 5.029737421681446e-07,
|
|
"loss": 1.0253,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 2.619540747411076,
|
|
"grad_norm": 0.2760307219806458,
|
|
"learning_rate": 4.938582900029437e-07,
|
|
"loss": 1.0117,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 2.6231427285006754,
|
|
"grad_norm": 0.2813571189889987,
|
|
"learning_rate": 4.848219023195644e-07,
|
|
"loss": 1.0558,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 2.6267447095902745,
|
|
"grad_norm": 0.27273042137937875,
|
|
"learning_rate": 4.758647376699033e-07,
|
|
"loss": 1.044,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 2.630346690679874,
|
|
"grad_norm": 0.2962356696193387,
|
|
"learning_rate": 4.6698695321581165e-07,
|
|
"loss": 1.0591,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 2.633948671769473,
|
|
"grad_norm": 0.3021705699786841,
|
|
"learning_rate": 4.581887047263445e-07,
|
|
"loss": 1.0098,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 2.6375506528590726,
|
|
"grad_norm": 0.3116020937459149,
|
|
"learning_rate": 4.494701465750217e-07,
|
|
"loss": 0.9851,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 2.6411526339486717,
|
|
"grad_norm": 0.26030040597442616,
|
|
"learning_rate": 4.4083143173712207e-07,
|
|
"loss": 1.0507,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 2.644754615038271,
|
|
"grad_norm": 0.2754248870408236,
|
|
"learning_rate": 4.322727117869951e-07,
|
|
"loss": 1.0644,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 2.6483565961278703,
|
|
"grad_norm": 0.28205843108275575,
|
|
"learning_rate": 4.237941368954124e-07,
|
|
"loss": 1.0424,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 2.65195857721747,
|
|
"grad_norm": 0.29294591358526023,
|
|
"learning_rate": 4.153958558269189e-07,
|
|
"loss": 1.0482,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 2.655560558307069,
|
|
"grad_norm": 0.27169863680028755,
|
|
"learning_rate": 4.0707801593723006e-07,
|
|
"loss": 1.0163,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 2.659162539396668,
|
|
"grad_norm": 0.27860960386729466,
|
|
"learning_rate": 3.9884076317064813e-07,
|
|
"loss": 1.0333,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 2.6627645204862675,
|
|
"grad_norm": 0.2770379539586451,
|
|
"learning_rate": 3.90684242057498e-07,
|
|
"loss": 1.0574,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 2.6663665015758666,
|
|
"grad_norm": 0.2668295100673454,
|
|
"learning_rate": 3.8260859571158883e-07,
|
|
"loss": 1.018,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 2.669968482665466,
|
|
"grad_norm": 0.30903619747795014,
|
|
"learning_rate": 3.7461396582771035e-07,
|
|
"loss": 1.033,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 2.6735704637550652,
|
|
"grad_norm": 0.28672971110408385,
|
|
"learning_rate": 3.6670049267913954e-07,
|
|
"loss": 1.0349,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 2.6771724448446648,
|
|
"grad_norm": 0.276249475350017,
|
|
"learning_rate": 3.5886831511518336e-07,
|
|
"loss": 1.0317,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 2.680774425934264,
|
|
"grad_norm": 0.2676884521307514,
|
|
"learning_rate": 3.511175705587433e-07,
|
|
"loss": 1.0254,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 2.684376407023863,
|
|
"grad_norm": 0.2983580659797731,
|
|
"learning_rate": 3.434483950038986e-07,
|
|
"loss": 1.0412,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 2.6879783881134625,
|
|
"grad_norm": 0.28438744098885343,
|
|
"learning_rate": 3.358609230135268e-07,
|
|
"loss": 1.0502,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 2.6915803692030615,
|
|
"grad_norm": 0.2715735909059339,
|
|
"learning_rate": 3.283552877169399e-07,
|
|
"loss": 1.0289,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 2.695182350292661,
|
|
"grad_norm": 0.2970172751919579,
|
|
"learning_rate": 3.2093162080754634e-07,
|
|
"loss": 1.0279,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 2.69878433138226,
|
|
"grad_norm": 0.2845334892172726,
|
|
"learning_rate": 3.135900525405428e-07,
|
|
"loss": 1.0137,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 2.7023863124718597,
|
|
"grad_norm": 0.27255472615309784,
|
|
"learning_rate": 3.0633071173062966e-07,
|
|
"loss": 1.0262,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 2.7059882935614588,
|
|
"grad_norm": 0.27752394798698354,
|
|
"learning_rate": 2.99153725749749e-07,
|
|
"loss": 1.0454,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 2.709590274651058,
|
|
"grad_norm": 0.276728317098971,
|
|
"learning_rate": 2.920592205248496e-07,
|
|
"loss": 0.9963,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 2.7131922557406574,
|
|
"grad_norm": 0.27889890261263245,
|
|
"learning_rate": 2.850473205356774e-07,
|
|
"loss": 1.0716,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 2.716794236830257,
|
|
"grad_norm": 0.31799452745614826,
|
|
"learning_rate": 2.7811814881259503e-07,
|
|
"loss": 1.0446,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 2.720396217919856,
|
|
"grad_norm": 0.27681370096335217,
|
|
"learning_rate": 2.712718269344161e-07,
|
|
"loss": 1.0388,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 2.723998199009455,
|
|
"grad_norm": 0.36693230305260593,
|
|
"learning_rate": 2.6450847502627883e-07,
|
|
"loss": 1.0537,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 2.7276001800990546,
|
|
"grad_norm": 0.29241171362928314,
|
|
"learning_rate": 2.578282117575343e-07,
|
|
"loss": 1.0404,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 2.7312021611886537,
|
|
"grad_norm": 0.2579639214579812,
|
|
"learning_rate": 2.5123115433966615e-07,
|
|
"loss": 1.0798,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 2.734804142278253,
|
|
"grad_norm": 0.27185445345296794,
|
|
"learning_rate": 2.447174185242324e-07,
|
|
"loss": 1.0262,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 2.7384061233678523,
|
|
"grad_norm": 0.2886015166476546,
|
|
"learning_rate": 2.3828711860083676e-07,
|
|
"loss": 1.0316,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 2.742008104457452,
|
|
"grad_norm": 0.3031146928198293,
|
|
"learning_rate": 2.319403673951204e-07,
|
|
"loss": 1.0525,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 2.745610085547051,
|
|
"grad_norm": 0.2703340838231308,
|
|
"learning_rate": 2.2567727626678527e-07,
|
|
"loss": 1.0723,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 2.74921206663665,
|
|
"grad_norm": 0.2650868418407948,
|
|
"learning_rate": 2.1949795510763872e-07,
|
|
"loss": 1.0205,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 2.7528140477262495,
|
|
"grad_norm": 0.26760082891770176,
|
|
"learning_rate": 2.134025123396638e-07,
|
|
"loss": 1.0261,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 2.7564160288158486,
|
|
"grad_norm": 0.2717529102257122,
|
|
"learning_rate": 2.0739105491312028e-07,
|
|
"loss": 1.044,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 2.760018009905448,
|
|
"grad_norm": 0.26225450814352563,
|
|
"learning_rate": 2.0146368830466668e-07,
|
|
"loss": 1.0793,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 2.763619990995047,
|
|
"grad_norm": 0.3063150762890465,
|
|
"learning_rate": 1.9562051651550784e-07,
|
|
"loss": 1.0673,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 2.7672219720846467,
|
|
"grad_norm": 0.2774099125784962,
|
|
"learning_rate": 1.8986164206957037e-07,
|
|
"loss": 1.0673,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 2.770823953174246,
|
|
"grad_norm": 0.2847835154599416,
|
|
"learning_rate": 1.841871660117095e-07,
|
|
"loss": 1.0311,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 2.774425934263845,
|
|
"grad_norm": 0.25146022356844167,
|
|
"learning_rate": 1.785971879059273e-07,
|
|
"loss": 1.0194,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 2.7780279153534444,
|
|
"grad_norm": 0.3024944481082907,
|
|
"learning_rate": 1.7309180583363062e-07,
|
|
"loss": 0.9938,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 2.781629896443044,
|
|
"grad_norm": 0.2875295564587982,
|
|
"learning_rate": 1.6767111639191202e-07,
|
|
"loss": 1.0043,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 2.785231877532643,
|
|
"grad_norm": 0.28086121255020596,
|
|
"learning_rate": 1.6233521469185054e-07,
|
|
"loss": 1.0393,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 2.788833858622242,
|
|
"grad_norm": 0.28337014562304264,
|
|
"learning_rate": 1.5708419435684463e-07,
|
|
"loss": 1.0467,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 2.7924358397118416,
|
|
"grad_norm": 0.2695975671118399,
|
|
"learning_rate": 1.5191814752097024e-07,
|
|
"loss": 1.0277,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 2.7960378208014407,
|
|
"grad_norm": 0.2596950818220317,
|
|
"learning_rate": 1.4683716482736364e-07,
|
|
"loss": 1.0375,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 2.7996398018910402,
|
|
"grad_norm": 0.2833370915529864,
|
|
"learning_rate": 1.4184133542663014e-07,
|
|
"loss": 1.0402,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 2.8032417829806393,
|
|
"grad_norm": 0.2837127477632618,
|
|
"learning_rate": 1.3693074697528231e-07,
|
|
"loss": 1.0663,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 2.806843764070239,
|
|
"grad_norm": 0.258408242099485,
|
|
"learning_rate": 1.3210548563419857e-07,
|
|
"loss": 1.0245,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 2.810445745159838,
|
|
"grad_norm": 0.29442506294804527,
|
|
"learning_rate": 1.2736563606711384e-07,
|
|
"loss": 0.9917,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 2.814047726249437,
|
|
"grad_norm": 0.29244090698076763,
|
|
"learning_rate": 1.2271128143913458e-07,
|
|
"loss": 1.0409,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 2.8176497073390365,
|
|
"grad_norm": 0.26403315463361793,
|
|
"learning_rate": 1.1814250341527611e-07,
|
|
"loss": 1.0486,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 2.8212516884286356,
|
|
"grad_norm": 0.2723144354203953,
|
|
"learning_rate": 1.136593821590326e-07,
|
|
"loss": 1.0543,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 2.824853669518235,
|
|
"grad_norm": 0.2850711230800217,
|
|
"learning_rate": 1.0926199633097156e-07,
|
|
"loss": 1.0095,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 2.828455650607834,
|
|
"grad_norm": 0.2771326731497052,
|
|
"learning_rate": 1.0495042308735104e-07,
|
|
"loss": 1.0138,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 2.8320576316974337,
|
|
"grad_norm": 0.2667788079924329,
|
|
"learning_rate": 1.007247380787657e-07,
|
|
"loss": 1.0354,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 2.835659612787033,
|
|
"grad_norm": 0.27205262075900666,
|
|
"learning_rate": 9.658501544882182e-08,
|
|
"loss": 1.008,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 2.839261593876632,
|
|
"grad_norm": 0.278153414604589,
|
|
"learning_rate": 9.253132783283548e-08,
|
|
"loss": 1.0575,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 2.8428635749662314,
|
|
"grad_norm": 0.2722033011712004,
|
|
"learning_rate": 8.856374635655696e-08,
|
|
"loss": 1.0373,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 2.846465556055831,
|
|
"grad_norm": 0.2643963535331226,
|
|
"learning_rate": 8.468234063492287e-08,
|
|
"loss": 1.0331,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 2.85006753714543,
|
|
"grad_norm": 0.2772917134427986,
|
|
"learning_rate": 8.088717877083706e-08,
|
|
"loss": 0.9933,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 2.853669518235029,
|
|
"grad_norm": 0.2567788777229302,
|
|
"learning_rate": 7.717832735397335e-08,
|
|
"loss": 1.039,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 2.8572714993246286,
|
|
"grad_norm": 0.3279492936255398,
|
|
"learning_rate": 7.355585145960743e-08,
|
|
"loss": 1.066,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 2.8608734804142277,
|
|
"grad_norm": 0.26103624246805707,
|
|
"learning_rate": 7.001981464747565e-08,
|
|
"loss": 1.0502,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 2.8644754615038273,
|
|
"grad_norm": 0.28523237313847577,
|
|
"learning_rate": 6.657027896065982e-08,
|
|
"loss": 1.0071,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 2.8680774425934263,
|
|
"grad_norm": 0.2714272091000359,
|
|
"learning_rate": 6.3207304924498e-08,
|
|
"loss": 1.0614,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 2.871679423683026,
|
|
"grad_norm": 0.28016748152731064,
|
|
"learning_rate": 5.993095154552431e-08,
|
|
"loss": 1.0324,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 2.875281404772625,
|
|
"grad_norm": 0.28138701855091797,
|
|
"learning_rate": 5.674127631043025e-08,
|
|
"loss": 1.0357,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 2.878883385862224,
|
|
"grad_norm": 0.259827571346895,
|
|
"learning_rate": 5.363833518505834e-08,
|
|
"loss": 1.0203,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 2.8824853669518236,
|
|
"grad_norm": 0.30527969392963294,
|
|
"learning_rate": 5.062218261342122e-08,
|
|
"loss": 1.04,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 2.8860873480414226,
|
|
"grad_norm": 0.3336787676428948,
|
|
"learning_rate": 4.769287151674407e-08,
|
|
"loss": 1.0177,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 2.889689329131022,
|
|
"grad_norm": 0.2673565531562563,
|
|
"learning_rate": 4.485045329253646e-08,
|
|
"loss": 0.9941,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 2.8932913102206212,
|
|
"grad_norm": 0.27644924732915116,
|
|
"learning_rate": 4.209497781369143e-08,
|
|
"loss": 1.047,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 2.8968932913102208,
|
|
"grad_norm": 0.2762539625768617,
|
|
"learning_rate": 3.9426493427611177e-08,
|
|
"loss": 1.0618,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 2.90049527239982,
|
|
"grad_norm": 0.279438570935992,
|
|
"learning_rate": 3.684504695535496e-08,
|
|
"loss": 1.0407,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 2.904097253489419,
|
|
"grad_norm": 0.25093375155659065,
|
|
"learning_rate": 3.435068369082306e-08,
|
|
"loss": 1.0263,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 2.9076992345790185,
|
|
"grad_norm": 0.28318467682760556,
|
|
"learning_rate": 3.194344739995803e-08,
|
|
"loss": 1.0519,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 2.911301215668618,
|
|
"grad_norm": 0.2904491497208285,
|
|
"learning_rate": 2.9623380319976912e-08,
|
|
"loss": 1.0549,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 2.914903196758217,
|
|
"grad_norm": 0.2605165703364476,
|
|
"learning_rate": 2.7390523158633552e-08,
|
|
"loss": 1.0166,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 2.918505177847816,
|
|
"grad_norm": 0.27714338429064156,
|
|
"learning_rate": 2.5244915093499134e-08,
|
|
"loss": 1.0533,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 2.9221071589374157,
|
|
"grad_norm": 0.275239630057375,
|
|
"learning_rate": 2.3186593771280518e-08,
|
|
"loss": 1.0233,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 2.9257091400270148,
|
|
"grad_norm": 0.27858108381395574,
|
|
"learning_rate": 2.1215595307154667e-08,
|
|
"loss": 0.9939,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 2.9293111211166143,
|
|
"grad_norm": 0.2685380458847235,
|
|
"learning_rate": 1.9331954284137476e-08,
|
|
"loss": 1.0429,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 2.9329131022062134,
|
|
"grad_norm": 0.2721576649644654,
|
|
"learning_rate": 1.753570375247815e-08,
|
|
"loss": 1.0401,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 2.936515083295813,
|
|
"grad_norm": 0.25219694366538187,
|
|
"learning_rate": 1.582687522907633e-08,
|
|
"loss": 1.0321,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 2.940117064385412,
|
|
"grad_norm": 0.31225958064880055,
|
|
"learning_rate": 1.4205498696930332e-08,
|
|
"loss": 1.0564,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 2.943719045475011,
|
|
"grad_norm": 0.2907802700059505,
|
|
"learning_rate": 1.2671602604612531e-08,
|
|
"loss": 1.0549,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 2.9473210265646106,
|
|
"grad_norm": 0.2620211100717753,
|
|
"learning_rate": 1.1225213865767026e-08,
|
|
"loss": 1.0135,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 2.9509230076542097,
|
|
"grad_norm": 0.2662621532694749,
|
|
"learning_rate": 9.866357858642206e-09,
|
|
"loss": 1.0217,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 2.954524988743809,
|
|
"grad_norm": 0.26026540743415805,
|
|
"learning_rate": 8.595058425640012e-09,
|
|
"loss": 1.0242,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 2.9581269698334083,
|
|
"grad_norm": 0.27157295590276637,
|
|
"learning_rate": 7.411337872900715e-09,
|
|
"loss": 0.9976,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 2.961728950923008,
|
|
"grad_norm": 0.2810030913242284,
|
|
"learning_rate": 6.315216969912663e-09,
|
|
"loss": 1.0426,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 2.965330932012607,
|
|
"grad_norm": 0.27067627376772047,
|
|
"learning_rate": 5.306714949143699e-09,
|
|
"loss": 1.0691,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 2.968932913102206,
|
|
"grad_norm": 0.26312224124888717,
|
|
"learning_rate": 4.385849505708084e-09,
|
|
"loss": 1.0305,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 2.9725348941918055,
|
|
"grad_norm": 0.28983516142663135,
|
|
"learning_rate": 3.5526367970539765e-09,
|
|
"loss": 1.0732,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 2.976136875281405,
|
|
"grad_norm": 0.2824286593101622,
|
|
"learning_rate": 2.8070914426786555e-09,
|
|
"loss": 1.0435,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 2.979738856371004,
|
|
"grad_norm": 0.26087637492866167,
|
|
"learning_rate": 2.149226523874837e-09,
|
|
"loss": 1.0591,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 2.983340837460603,
|
|
"grad_norm": 0.2909906050244136,
|
|
"learning_rate": 1.5790535835003006e-09,
|
|
"loss": 1.0739,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 2.9869428185502027,
|
|
"grad_norm": 0.26693864211195434,
|
|
"learning_rate": 1.096582625772502e-09,
|
|
"loss": 1.037,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 2.990544799639802,
|
|
"grad_norm": 0.28489493631726986,
|
|
"learning_rate": 7.018221160981498e-10,
|
|
"loss": 1.0347,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 2.9941467807294013,
|
|
"grad_norm": 0.2886886682421759,
|
|
"learning_rate": 3.9477898091944135e-10,
|
|
"loss": 1.0298,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 2.9977487618190004,
|
|
"grad_norm": 0.28032318013340757,
|
|
"learning_rate": 1.7545860759693446e-10,
|
|
"loss": 0.9798,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.28032318013340757,
|
|
"learning_rate": 4.3864844311847235e-11,
|
|
"loss": 1.0169,
|
|
"step": 834
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 834,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1614971456716800.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|