Model: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz512_lr4e5_epochs5 Source: Original Platform
6868 lines
166 KiB
JSON
6868 lines
166 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 4.987212276214834,
|
|
"eval_steps": 500,
|
|
"global_step": 975,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.005115089514066497,
|
|
"grad_norm": 2.9552626224042746,
|
|
"learning_rate": 4.0816326530612243e-07,
|
|
"loss": 1.4957,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.010230179028132993,
|
|
"grad_norm": 2.9842329763367634,
|
|
"learning_rate": 8.163265306122449e-07,
|
|
"loss": 1.4889,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.015345268542199489,
|
|
"grad_norm": 2.9894673729833756,
|
|
"learning_rate": 1.2244897959183673e-06,
|
|
"loss": 1.4995,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.020460358056265986,
|
|
"grad_norm": 2.9406348527530266,
|
|
"learning_rate": 1.6326530612244897e-06,
|
|
"loss": 1.4851,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.02557544757033248,
|
|
"grad_norm": 2.884254781810192,
|
|
"learning_rate": 2.0408163265306125e-06,
|
|
"loss": 1.5193,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.030690537084398978,
|
|
"grad_norm": 2.6620670413572483,
|
|
"learning_rate": 2.4489795918367347e-06,
|
|
"loss": 1.4843,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.03580562659846547,
|
|
"grad_norm": 2.5965076114745362,
|
|
"learning_rate": 2.8571428571428573e-06,
|
|
"loss": 1.4878,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.04092071611253197,
|
|
"grad_norm": 2.0170252553614705,
|
|
"learning_rate": 3.2653061224489794e-06,
|
|
"loss": 1.4657,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.04603580562659847,
|
|
"grad_norm": 1.9679168334183053,
|
|
"learning_rate": 3.6734693877551024e-06,
|
|
"loss": 1.4733,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.05115089514066496,
|
|
"grad_norm": 1.8694892639210836,
|
|
"learning_rate": 4.081632653061225e-06,
|
|
"loss": 1.4641,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.056265984654731455,
|
|
"grad_norm": 1.8685294422005332,
|
|
"learning_rate": 4.489795918367348e-06,
|
|
"loss": 1.4459,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.061381074168797956,
|
|
"grad_norm": 1.8352337338017686,
|
|
"learning_rate": 4.897959183673469e-06,
|
|
"loss": 1.4347,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.06649616368286446,
|
|
"grad_norm": 1.715529029874086,
|
|
"learning_rate": 5.306122448979593e-06,
|
|
"loss": 1.4253,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.07161125319693094,
|
|
"grad_norm": 1.4603873855315377,
|
|
"learning_rate": 5.7142857142857145e-06,
|
|
"loss": 1.4073,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.07672634271099744,
|
|
"grad_norm": 1.8889329281960052,
|
|
"learning_rate": 6.122448979591837e-06,
|
|
"loss": 1.4073,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.08184143222506395,
|
|
"grad_norm": 1.8256444813829467,
|
|
"learning_rate": 6.530612244897959e-06,
|
|
"loss": 1.402,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.08695652173913043,
|
|
"grad_norm": 1.5107616554904217,
|
|
"learning_rate": 6.938775510204082e-06,
|
|
"loss": 1.4226,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.09207161125319693,
|
|
"grad_norm": 1.2135260104218866,
|
|
"learning_rate": 7.346938775510205e-06,
|
|
"loss": 1.3806,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.09718670076726342,
|
|
"grad_norm": 0.9783065195702848,
|
|
"learning_rate": 7.755102040816327e-06,
|
|
"loss": 1.3779,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.10230179028132992,
|
|
"grad_norm": 1.0913652122620938,
|
|
"learning_rate": 8.16326530612245e-06,
|
|
"loss": 1.3479,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.10741687979539642,
|
|
"grad_norm": 0.9002918394031234,
|
|
"learning_rate": 8.571428571428571e-06,
|
|
"loss": 1.3421,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.11253196930946291,
|
|
"grad_norm": 0.8291547537993212,
|
|
"learning_rate": 8.979591836734695e-06,
|
|
"loss": 1.3199,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.11764705882352941,
|
|
"grad_norm": 0.807288721347729,
|
|
"learning_rate": 9.387755102040818e-06,
|
|
"loss": 1.3344,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.12276214833759591,
|
|
"grad_norm": 0.6809772051446031,
|
|
"learning_rate": 9.795918367346939e-06,
|
|
"loss": 1.3248,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.1278772378516624,
|
|
"grad_norm": 0.6460286130501192,
|
|
"learning_rate": 1.0204081632653063e-05,
|
|
"loss": 1.3101,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.1329923273657289,
|
|
"grad_norm": 0.7041553190299802,
|
|
"learning_rate": 1.0612244897959186e-05,
|
|
"loss": 1.2905,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.13810741687979539,
|
|
"grad_norm": 0.5916151711963107,
|
|
"learning_rate": 1.1020408163265306e-05,
|
|
"loss": 1.2957,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.1432225063938619,
|
|
"grad_norm": 0.4814903541835213,
|
|
"learning_rate": 1.1428571428571429e-05,
|
|
"loss": 1.3205,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.1483375959079284,
|
|
"grad_norm": 0.5453307778014714,
|
|
"learning_rate": 1.1836734693877552e-05,
|
|
"loss": 1.3054,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.1534526854219949,
|
|
"grad_norm": 0.5416929825329082,
|
|
"learning_rate": 1.2244897959183674e-05,
|
|
"loss": 1.2876,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.1585677749360614,
|
|
"grad_norm": 0.47860645294978243,
|
|
"learning_rate": 1.2653061224489798e-05,
|
|
"loss": 1.3007,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.1636828644501279,
|
|
"grad_norm": 0.4486371377266475,
|
|
"learning_rate": 1.3061224489795918e-05,
|
|
"loss": 1.2954,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.16879795396419436,
|
|
"grad_norm": 0.44450958377830624,
|
|
"learning_rate": 1.3469387755102042e-05,
|
|
"loss": 1.2676,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.17391304347826086,
|
|
"grad_norm": 0.44087698545639353,
|
|
"learning_rate": 1.3877551020408165e-05,
|
|
"loss": 1.2574,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.17902813299232737,
|
|
"grad_norm": 0.40237081708363603,
|
|
"learning_rate": 1.4285714285714287e-05,
|
|
"loss": 1.2344,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.18414322250639387,
|
|
"grad_norm": 0.36218165247096934,
|
|
"learning_rate": 1.469387755102041e-05,
|
|
"loss": 1.2553,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.18925831202046037,
|
|
"grad_norm": 0.3828961442249037,
|
|
"learning_rate": 1.510204081632653e-05,
|
|
"loss": 1.2678,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.19437340153452684,
|
|
"grad_norm": 0.37416837544698317,
|
|
"learning_rate": 1.5510204081632655e-05,
|
|
"loss": 1.2406,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.19948849104859334,
|
|
"grad_norm": 0.373896446093631,
|
|
"learning_rate": 1.5918367346938776e-05,
|
|
"loss": 1.2431,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.20460358056265984,
|
|
"grad_norm": 0.3008904471337201,
|
|
"learning_rate": 1.63265306122449e-05,
|
|
"loss": 1.2324,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.20971867007672634,
|
|
"grad_norm": 0.30387006502929764,
|
|
"learning_rate": 1.673469387755102e-05,
|
|
"loss": 1.2238,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.21483375959079284,
|
|
"grad_norm": 0.3165654505340095,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 1.2344,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.21994884910485935,
|
|
"grad_norm": 0.28748071942522757,
|
|
"learning_rate": 1.7551020408163266e-05,
|
|
"loss": 1.2559,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.22506393861892582,
|
|
"grad_norm": 0.2664919173232997,
|
|
"learning_rate": 1.795918367346939e-05,
|
|
"loss": 1.2501,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.23017902813299232,
|
|
"grad_norm": 0.2716330693107337,
|
|
"learning_rate": 1.836734693877551e-05,
|
|
"loss": 1.235,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.23529411764705882,
|
|
"grad_norm": 0.2758197763809488,
|
|
"learning_rate": 1.8775510204081636e-05,
|
|
"loss": 1.2463,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.24040920716112532,
|
|
"grad_norm": 0.24892493271713195,
|
|
"learning_rate": 1.9183673469387756e-05,
|
|
"loss": 1.2415,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.24552429667519182,
|
|
"grad_norm": 0.2755681817261238,
|
|
"learning_rate": 1.9591836734693877e-05,
|
|
"loss": 1.2186,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.2506393861892583,
|
|
"grad_norm": 0.24083989397516156,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.2224,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.2557544757033248,
|
|
"grad_norm": 0.24122206872487506,
|
|
"learning_rate": 2.0408163265306126e-05,
|
|
"loss": 1.2012,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.2608695652173913,
|
|
"grad_norm": 0.25341193733481965,
|
|
"learning_rate": 2.0816326530612247e-05,
|
|
"loss": 1.2221,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.2659846547314578,
|
|
"grad_norm": 0.22903243976655355,
|
|
"learning_rate": 2.122448979591837e-05,
|
|
"loss": 1.1924,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.2710997442455243,
|
|
"grad_norm": 0.21677309278156626,
|
|
"learning_rate": 2.1632653061224492e-05,
|
|
"loss": 1.1889,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.27621483375959077,
|
|
"grad_norm": 0.21471884542317143,
|
|
"learning_rate": 2.2040816326530613e-05,
|
|
"loss": 1.2008,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.2813299232736573,
|
|
"grad_norm": 0.24214035073106985,
|
|
"learning_rate": 2.2448979591836734e-05,
|
|
"loss": 1.2144,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.2864450127877238,
|
|
"grad_norm": 0.2498234706393149,
|
|
"learning_rate": 2.2857142857142858e-05,
|
|
"loss": 1.2106,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.2915601023017903,
|
|
"grad_norm": 0.2946584201665625,
|
|
"learning_rate": 2.3265306122448982e-05,
|
|
"loss": 1.1915,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.2966751918158568,
|
|
"grad_norm": 0.31814731087212356,
|
|
"learning_rate": 2.3673469387755103e-05,
|
|
"loss": 1.1919,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.30179028132992325,
|
|
"grad_norm": 0.2535281231928839,
|
|
"learning_rate": 2.4081632653061227e-05,
|
|
"loss": 1.2135,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.3069053708439898,
|
|
"grad_norm": 0.28231587803629354,
|
|
"learning_rate": 2.448979591836735e-05,
|
|
"loss": 1.2029,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.31202046035805625,
|
|
"grad_norm": 0.2010585704472117,
|
|
"learning_rate": 2.4897959183673473e-05,
|
|
"loss": 1.1958,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.3171355498721228,
|
|
"grad_norm": 0.2261450242992654,
|
|
"learning_rate": 2.5306122448979597e-05,
|
|
"loss": 1.1903,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.32225063938618925,
|
|
"grad_norm": 0.23939206047592157,
|
|
"learning_rate": 2.5714285714285718e-05,
|
|
"loss": 1.1947,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.3273657289002558,
|
|
"grad_norm": 0.2629186812479751,
|
|
"learning_rate": 2.6122448979591835e-05,
|
|
"loss": 1.1738,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.33248081841432225,
|
|
"grad_norm": 0.25820871123390515,
|
|
"learning_rate": 2.653061224489796e-05,
|
|
"loss": 1.1918,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.3375959079283887,
|
|
"grad_norm": 0.2860124405371331,
|
|
"learning_rate": 2.6938775510204084e-05,
|
|
"loss": 1.2015,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.34271099744245526,
|
|
"grad_norm": 0.37417972707557,
|
|
"learning_rate": 2.7346938775510205e-05,
|
|
"loss": 1.1877,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.34782608695652173,
|
|
"grad_norm": 0.5554510331993981,
|
|
"learning_rate": 2.775510204081633e-05,
|
|
"loss": 1.2075,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.35294117647058826,
|
|
"grad_norm": 0.8032264545180317,
|
|
"learning_rate": 2.816326530612245e-05,
|
|
"loss": 1.1954,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.35805626598465473,
|
|
"grad_norm": 0.8465110589364025,
|
|
"learning_rate": 2.8571428571428574e-05,
|
|
"loss": 1.1948,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.3631713554987212,
|
|
"grad_norm": 0.6056567908053532,
|
|
"learning_rate": 2.89795918367347e-05,
|
|
"loss": 1.1608,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.36828644501278773,
|
|
"grad_norm": 1.0339865612062735,
|
|
"learning_rate": 2.938775510204082e-05,
|
|
"loss": 1.198,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.3734015345268542,
|
|
"grad_norm": 1.0360361783633478,
|
|
"learning_rate": 2.9795918367346944e-05,
|
|
"loss": 1.1873,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.37851662404092073,
|
|
"grad_norm": 0.5781784281108936,
|
|
"learning_rate": 3.020408163265306e-05,
|
|
"loss": 1.1919,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.3836317135549872,
|
|
"grad_norm": 0.9260593669478031,
|
|
"learning_rate": 3.061224489795918e-05,
|
|
"loss": 1.1672,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.3887468030690537,
|
|
"grad_norm": 0.8443095651460022,
|
|
"learning_rate": 3.102040816326531e-05,
|
|
"loss": 1.1785,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.3938618925831202,
|
|
"grad_norm": 0.5037173461965399,
|
|
"learning_rate": 3.142857142857143e-05,
|
|
"loss": 1.1696,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.3989769820971867,
|
|
"grad_norm": 0.83134727018411,
|
|
"learning_rate": 3.183673469387755e-05,
|
|
"loss": 1.1913,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.4040920716112532,
|
|
"grad_norm": 0.43515185597618067,
|
|
"learning_rate": 3.224489795918368e-05,
|
|
"loss": 1.1998,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.4092071611253197,
|
|
"grad_norm": 0.5605879393832622,
|
|
"learning_rate": 3.26530612244898e-05,
|
|
"loss": 1.159,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.4143222506393862,
|
|
"grad_norm": 0.5401839074014898,
|
|
"learning_rate": 3.306122448979592e-05,
|
|
"loss": 1.1614,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.4194373401534527,
|
|
"grad_norm": 0.6023459415610618,
|
|
"learning_rate": 3.346938775510204e-05,
|
|
"loss": 1.1681,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.42455242966751916,
|
|
"grad_norm": 0.5818804857404473,
|
|
"learning_rate": 3.387755102040817e-05,
|
|
"loss": 1.1598,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.4296675191815857,
|
|
"grad_norm": 0.6214983408408011,
|
|
"learning_rate": 3.4285714285714284e-05,
|
|
"loss": 1.1719,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.43478260869565216,
|
|
"grad_norm": 0.6889766714463987,
|
|
"learning_rate": 3.469387755102041e-05,
|
|
"loss": 1.1652,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.4398976982097187,
|
|
"grad_norm": 0.5760202894156686,
|
|
"learning_rate": 3.510204081632653e-05,
|
|
"loss": 1.1662,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.44501278772378516,
|
|
"grad_norm": 0.6061942284235254,
|
|
"learning_rate": 3.551020408163265e-05,
|
|
"loss": 1.1879,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.45012787723785164,
|
|
"grad_norm": 0.4851908724925709,
|
|
"learning_rate": 3.591836734693878e-05,
|
|
"loss": 1.1804,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.45524296675191817,
|
|
"grad_norm": 0.558284089379506,
|
|
"learning_rate": 3.63265306122449e-05,
|
|
"loss": 1.1667,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.46035805626598464,
|
|
"grad_norm": 0.5249911631612906,
|
|
"learning_rate": 3.673469387755102e-05,
|
|
"loss": 1.1551,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.46547314578005117,
|
|
"grad_norm": 0.6405503624893562,
|
|
"learning_rate": 3.714285714285715e-05,
|
|
"loss": 1.1732,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.47058823529411764,
|
|
"grad_norm": 0.5784785007056862,
|
|
"learning_rate": 3.755102040816327e-05,
|
|
"loss": 1.1713,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.47570332480818417,
|
|
"grad_norm": 0.8586793710160441,
|
|
"learning_rate": 3.795918367346939e-05,
|
|
"loss": 1.1749,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.48081841432225064,
|
|
"grad_norm": 0.7501542028722769,
|
|
"learning_rate": 3.836734693877551e-05,
|
|
"loss": 1.1661,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.4859335038363171,
|
|
"grad_norm": 0.5945696896064088,
|
|
"learning_rate": 3.8775510204081634e-05,
|
|
"loss": 1.1613,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.49104859335038364,
|
|
"grad_norm": 0.7440586521483908,
|
|
"learning_rate": 3.9183673469387755e-05,
|
|
"loss": 1.1322,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.4961636828644501,
|
|
"grad_norm": 1.114624130882715,
|
|
"learning_rate": 3.959183673469388e-05,
|
|
"loss": 1.1255,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.5012787723785166,
|
|
"grad_norm": 1.7108401028613913,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.1813,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.5063938618925832,
|
|
"grad_norm": 0.44659523303682175,
|
|
"learning_rate": 3.9999871678303026e-05,
|
|
"loss": 1.16,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.5115089514066496,
|
|
"grad_norm": 1.9551355398623464,
|
|
"learning_rate": 3.999948671485876e-05,
|
|
"loss": 1.1552,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.5166240409207161,
|
|
"grad_norm": 0.7446799337776974,
|
|
"learning_rate": 3.9998845114607106e-05,
|
|
"loss": 1.1362,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.5217391304347826,
|
|
"grad_norm": 1.50554061275789,
|
|
"learning_rate": 3.99979468857812e-05,
|
|
"loss": 1.1578,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.5268542199488491,
|
|
"grad_norm": 0.7841214706789117,
|
|
"learning_rate": 3.9996792039907254e-05,
|
|
"loss": 1.1512,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.5319693094629157,
|
|
"grad_norm": 1.307842675480053,
|
|
"learning_rate": 3.999538059180445e-05,
|
|
"loss": 1.1637,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.5370843989769821,
|
|
"grad_norm": 0.9212444211121106,
|
|
"learning_rate": 3.9993712559584736e-05,
|
|
"loss": 1.1395,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.5421994884910486,
|
|
"grad_norm": 1.1124085438820968,
|
|
"learning_rate": 3.9991787964652576e-05,
|
|
"loss": 1.1416,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.5473145780051151,
|
|
"grad_norm": 1.1650621890898196,
|
|
"learning_rate": 3.9989606831704704e-05,
|
|
"loss": 1.142,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.5524296675191815,
|
|
"grad_norm": 0.7803938686688274,
|
|
"learning_rate": 3.998716918872979e-05,
|
|
"loss": 1.1412,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.5575447570332481,
|
|
"grad_norm": 1.2099933733931747,
|
|
"learning_rate": 3.998447506700807e-05,
|
|
"loss": 1.119,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.5626598465473146,
|
|
"grad_norm": 0.5996285589701097,
|
|
"learning_rate": 3.998152450111099e-05,
|
|
"loss": 1.1538,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.5677749360613811,
|
|
"grad_norm": 1.0736932260251144,
|
|
"learning_rate": 3.9978317528900704e-05,
|
|
"loss": 1.1249,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.5728900255754475,
|
|
"grad_norm": 0.5804871130419977,
|
|
"learning_rate": 3.9974854191529616e-05,
|
|
"loss": 1.1441,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.578005115089514,
|
|
"grad_norm": 0.7862604118769216,
|
|
"learning_rate": 3.997113453343987e-05,
|
|
"loss": 1.1421,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.5831202046035806,
|
|
"grad_norm": 0.6885271538338327,
|
|
"learning_rate": 3.996715860236275e-05,
|
|
"loss": 1.1343,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.6788644129844003,
|
|
"learning_rate": 3.996292644931807e-05,
|
|
"loss": 1.1469,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.5933503836317136,
|
|
"grad_norm": 0.684753398463379,
|
|
"learning_rate": 3.995843812861353e-05,
|
|
"loss": 1.1399,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.59846547314578,
|
|
"grad_norm": 0.5126160753180327,
|
|
"learning_rate": 3.9953693697844036e-05,
|
|
"loss": 1.1149,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.6035805626598465,
|
|
"grad_norm": 0.5713048245116996,
|
|
"learning_rate": 3.994869321789093e-05,
|
|
"loss": 1.1294,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.6086956521739131,
|
|
"grad_norm": 0.4824046734986782,
|
|
"learning_rate": 3.994343675292121e-05,
|
|
"loss": 1.1194,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.6138107416879796,
|
|
"grad_norm": 0.5772818973773468,
|
|
"learning_rate": 3.9937924370386733e-05,
|
|
"loss": 1.1382,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.618925831202046,
|
|
"grad_norm": 0.5640558910780374,
|
|
"learning_rate": 3.9932156141023325e-05,
|
|
"loss": 1.1288,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.6240409207161125,
|
|
"grad_norm": 0.6644942399025234,
|
|
"learning_rate": 3.992613213884989e-05,
|
|
"loss": 1.159,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.629156010230179,
|
|
"grad_norm": 0.7622498053014113,
|
|
"learning_rate": 3.9919852441167426e-05,
|
|
"loss": 1.1412,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.6342710997442456,
|
|
"grad_norm": 0.642371805679136,
|
|
"learning_rate": 3.99133171285581e-05,
|
|
"loss": 1.1547,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.639386189258312,
|
|
"grad_norm": 0.47445759803876414,
|
|
"learning_rate": 3.9906526284884156e-05,
|
|
"loss": 1.1463,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.6445012787723785,
|
|
"grad_norm": 0.3468568785754221,
|
|
"learning_rate": 3.989947999728683e-05,
|
|
"loss": 1.1473,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.649616368286445,
|
|
"grad_norm": 0.3826230766226846,
|
|
"learning_rate": 3.98921783561853e-05,
|
|
"loss": 1.1523,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.6547314578005116,
|
|
"grad_norm": 0.40385322488596853,
|
|
"learning_rate": 3.988462145527545e-05,
|
|
"loss": 1.1275,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.659846547314578,
|
|
"grad_norm": 0.5075478796367268,
|
|
"learning_rate": 3.9876809391528724e-05,
|
|
"loss": 1.1392,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.6649616368286445,
|
|
"grad_norm": 0.5208684496947872,
|
|
"learning_rate": 3.986874226519085e-05,
|
|
"loss": 1.14,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.670076726342711,
|
|
"grad_norm": 0.5624315662158746,
|
|
"learning_rate": 3.986042017978055e-05,
|
|
"loss": 1.1099,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.6751918158567775,
|
|
"grad_norm": 0.5780512451990953,
|
|
"learning_rate": 3.985184324208826e-05,
|
|
"loss": 1.1511,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.680306905370844,
|
|
"grad_norm": 0.6247395333389373,
|
|
"learning_rate": 3.984301156217467e-05,
|
|
"loss": 1.1515,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.6854219948849105,
|
|
"grad_norm": 0.5133917066342321,
|
|
"learning_rate": 3.9833925253369415e-05,
|
|
"loss": 1.1117,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.690537084398977,
|
|
"grad_norm": 0.3219649704869207,
|
|
"learning_rate": 3.982458443226955e-05,
|
|
"loss": 1.1305,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.6956521739130435,
|
|
"grad_norm": 0.2522321720635661,
|
|
"learning_rate": 3.9814989218738074e-05,
|
|
"loss": 1.129,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.7007672634271099,
|
|
"grad_norm": 0.2980429052508869,
|
|
"learning_rate": 3.980513973590239e-05,
|
|
"loss": 1.1332,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.7058823529411765,
|
|
"grad_norm": 0.34290982806032505,
|
|
"learning_rate": 3.9795036110152745e-05,
|
|
"loss": 1.1296,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.710997442455243,
|
|
"grad_norm": 0.44091227459865195,
|
|
"learning_rate": 3.978467847114057e-05,
|
|
"loss": 1.1392,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.7161125319693095,
|
|
"grad_norm": 0.5949165816794791,
|
|
"learning_rate": 3.977406695177684e-05,
|
|
"loss": 1.1363,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.7212276214833759,
|
|
"grad_norm": 0.7738088490588236,
|
|
"learning_rate": 3.9763201688230385e-05,
|
|
"loss": 1.13,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.7263427109974424,
|
|
"grad_norm": 0.9415135094116259,
|
|
"learning_rate": 3.975208281992611e-05,
|
|
"loss": 1.1054,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.731457800511509,
|
|
"grad_norm": 1.013667201332667,
|
|
"learning_rate": 3.974071048954322e-05,
|
|
"loss": 1.1335,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.7365728900255755,
|
|
"grad_norm": 0.918574474686866,
|
|
"learning_rate": 3.972908484301338e-05,
|
|
"loss": 1.1225,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.7416879795396419,
|
|
"grad_norm": 1.0028928008507987,
|
|
"learning_rate": 3.971720602951886e-05,
|
|
"loss": 1.1394,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.7468030690537084,
|
|
"grad_norm": 1.01371150070985,
|
|
"learning_rate": 3.9705074201490614e-05,
|
|
"loss": 1.1135,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.7519181585677749,
|
|
"grad_norm": 0.6829183814095904,
|
|
"learning_rate": 3.9692689514606326e-05,
|
|
"loss": 1.1195,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.7570332480818415,
|
|
"grad_norm": 0.4995779268853437,
|
|
"learning_rate": 3.9680052127788386e-05,
|
|
"loss": 1.1096,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.7621483375959079,
|
|
"grad_norm": 0.5466194571724614,
|
|
"learning_rate": 3.96671622032019e-05,
|
|
"loss": 1.1438,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.7672634271099744,
|
|
"grad_norm": 0.4993258831722339,
|
|
"learning_rate": 3.965401990625255e-05,
|
|
"loss": 1.1246,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.7723785166240409,
|
|
"grad_norm": 0.5394473074799043,
|
|
"learning_rate": 3.964062540558454e-05,
|
|
"loss": 1.1367,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.7774936061381074,
|
|
"grad_norm": 0.6014091892453812,
|
|
"learning_rate": 3.962697887307836e-05,
|
|
"loss": 1.1074,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.782608695652174,
|
|
"grad_norm": 0.44042594454750733,
|
|
"learning_rate": 3.961308048384863e-05,
|
|
"loss": 1.1203,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.7877237851662404,
|
|
"grad_norm": 0.41663585280574117,
|
|
"learning_rate": 3.9598930416241857e-05,
|
|
"loss": 1.1,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.7928388746803069,
|
|
"grad_norm": 0.43073314610132185,
|
|
"learning_rate": 3.9584528851834096e-05,
|
|
"loss": 1.1176,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.7979539641943734,
|
|
"grad_norm": 0.4025238410612463,
|
|
"learning_rate": 3.956987597542867e-05,
|
|
"loss": 1.1404,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.80306905370844,
|
|
"grad_norm": 0.31467711628825223,
|
|
"learning_rate": 3.955497197505377e-05,
|
|
"loss": 1.1151,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.8081841432225064,
|
|
"grad_norm": 0.3777897458256624,
|
|
"learning_rate": 3.953981704196007e-05,
|
|
"loss": 1.1064,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.8132992327365729,
|
|
"grad_norm": 0.4263089414332897,
|
|
"learning_rate": 3.952441137061823e-05,
|
|
"loss": 1.1317,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.8184143222506394,
|
|
"grad_norm": 0.44871938263927913,
|
|
"learning_rate": 3.9508755158716445e-05,
|
|
"loss": 1.1006,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.8235294117647058,
|
|
"grad_norm": 0.39666587385431135,
|
|
"learning_rate": 3.949284860715787e-05,
|
|
"loss": 1.106,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.8286445012787724,
|
|
"grad_norm": 0.37222061819294466,
|
|
"learning_rate": 3.94766919200581e-05,
|
|
"loss": 1.1424,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.8337595907928389,
|
|
"grad_norm": 0.4106985425516431,
|
|
"learning_rate": 3.946028530474247e-05,
|
|
"loss": 1.1066,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.8388746803069054,
|
|
"grad_norm": 0.34174723556345027,
|
|
"learning_rate": 3.944362897174345e-05,
|
|
"loss": 1.1377,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.8439897698209718,
|
|
"grad_norm": 0.31421437045922873,
|
|
"learning_rate": 3.942672313479794e-05,
|
|
"loss": 1.1481,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.8491048593350383,
|
|
"grad_norm": 0.4554074855575182,
|
|
"learning_rate": 3.9409568010844504e-05,
|
|
"loss": 1.1021,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.8542199488491049,
|
|
"grad_norm": 0.4862902618582567,
|
|
"learning_rate": 3.9392163820020596e-05,
|
|
"loss": 1.1136,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.8593350383631714,
|
|
"grad_norm": 0.5031898914893316,
|
|
"learning_rate": 3.937451078565975e-05,
|
|
"loss": 1.1041,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.8644501278772379,
|
|
"grad_norm": 0.4901545096169435,
|
|
"learning_rate": 3.935660913428871e-05,
|
|
"loss": 1.1283,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.8695652173913043,
|
|
"grad_norm": 0.4686488348433868,
|
|
"learning_rate": 3.93384590956245e-05,
|
|
"loss": 1.1188,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.8746803069053708,
|
|
"grad_norm": 0.382892444800895,
|
|
"learning_rate": 3.932006090257149e-05,
|
|
"loss": 1.1068,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.8797953964194374,
|
|
"grad_norm": 0.37159943638867027,
|
|
"learning_rate": 3.930141479121841e-05,
|
|
"loss": 1.1208,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.8849104859335039,
|
|
"grad_norm": 0.42709573093618236,
|
|
"learning_rate": 3.9282521000835343e-05,
|
|
"loss": 1.1383,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.8900255754475703,
|
|
"grad_norm": 0.47346272108755927,
|
|
"learning_rate": 3.9263379773870595e-05,
|
|
"loss": 1.0882,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.8951406649616368,
|
|
"grad_norm": 0.5285371745037324,
|
|
"learning_rate": 3.9243991355947654e-05,
|
|
"loss": 1.112,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.9002557544757033,
|
|
"grad_norm": 0.5906366765469058,
|
|
"learning_rate": 3.9224355995861976e-05,
|
|
"loss": 1.1198,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.9053708439897699,
|
|
"grad_norm": 0.6390366442616775,
|
|
"learning_rate": 3.9204473945577844e-05,
|
|
"loss": 1.1165,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.9104859335038363,
|
|
"grad_norm": 0.7619939625989861,
|
|
"learning_rate": 3.9184345460225086e-05,
|
|
"loss": 1.0976,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.9156010230179028,
|
|
"grad_norm": 0.8274597084021534,
|
|
"learning_rate": 3.916397079809587e-05,
|
|
"loss": 1.1263,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.9207161125319693,
|
|
"grad_norm": 0.8670812778043089,
|
|
"learning_rate": 3.914335022064129e-05,
|
|
"loss": 1.1038,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.9258312020460358,
|
|
"grad_norm": 0.7987111262536127,
|
|
"learning_rate": 3.91224839924681e-05,
|
|
"loss": 1.0991,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.9309462915601023,
|
|
"grad_norm": 0.6789089254585002,
|
|
"learning_rate": 3.91013723813353e-05,
|
|
"loss": 1.1153,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.9360613810741688,
|
|
"grad_norm": 0.5247353582861676,
|
|
"learning_rate": 3.9080015658150644e-05,
|
|
"loss": 1.1346,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.9411764705882353,
|
|
"grad_norm": 0.3644802006853034,
|
|
"learning_rate": 3.905841409696724e-05,
|
|
"loss": 1.1203,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.9462915601023018,
|
|
"grad_norm": 0.302951995016467,
|
|
"learning_rate": 3.903656797497998e-05,
|
|
"loss": 1.1085,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.9514066496163683,
|
|
"grad_norm": 0.40803915020390363,
|
|
"learning_rate": 3.901447757252202e-05,
|
|
"loss": 1.1249,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.9565217391304348,
|
|
"grad_norm": 0.4575912395967058,
|
|
"learning_rate": 3.899214317306114e-05,
|
|
"loss": 1.1207,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.9616368286445013,
|
|
"grad_norm": 0.4384857639291277,
|
|
"learning_rate": 3.896956506319615e-05,
|
|
"loss": 1.1175,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.9667519181585678,
|
|
"grad_norm": 0.37810315759266455,
|
|
"learning_rate": 3.894674353265319e-05,
|
|
"loss": 1.1263,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.9718670076726342,
|
|
"grad_norm": 0.33040746591886666,
|
|
"learning_rate": 3.8923678874282005e-05,
|
|
"loss": 1.0891,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.9769820971867008,
|
|
"grad_norm": 0.2970896158791436,
|
|
"learning_rate": 3.890037138405221e-05,
|
|
"loss": 1.1118,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.9820971867007673,
|
|
"grad_norm": 0.2786192370878433,
|
|
"learning_rate": 3.887682136104948e-05,
|
|
"loss": 1.0955,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.9872122762148338,
|
|
"grad_norm": 0.37616961887523437,
|
|
"learning_rate": 3.88530291074717e-05,
|
|
"loss": 1.1309,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.9923273657289002,
|
|
"grad_norm": 0.30458475511135424,
|
|
"learning_rate": 3.88289949286251e-05,
|
|
"loss": 1.1288,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.9974424552429667,
|
|
"grad_norm": 0.27006171785266825,
|
|
"learning_rate": 3.880471913292035e-05,
|
|
"loss": 1.1067,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 1.0025575447570332,
|
|
"grad_norm": 0.641130344717003,
|
|
"learning_rate": 3.878020203186858e-05,
|
|
"loss": 1.6856,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 1.0076726342710998,
|
|
"grad_norm": 1.12901892252244,
|
|
"learning_rate": 3.875544394007739e-05,
|
|
"loss": 1.1049,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 1.0127877237851663,
|
|
"grad_norm": 0.8810192018583497,
|
|
"learning_rate": 3.8730445175246815e-05,
|
|
"loss": 1.0976,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 1.0179028132992327,
|
|
"grad_norm": 0.5060443892157782,
|
|
"learning_rate": 3.8705206058165244e-05,
|
|
"loss": 1.1224,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 1.0230179028132993,
|
|
"grad_norm": 0.45555273554867565,
|
|
"learning_rate": 3.8679726912705315e-05,
|
|
"loss": 1.1104,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 1.0281329923273657,
|
|
"grad_norm": 0.5987124980388637,
|
|
"learning_rate": 3.865400806581975e-05,
|
|
"loss": 1.1236,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 1.0332480818414322,
|
|
"grad_norm": 0.6102266868676671,
|
|
"learning_rate": 3.862804984753714e-05,
|
|
"loss": 1.0929,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 1.0383631713554988,
|
|
"grad_norm": 0.4715043183631868,
|
|
"learning_rate": 3.8601852590957766e-05,
|
|
"loss": 1.1314,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 1.0434782608695652,
|
|
"grad_norm": 0.35678120553758946,
|
|
"learning_rate": 3.857541663224926e-05,
|
|
"loss": 1.0817,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 1.0485933503836318,
|
|
"grad_norm": 0.5779239076939061,
|
|
"learning_rate": 3.8548742310642334e-05,
|
|
"loss": 1.0744,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 1.0537084398976981,
|
|
"grad_norm": 0.4640287441279096,
|
|
"learning_rate": 3.852182996842641e-05,
|
|
"loss": 1.0778,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 1.0588235294117647,
|
|
"grad_norm": 0.42150823261105713,
|
|
"learning_rate": 3.849467995094522e-05,
|
|
"loss": 1.1283,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 1.0639386189258313,
|
|
"grad_norm": 0.5821601085737635,
|
|
"learning_rate": 3.846729260659241e-05,
|
|
"loss": 1.0972,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 1.0690537084398977,
|
|
"grad_norm": 0.4704548470212048,
|
|
"learning_rate": 3.843966828680702e-05,
|
|
"loss": 1.0747,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 1.0741687979539642,
|
|
"grad_norm": 0.38693655375739405,
|
|
"learning_rate": 3.841180734606902e-05,
|
|
"loss": 1.1187,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 1.0792838874680306,
|
|
"grad_norm": 0.3630491063461679,
|
|
"learning_rate": 3.838371014189472e-05,
|
|
"loss": 1.0892,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 1.0843989769820972,
|
|
"grad_norm": 0.4405195407898948,
|
|
"learning_rate": 3.835537703483221e-05,
|
|
"loss": 1.1067,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 1.0895140664961638,
|
|
"grad_norm": 0.44452544270846567,
|
|
"learning_rate": 3.832680838845674e-05,
|
|
"loss": 1.0756,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 1.0946291560102301,
|
|
"grad_norm": 0.5401710772090864,
|
|
"learning_rate": 3.8298004569366016e-05,
|
|
"loss": 1.0847,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 1.0997442455242967,
|
|
"grad_norm": 0.7049028690595734,
|
|
"learning_rate": 3.8268965947175545e-05,
|
|
"loss": 1.1182,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 1.104859335038363,
|
|
"grad_norm": 0.7116280073130989,
|
|
"learning_rate": 3.823969289451384e-05,
|
|
"loss": 1.126,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 1.1099744245524297,
|
|
"grad_norm": 0.6441754688478455,
|
|
"learning_rate": 3.821018578701769e-05,
|
|
"loss": 1.1012,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 1.1150895140664963,
|
|
"grad_norm": 0.5733417711636994,
|
|
"learning_rate": 3.8180445003327296e-05,
|
|
"loss": 1.0919,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 1.1202046035805626,
|
|
"grad_norm": 0.5168592814231329,
|
|
"learning_rate": 3.815047092508146e-05,
|
|
"loss": 1.0708,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 1.1253196930946292,
|
|
"grad_norm": 0.40791830250588856,
|
|
"learning_rate": 3.812026393691262e-05,
|
|
"loss": 1.1016,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 1.1304347826086956,
|
|
"grad_norm": 0.3935626219257489,
|
|
"learning_rate": 3.808982442644199e-05,
|
|
"loss": 1.1062,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 1.1355498721227621,
|
|
"grad_norm": 0.42991493583618057,
|
|
"learning_rate": 3.8059152784274526e-05,
|
|
"loss": 1.0787,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 1.1406649616368287,
|
|
"grad_norm": 0.381545106483542,
|
|
"learning_rate": 3.802824940399395e-05,
|
|
"loss": 1.076,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 1.145780051150895,
|
|
"grad_norm": 0.43466891058485496,
|
|
"learning_rate": 3.799711468215767e-05,
|
|
"loss": 1.0788,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 1.1508951406649617,
|
|
"grad_norm": 0.5444427949474252,
|
|
"learning_rate": 3.796574901829173e-05,
|
|
"loss": 1.0916,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 1.156010230179028,
|
|
"grad_norm": 0.597694612097652,
|
|
"learning_rate": 3.793415281488566e-05,
|
|
"loss": 1.1063,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 1.1611253196930946,
|
|
"grad_norm": 0.5644442647832518,
|
|
"learning_rate": 3.790232647738728e-05,
|
|
"loss": 1.1242,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 1.1662404092071612,
|
|
"grad_norm": 0.5088885436714243,
|
|
"learning_rate": 3.7870270414197566e-05,
|
|
"loss": 1.0651,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 1.1713554987212276,
|
|
"grad_norm": 0.45669824317435515,
|
|
"learning_rate": 3.783798503666537e-05,
|
|
"loss": 1.0788,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 0.5158511102992952,
|
|
"learning_rate": 3.780547075908213e-05,
|
|
"loss": 1.0897,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 1.1815856777493605,
|
|
"grad_norm": 0.5509778938951233,
|
|
"learning_rate": 3.777272799867657e-05,
|
|
"loss": 1.1186,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 1.186700767263427,
|
|
"grad_norm": 0.6199044676759673,
|
|
"learning_rate": 3.773975717560934e-05,
|
|
"loss": 1.0839,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 1.1918158567774937,
|
|
"grad_norm": 0.5867928689399399,
|
|
"learning_rate": 3.7706558712967656e-05,
|
|
"loss": 1.0994,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 1.19693094629156,
|
|
"grad_norm": 0.43551312787108826,
|
|
"learning_rate": 3.76731330367598e-05,
|
|
"loss": 1.1112,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 1.2020460358056266,
|
|
"grad_norm": 0.3428798495589015,
|
|
"learning_rate": 3.763948057590975e-05,
|
|
"loss": 1.1025,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 1.207161125319693,
|
|
"grad_norm": 0.3420589750674672,
|
|
"learning_rate": 3.760560176225157e-05,
|
|
"loss": 1.064,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 1.2122762148337596,
|
|
"grad_norm": 0.47964929329378947,
|
|
"learning_rate": 3.757149703052395e-05,
|
|
"loss": 1.0982,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 1.2173913043478262,
|
|
"grad_norm": 0.5164961726841073,
|
|
"learning_rate": 3.753716681836461e-05,
|
|
"loss": 1.0698,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 1.2225063938618925,
|
|
"grad_norm": 0.5004726718437401,
|
|
"learning_rate": 3.750261156630465e-05,
|
|
"loss": 1.1286,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 1.227621483375959,
|
|
"grad_norm": 0.47794124988866565,
|
|
"learning_rate": 3.7467831717762925e-05,
|
|
"loss": 1.0846,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 1.2327365728900257,
|
|
"grad_norm": 0.44111411888891566,
|
|
"learning_rate": 3.743282771904035e-05,
|
|
"loss": 1.0834,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 1.237851662404092,
|
|
"grad_norm": 0.5079015451764834,
|
|
"learning_rate": 3.739760001931419e-05,
|
|
"loss": 1.104,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 1.2429667519181586,
|
|
"grad_norm": 0.5075602497339351,
|
|
"learning_rate": 3.7362149070632255e-05,
|
|
"loss": 1.0931,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 1.248081841432225,
|
|
"grad_norm": 0.3971531368129775,
|
|
"learning_rate": 3.732647532790713e-05,
|
|
"loss": 1.0792,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 1.2531969309462916,
|
|
"grad_norm": 0.33746211887589067,
|
|
"learning_rate": 3.729057924891035e-05,
|
|
"loss": 1.1235,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 1.258312020460358,
|
|
"grad_norm": 0.3650974996204564,
|
|
"learning_rate": 3.7254461294266483e-05,
|
|
"loss": 1.0974,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 1.2634271099744245,
|
|
"grad_norm": 0.377421756851408,
|
|
"learning_rate": 3.721812192744725e-05,
|
|
"loss": 1.0831,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 1.2685421994884911,
|
|
"grad_norm": 0.3795307978882048,
|
|
"learning_rate": 3.718156161476558e-05,
|
|
"loss": 1.0957,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 1.2736572890025575,
|
|
"grad_norm": 0.3800374146414857,
|
|
"learning_rate": 3.7144780825369615e-05,
|
|
"loss": 1.0709,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 1.278772378516624,
|
|
"grad_norm": 0.4007139140880588,
|
|
"learning_rate": 3.710778003123667e-05,
|
|
"loss": 1.0833,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 1.2838874680306906,
|
|
"grad_norm": 0.40407343707960797,
|
|
"learning_rate": 3.707055970716722e-05,
|
|
"loss": 1.0888,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 1.289002557544757,
|
|
"grad_norm": 0.2994034629230688,
|
|
"learning_rate": 3.703312033077878e-05,
|
|
"loss": 1.1002,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 1.2941176470588236,
|
|
"grad_norm": 0.3254270151573406,
|
|
"learning_rate": 3.699546238249979e-05,
|
|
"loss": 1.0737,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 1.29923273657289,
|
|
"grad_norm": 0.3911289495044072,
|
|
"learning_rate": 3.6957586345563417e-05,
|
|
"loss": 1.0991,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 1.3043478260869565,
|
|
"grad_norm": 0.38966577888960535,
|
|
"learning_rate": 3.691949270600141e-05,
|
|
"loss": 1.0791,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 1.309462915601023,
|
|
"grad_norm": 0.39562876092393817,
|
|
"learning_rate": 3.68811819526378e-05,
|
|
"loss": 1.1031,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 1.3145780051150895,
|
|
"grad_norm": 0.4269451879598667,
|
|
"learning_rate": 3.6842654577082686e-05,
|
|
"loss": 1.075,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 1.319693094629156,
|
|
"grad_norm": 0.4819004002486693,
|
|
"learning_rate": 3.6803911073725895e-05,
|
|
"loss": 1.0701,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 1.3248081841432224,
|
|
"grad_norm": 0.504125026080002,
|
|
"learning_rate": 3.6764951939730624e-05,
|
|
"loss": 1.0864,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 1.329923273657289,
|
|
"grad_norm": 0.4870192901488172,
|
|
"learning_rate": 3.6725777675027095e-05,
|
|
"loss": 1.1046,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 1.3350383631713556,
|
|
"grad_norm": 0.43270225897597664,
|
|
"learning_rate": 3.668638878230613e-05,
|
|
"loss": 1.0962,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 1.340153452685422,
|
|
"grad_norm": 0.3494948322409856,
|
|
"learning_rate": 3.664678576701267e-05,
|
|
"loss": 1.1036,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 1.3452685421994885,
|
|
"grad_norm": 0.29197512485904487,
|
|
"learning_rate": 3.660696913733934e-05,
|
|
"loss": 1.0955,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 1.350383631713555,
|
|
"grad_norm": 0.26565447956578164,
|
|
"learning_rate": 3.6566939404219874e-05,
|
|
"loss": 1.1045,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 1.3554987212276215,
|
|
"grad_norm": 0.29428244421319627,
|
|
"learning_rate": 3.652669708132261e-05,
|
|
"loss": 1.0592,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 1.3606138107416879,
|
|
"grad_norm": 0.2791238009074844,
|
|
"learning_rate": 3.648624268504387e-05,
|
|
"loss": 1.0825,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 1.3657289002557544,
|
|
"grad_norm": 0.2836207345495186,
|
|
"learning_rate": 3.644557673450133e-05,
|
|
"loss": 1.1182,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 1.370843989769821,
|
|
"grad_norm": 0.2642712963707173,
|
|
"learning_rate": 3.6404699751527365e-05,
|
|
"loss": 1.1036,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 1.3759590792838874,
|
|
"grad_norm": 0.2569721937955015,
|
|
"learning_rate": 3.6363612260662346e-05,
|
|
"loss": 1.1037,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 1.381074168797954,
|
|
"grad_norm": 0.2626548247202908,
|
|
"learning_rate": 3.632231478914794e-05,
|
|
"loss": 1.0752,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 1.3861892583120206,
|
|
"grad_norm": 0.33628877791383244,
|
|
"learning_rate": 3.628080786692032e-05,
|
|
"loss": 1.1241,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 1.391304347826087,
|
|
"grad_norm": 0.39928469448042375,
|
|
"learning_rate": 3.623909202660333e-05,
|
|
"loss": 1.0692,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 1.3964194373401535,
|
|
"grad_norm": 0.41762818228894244,
|
|
"learning_rate": 3.619716780350174e-05,
|
|
"loss": 1.0635,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 1.40153452685422,
|
|
"grad_norm": 0.5007186791076494,
|
|
"learning_rate": 3.615503573559426e-05,
|
|
"loss": 1.0869,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 1.4066496163682864,
|
|
"grad_norm": 0.6087223158210029,
|
|
"learning_rate": 3.6112696363526774e-05,
|
|
"loss": 1.0802,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 1.4117647058823528,
|
|
"grad_norm": 0.8238974925650274,
|
|
"learning_rate": 3.6070150230605264e-05,
|
|
"loss": 1.0911,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 1.4168797953964194,
|
|
"grad_norm": 1.0295392330906121,
|
|
"learning_rate": 3.6027397882788944e-05,
|
|
"loss": 1.0882,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 1.421994884910486,
|
|
"grad_norm": 0.9094341485583135,
|
|
"learning_rate": 3.5984439868683185e-05,
|
|
"loss": 1.0985,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 1.4271099744245523,
|
|
"grad_norm": 0.6431552527387483,
|
|
"learning_rate": 3.594127673953251e-05,
|
|
"loss": 1.0917,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 1.432225063938619,
|
|
"grad_norm": 0.39730871502755355,
|
|
"learning_rate": 3.589790904921353e-05,
|
|
"loss": 1.0649,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 1.4373401534526855,
|
|
"grad_norm": 0.25033607318319917,
|
|
"learning_rate": 3.585433735422779e-05,
|
|
"loss": 1.0704,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 1.4424552429667519,
|
|
"grad_norm": 0.2688599706833758,
|
|
"learning_rate": 3.581056221369469e-05,
|
|
"loss": 1.1132,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 1.4475703324808185,
|
|
"grad_norm": 0.36659477814922325,
|
|
"learning_rate": 3.5766584189344255e-05,
|
|
"loss": 1.0987,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 1.452685421994885,
|
|
"grad_norm": 0.4117135879547577,
|
|
"learning_rate": 3.572240384550996e-05,
|
|
"loss": 1.0782,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 1.4578005115089514,
|
|
"grad_norm": 0.3858803623667727,
|
|
"learning_rate": 3.5678021749121465e-05,
|
|
"loss": 1.0792,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 1.4629156010230178,
|
|
"grad_norm": 0.3491502416990134,
|
|
"learning_rate": 3.563343846969738e-05,
|
|
"loss": 1.0868,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 1.4680306905370843,
|
|
"grad_norm": 0.2782819121589572,
|
|
"learning_rate": 3.558865457933789e-05,
|
|
"loss": 1.0874,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 1.473145780051151,
|
|
"grad_norm": 0.2855530481636196,
|
|
"learning_rate": 3.5543670652717485e-05,
|
|
"loss": 1.1035,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 1.4782608695652173,
|
|
"grad_norm": 0.2506736083535957,
|
|
"learning_rate": 3.5498487267077556e-05,
|
|
"loss": 1.0889,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 1.4833759590792839,
|
|
"grad_norm": 0.3217152717517136,
|
|
"learning_rate": 3.545310500221896e-05,
|
|
"loss": 1.0743,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 1.4884910485933505,
|
|
"grad_norm": 0.36398355801272403,
|
|
"learning_rate": 3.540752444049463e-05,
|
|
"loss": 1.1007,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 1.4936061381074168,
|
|
"grad_norm": 0.4534774518127883,
|
|
"learning_rate": 3.536174616680206e-05,
|
|
"loss": 1.0998,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 1.4987212276214834,
|
|
"grad_norm": 0.5782489918794124,
|
|
"learning_rate": 3.531577076857584e-05,
|
|
"loss": 1.0707,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 1.50383631713555,
|
|
"grad_norm": 0.6360084025806246,
|
|
"learning_rate": 3.5269598835780074e-05,
|
|
"loss": 1.0882,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 1.5089514066496164,
|
|
"grad_norm": 0.6564145997881213,
|
|
"learning_rate": 3.522323096090083e-05,
|
|
"loss": 1.0822,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 1.5140664961636827,
|
|
"grad_norm": 0.6579103014127573,
|
|
"learning_rate": 3.517666773893856e-05,
|
|
"loss": 1.0659,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 1.5191815856777495,
|
|
"grad_norm": 0.588797087024321,
|
|
"learning_rate": 3.512990976740043e-05,
|
|
"loss": 1.0701,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 1.5242966751918159,
|
|
"grad_norm": 0.507691256755524,
|
|
"learning_rate": 3.5082957646292656e-05,
|
|
"loss": 1.0703,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 1.5294117647058822,
|
|
"grad_norm": 0.46225068995130947,
|
|
"learning_rate": 3.503581197811283e-05,
|
|
"loss": 1.0998,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 1.5345268542199488,
|
|
"grad_norm": 0.46321293283271464,
|
|
"learning_rate": 3.498847336784217e-05,
|
|
"loss": 1.1118,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 1.5396419437340154,
|
|
"grad_norm": 0.47673226500293,
|
|
"learning_rate": 3.4940942422937745e-05,
|
|
"loss": 1.062,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 1.5447570332480818,
|
|
"grad_norm": 0.4938233436299125,
|
|
"learning_rate": 3.4893219753324715e-05,
|
|
"loss": 1.0796,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 1.5498721227621484,
|
|
"grad_norm": 0.454989222644801,
|
|
"learning_rate": 3.4845305971388474e-05,
|
|
"loss": 1.0599,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 1.554987212276215,
|
|
"grad_norm": 0.42196538938547673,
|
|
"learning_rate": 3.4797201691966804e-05,
|
|
"loss": 1.0976,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 1.5601023017902813,
|
|
"grad_norm": 0.438633061741324,
|
|
"learning_rate": 3.4748907532341974e-05,
|
|
"loss": 1.069,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 1.5652173913043477,
|
|
"grad_norm": 0.35425686678327734,
|
|
"learning_rate": 3.470042411223284e-05,
|
|
"loss": 1.0876,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 1.5703324808184145,
|
|
"grad_norm": 0.333738264976303,
|
|
"learning_rate": 3.465175205378688e-05,
|
|
"loss": 1.0832,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 1.5754475703324808,
|
|
"grad_norm": 0.3167972686191401,
|
|
"learning_rate": 3.46028919815722e-05,
|
|
"loss": 1.0822,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 1.5805626598465472,
|
|
"grad_norm": 0.30610984275864866,
|
|
"learning_rate": 3.4553844522569545e-05,
|
|
"loss": 1.0812,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 1.5856777493606138,
|
|
"grad_norm": 0.3880104515073711,
|
|
"learning_rate": 3.4504610306164235e-05,
|
|
"loss": 1.0556,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 1.5907928388746804,
|
|
"grad_norm": 0.5025355791144405,
|
|
"learning_rate": 3.4455189964138076e-05,
|
|
"loss": 1.0977,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 1.5959079283887467,
|
|
"grad_norm": 0.4731775995897406,
|
|
"learning_rate": 3.4405584130661294e-05,
|
|
"loss": 1.0999,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 1.6010230179028133,
|
|
"grad_norm": 0.3302367508833335,
|
|
"learning_rate": 3.435579344228436e-05,
|
|
"loss": 1.0947,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 1.60613810741688,
|
|
"grad_norm": 0.3079090812550143,
|
|
"learning_rate": 3.430581853792983e-05,
|
|
"loss": 1.0855,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 1.6112531969309463,
|
|
"grad_norm": 0.3346134543184726,
|
|
"learning_rate": 3.425566005888418e-05,
|
|
"loss": 1.0705,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 1.6163682864450126,
|
|
"grad_norm": 0.306822833635453,
|
|
"learning_rate": 3.42053186487895e-05,
|
|
"loss": 1.0677,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 1.6214833759590794,
|
|
"grad_norm": 0.2893288371945462,
|
|
"learning_rate": 3.4154794953635314e-05,
|
|
"loss": 1.0867,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 1.6265984654731458,
|
|
"grad_norm": 0.3179300501326337,
|
|
"learning_rate": 3.410408962175026e-05,
|
|
"loss": 1.0751,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 1.6317135549872122,
|
|
"grad_norm": 0.4094677547905539,
|
|
"learning_rate": 3.405320330379374e-05,
|
|
"loss": 1.0974,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 1.6368286445012787,
|
|
"grad_norm": 0.44398101804683016,
|
|
"learning_rate": 3.4002136652747654e-05,
|
|
"loss": 1.068,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 1.6419437340153453,
|
|
"grad_norm": 0.3672002423818593,
|
|
"learning_rate": 3.3950890323907906e-05,
|
|
"loss": 1.0595,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 1.6470588235294117,
|
|
"grad_norm": 0.34831887181846766,
|
|
"learning_rate": 3.3899464974876095e-05,
|
|
"loss": 1.1265,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 1.6521739130434783,
|
|
"grad_norm": 0.33785733953151414,
|
|
"learning_rate": 3.3847861265551034e-05,
|
|
"loss": 1.0635,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 1.6572890025575449,
|
|
"grad_norm": 0.300108569750646,
|
|
"learning_rate": 3.379607985812026e-05,
|
|
"loss": 1.0663,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 1.6624040920716112,
|
|
"grad_norm": 0.3168223492831874,
|
|
"learning_rate": 3.37441214170516e-05,
|
|
"loss": 1.0703,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 1.6675191815856778,
|
|
"grad_norm": 0.30519376911285095,
|
|
"learning_rate": 3.369198660908457e-05,
|
|
"loss": 1.0989,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 1.6726342710997444,
|
|
"grad_norm": 0.27340255530098717,
|
|
"learning_rate": 3.3639676103221885e-05,
|
|
"loss": 1.083,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 1.6777493606138107,
|
|
"grad_norm": 0.24237557891013498,
|
|
"learning_rate": 3.358719057072082e-05,
|
|
"loss": 1.0888,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 1.682864450127877,
|
|
"grad_norm": 0.33187922158308414,
|
|
"learning_rate": 3.353453068508465e-05,
|
|
"loss": 1.078,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 1.6879795396419437,
|
|
"grad_norm": 0.33136779692393137,
|
|
"learning_rate": 3.348169712205396e-05,
|
|
"loss": 1.0955,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 1.6930946291560103,
|
|
"grad_norm": 0.3896824184636105,
|
|
"learning_rate": 3.342869055959799e-05,
|
|
"loss": 1.0919,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 1.6982097186700766,
|
|
"grad_norm": 0.4690457383285477,
|
|
"learning_rate": 3.337551167790594e-05,
|
|
"loss": 1.0834,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 1.7033248081841432,
|
|
"grad_norm": 0.46902429127424217,
|
|
"learning_rate": 3.3322161159378266e-05,
|
|
"loss": 1.078,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 1.7084398976982098,
|
|
"grad_norm": 0.5077584309818212,
|
|
"learning_rate": 3.326863968861785e-05,
|
|
"loss": 1.0797,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 1.7135549872122762,
|
|
"grad_norm": 0.5218252773609796,
|
|
"learning_rate": 3.32149479524213e-05,
|
|
"loss": 1.0946,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 1.7186700767263428,
|
|
"grad_norm": 0.4069002876418554,
|
|
"learning_rate": 3.3161086639770096e-05,
|
|
"loss": 1.0765,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 1.7237851662404093,
|
|
"grad_norm": 0.31535846997146344,
|
|
"learning_rate": 3.310705644182172e-05,
|
|
"loss": 1.1063,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 1.7289002557544757,
|
|
"grad_norm": 0.33692698144202765,
|
|
"learning_rate": 3.3052858051900855e-05,
|
|
"loss": 1.0702,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 1.734015345268542,
|
|
"grad_norm": 0.38776356076458385,
|
|
"learning_rate": 3.299849216549043e-05,
|
|
"loss": 1.0671,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 1.7391304347826086,
|
|
"grad_norm": 0.4685217600838493,
|
|
"learning_rate": 3.294395948022274e-05,
|
|
"loss": 1.0741,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 1.7442455242966752,
|
|
"grad_norm": 0.5146785167418015,
|
|
"learning_rate": 3.288926069587043e-05,
|
|
"loss": 1.0872,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 1.7493606138107416,
|
|
"grad_norm": 0.4361055840814472,
|
|
"learning_rate": 3.283439651433761e-05,
|
|
"loss": 1.0854,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 1.7544757033248082,
|
|
"grad_norm": 0.3656787232466876,
|
|
"learning_rate": 3.277936763965076e-05,
|
|
"loss": 1.1032,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 1.7595907928388748,
|
|
"grad_norm": 0.28217873995066556,
|
|
"learning_rate": 3.272417477794973e-05,
|
|
"loss": 1.0793,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 0.3204034304733446,
|
|
"learning_rate": 3.266881863747869e-05,
|
|
"loss": 1.075,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 1.7698209718670077,
|
|
"grad_norm": 0.3079284299575741,
|
|
"learning_rate": 3.261329992857703e-05,
|
|
"loss": 1.072,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 1.7749360613810743,
|
|
"grad_norm": 0.38759228082284236,
|
|
"learning_rate": 3.255761936367025e-05,
|
|
"loss": 1.0739,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 1.7800511508951407,
|
|
"grad_norm": 0.43446165090981037,
|
|
"learning_rate": 3.25017776572608e-05,
|
|
"loss": 1.0796,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 1.785166240409207,
|
|
"grad_norm": 0.3249536808260686,
|
|
"learning_rate": 3.2445775525918934e-05,
|
|
"loss": 1.0863,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 1.7902813299232738,
|
|
"grad_norm": 0.2899437031105078,
|
|
"learning_rate": 3.238961368827351e-05,
|
|
"loss": 1.0835,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 1.7953964194373402,
|
|
"grad_norm": 0.317836365773142,
|
|
"learning_rate": 3.2333292865002754e-05,
|
|
"loss": 1.0826,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 1.8005115089514065,
|
|
"grad_norm": 0.2916579319687349,
|
|
"learning_rate": 3.227681377882503e-05,
|
|
"loss": 1.0751,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 1.8056265984654731,
|
|
"grad_norm": 0.21860087493949837,
|
|
"learning_rate": 3.2220177154489544e-05,
|
|
"loss": 1.0675,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 1.8107416879795397,
|
|
"grad_norm": 0.3359559031535743,
|
|
"learning_rate": 3.216338371876709e-05,
|
|
"loss": 1.0534,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 1.815856777493606,
|
|
"grad_norm": 0.39008315063327503,
|
|
"learning_rate": 3.2106434200440665e-05,
|
|
"loss": 1.0529,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 1.8209718670076727,
|
|
"grad_norm": 0.32916920973912117,
|
|
"learning_rate": 3.204932933029615e-05,
|
|
"loss": 1.1093,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 1.8260869565217392,
|
|
"grad_norm": 0.26659714069662344,
|
|
"learning_rate": 3.1992069841112936e-05,
|
|
"loss": 1.0707,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 1.8312020460358056,
|
|
"grad_norm": 0.2408662731393292,
|
|
"learning_rate": 3.19346564676545e-05,
|
|
"loss": 1.0664,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 1.836317135549872,
|
|
"grad_norm": 0.3149309446405476,
|
|
"learning_rate": 3.187708994665899e-05,
|
|
"loss": 1.0627,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 1.8414322250639388,
|
|
"grad_norm": 0.2922367857765737,
|
|
"learning_rate": 3.181937101682977e-05,
|
|
"loss": 1.0955,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 1.8465473145780051,
|
|
"grad_norm": 0.2732975527492678,
|
|
"learning_rate": 3.1761500418825955e-05,
|
|
"loss": 1.085,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 1.8516624040920715,
|
|
"grad_norm": 0.32951032286545007,
|
|
"learning_rate": 3.170347889525287e-05,
|
|
"loss": 1.0667,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 1.856777493606138,
|
|
"grad_norm": 0.2718568672274621,
|
|
"learning_rate": 3.1645307190652553e-05,
|
|
"loss": 1.0692,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 1.8618925831202047,
|
|
"grad_norm": 0.3038697100173248,
|
|
"learning_rate": 3.1586986051494185e-05,
|
|
"loss": 1.0531,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 1.867007672634271,
|
|
"grad_norm": 0.3482315834294184,
|
|
"learning_rate": 3.152851622616453e-05,
|
|
"loss": 1.0961,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 1.8721227621483376,
|
|
"grad_norm": 0.3635382014306177,
|
|
"learning_rate": 3.146989846495831e-05,
|
|
"loss": 1.0594,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 1.8772378516624042,
|
|
"grad_norm": 0.3801528737374951,
|
|
"learning_rate": 3.1411133520068565e-05,
|
|
"loss": 1.0719,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 1.8823529411764706,
|
|
"grad_norm": 0.35767074568834234,
|
|
"learning_rate": 3.135222214557706e-05,
|
|
"loss": 1.0978,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 1.887468030690537,
|
|
"grad_norm": 0.3177969921797143,
|
|
"learning_rate": 3.1293165097444545e-05,
|
|
"loss": 1.0824,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 1.8925831202046037,
|
|
"grad_norm": 0.23887001529666604,
|
|
"learning_rate": 3.123396313350108e-05,
|
|
"loss": 1.0584,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 1.89769820971867,
|
|
"grad_norm": 0.2180214891603909,
|
|
"learning_rate": 3.11746170134363e-05,
|
|
"loss": 1.0453,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 1.9028132992327365,
|
|
"grad_norm": 0.2767381523731611,
|
|
"learning_rate": 3.111512749878972e-05,
|
|
"loss": 1.0809,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 1.907928388746803,
|
|
"grad_norm": 0.2440349939808095,
|
|
"learning_rate": 3.105549535294086e-05,
|
|
"loss": 1.0767,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 1.9130434782608696,
|
|
"grad_norm": 0.22664308140303832,
|
|
"learning_rate": 3.099572134109955e-05,
|
|
"loss": 1.0689,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 1.918158567774936,
|
|
"grad_norm": 0.215228570214006,
|
|
"learning_rate": 3.093580623029605e-05,
|
|
"loss": 1.0757,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 1.9232736572890026,
|
|
"grad_norm": 0.238491708219821,
|
|
"learning_rate": 3.087575078937121e-05,
|
|
"loss": 1.0821,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 1.9283887468030692,
|
|
"grad_norm": 0.25677725654049566,
|
|
"learning_rate": 3.081555578896667e-05,
|
|
"loss": 1.076,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 1.9335038363171355,
|
|
"grad_norm": 0.27990137738084897,
|
|
"learning_rate": 3.075522200151488e-05,
|
|
"loss": 1.056,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 1.938618925831202,
|
|
"grad_norm": 0.2747809542407332,
|
|
"learning_rate": 3.069475020122923e-05,
|
|
"loss": 1.0657,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 1.9437340153452687,
|
|
"grad_norm": 0.24205044950668198,
|
|
"learning_rate": 3.063414116409413e-05,
|
|
"loss": 1.1051,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 1.948849104859335,
|
|
"grad_norm": 0.2286831747173763,
|
|
"learning_rate": 3.057339566785502e-05,
|
|
"loss": 1.1069,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 1.9539641943734014,
|
|
"grad_norm": 0.21926358704649387,
|
|
"learning_rate": 3.0512514492008437e-05,
|
|
"loss": 1.068,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 1.959079283887468,
|
|
"grad_norm": 0.23417329353885752,
|
|
"learning_rate": 3.045149841779194e-05,
|
|
"loss": 1.0575,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 1.9641943734015346,
|
|
"grad_norm": 0.21864816336590975,
|
|
"learning_rate": 3.039034822817416e-05,
|
|
"loss": 1.0624,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 1.969309462915601,
|
|
"grad_norm": 0.23037762810155873,
|
|
"learning_rate": 3.03290647078447e-05,
|
|
"loss": 1.0841,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 1.9744245524296675,
|
|
"grad_norm": 0.24730861325346853,
|
|
"learning_rate": 3.0267648643204093e-05,
|
|
"loss": 1.0614,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 1.979539641943734,
|
|
"grad_norm": 0.22748550621912642,
|
|
"learning_rate": 3.020610082235371e-05,
|
|
"loss": 1.0807,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 1.9846547314578005,
|
|
"grad_norm": 0.2406930461729291,
|
|
"learning_rate": 3.0144422035085625e-05,
|
|
"loss": 1.0678,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 1.989769820971867,
|
|
"grad_norm": 0.1947510056879471,
|
|
"learning_rate": 3.0082613072872512e-05,
|
|
"loss": 1.0706,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 1.9948849104859336,
|
|
"grad_norm": 0.24668811547574934,
|
|
"learning_rate": 3.0020674728857446e-05,
|
|
"loss": 1.0882,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.3944045454129894,
|
|
"learning_rate": 2.9958607797843783e-05,
|
|
"loss": 1.5854,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 2.0051150895140664,
|
|
"grad_norm": 0.571211331740568,
|
|
"learning_rate": 2.9896413076284915e-05,
|
|
"loss": 1.0541,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 2.010230179028133,
|
|
"grad_norm": 0.6496305605729329,
|
|
"learning_rate": 2.9834091362274055e-05,
|
|
"loss": 1.0948,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 2.0153452685421995,
|
|
"grad_norm": 0.5024579541598875,
|
|
"learning_rate": 2.9771643455534013e-05,
|
|
"loss": 1.0733,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 2.020460358056266,
|
|
"grad_norm": 0.337503688669548,
|
|
"learning_rate": 2.9709070157406932e-05,
|
|
"loss": 1.0575,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 2.0255754475703327,
|
|
"grad_norm": 0.4068958428270923,
|
|
"learning_rate": 2.9646372270843987e-05,
|
|
"loss": 1.0526,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 2.030690537084399,
|
|
"grad_norm": 0.511369275269955,
|
|
"learning_rate": 2.9583550600395106e-05,
|
|
"loss": 1.0499,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 2.0358056265984654,
|
|
"grad_norm": 0.4759521455282091,
|
|
"learning_rate": 2.952060595219861e-05,
|
|
"loss": 1.0576,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 2.040920716112532,
|
|
"grad_norm": 0.30686443389558704,
|
|
"learning_rate": 2.9457539133970923e-05,
|
|
"loss": 1.0703,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 2.0460358056265986,
|
|
"grad_norm": 0.48650529610865,
|
|
"learning_rate": 2.9394350954996147e-05,
|
|
"loss": 1.0805,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 2.051150895140665,
|
|
"grad_norm": 0.536898019942215,
|
|
"learning_rate": 2.9331042226115722e-05,
|
|
"loss": 1.0595,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 2.0562659846547313,
|
|
"grad_norm": 0.2894393958463196,
|
|
"learning_rate": 2.9267613759718002e-05,
|
|
"loss": 1.0819,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 2.061381074168798,
|
|
"grad_norm": 0.35736846132665373,
|
|
"learning_rate": 2.9204066369727826e-05,
|
|
"loss": 1.0516,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 2.0664961636828645,
|
|
"grad_norm": 0.5391010352210259,
|
|
"learning_rate": 2.914040087159609e-05,
|
|
"loss": 1.0873,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 2.071611253196931,
|
|
"grad_norm": 0.4261141488201902,
|
|
"learning_rate": 2.9076618082289272e-05,
|
|
"loss": 1.0517,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 2.0767263427109977,
|
|
"grad_norm": 0.32238839872169717,
|
|
"learning_rate": 2.901271882027894e-05,
|
|
"loss": 1.0624,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 2.081841432225064,
|
|
"grad_norm": 0.4015293185545378,
|
|
"learning_rate": 2.894870390553128e-05,
|
|
"loss": 1.0844,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 2.0869565217391304,
|
|
"grad_norm": 0.38973769415241016,
|
|
"learning_rate": 2.8884574159496524e-05,
|
|
"loss": 1.0455,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 2.0920716112531967,
|
|
"grad_norm": 0.428656757682565,
|
|
"learning_rate": 2.882033040509848e-05,
|
|
"loss": 1.0522,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 2.0971867007672635,
|
|
"grad_norm": 0.4067836857911701,
|
|
"learning_rate": 2.875597346672388e-05,
|
|
"loss": 1.0446,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 2.10230179028133,
|
|
"grad_norm": 0.2658806981254451,
|
|
"learning_rate": 2.8691504170211896e-05,
|
|
"loss": 1.052,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 2.1074168797953963,
|
|
"grad_norm": 0.29345767005827766,
|
|
"learning_rate": 2.862692334284347e-05,
|
|
"loss": 1.071,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 2.112531969309463,
|
|
"grad_norm": 0.31643534871624796,
|
|
"learning_rate": 2.856223181333075e-05,
|
|
"loss": 1.0678,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 2.1176470588235294,
|
|
"grad_norm": 0.30277925647085663,
|
|
"learning_rate": 2.849743041180641e-05,
|
|
"loss": 1.0596,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 2.122762148337596,
|
|
"grad_norm": 0.28360578956825416,
|
|
"learning_rate": 2.8432519969813044e-05,
|
|
"loss": 1.0829,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 2.1278772378516626,
|
|
"grad_norm": 0.25623419269068504,
|
|
"learning_rate": 2.836750132029244e-05,
|
|
"loss": 1.0863,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 2.132992327365729,
|
|
"grad_norm": 0.3028046129485344,
|
|
"learning_rate": 2.8302375297574963e-05,
|
|
"loss": 1.0641,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 2.1381074168797953,
|
|
"grad_norm": 0.243941002386985,
|
|
"learning_rate": 2.8237142737368767e-05,
|
|
"loss": 1.0815,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 2.1432225063938617,
|
|
"grad_norm": 0.2439932508429276,
|
|
"learning_rate": 2.817180447674915e-05,
|
|
"loss": 1.0609,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 2.1483375959079285,
|
|
"grad_norm": 0.33126365061709473,
|
|
"learning_rate": 2.8106361354147754e-05,
|
|
"loss": 1.0782,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 2.153452685421995,
|
|
"grad_norm": 0.2315933256251212,
|
|
"learning_rate": 2.8040814209341834e-05,
|
|
"loss": 1.0732,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 2.1585677749360612,
|
|
"grad_norm": 0.2531413000703873,
|
|
"learning_rate": 2.797516388344348e-05,
|
|
"loss": 1.0776,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 2.163682864450128,
|
|
"grad_norm": 0.24520307389872711,
|
|
"learning_rate": 2.7909411218888805e-05,
|
|
"loss": 1.0575,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 2.1687979539641944,
|
|
"grad_norm": 0.21837227419529545,
|
|
"learning_rate": 2.7843557059427165e-05,
|
|
"loss": 1.0648,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 2.1739130434782608,
|
|
"grad_norm": 0.23257820290766587,
|
|
"learning_rate": 2.7777602250110312e-05,
|
|
"loss": 1.0698,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 2.1790281329923276,
|
|
"grad_norm": 0.24175028544297208,
|
|
"learning_rate": 2.7711547637281547e-05,
|
|
"loss": 1.0827,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 2.184143222506394,
|
|
"grad_norm": 0.24548626955618205,
|
|
"learning_rate": 2.764539406856487e-05,
|
|
"loss": 1.0174,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 2.1892583120204603,
|
|
"grad_norm": 0.2400998129886217,
|
|
"learning_rate": 2.7579142392854108e-05,
|
|
"loss": 1.0736,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 2.1943734015345266,
|
|
"grad_norm": 0.2556650098002111,
|
|
"learning_rate": 2.7512793460301996e-05,
|
|
"loss": 1.0969,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 2.1994884910485935,
|
|
"grad_norm": 0.2657024393852246,
|
|
"learning_rate": 2.7446348122309304e-05,
|
|
"loss": 1.0871,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 2.20460358056266,
|
|
"grad_norm": 0.30061197720631916,
|
|
"learning_rate": 2.7379807231513882e-05,
|
|
"loss": 1.0635,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 2.209718670076726,
|
|
"grad_norm": 0.24434115350001423,
|
|
"learning_rate": 2.7313171641779736e-05,
|
|
"loss": 1.0718,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 2.214833759590793,
|
|
"grad_norm": 0.1943553020243117,
|
|
"learning_rate": 2.724644220818605e-05,
|
|
"loss": 1.0464,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 2.2199488491048593,
|
|
"grad_norm": 0.2409184010564603,
|
|
"learning_rate": 2.7179619787016257e-05,
|
|
"loss": 1.0745,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 2.2250639386189257,
|
|
"grad_norm": 0.2256656815374472,
|
|
"learning_rate": 2.7112705235746985e-05,
|
|
"loss": 1.0747,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 2.2301790281329925,
|
|
"grad_norm": 0.23084899035972747,
|
|
"learning_rate": 2.7045699413037133e-05,
|
|
"loss": 1.057,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 2.235294117647059,
|
|
"grad_norm": 0.21422250948749663,
|
|
"learning_rate": 2.697860317871677e-05,
|
|
"loss": 1.0787,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 2.2404092071611252,
|
|
"grad_norm": 0.22055197289125814,
|
|
"learning_rate": 2.6911417393776172e-05,
|
|
"loss": 1.05,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 2.2455242966751916,
|
|
"grad_norm": 0.2090525921429115,
|
|
"learning_rate": 2.6844142920354722e-05,
|
|
"loss": 1.0525,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 2.2506393861892584,
|
|
"grad_norm": 0.23505089353994232,
|
|
"learning_rate": 2.677678062172989e-05,
|
|
"loss": 1.0435,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 2.2557544757033248,
|
|
"grad_norm": 0.24040274933787345,
|
|
"learning_rate": 2.6709331362306122e-05,
|
|
"loss": 1.0345,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 2.260869565217391,
|
|
"grad_norm": 0.22951772137968907,
|
|
"learning_rate": 2.6641796007603756e-05,
|
|
"loss": 1.0554,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 2.265984654731458,
|
|
"grad_norm": 0.1954752873279388,
|
|
"learning_rate": 2.6574175424247926e-05,
|
|
"loss": 1.0778,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 2.2710997442455243,
|
|
"grad_norm": 0.20768996296631603,
|
|
"learning_rate": 2.6506470479957432e-05,
|
|
"loss": 1.0384,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 2.2762148337595907,
|
|
"grad_norm": 0.2050178399434897,
|
|
"learning_rate": 2.6438682043533606e-05,
|
|
"loss": 1.0819,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 2.2813299232736575,
|
|
"grad_norm": 0.22084478986907694,
|
|
"learning_rate": 2.637081098484918e-05,
|
|
"loss": 1.0681,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 2.286445012787724,
|
|
"grad_norm": 0.2525194084168267,
|
|
"learning_rate": 2.6302858174837084e-05,
|
|
"loss": 1.0766,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 2.29156010230179,
|
|
"grad_norm": 0.20434937850015836,
|
|
"learning_rate": 2.623482448547931e-05,
|
|
"loss": 1.0674,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 2.296675191815857,
|
|
"grad_norm": 0.18111384656350762,
|
|
"learning_rate": 2.6166710789795704e-05,
|
|
"loss": 1.049,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 2.3017902813299234,
|
|
"grad_norm": 0.19742247992531406,
|
|
"learning_rate": 2.6098517961832773e-05,
|
|
"loss": 1.0636,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 2.3069053708439897,
|
|
"grad_norm": 0.23785950833253988,
|
|
"learning_rate": 2.6030246876652445e-05,
|
|
"loss": 1.0504,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 2.312020460358056,
|
|
"grad_norm": 0.2097971952126859,
|
|
"learning_rate": 2.5961898410320894e-05,
|
|
"loss": 1.0361,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 2.317135549872123,
|
|
"grad_norm": 0.2276417405483114,
|
|
"learning_rate": 2.5893473439897215e-05,
|
|
"loss": 1.0411,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 2.3222506393861893,
|
|
"grad_norm": 0.2537489564697595,
|
|
"learning_rate": 2.5824972843422257e-05,
|
|
"loss": 1.0714,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 2.3273657289002556,
|
|
"grad_norm": 0.25653394100856325,
|
|
"learning_rate": 2.5756397499907283e-05,
|
|
"loss": 1.0661,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 2.3324808184143224,
|
|
"grad_norm": 0.22816918192865004,
|
|
"learning_rate": 2.5687748289322744e-05,
|
|
"loss": 1.0596,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 2.337595907928389,
|
|
"grad_norm": 0.21376463186605166,
|
|
"learning_rate": 2.561902609258697e-05,
|
|
"loss": 1.0725,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 2.342710997442455,
|
|
"grad_norm": 0.2242763029015445,
|
|
"learning_rate": 2.5550231791554833e-05,
|
|
"loss": 1.0815,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 2.3478260869565215,
|
|
"grad_norm": 0.17279605910571388,
|
|
"learning_rate": 2.5481366269006497e-05,
|
|
"loss": 1.045,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 0.21321751878224862,
|
|
"learning_rate": 2.541243040863602e-05,
|
|
"loss": 1.0449,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 2.3580562659846547,
|
|
"grad_norm": 0.2068004135775521,
|
|
"learning_rate": 2.5343425095040065e-05,
|
|
"loss": 1.0484,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 2.363171355498721,
|
|
"grad_norm": 0.1767285038930778,
|
|
"learning_rate": 2.527435121370653e-05,
|
|
"loss": 1.0554,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 2.368286445012788,
|
|
"grad_norm": 0.18469974343909754,
|
|
"learning_rate": 2.5205209651003176e-05,
|
|
"loss": 1.0338,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 2.373401534526854,
|
|
"grad_norm": 0.1692343543578853,
|
|
"learning_rate": 2.5136001294166263e-05,
|
|
"loss": 1.0831,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 2.3785166240409206,
|
|
"grad_norm": 0.19527583181910754,
|
|
"learning_rate": 2.506672703128919e-05,
|
|
"loss": 1.0679,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 2.3836317135549874,
|
|
"grad_norm": 0.21170860178119313,
|
|
"learning_rate": 2.4997387751311035e-05,
|
|
"loss": 1.0581,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 2.3887468030690537,
|
|
"grad_norm": 0.21354376020274823,
|
|
"learning_rate": 2.4927984344005212e-05,
|
|
"loss": 1.0488,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 2.39386189258312,
|
|
"grad_norm": 0.16943920612627475,
|
|
"learning_rate": 2.4858517699968027e-05,
|
|
"loss": 1.0498,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 2.398976982097187,
|
|
"grad_norm": 0.2263431725640194,
|
|
"learning_rate": 2.4788988710607232e-05,
|
|
"loss": 1.0649,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 2.4040920716112533,
|
|
"grad_norm": 0.2500711707765182,
|
|
"learning_rate": 2.471939826813063e-05,
|
|
"loss": 1.0549,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 2.4092071611253196,
|
|
"grad_norm": 0.28197002741407323,
|
|
"learning_rate": 2.4649747265534584e-05,
|
|
"loss": 1.0568,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 2.414322250639386,
|
|
"grad_norm": 0.2307560418077442,
|
|
"learning_rate": 2.458003659659257e-05,
|
|
"loss": 1.0643,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 2.419437340153453,
|
|
"grad_norm": 0.2275883404782231,
|
|
"learning_rate": 2.451026715584374e-05,
|
|
"loss": 1.0263,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 2.424552429667519,
|
|
"grad_norm": 0.1837213662498589,
|
|
"learning_rate": 2.4440439838581375e-05,
|
|
"loss": 1.0663,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 2.4296675191815855,
|
|
"grad_norm": 0.2191908919218784,
|
|
"learning_rate": 2.4370555540841477e-05,
|
|
"loss": 1.0508,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 2.4347826086956523,
|
|
"grad_norm": 0.2906689646420887,
|
|
"learning_rate": 2.4300615159391204e-05,
|
|
"loss": 1.0401,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 2.4398976982097187,
|
|
"grad_norm": 0.2883194193305295,
|
|
"learning_rate": 2.423061959171741e-05,
|
|
"loss": 1.0767,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 2.445012787723785,
|
|
"grad_norm": 0.17758442728560847,
|
|
"learning_rate": 2.4160569736015082e-05,
|
|
"loss": 1.0577,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 2.4501278772378514,
|
|
"grad_norm": 0.16717169630312334,
|
|
"learning_rate": 2.4090466491175876e-05,
|
|
"loss": 1.0293,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 2.455242966751918,
|
|
"grad_norm": 0.24782575757267775,
|
|
"learning_rate": 2.4020310756776506e-05,
|
|
"loss": 1.0336,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 2.4603580562659846,
|
|
"grad_norm": 0.2636655812446855,
|
|
"learning_rate": 2.3950103433067273e-05,
|
|
"loss": 1.0809,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 2.4654731457800514,
|
|
"grad_norm": 0.16902242053452235,
|
|
"learning_rate": 2.3879845420960458e-05,
|
|
"loss": 1.0785,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 2.4705882352941178,
|
|
"grad_norm": 0.21078452585148946,
|
|
"learning_rate": 2.3809537622018812e-05,
|
|
"loss": 1.0398,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 2.475703324808184,
|
|
"grad_norm": 0.21973365788168056,
|
|
"learning_rate": 2.373918093844393e-05,
|
|
"loss": 1.0591,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 2.4808184143222505,
|
|
"grad_norm": 0.20412106095476204,
|
|
"learning_rate": 2.3668776273064717e-05,
|
|
"loss": 1.0452,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 2.4859335038363173,
|
|
"grad_norm": 0.20054437938777525,
|
|
"learning_rate": 2.3598324529325783e-05,
|
|
"loss": 1.0632,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 2.4910485933503836,
|
|
"grad_norm": 0.18477265021437198,
|
|
"learning_rate": 2.3527826611275865e-05,
|
|
"loss": 1.0536,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 2.49616368286445,
|
|
"grad_norm": 0.20666512725520833,
|
|
"learning_rate": 2.3457283423556206e-05,
|
|
"loss": 1.0484,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 2.501278772378517,
|
|
"grad_norm": 0.18951044504687325,
|
|
"learning_rate": 2.338669587138897e-05,
|
|
"loss": 1.0577,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 2.506393861892583,
|
|
"grad_norm": 0.16411315959112188,
|
|
"learning_rate": 2.33160648605656e-05,
|
|
"loss": 1.0482,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 2.5115089514066495,
|
|
"grad_norm": 0.20692388287261934,
|
|
"learning_rate": 2.3245391297435208e-05,
|
|
"loss": 1.0296,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 2.516624040920716,
|
|
"grad_norm": 0.22236042948249146,
|
|
"learning_rate": 2.3174676088892955e-05,
|
|
"loss": 1.0461,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 2.5217391304347827,
|
|
"grad_norm": 0.21800230689438133,
|
|
"learning_rate": 2.3103920142368392e-05,
|
|
"loss": 1.0433,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 2.526854219948849,
|
|
"grad_norm": 0.18373456124142468,
|
|
"learning_rate": 2.3033124365813845e-05,
|
|
"loss": 1.0569,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 2.531969309462916,
|
|
"grad_norm": 0.18720140331921853,
|
|
"learning_rate": 2.2962289667692717e-05,
|
|
"loss": 1.0729,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 2.5370843989769822,
|
|
"grad_norm": 0.25688260624393866,
|
|
"learning_rate": 2.2891416956967883e-05,
|
|
"loss": 1.0662,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 2.5421994884910486,
|
|
"grad_norm": 0.22685524650641428,
|
|
"learning_rate": 2.2820507143089986e-05,
|
|
"loss": 1.0482,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 2.547314578005115,
|
|
"grad_norm": 0.22006061149849276,
|
|
"learning_rate": 2.27495611359858e-05,
|
|
"loss": 1.0413,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 2.5524296675191813,
|
|
"grad_norm": 0.2511924102749484,
|
|
"learning_rate": 2.2678579846046526e-05,
|
|
"loss": 1.0525,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 2.557544757033248,
|
|
"grad_norm": 0.2700011728698647,
|
|
"learning_rate": 2.2607564184116125e-05,
|
|
"loss": 1.0477,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 2.5626598465473145,
|
|
"grad_norm": 0.1824818058130156,
|
|
"learning_rate": 2.2536515061479607e-05,
|
|
"loss": 1.0464,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 2.5677749360613813,
|
|
"grad_norm": 0.1815390402019666,
|
|
"learning_rate": 2.2465433389851387e-05,
|
|
"loss": 1.0728,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 2.5728900255754477,
|
|
"grad_norm": 0.20838139814968445,
|
|
"learning_rate": 2.2394320081363527e-05,
|
|
"loss": 1.037,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 2.578005115089514,
|
|
"grad_norm": 0.21820814237765307,
|
|
"learning_rate": 2.2323176048554074e-05,
|
|
"loss": 1.0783,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 2.5831202046035804,
|
|
"grad_norm": 0.18364164718719542,
|
|
"learning_rate": 2.2252002204355333e-05,
|
|
"loss": 1.0616,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 2.588235294117647,
|
|
"grad_norm": 0.1589951601627371,
|
|
"learning_rate": 2.2180799462082145e-05,
|
|
"loss": 1.0597,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 2.5933503836317136,
|
|
"grad_norm": 0.16399196023280369,
|
|
"learning_rate": 2.2109568735420183e-05,
|
|
"loss": 1.0541,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 2.59846547314578,
|
|
"grad_norm": 0.1797478143219866,
|
|
"learning_rate": 2.203831093841422e-05,
|
|
"loss": 1.042,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 2.6035805626598467,
|
|
"grad_norm": 0.17837990860903477,
|
|
"learning_rate": 2.19670269854564e-05,
|
|
"loss": 1.057,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 2.608695652173913,
|
|
"grad_norm": 0.1824720167101557,
|
|
"learning_rate": 2.189571779127451e-05,
|
|
"loss": 1.0342,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 2.6138107416879794,
|
|
"grad_norm": 0.196252683041784,
|
|
"learning_rate": 2.182438427092022e-05,
|
|
"loss": 1.0468,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 2.618925831202046,
|
|
"grad_norm": 0.18345210192186157,
|
|
"learning_rate": 2.1753027339757367e-05,
|
|
"loss": 1.0555,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 2.6240409207161126,
|
|
"grad_norm": 0.210072150573065,
|
|
"learning_rate": 2.1681647913450208e-05,
|
|
"loss": 1.0474,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 2.629156010230179,
|
|
"grad_norm": 0.1649906960552718,
|
|
"learning_rate": 2.161024690795166e-05,
|
|
"loss": 1.0615,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 2.634271099744246,
|
|
"grad_norm": 0.19273203824222748,
|
|
"learning_rate": 2.1538825239491525e-05,
|
|
"loss": 1.0619,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 2.639386189258312,
|
|
"grad_norm": 0.17231795376570638,
|
|
"learning_rate": 2.1467383824564793e-05,
|
|
"loss": 1.0427,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 2.6445012787723785,
|
|
"grad_norm": 0.2028517262474422,
|
|
"learning_rate": 2.1395923579919805e-05,
|
|
"loss": 1.0736,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 2.649616368286445,
|
|
"grad_norm": 0.18318105559288345,
|
|
"learning_rate": 2.1324445422546562e-05,
|
|
"loss": 1.0541,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 2.6547314578005117,
|
|
"grad_norm": 0.19128403854869414,
|
|
"learning_rate": 2.1252950269664897e-05,
|
|
"loss": 1.0483,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 2.659846547314578,
|
|
"grad_norm": 0.18383821827497523,
|
|
"learning_rate": 2.1181439038712747e-05,
|
|
"loss": 1.0645,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 2.6649616368286444,
|
|
"grad_norm": 0.18884844033700063,
|
|
"learning_rate": 2.1109912647334375e-05,
|
|
"loss": 1.062,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 2.670076726342711,
|
|
"grad_norm": 0.20868590019750766,
|
|
"learning_rate": 2.1038372013368553e-05,
|
|
"loss": 1.0354,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 2.6751918158567776,
|
|
"grad_norm": 0.19337083461260082,
|
|
"learning_rate": 2.0966818054836852e-05,
|
|
"loss": 1.0344,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 2.680306905370844,
|
|
"grad_norm": 0.18868476730256678,
|
|
"learning_rate": 2.08952516899318e-05,
|
|
"loss": 1.0588,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 2.6854219948849103,
|
|
"grad_norm": 0.22485066460796876,
|
|
"learning_rate": 2.0823673837005146e-05,
|
|
"loss": 1.0471,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 2.690537084398977,
|
|
"grad_norm": 0.17863420314473608,
|
|
"learning_rate": 2.075208541455604e-05,
|
|
"loss": 1.0526,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 2.6956521739130435,
|
|
"grad_norm": 0.16122683418094333,
|
|
"learning_rate": 2.068048734121927e-05,
|
|
"loss": 1.0419,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 2.70076726342711,
|
|
"grad_norm": 0.19335277655052013,
|
|
"learning_rate": 2.0608880535753456e-05,
|
|
"loss": 1.0498,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 2.7058823529411766,
|
|
"grad_norm": 0.17128814463154046,
|
|
"learning_rate": 2.0537265917029282e-05,
|
|
"loss": 1.0549,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 2.710997442455243,
|
|
"grad_norm": 0.19141186780590191,
|
|
"learning_rate": 2.046564440401769e-05,
|
|
"loss": 1.0535,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 2.7161125319693094,
|
|
"grad_norm": 0.20405582061795946,
|
|
"learning_rate": 2.039401691577809e-05,
|
|
"loss": 1.0692,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 2.7212276214833757,
|
|
"grad_norm": 0.17731656359333237,
|
|
"learning_rate": 2.0322384371446563e-05,
|
|
"loss": 1.0752,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 2.7263427109974425,
|
|
"grad_norm": 0.21842059415138476,
|
|
"learning_rate": 2.025074769022407e-05,
|
|
"loss": 1.0364,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 2.731457800511509,
|
|
"grad_norm": 0.20651080785780135,
|
|
"learning_rate": 2.0179107791364662e-05,
|
|
"loss": 1.0626,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 2.7365728900255757,
|
|
"grad_norm": 0.17155059878215104,
|
|
"learning_rate": 2.0107465594163686e-05,
|
|
"loss": 1.0715,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 2.741687979539642,
|
|
"grad_norm": 0.191605422088092,
|
|
"learning_rate": 2.0035822017945964e-05,
|
|
"loss": 1.0586,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 2.7468030690537084,
|
|
"grad_norm": 0.20091866570259082,
|
|
"learning_rate": 1.996417798205404e-05,
|
|
"loss": 1.0481,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 2.7519181585677748,
|
|
"grad_norm": 0.1631767258894682,
|
|
"learning_rate": 1.9892534405836314e-05,
|
|
"loss": 1.0727,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 2.7570332480818416,
|
|
"grad_norm": 0.17582792226780383,
|
|
"learning_rate": 1.982089220863534e-05,
|
|
"loss": 1.0516,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 2.762148337595908,
|
|
"grad_norm": 0.21550165289844242,
|
|
"learning_rate": 1.974925230977594e-05,
|
|
"loss": 1.038,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 2.7672634271099743,
|
|
"grad_norm": 0.1535322128634846,
|
|
"learning_rate": 1.9677615628553447e-05,
|
|
"loss": 1.0406,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 2.772378516624041,
|
|
"grad_norm": 0.19027107621098235,
|
|
"learning_rate": 1.9605983084221918e-05,
|
|
"loss": 1.0602,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 2.7774936061381075,
|
|
"grad_norm": 0.18202864426113524,
|
|
"learning_rate": 1.953435559598231e-05,
|
|
"loss": 1.0615,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 2.782608695652174,
|
|
"grad_norm": 0.174647540829116,
|
|
"learning_rate": 1.946273408297072e-05,
|
|
"loss": 1.0261,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 2.78772378516624,
|
|
"grad_norm": 0.1792789344961753,
|
|
"learning_rate": 1.939111946424655e-05,
|
|
"loss": 1.0438,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 2.792838874680307,
|
|
"grad_norm": 0.16977791807638054,
|
|
"learning_rate": 1.9319512658780735e-05,
|
|
"loss": 1.0342,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 2.7979539641943734,
|
|
"grad_norm": 0.184556004403152,
|
|
"learning_rate": 1.9247914585443963e-05,
|
|
"loss": 1.0583,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 2.80306905370844,
|
|
"grad_norm": 0.1657344560842826,
|
|
"learning_rate": 1.9176326162994854e-05,
|
|
"loss": 1.0709,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 2.8081841432225065,
|
|
"grad_norm": 0.22030489192290154,
|
|
"learning_rate": 1.9104748310068203e-05,
|
|
"loss": 1.0708,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 2.813299232736573,
|
|
"grad_norm": 0.16653582773895698,
|
|
"learning_rate": 1.9033181945163158e-05,
|
|
"loss": 1.0528,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 2.8184143222506393,
|
|
"grad_norm": 0.19617601692800635,
|
|
"learning_rate": 1.8961627986631453e-05,
|
|
"loss": 1.0601,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 2.8235294117647056,
|
|
"grad_norm": 0.19528571595370786,
|
|
"learning_rate": 1.889008735266564e-05,
|
|
"loss": 1.0538,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 2.8286445012787724,
|
|
"grad_norm": 0.18171426693241638,
|
|
"learning_rate": 1.8818560961287257e-05,
|
|
"loss": 1.047,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 2.833759590792839,
|
|
"grad_norm": 0.20058111644859636,
|
|
"learning_rate": 1.8747049730335113e-05,
|
|
"loss": 1.0645,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 2.8388746803069056,
|
|
"grad_norm": 0.16553935567855677,
|
|
"learning_rate": 1.8675554577453445e-05,
|
|
"loss": 1.064,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 2.843989769820972,
|
|
"grad_norm": 0.1967672535744814,
|
|
"learning_rate": 1.8604076420080198e-05,
|
|
"loss": 1.0577,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 2.8491048593350383,
|
|
"grad_norm": 0.1756479122957693,
|
|
"learning_rate": 1.8532616175435218e-05,
|
|
"loss": 1.0505,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 2.8542199488491047,
|
|
"grad_norm": 0.17168091235870145,
|
|
"learning_rate": 1.8461174760508475e-05,
|
|
"loss": 1.0519,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 2.8593350383631715,
|
|
"grad_norm": 0.15321315491437887,
|
|
"learning_rate": 1.8389753092048347e-05,
|
|
"loss": 1.0486,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 2.864450127877238,
|
|
"grad_norm": 0.18225430033662582,
|
|
"learning_rate": 1.8318352086549792e-05,
|
|
"loss": 1.066,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 2.869565217391304,
|
|
"grad_norm": 0.16006236619371164,
|
|
"learning_rate": 1.8246972660242636e-05,
|
|
"loss": 1.0515,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 2.874680306905371,
|
|
"grad_norm": 0.16253091851156742,
|
|
"learning_rate": 1.8175615729079795e-05,
|
|
"loss": 1.0541,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 2.8797953964194374,
|
|
"grad_norm": 0.16423315718989012,
|
|
"learning_rate": 1.8104282208725496e-05,
|
|
"loss": 1.0667,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 2.8849104859335037,
|
|
"grad_norm": 0.16926338455307693,
|
|
"learning_rate": 1.8032973014543608e-05,
|
|
"loss": 1.0313,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 2.89002557544757,
|
|
"grad_norm": 0.17421414000562616,
|
|
"learning_rate": 1.7961689061585778e-05,
|
|
"loss": 1.0536,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 2.895140664961637,
|
|
"grad_norm": 0.17451819177181235,
|
|
"learning_rate": 1.7890431264579823e-05,
|
|
"loss": 1.0467,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 2.9002557544757033,
|
|
"grad_norm": 0.15316592607524337,
|
|
"learning_rate": 1.7819200537917865e-05,
|
|
"loss": 1.0362,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 2.90537084398977,
|
|
"grad_norm": 0.15871802556233625,
|
|
"learning_rate": 1.7747997795644673e-05,
|
|
"loss": 1.0464,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 2.9104859335038364,
|
|
"grad_norm": 0.16383928906479925,
|
|
"learning_rate": 1.7676823951445932e-05,
|
|
"loss": 1.0643,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 2.915601023017903,
|
|
"grad_norm": 0.1564037056868544,
|
|
"learning_rate": 1.7605679918636477e-05,
|
|
"loss": 1.0468,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 2.920716112531969,
|
|
"grad_norm": 0.16292314246842024,
|
|
"learning_rate": 1.753456661014862e-05,
|
|
"loss": 1.0627,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 2.9258312020460355,
|
|
"grad_norm": 0.17007157486665242,
|
|
"learning_rate": 1.7463484938520403e-05,
|
|
"loss": 1.0543,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 2.9309462915601023,
|
|
"grad_norm": 0.18572662827402042,
|
|
"learning_rate": 1.7392435815883882e-05,
|
|
"loss": 1.0414,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 2.9360613810741687,
|
|
"grad_norm": 0.17257165847671296,
|
|
"learning_rate": 1.732142015395348e-05,
|
|
"loss": 1.0529,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 0.20412881132319738,
|
|
"learning_rate": 1.72504388640142e-05,
|
|
"loss": 1.0625,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 2.946291560102302,
|
|
"grad_norm": 0.20481457378652648,
|
|
"learning_rate": 1.717949285691002e-05,
|
|
"loss": 1.0555,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 2.9514066496163682,
|
|
"grad_norm": 0.20710495242836005,
|
|
"learning_rate": 1.7108583043032128e-05,
|
|
"loss": 1.0676,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 2.9565217391304346,
|
|
"grad_norm": 0.18506419413597094,
|
|
"learning_rate": 1.703771033230729e-05,
|
|
"loss": 1.073,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 2.9616368286445014,
|
|
"grad_norm": 0.21026685381302263,
|
|
"learning_rate": 1.6966875634186165e-05,
|
|
"loss": 1.0489,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 2.9667519181585678,
|
|
"grad_norm": 0.1426296083677054,
|
|
"learning_rate": 1.6896079857631608e-05,
|
|
"loss": 1.0551,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 2.971867007672634,
|
|
"grad_norm": 0.21547970346674888,
|
|
"learning_rate": 1.682532391110705e-05,
|
|
"loss": 1.0405,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 2.976982097186701,
|
|
"grad_norm": 0.1789073483963138,
|
|
"learning_rate": 1.67546087025648e-05,
|
|
"loss": 1.0597,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 2.9820971867007673,
|
|
"grad_norm": 0.17706811680146436,
|
|
"learning_rate": 1.6683935139434407e-05,
|
|
"loss": 1.0613,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 2.9872122762148337,
|
|
"grad_norm": 0.15139119384163008,
|
|
"learning_rate": 1.6613304128611033e-05,
|
|
"loss": 1.0619,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 2.9923273657289,
|
|
"grad_norm": 0.14879029530531915,
|
|
"learning_rate": 1.6542716576443794e-05,
|
|
"loss": 1.0425,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 2.997442455242967,
|
|
"grad_norm": 0.17723730809616817,
|
|
"learning_rate": 1.647217338872414e-05,
|
|
"loss": 1.0587,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 3.002557544757033,
|
|
"grad_norm": 0.3071736955013866,
|
|
"learning_rate": 1.6401675470674227e-05,
|
|
"loss": 1.6121,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 3.0076726342710995,
|
|
"grad_norm": 0.26492500317946227,
|
|
"learning_rate": 1.633122372693529e-05,
|
|
"loss": 1.0672,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 3.0127877237851663,
|
|
"grad_norm": 0.15890708426469805,
|
|
"learning_rate": 1.626081906155608e-05,
|
|
"loss": 1.0691,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 3.0179028132992327,
|
|
"grad_norm": 0.2443938647122444,
|
|
"learning_rate": 1.6190462377981195e-05,
|
|
"loss": 1.0177,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 3.023017902813299,
|
|
"grad_norm": 0.219676833544469,
|
|
"learning_rate": 1.6120154579039545e-05,
|
|
"loss": 1.059,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 3.028132992327366,
|
|
"grad_norm": 0.1914974178554955,
|
|
"learning_rate": 1.6049896566932734e-05,
|
|
"loss": 1.0517,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 3.0332480818414322,
|
|
"grad_norm": 0.2531309353890888,
|
|
"learning_rate": 1.59796892432235e-05,
|
|
"loss": 1.0297,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 3.0383631713554986,
|
|
"grad_norm": 0.1792627598925816,
|
|
"learning_rate": 1.5909533508824134e-05,
|
|
"loss": 1.0276,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 3.0434782608695654,
|
|
"grad_norm": 0.2125713178736253,
|
|
"learning_rate": 1.5839430263984918e-05,
|
|
"loss": 1.04,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 3.0485933503836318,
|
|
"grad_norm": 0.25332731139582426,
|
|
"learning_rate": 1.5769380408282597e-05,
|
|
"loss": 1.0311,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 3.053708439897698,
|
|
"grad_norm": 0.14333190077941838,
|
|
"learning_rate": 1.5699384840608796e-05,
|
|
"loss": 1.0431,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 3.0588235294117645,
|
|
"grad_norm": 0.22250304106653512,
|
|
"learning_rate": 1.5629444459158526e-05,
|
|
"loss": 1.0448,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 3.0639386189258313,
|
|
"grad_norm": 0.15763841427027347,
|
|
"learning_rate": 1.5559560161418635e-05,
|
|
"loss": 1.0447,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 3.0690537084398977,
|
|
"grad_norm": 0.18154306752668684,
|
|
"learning_rate": 1.5489732844156267e-05,
|
|
"loss": 1.058,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 3.074168797953964,
|
|
"grad_norm": 0.17175626290889617,
|
|
"learning_rate": 1.5419963403407437e-05,
|
|
"loss": 1.0456,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 3.079283887468031,
|
|
"grad_norm": 0.15750320891130676,
|
|
"learning_rate": 1.535025273446542e-05,
|
|
"loss": 1.0565,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 3.084398976982097,
|
|
"grad_norm": 0.1587964508315697,
|
|
"learning_rate": 1.5280601731869375e-05,
|
|
"loss": 1.0545,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 3.0895140664961636,
|
|
"grad_norm": 0.15466633117165798,
|
|
"learning_rate": 1.5211011289392775e-05,
|
|
"loss": 1.0793,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 3.0946291560102304,
|
|
"grad_norm": 0.16479484418980225,
|
|
"learning_rate": 1.514148230003198e-05,
|
|
"loss": 1.0527,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 3.0997442455242967,
|
|
"grad_norm": 0.14980835645414117,
|
|
"learning_rate": 1.5072015655994793e-05,
|
|
"loss": 1.0261,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 3.104859335038363,
|
|
"grad_norm": 0.14149499986126193,
|
|
"learning_rate": 1.500261224868897e-05,
|
|
"loss": 1.0639,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 3.10997442455243,
|
|
"grad_norm": 0.14663927333662252,
|
|
"learning_rate": 1.4933272968710819e-05,
|
|
"loss": 1.0533,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 3.1150895140664963,
|
|
"grad_norm": 0.12780353135571323,
|
|
"learning_rate": 1.486399870583374e-05,
|
|
"loss": 1.0507,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 3.1202046035805626,
|
|
"grad_norm": 0.1568073693632928,
|
|
"learning_rate": 1.4794790348996833e-05,
|
|
"loss": 1.0581,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 3.125319693094629,
|
|
"grad_norm": 0.1384187110673024,
|
|
"learning_rate": 1.4725648786293478e-05,
|
|
"loss": 1.0005,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 3.130434782608696,
|
|
"grad_norm": 0.15518779732930923,
|
|
"learning_rate": 1.4656574904959937e-05,
|
|
"loss": 1.0461,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 3.135549872122762,
|
|
"grad_norm": 0.1377191810212374,
|
|
"learning_rate": 1.4587569591363988e-05,
|
|
"loss": 1.039,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 3.1406649616368285,
|
|
"grad_norm": 0.14717408978482713,
|
|
"learning_rate": 1.4518633730993515e-05,
|
|
"loss": 1.0585,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 3.1457800511508953,
|
|
"grad_norm": 0.14545458947244394,
|
|
"learning_rate": 1.444976820844517e-05,
|
|
"loss": 1.0589,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 3.1508951406649617,
|
|
"grad_norm": 0.15779651560328295,
|
|
"learning_rate": 1.438097390741304e-05,
|
|
"loss": 1.0507,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 3.156010230179028,
|
|
"grad_norm": 0.16768466110672925,
|
|
"learning_rate": 1.431225171067726e-05,
|
|
"loss": 1.0798,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 3.1611253196930944,
|
|
"grad_norm": 0.1360648364823117,
|
|
"learning_rate": 1.4243602500092725e-05,
|
|
"loss": 1.0532,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 3.166240409207161,
|
|
"grad_norm": 0.16595760551607416,
|
|
"learning_rate": 1.4175027156577757e-05,
|
|
"loss": 1.0399,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 3.1713554987212276,
|
|
"grad_norm": 0.1698846080059336,
|
|
"learning_rate": 1.4106526560102788e-05,
|
|
"loss": 1.0342,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 3.176470588235294,
|
|
"grad_norm": 0.14590952827196066,
|
|
"learning_rate": 1.4038101589679115e-05,
|
|
"loss": 1.0259,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 3.1815856777493607,
|
|
"grad_norm": 0.14992604919353308,
|
|
"learning_rate": 1.3969753123347553e-05,
|
|
"loss": 1.0317,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 3.186700767263427,
|
|
"grad_norm": 0.15939032054557312,
|
|
"learning_rate": 1.3901482038167235e-05,
|
|
"loss": 1.0294,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 3.1918158567774935,
|
|
"grad_norm": 0.14501109411782664,
|
|
"learning_rate": 1.3833289210204299e-05,
|
|
"loss": 1.0515,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 3.1969309462915603,
|
|
"grad_norm": 0.1539538622034391,
|
|
"learning_rate": 1.3765175514520697e-05,
|
|
"loss": 1.0351,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 3.2020460358056266,
|
|
"grad_norm": 0.13029762347002058,
|
|
"learning_rate": 1.3697141825162928e-05,
|
|
"loss": 1.0483,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 3.207161125319693,
|
|
"grad_norm": 0.14812456235087262,
|
|
"learning_rate": 1.3629189015150824e-05,
|
|
"loss": 1.0208,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 3.21227621483376,
|
|
"grad_norm": 0.13396106035236727,
|
|
"learning_rate": 1.3561317956466397e-05,
|
|
"loss": 1.0326,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 3.217391304347826,
|
|
"grad_norm": 0.1341030709959724,
|
|
"learning_rate": 1.3493529520042574e-05,
|
|
"loss": 1.0398,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 3.2225063938618925,
|
|
"grad_norm": 0.13625306065057746,
|
|
"learning_rate": 1.3425824575752082e-05,
|
|
"loss": 1.0564,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 3.227621483375959,
|
|
"grad_norm": 0.14453871577407487,
|
|
"learning_rate": 1.3358203992396253e-05,
|
|
"loss": 1.0316,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 3.2327365728900257,
|
|
"grad_norm": 0.12896562231375275,
|
|
"learning_rate": 1.3290668637693883e-05,
|
|
"loss": 1.0368,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 3.237851662404092,
|
|
"grad_norm": 0.138959236048212,
|
|
"learning_rate": 1.3223219378270114e-05,
|
|
"loss": 1.0375,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 3.2429667519181584,
|
|
"grad_norm": 0.1395052519380716,
|
|
"learning_rate": 1.315585707964528e-05,
|
|
"loss": 1.0195,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 3.2480818414322252,
|
|
"grad_norm": 0.1374425398825904,
|
|
"learning_rate": 1.3088582606223836e-05,
|
|
"loss": 1.0337,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 3.2531969309462916,
|
|
"grad_norm": 0.15730060552663264,
|
|
"learning_rate": 1.3021396821283242e-05,
|
|
"loss": 1.045,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 3.258312020460358,
|
|
"grad_norm": 0.13746345154314507,
|
|
"learning_rate": 1.295430058696287e-05,
|
|
"loss": 1.0303,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 3.2634271099744243,
|
|
"grad_norm": 0.15421183051065532,
|
|
"learning_rate": 1.288729476425302e-05,
|
|
"loss": 1.0418,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 3.268542199488491,
|
|
"grad_norm": 0.1598425847647116,
|
|
"learning_rate": 1.2820380212983748e-05,
|
|
"loss": 1.0481,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 3.2736572890025575,
|
|
"grad_norm": 0.1390236215037552,
|
|
"learning_rate": 1.2753557791813953e-05,
|
|
"loss": 1.0649,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 3.2787723785166243,
|
|
"grad_norm": 0.19941059675590572,
|
|
"learning_rate": 1.2686828358220273e-05,
|
|
"loss": 1.0573,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 3.2838874680306906,
|
|
"grad_norm": 0.14878037647197712,
|
|
"learning_rate": 1.2620192768486121e-05,
|
|
"loss": 1.0395,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 3.289002557544757,
|
|
"grad_norm": 0.15545058295048259,
|
|
"learning_rate": 1.25536518776907e-05,
|
|
"loss": 1.0382,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 3.2941176470588234,
|
|
"grad_norm": 0.14131847135345135,
|
|
"learning_rate": 1.2487206539698007e-05,
|
|
"loss": 1.0397,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 3.29923273657289,
|
|
"grad_norm": 0.16100758922061306,
|
|
"learning_rate": 1.2420857607145897e-05,
|
|
"loss": 1.0117,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 3.3043478260869565,
|
|
"grad_norm": 0.17269827345207395,
|
|
"learning_rate": 1.2354605931435133e-05,
|
|
"loss": 1.0463,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 3.309462915601023,
|
|
"grad_norm": 0.1569003313816037,
|
|
"learning_rate": 1.2288452362718454e-05,
|
|
"loss": 1.0166,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 3.3145780051150897,
|
|
"grad_norm": 0.16537040330310332,
|
|
"learning_rate": 1.2222397749889691e-05,
|
|
"loss": 1.0399,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 3.319693094629156,
|
|
"grad_norm": 0.13784997627726225,
|
|
"learning_rate": 1.2156442940572835e-05,
|
|
"loss": 1.0235,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 3.3248081841432224,
|
|
"grad_norm": 0.1545373822751002,
|
|
"learning_rate": 1.2090588781111197e-05,
|
|
"loss": 1.0643,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 3.329923273657289,
|
|
"grad_norm": 0.14181454877401659,
|
|
"learning_rate": 1.202483611655653e-05,
|
|
"loss": 1.0407,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 3.3350383631713556,
|
|
"grad_norm": 0.15648818999987013,
|
|
"learning_rate": 1.195918579065817e-05,
|
|
"loss": 1.0333,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 3.340153452685422,
|
|
"grad_norm": 0.13765084151466453,
|
|
"learning_rate": 1.1893638645852254e-05,
|
|
"loss": 1.0534,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 3.3452685421994883,
|
|
"grad_norm": 0.12576483675459751,
|
|
"learning_rate": 1.1828195523250857e-05,
|
|
"loss": 1.0149,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 3.350383631713555,
|
|
"grad_norm": 0.13956835034375759,
|
|
"learning_rate": 1.176285726263124e-05,
|
|
"loss": 1.052,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 3.3554987212276215,
|
|
"grad_norm": 0.1335543619596467,
|
|
"learning_rate": 1.1697624702425045e-05,
|
|
"loss": 1.035,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 3.360613810741688,
|
|
"grad_norm": 0.1572699150312706,
|
|
"learning_rate": 1.1632498679707562e-05,
|
|
"loss": 1.0416,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 3.3657289002557547,
|
|
"grad_norm": 0.14309357126008015,
|
|
"learning_rate": 1.1567480030186968e-05,
|
|
"loss": 1.0237,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 3.370843989769821,
|
|
"grad_norm": 0.14894097005492454,
|
|
"learning_rate": 1.1502569588193586e-05,
|
|
"loss": 1.0584,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 3.3759590792838874,
|
|
"grad_norm": 0.12923588532019384,
|
|
"learning_rate": 1.1437768186669253e-05,
|
|
"loss": 1.0327,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 3.381074168797954,
|
|
"grad_norm": 0.16813432468192543,
|
|
"learning_rate": 1.1373076657156532e-05,
|
|
"loss": 1.0162,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 3.3861892583120206,
|
|
"grad_norm": 0.13954296082267167,
|
|
"learning_rate": 1.1308495829788115e-05,
|
|
"loss": 1.0317,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 3.391304347826087,
|
|
"grad_norm": 0.16248894290277624,
|
|
"learning_rate": 1.1244026533276127e-05,
|
|
"loss": 1.0551,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 3.3964194373401533,
|
|
"grad_norm": 0.16453679177936717,
|
|
"learning_rate": 1.1179669594901528e-05,
|
|
"loss": 1.0159,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 3.40153452685422,
|
|
"grad_norm": 0.14385678092390486,
|
|
"learning_rate": 1.1115425840503482e-05,
|
|
"loss": 1.065,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 3.4066496163682864,
|
|
"grad_norm": 0.1465819532723495,
|
|
"learning_rate": 1.1051296094468729e-05,
|
|
"loss": 1.047,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 3.411764705882353,
|
|
"grad_norm": 0.1695383670185565,
|
|
"learning_rate": 1.098728117972106e-05,
|
|
"loss": 1.0356,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 3.4168797953964196,
|
|
"grad_norm": 0.13657609259702944,
|
|
"learning_rate": 1.0923381917710736e-05,
|
|
"loss": 1.054,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 3.421994884910486,
|
|
"grad_norm": 0.15659973454341425,
|
|
"learning_rate": 1.0859599128403912e-05,
|
|
"loss": 1.0221,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 3.4271099744245523,
|
|
"grad_norm": 0.10542430698183479,
|
|
"learning_rate": 1.0795933630272181e-05,
|
|
"loss": 1.0594,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 3.4322250639386187,
|
|
"grad_norm": 0.17862668891732322,
|
|
"learning_rate": 1.0732386240281998e-05,
|
|
"loss": 1.0591,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 3.4373401534526855,
|
|
"grad_norm": 0.12133606037305075,
|
|
"learning_rate": 1.0668957773884281e-05,
|
|
"loss": 1.0303,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 3.442455242966752,
|
|
"grad_norm": 0.15936735682917638,
|
|
"learning_rate": 1.0605649045003861e-05,
|
|
"loss": 1.0516,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 3.4475703324808182,
|
|
"grad_norm": 0.13145770125675021,
|
|
"learning_rate": 1.0542460866029086e-05,
|
|
"loss": 1.0402,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 3.452685421994885,
|
|
"grad_norm": 0.16728115574775487,
|
|
"learning_rate": 1.0479394047801392e-05,
|
|
"loss": 1.0857,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 3.4578005115089514,
|
|
"grad_norm": 0.13370080173440338,
|
|
"learning_rate": 1.0416449399604898e-05,
|
|
"loss": 1.0455,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 3.4629156010230178,
|
|
"grad_norm": 0.1600795388469388,
|
|
"learning_rate": 1.035362772915602e-05,
|
|
"loss": 1.027,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 3.4680306905370846,
|
|
"grad_norm": 0.15111783496094752,
|
|
"learning_rate": 1.0290929842593074e-05,
|
|
"loss": 1.0377,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 3.473145780051151,
|
|
"grad_norm": 0.1523541044075201,
|
|
"learning_rate": 1.022835654446599e-05,
|
|
"loss": 1.0403,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 3.4782608695652173,
|
|
"grad_norm": 0.14954178374830374,
|
|
"learning_rate": 1.0165908637725957e-05,
|
|
"loss": 1.0701,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 3.483375959079284,
|
|
"grad_norm": 0.1524360548398074,
|
|
"learning_rate": 1.0103586923715092e-05,
|
|
"loss": 1.0332,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 3.4884910485933505,
|
|
"grad_norm": 0.13627756259558588,
|
|
"learning_rate": 1.0041392202156217e-05,
|
|
"loss": 1.0426,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 3.493606138107417,
|
|
"grad_norm": 0.13374904883755645,
|
|
"learning_rate": 9.979325271142561e-06,
|
|
"loss": 1.0804,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 3.498721227621483,
|
|
"grad_norm": 0.1335636067760998,
|
|
"learning_rate": 9.917386927127498e-06,
|
|
"loss": 1.0181,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 3.50383631713555,
|
|
"grad_norm": 0.13186547128081622,
|
|
"learning_rate": 9.855577964914385e-06,
|
|
"loss": 1.0611,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 3.5089514066496164,
|
|
"grad_norm": 0.14160556585225306,
|
|
"learning_rate": 9.793899177646297e-06,
|
|
"loss": 1.0724,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 3.5140664961636827,
|
|
"grad_norm": 0.14343913251962634,
|
|
"learning_rate": 9.73235135679591e-06,
|
|
"loss": 1.0421,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 3.5191815856777495,
|
|
"grad_norm": 0.13744196044235218,
|
|
"learning_rate": 9.670935292155313e-06,
|
|
"loss": 1.0165,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 3.524296675191816,
|
|
"grad_norm": 0.15599826782771067,
|
|
"learning_rate": 9.60965177182585e-06,
|
|
"loss": 1.0441,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 3.5294117647058822,
|
|
"grad_norm": 0.134874002528585,
|
|
"learning_rate": 9.548501582208065e-06,
|
|
"loss": 1.0521,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 3.5345268542199486,
|
|
"grad_norm": 0.1483611542190334,
|
|
"learning_rate": 9.48748550799157e-06,
|
|
"loss": 1.0797,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 3.5396419437340154,
|
|
"grad_norm": 0.13453528613994495,
|
|
"learning_rate": 9.426604332144985e-06,
|
|
"loss": 1.0437,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 3.544757033248082,
|
|
"grad_norm": 0.14159262869376904,
|
|
"learning_rate": 9.365858835905878e-06,
|
|
"loss": 1.0453,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 3.5498721227621486,
|
|
"grad_norm": 0.12925051990957068,
|
|
"learning_rate": 9.305249798770774e-06,
|
|
"loss": 1.0373,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 3.554987212276215,
|
|
"grad_norm": 0.12332769648279926,
|
|
"learning_rate": 9.244777998485129e-06,
|
|
"loss": 1.0673,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 3.5601023017902813,
|
|
"grad_norm": 0.12304869088961108,
|
|
"learning_rate": 9.184444211033333e-06,
|
|
"loss": 1.0324,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 3.5652173913043477,
|
|
"grad_norm": 0.11538632936434459,
|
|
"learning_rate": 9.124249210628795e-06,
|
|
"loss": 1.0584,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 3.5703324808184145,
|
|
"grad_norm": 0.13689773750974635,
|
|
"learning_rate": 9.064193769703957e-06,
|
|
"loss": 1.0391,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 3.575447570332481,
|
|
"grad_norm": 0.11857750937375874,
|
|
"learning_rate": 9.004278658900456e-06,
|
|
"loss": 1.0401,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 3.580562659846547,
|
|
"grad_norm": 0.1492037904190607,
|
|
"learning_rate": 8.94450464705915e-06,
|
|
"loss": 1.0467,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 3.585677749360614,
|
|
"grad_norm": 0.12369105867375763,
|
|
"learning_rate": 8.884872501210288e-06,
|
|
"loss": 1.0372,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 3.5907928388746804,
|
|
"grad_norm": 0.13306031841676783,
|
|
"learning_rate": 8.8253829865637e-06,
|
|
"loss": 1.039,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 3.5959079283887467,
|
|
"grad_norm": 0.14095435133920914,
|
|
"learning_rate": 8.766036866498929e-06,
|
|
"loss": 1.0441,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 3.601023017902813,
|
|
"grad_norm": 0.12562296469541392,
|
|
"learning_rate": 8.706834902555465e-06,
|
|
"loss": 1.0566,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 3.60613810741688,
|
|
"grad_norm": 0.13493082025934391,
|
|
"learning_rate": 8.647777854422945e-06,
|
|
"loss": 1.014,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 3.6112531969309463,
|
|
"grad_norm": 0.12574671572281473,
|
|
"learning_rate": 8.588866479931436e-06,
|
|
"loss": 1.0562,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 3.6163682864450126,
|
|
"grad_norm": 0.12741627369712372,
|
|
"learning_rate": 8.530101535041701e-06,
|
|
"loss": 1.0359,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 3.6214833759590794,
|
|
"grad_norm": 0.13342253434879386,
|
|
"learning_rate": 8.471483773835472e-06,
|
|
"loss": 1.0348,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 3.626598465473146,
|
|
"grad_norm": 0.13078373294734275,
|
|
"learning_rate": 8.413013948505822e-06,
|
|
"loss": 1.0244,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 3.631713554987212,
|
|
"grad_norm": 0.12343477770132286,
|
|
"learning_rate": 8.354692809347455e-06,
|
|
"loss": 1.0624,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 3.6368286445012785,
|
|
"grad_norm": 0.1235318396130475,
|
|
"learning_rate": 8.296521104747135e-06,
|
|
"loss": 1.0422,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 3.6419437340153453,
|
|
"grad_norm": 0.13132079552275194,
|
|
"learning_rate": 8.238499581174055e-06,
|
|
"loss": 1.0344,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 3.6470588235294117,
|
|
"grad_norm": 0.12173789633241248,
|
|
"learning_rate": 8.180628983170235e-06,
|
|
"loss": 1.0552,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 3.6521739130434785,
|
|
"grad_norm": 0.1348770641930468,
|
|
"learning_rate": 8.122910053341018e-06,
|
|
"loss": 1.0255,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 3.657289002557545,
|
|
"grad_norm": 0.1174470741078736,
|
|
"learning_rate": 8.065343532345513e-06,
|
|
"loss": 1.0349,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 3.662404092071611,
|
|
"grad_norm": 0.13327709149809897,
|
|
"learning_rate": 8.00793015888707e-06,
|
|
"loss": 1.061,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 3.6675191815856776,
|
|
"grad_norm": 0.11675888468655334,
|
|
"learning_rate": 7.95067066970385e-06,
|
|
"loss": 1.0161,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 3.6726342710997444,
|
|
"grad_norm": 0.12060066317225653,
|
|
"learning_rate": 7.893565799559335e-06,
|
|
"loss": 1.056,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 3.6777493606138107,
|
|
"grad_norm": 0.12379438618187949,
|
|
"learning_rate": 7.836616281232913e-06,
|
|
"loss": 1.0606,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 3.682864450127877,
|
|
"grad_norm": 0.11445856861073442,
|
|
"learning_rate": 7.779822845510463e-06,
|
|
"loss": 1.0239,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 3.687979539641944,
|
|
"grad_norm": 0.11526838488380206,
|
|
"learning_rate": 7.723186221174976e-06,
|
|
"loss": 1.0248,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 3.6930946291560103,
|
|
"grad_norm": 0.11128815380192054,
|
|
"learning_rate": 7.666707134997255e-06,
|
|
"loss": 1.0227,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 3.6982097186700766,
|
|
"grad_norm": 0.12789433531914984,
|
|
"learning_rate": 7.610386311726494e-06,
|
|
"loss": 1.0391,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 3.703324808184143,
|
|
"grad_norm": 0.12654463075348907,
|
|
"learning_rate": 7.554224474081073e-06,
|
|
"loss": 1.0569,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 3.70843989769821,
|
|
"grad_norm": 0.12120187889801023,
|
|
"learning_rate": 7.498222342739205e-06,
|
|
"loss": 1.0316,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 3.713554987212276,
|
|
"grad_norm": 0.12870715819071762,
|
|
"learning_rate": 7.442380636329754e-06,
|
|
"loss": 1.0272,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 3.718670076726343,
|
|
"grad_norm": 0.12038936558963169,
|
|
"learning_rate": 7.386700071422977e-06,
|
|
"loss": 1.0442,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 3.7237851662404093,
|
|
"grad_norm": 0.12005588437284867,
|
|
"learning_rate": 7.331181362521316e-06,
|
|
"loss": 1.0219,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 3.7289002557544757,
|
|
"grad_norm": 0.13885672282036873,
|
|
"learning_rate": 7.2758252220502766e-06,
|
|
"loss": 1.0549,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 3.734015345268542,
|
|
"grad_norm": 0.114685790777722,
|
|
"learning_rate": 7.220632360349245e-06,
|
|
"loss": 1.0531,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 3.7391304347826084,
|
|
"grad_norm": 0.12796701045791628,
|
|
"learning_rate": 7.165603485662394e-06,
|
|
"loss": 1.0161,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 3.7442455242966752,
|
|
"grad_norm": 0.12255668076944778,
|
|
"learning_rate": 7.110739304129575e-06,
|
|
"loss": 1.0506,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 3.7493606138107416,
|
|
"grad_norm": 0.11147398883269082,
|
|
"learning_rate": 7.056040519777265e-06,
|
|
"loss": 1.0386,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 3.7544757033248084,
|
|
"grad_norm": 0.11571505214353113,
|
|
"learning_rate": 7.001507834509573e-06,
|
|
"loss": 1.0268,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 3.7595907928388748,
|
|
"grad_norm": 0.11338997067713637,
|
|
"learning_rate": 6.9471419480991495e-06,
|
|
"loss": 1.042,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 3.764705882352941,
|
|
"grad_norm": 0.10429396376138998,
|
|
"learning_rate": 6.892943558178289e-06,
|
|
"loss": 1.0491,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 3.7698209718670075,
|
|
"grad_norm": 0.19807030388806532,
|
|
"learning_rate": 6.838913360229913e-06,
|
|
"loss": 1.0402,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 3.7749360613810743,
|
|
"grad_norm": 0.10549982080612258,
|
|
"learning_rate": 6.785052047578697e-06,
|
|
"loss": 1.0358,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 3.7800511508951407,
|
|
"grad_norm": 0.12088290265838489,
|
|
"learning_rate": 6.731360311382156e-06,
|
|
"loss": 1.0291,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 3.785166240409207,
|
|
"grad_norm": 0.10642570829304446,
|
|
"learning_rate": 6.677838840621742e-06,
|
|
"loss": 1.0225,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 3.790281329923274,
|
|
"grad_norm": 0.11324080154482034,
|
|
"learning_rate": 6.624488322094058e-06,
|
|
"loss": 1.0345,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 3.79539641943734,
|
|
"grad_norm": 0.11373484571233722,
|
|
"learning_rate": 6.571309440402021e-06,
|
|
"loss": 1.0839,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 3.8005115089514065,
|
|
"grad_norm": 0.11462945002712063,
|
|
"learning_rate": 6.518302877946048e-06,
|
|
"loss": 1.0669,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 3.805626598465473,
|
|
"grad_norm": 0.11204861716772031,
|
|
"learning_rate": 6.465469314915352e-06,
|
|
"loss": 1.046,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 3.8107416879795397,
|
|
"grad_norm": 0.12022178279314234,
|
|
"learning_rate": 6.412809429279179e-06,
|
|
"loss": 1.0512,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 3.815856777493606,
|
|
"grad_norm": 0.12165382757174402,
|
|
"learning_rate": 6.3603238967781245e-06,
|
|
"loss": 1.0341,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 3.820971867007673,
|
|
"grad_norm": 0.12397109577620231,
|
|
"learning_rate": 6.308013390915439e-06,
|
|
"loss": 1.0286,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 3.8260869565217392,
|
|
"grad_norm": 0.12905831522454317,
|
|
"learning_rate": 6.255878582948409e-06,
|
|
"loss": 1.0732,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 3.8312020460358056,
|
|
"grad_norm": 0.10775412647558373,
|
|
"learning_rate": 6.203920141879742e-06,
|
|
"loss": 1.031,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 3.836317135549872,
|
|
"grad_norm": 0.12988530370192228,
|
|
"learning_rate": 6.1521387344489716e-06,
|
|
"loss": 1.0456,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 3.8414322250639388,
|
|
"grad_norm": 0.1304249761399152,
|
|
"learning_rate": 6.100535025123908e-06,
|
|
"loss": 1.0363,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 3.846547314578005,
|
|
"grad_norm": 0.10738021940802008,
|
|
"learning_rate": 6.049109676092097e-06,
|
|
"loss": 1.0397,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 3.8516624040920715,
|
|
"grad_norm": 0.12589865567427963,
|
|
"learning_rate": 5.9978633472523505e-06,
|
|
"loss": 1.0501,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 3.8567774936061383,
|
|
"grad_norm": 0.11070976622585119,
|
|
"learning_rate": 5.94679669620626e-06,
|
|
"loss": 1.0591,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 3.8618925831202047,
|
|
"grad_norm": 0.1117884773522583,
|
|
"learning_rate": 5.895910378249749e-06,
|
|
"loss": 1.04,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 3.867007672634271,
|
|
"grad_norm": 0.11784667783075373,
|
|
"learning_rate": 5.845205046364688e-06,
|
|
"loss": 1.0224,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 3.8721227621483374,
|
|
"grad_norm": 0.11631341575223461,
|
|
"learning_rate": 5.7946813512105025e-06,
|
|
"loss": 1.0242,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 3.877237851662404,
|
|
"grad_norm": 0.11957689310265991,
|
|
"learning_rate": 5.744339941115826e-06,
|
|
"loss": 1.0501,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 3.8823529411764706,
|
|
"grad_norm": 0.11842983649303031,
|
|
"learning_rate": 5.694181462070172e-06,
|
|
"loss": 1.0725,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 3.887468030690537,
|
|
"grad_norm": 0.11952338302619617,
|
|
"learning_rate": 5.644206557715641e-06,
|
|
"loss": 1.0664,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 3.8925831202046037,
|
|
"grad_norm": 0.11892536713687875,
|
|
"learning_rate": 5.5944158693387116e-06,
|
|
"loss": 0.998,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 3.89769820971867,
|
|
"grad_norm": 0.12406906776609274,
|
|
"learning_rate": 5.54481003586193e-06,
|
|
"loss": 1.0383,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 3.9028132992327365,
|
|
"grad_norm": 0.12004615203676204,
|
|
"learning_rate": 5.495389693835777e-06,
|
|
"loss": 1.0641,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 3.907928388746803,
|
|
"grad_norm": 0.11520566020799254,
|
|
"learning_rate": 5.446155477430459e-06,
|
|
"loss": 1.0434,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 3.9130434782608696,
|
|
"grad_norm": 0.1120035353943794,
|
|
"learning_rate": 5.397108018427804e-06,
|
|
"loss": 1.0196,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 3.918158567774936,
|
|
"grad_norm": 0.13347354730054195,
|
|
"learning_rate": 5.3482479462131295e-06,
|
|
"loss": 1.0531,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 3.923273657289003,
|
|
"grad_norm": 0.11377182994995544,
|
|
"learning_rate": 5.299575887767166e-06,
|
|
"loss": 1.0447,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 3.928388746803069,
|
|
"grad_norm": 0.10277267322916264,
|
|
"learning_rate": 5.251092467658032e-06,
|
|
"loss": 1.0431,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 3.9335038363171355,
|
|
"grad_norm": 0.11344118476671607,
|
|
"learning_rate": 5.202798308033206e-06,
|
|
"loss": 1.0364,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 3.938618925831202,
|
|
"grad_norm": 0.12174309556410545,
|
|
"learning_rate": 5.1546940286115314e-06,
|
|
"loss": 1.027,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 3.9437340153452687,
|
|
"grad_norm": 0.1131547612504401,
|
|
"learning_rate": 5.106780246675293e-06,
|
|
"loss": 1.0181,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 3.948849104859335,
|
|
"grad_norm": 0.11462221970998479,
|
|
"learning_rate": 5.059057577062256e-06,
|
|
"loss": 1.0391,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 3.9539641943734014,
|
|
"grad_norm": 0.10731649265711139,
|
|
"learning_rate": 5.011526632157837e-06,
|
|
"loss": 1.0385,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 3.959079283887468,
|
|
"grad_norm": 0.1068334719582534,
|
|
"learning_rate": 4.9641880218871775e-06,
|
|
"loss": 1.026,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 3.9641943734015346,
|
|
"grad_norm": 0.1086853971132405,
|
|
"learning_rate": 4.917042353707351e-06,
|
|
"loss": 1.0907,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 3.969309462915601,
|
|
"grad_norm": 0.10897664106001581,
|
|
"learning_rate": 4.870090232599576e-06,
|
|
"loss": 1.0469,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 3.9744245524296673,
|
|
"grad_norm": 0.10850930161429445,
|
|
"learning_rate": 4.823332261061442e-06,
|
|
"loss": 1.0442,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 3.979539641943734,
|
|
"grad_norm": 0.10718500232069637,
|
|
"learning_rate": 4.776769039099176e-06,
|
|
"loss": 1.0612,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 3.9846547314578005,
|
|
"grad_norm": 0.10897845999718717,
|
|
"learning_rate": 4.7304011642199355e-06,
|
|
"loss": 1.0457,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 3.9897698209718673,
|
|
"grad_norm": 0.10338741515429783,
|
|
"learning_rate": 4.6842292314241626e-06,
|
|
"loss": 1.0423,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 3.9948849104859336,
|
|
"grad_norm": 0.1046539074162833,
|
|
"learning_rate": 4.638253833197943e-06,
|
|
"loss": 1.0668,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.16987244243701544,
|
|
"learning_rate": 4.592475559505374e-06,
|
|
"loss": 1.5661,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 4.005115089514066,
|
|
"grad_norm": 0.10825655007416356,
|
|
"learning_rate": 4.5468949977810415e-06,
|
|
"loss": 1.0295,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 4.010230179028133,
|
|
"grad_norm": 0.09709211523460262,
|
|
"learning_rate": 4.50151273292245e-06,
|
|
"loss": 1.0199,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 4.015345268542199,
|
|
"grad_norm": 0.1106600074433777,
|
|
"learning_rate": 4.456329347282515e-06,
|
|
"loss": 1.044,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 4.020460358056266,
|
|
"grad_norm": 0.11786657875371895,
|
|
"learning_rate": 4.4113454206621185e-06,
|
|
"loss": 1.0322,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 4.025575447570333,
|
|
"grad_norm": 0.10523548089343922,
|
|
"learning_rate": 4.366561530302631e-06,
|
|
"loss": 1.0252,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 4.030690537084399,
|
|
"grad_norm": 0.10903480563934011,
|
|
"learning_rate": 4.321978250878536e-06,
|
|
"loss": 1.0361,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 4.035805626598465,
|
|
"grad_norm": 0.11279131671447776,
|
|
"learning_rate": 4.277596154490047e-06,
|
|
"loss": 1.0259,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 4.040920716112532,
|
|
"grad_norm": 0.10911611333237252,
|
|
"learning_rate": 4.233415810655748e-06,
|
|
"loss": 1.0512,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 4.046035805626598,
|
|
"grad_norm": 0.10899009342627193,
|
|
"learning_rate": 4.189437786305313e-06,
|
|
"loss": 1.0265,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 4.051150895140665,
|
|
"grad_norm": 0.1208890385053231,
|
|
"learning_rate": 4.14566264577221e-06,
|
|
"loss": 1.0196,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 4.056265984654732,
|
|
"grad_norm": 0.10117429151986053,
|
|
"learning_rate": 4.102090950786479e-06,
|
|
"loss": 1.0488,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 4.061381074168798,
|
|
"grad_norm": 0.10061697427381379,
|
|
"learning_rate": 4.058723260467494e-06,
|
|
"loss": 1.0075,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 4.0664961636828645,
|
|
"grad_norm": 0.1215862753853224,
|
|
"learning_rate": 4.0155601313168204e-06,
|
|
"loss": 1.0446,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 4.071611253196931,
|
|
"grad_norm": 0.1097589288716949,
|
|
"learning_rate": 3.972602117211062e-06,
|
|
"loss": 1.0603,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 4.076726342710997,
|
|
"grad_norm": 0.1021864573752271,
|
|
"learning_rate": 3.929849769394733e-06,
|
|
"loss": 1.0668,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 4.081841432225064,
|
|
"grad_norm": 0.10449100664299484,
|
|
"learning_rate": 3.887303636473232e-06,
|
|
"loss": 1.0411,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 4.086956521739131,
|
|
"grad_norm": 0.10879423779843234,
|
|
"learning_rate": 3.844964264405735e-06,
|
|
"loss": 1.0223,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 4.092071611253197,
|
|
"grad_norm": 0.09703196013660684,
|
|
"learning_rate": 3.802832196498272e-06,
|
|
"loss": 1.0285,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 4.0971867007672635,
|
|
"grad_norm": 0.09959222058834097,
|
|
"learning_rate": 3.760907973396677e-06,
|
|
"loss": 1.0309,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 4.10230179028133,
|
|
"grad_norm": 0.10177075731564252,
|
|
"learning_rate": 3.719192133079692e-06,
|
|
"loss": 1.0318,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 4.107416879795396,
|
|
"grad_norm": 0.10333452571953791,
|
|
"learning_rate": 3.677685210852062e-06,
|
|
"loss": 1.0406,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 4.112531969309463,
|
|
"grad_norm": 0.09844038300469107,
|
|
"learning_rate": 3.636387739337659e-06,
|
|
"loss": 1.0219,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 4.117647058823529,
|
|
"grad_norm": 0.09831929728264319,
|
|
"learning_rate": 3.5953002484726484e-06,
|
|
"loss": 1.0431,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 4.122762148337596,
|
|
"grad_norm": 0.09989497009088748,
|
|
"learning_rate": 3.5544232654986744e-06,
|
|
"loss": 1.0276,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 4.127877237851663,
|
|
"grad_norm": 0.09990437586089519,
|
|
"learning_rate": 3.513757314956128e-06,
|
|
"loss": 1.0428,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 4.132992327365729,
|
|
"grad_norm": 0.09654332882972133,
|
|
"learning_rate": 3.4733029186773905e-06,
|
|
"loss": 1.0392,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 4.138107416879795,
|
|
"grad_norm": 0.09700482096232281,
|
|
"learning_rate": 3.433060595780131e-06,
|
|
"loss": 1.0324,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 4.143222506393862,
|
|
"grad_norm": 0.09733789290361096,
|
|
"learning_rate": 3.3930308626606733e-06,
|
|
"loss": 1.0337,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 4.148337595907928,
|
|
"grad_norm": 0.09679936208456312,
|
|
"learning_rate": 3.3532142329873362e-06,
|
|
"loss": 1.0427,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 4.153452685421995,
|
|
"grad_norm": 0.09850838614448618,
|
|
"learning_rate": 3.3136112176938774e-06,
|
|
"loss": 1.039,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 4.158567774936062,
|
|
"grad_norm": 0.10014402864349467,
|
|
"learning_rate": 3.274222324972909e-06,
|
|
"loss": 1.0397,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 4.163682864450128,
|
|
"grad_norm": 0.09846775188414943,
|
|
"learning_rate": 3.2350480602693813e-06,
|
|
"loss": 1.0438,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 4.168797953964194,
|
|
"grad_norm": 0.09849530091060531,
|
|
"learning_rate": 3.196088926274108e-06,
|
|
"loss": 1.0487,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 4.173913043478261,
|
|
"grad_norm": 0.09894112470378255,
|
|
"learning_rate": 3.1573454229173173e-06,
|
|
"loss": 1.0511,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 4.179028132992327,
|
|
"grad_norm": 0.09585569782795421,
|
|
"learning_rate": 3.1188180473622045e-06,
|
|
"loss": 1.033,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 4.1841432225063935,
|
|
"grad_norm": 0.0907063248106673,
|
|
"learning_rate": 3.080507293998598e-06,
|
|
"loss": 1.0641,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 4.189258312020461,
|
|
"grad_norm": 0.10068952997359636,
|
|
"learning_rate": 3.0424136544365846e-06,
|
|
"loss": 1.041,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 4.194373401534527,
|
|
"grad_norm": 0.09500920205889321,
|
|
"learning_rate": 3.0045376175002185e-06,
|
|
"loss": 1.0282,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 4.1994884910485935,
|
|
"grad_norm": 0.09153412621935421,
|
|
"learning_rate": 2.9668796692212253e-06,
|
|
"loss": 1.0279,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 4.20460358056266,
|
|
"grad_norm": 0.09475771204466948,
|
|
"learning_rate": 2.9294402928327815e-06,
|
|
"loss": 1.029,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 4.209718670076726,
|
|
"grad_norm": 0.09846697389243914,
|
|
"learning_rate": 2.892219968763337e-06,
|
|
"loss": 1.0286,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 4.2148337595907925,
|
|
"grad_norm": 0.09305701318883604,
|
|
"learning_rate": 2.8552191746303904e-06,
|
|
"loss": 1.0262,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 4.21994884910486,
|
|
"grad_norm": 0.09124936813337096,
|
|
"learning_rate": 2.8184383852344212e-06,
|
|
"loss": 1.0185,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 4.225063938618926,
|
|
"grad_norm": 0.10077071629270502,
|
|
"learning_rate": 2.7818780725527505e-06,
|
|
"loss": 1.0366,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 4.2301790281329925,
|
|
"grad_norm": 0.09979949490236956,
|
|
"learning_rate": 2.745538705733519e-06,
|
|
"loss": 1.0383,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 4.235294117647059,
|
|
"grad_norm": 0.09761402809078823,
|
|
"learning_rate": 2.7094207510896574e-06,
|
|
"loss": 1.0508,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 4.240409207161125,
|
|
"grad_norm": 0.0935296858882418,
|
|
"learning_rate": 2.673524672092873e-06,
|
|
"loss": 1.0405,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 4.245524296675192,
|
|
"grad_norm": 0.09509323461150619,
|
|
"learning_rate": 2.6378509293677533e-06,
|
|
"loss": 1.0426,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 4.250639386189258,
|
|
"grad_norm": 0.09606218354815171,
|
|
"learning_rate": 2.602399980685815e-06,
|
|
"loss": 1.0488,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 4.255754475703325,
|
|
"grad_norm": 0.0985764043857404,
|
|
"learning_rate": 2.567172280959653e-06,
|
|
"loss": 1.0257,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 4.260869565217392,
|
|
"grad_norm": 0.09298961777037151,
|
|
"learning_rate": 2.532168282237084e-06,
|
|
"loss": 1.0301,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 4.265984654731458,
|
|
"grad_norm": 0.09501389616813173,
|
|
"learning_rate": 2.4973884336953512e-06,
|
|
"loss": 1.0158,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 4.271099744245524,
|
|
"grad_norm": 0.10060659630523315,
|
|
"learning_rate": 2.462833181635391e-06,
|
|
"loss": 1.0402,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 4.276214833759591,
|
|
"grad_norm": 0.08985813903795348,
|
|
"learning_rate": 2.4285029694760475e-06,
|
|
"loss": 1.0278,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 4.281329923273657,
|
|
"grad_norm": 0.09047049245073066,
|
|
"learning_rate": 2.3943982377484364e-06,
|
|
"loss": 1.007,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 4.286445012787723,
|
|
"grad_norm": 0.09542213115984054,
|
|
"learning_rate": 2.3605194240902575e-06,
|
|
"loss": 1.0349,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 4.291560102301791,
|
|
"grad_norm": 0.09592894134973562,
|
|
"learning_rate": 2.3268669632401997e-06,
|
|
"loss": 1.0461,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 4.296675191815857,
|
|
"grad_norm": 0.09276563388354862,
|
|
"learning_rate": 2.293441287032354e-06,
|
|
"loss": 1.0409,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 4.301790281329923,
|
|
"grad_norm": 0.08621024713151011,
|
|
"learning_rate": 2.2602428243906638e-06,
|
|
"loss": 1.0458,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 4.30690537084399,
|
|
"grad_norm": 0.0947853691754098,
|
|
"learning_rate": 2.2272720013234372e-06,
|
|
"loss": 1.0597,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 4.312020460358056,
|
|
"grad_norm": 0.09738935216253661,
|
|
"learning_rate": 2.1945292409178755e-06,
|
|
"loss": 1.0354,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 4.3171355498721224,
|
|
"grad_norm": 0.08975381339730756,
|
|
"learning_rate": 2.162014963334631e-06,
|
|
"loss": 1.0121,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 4.322250639386189,
|
|
"grad_norm": 0.08820100067894239,
|
|
"learning_rate": 2.1297295858024313e-06,
|
|
"loss": 1.0514,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 4.327365728900256,
|
|
"grad_norm": 0.09617468579113231,
|
|
"learning_rate": 2.097673522612722e-06,
|
|
"loss": 1.0285,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 4.332480818414322,
|
|
"grad_norm": 0.09593751503684586,
|
|
"learning_rate": 2.0658471851143513e-06,
|
|
"loss": 1.039,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 4.337595907928389,
|
|
"grad_norm": 0.09444286356238825,
|
|
"learning_rate": 2.0342509817082747e-06,
|
|
"loss": 1.0414,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 4.342710997442455,
|
|
"grad_norm": 0.09101012732573761,
|
|
"learning_rate": 2.0028853178423356e-06,
|
|
"loss": 1.0177,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 0.09325815402802133,
|
|
"learning_rate": 1.971750596006059e-06,
|
|
"loss": 1.0409,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 4.352941176470588,
|
|
"grad_norm": 0.08872400279590349,
|
|
"learning_rate": 1.9408472157254765e-06,
|
|
"loss": 1.0268,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 4.358056265984655,
|
|
"grad_norm": 0.0885901270770776,
|
|
"learning_rate": 1.9101755735580128e-06,
|
|
"loss": 1.044,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 4.3631713554987215,
|
|
"grad_norm": 0.0949449985202502,
|
|
"learning_rate": 1.8797360630873806e-06,
|
|
"loss": 1.0452,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 4.368286445012788,
|
|
"grad_norm": 0.08410055531702589,
|
|
"learning_rate": 1.8495290749185435e-06,
|
|
"loss": 1.0325,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 4.373401534526854,
|
|
"grad_norm": 0.09545532227569593,
|
|
"learning_rate": 1.8195549966727054e-06,
|
|
"loss": 1.0481,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 4.378516624040921,
|
|
"grad_norm": 0.08476459696114018,
|
|
"learning_rate": 1.7898142129823171e-06,
|
|
"loss": 1.0639,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 4.383631713554987,
|
|
"grad_norm": 0.08949442544964742,
|
|
"learning_rate": 1.7603071054861653e-06,
|
|
"loss": 1.0371,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 4.388746803069053,
|
|
"grad_norm": 0.08879874215911474,
|
|
"learning_rate": 1.7310340528244607e-06,
|
|
"loss": 1.0405,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 4.3938618925831205,
|
|
"grad_norm": 0.08840226801940593,
|
|
"learning_rate": 1.701995430633987e-06,
|
|
"loss": 1.0422,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 4.398976982097187,
|
|
"grad_norm": 0.08626387602238281,
|
|
"learning_rate": 1.6731916115432678e-06,
|
|
"loss": 1.0362,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 4.404092071611253,
|
|
"grad_norm": 0.0883006081970065,
|
|
"learning_rate": 1.6446229651677903e-06,
|
|
"loss": 1.0594,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 4.40920716112532,
|
|
"grad_norm": 0.08763882002142795,
|
|
"learning_rate": 1.6162898581052866e-06,
|
|
"loss": 1.0402,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 4.414322250639386,
|
|
"grad_norm": 0.08473283566109852,
|
|
"learning_rate": 1.5881926539309845e-06,
|
|
"loss": 1.0369,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 4.419437340153452,
|
|
"grad_norm": 0.08882688131234197,
|
|
"learning_rate": 1.560331713192984e-06,
|
|
"loss": 1.0268,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 4.42455242966752,
|
|
"grad_norm": 0.0919916289952921,
|
|
"learning_rate": 1.5327073934075954e-06,
|
|
"loss": 1.0548,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 4.429667519181586,
|
|
"grad_norm": 0.09199096614028365,
|
|
"learning_rate": 1.5053200490547838e-06,
|
|
"loss": 1.0138,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 4.434782608695652,
|
|
"grad_norm": 0.0871055821541935,
|
|
"learning_rate": 1.4781700315736002e-06,
|
|
"loss": 1.0355,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 4.439897698209719,
|
|
"grad_norm": 0.08976011928625406,
|
|
"learning_rate": 1.4512576893576725e-06,
|
|
"loss": 1.0591,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 4.445012787723785,
|
|
"grad_norm": 0.08644290864743312,
|
|
"learning_rate": 1.4245833677507448e-06,
|
|
"loss": 1.0304,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 4.450127877237851,
|
|
"grad_norm": 0.08435192281249178,
|
|
"learning_rate": 1.3981474090422408e-06,
|
|
"loss": 1.0279,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 4.455242966751918,
|
|
"grad_norm": 0.0846195173709492,
|
|
"learning_rate": 1.3719501524628643e-06,
|
|
"loss": 1.0472,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 4.460358056265985,
|
|
"grad_norm": 0.08696234341868504,
|
|
"learning_rate": 1.3459919341802618e-06,
|
|
"loss": 1.0293,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 4.465473145780051,
|
|
"grad_norm": 0.08749456032856028,
|
|
"learning_rate": 1.3202730872946878e-06,
|
|
"loss": 1.0424,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 4.470588235294118,
|
|
"grad_norm": 0.08679996505251306,
|
|
"learning_rate": 1.2947939418347599e-06,
|
|
"loss": 1.0337,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 4.475703324808184,
|
|
"grad_norm": 0.08763262493448441,
|
|
"learning_rate": 1.269554824753192e-06,
|
|
"loss": 1.053,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 4.4808184143222505,
|
|
"grad_norm": 0.08632279952905321,
|
|
"learning_rate": 1.2445560599226148e-06,
|
|
"loss": 1.0353,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 4.485933503836317,
|
|
"grad_norm": 0.08740638922400544,
|
|
"learning_rate": 1.219797968131422e-06,
|
|
"loss": 1.0573,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 4.491048593350383,
|
|
"grad_norm": 0.08830677643420132,
|
|
"learning_rate": 1.1952808670796511e-06,
|
|
"loss": 1.0372,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 4.4961636828644505,
|
|
"grad_norm": 0.08336534008550968,
|
|
"learning_rate": 1.1710050713749067e-06,
|
|
"loss": 1.0335,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 4.501278772378517,
|
|
"grad_norm": 0.089103955480962,
|
|
"learning_rate": 1.1469708925283095e-06,
|
|
"loss": 1.0326,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 4.506393861892583,
|
|
"grad_norm": 0.08776482694311091,
|
|
"learning_rate": 1.123178638950526e-06,
|
|
"loss": 1.0575,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 4.5115089514066495,
|
|
"grad_norm": 0.08866769991349784,
|
|
"learning_rate": 1.0996286159477943e-06,
|
|
"loss": 1.0354,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 4.516624040920716,
|
|
"grad_norm": 0.08687727068648551,
|
|
"learning_rate": 1.0763211257180007e-06,
|
|
"loss": 1.0327,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 4.521739130434782,
|
|
"grad_norm": 0.0885626504483757,
|
|
"learning_rate": 1.053256467346817e-06,
|
|
"loss": 1.0566,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 4.526854219948849,
|
|
"grad_norm": 0.08688698614964542,
|
|
"learning_rate": 1.030434936803857e-06,
|
|
"loss": 1.0262,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 4.531969309462916,
|
|
"grad_norm": 0.08874324934826401,
|
|
"learning_rate": 1.0078568269388666e-06,
|
|
"loss": 1.0355,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 4.537084398976982,
|
|
"grad_norm": 0.09066610304373438,
|
|
"learning_rate": 9.855224274779894e-07,
|
|
"loss": 1.0242,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 4.542199488491049,
|
|
"grad_norm": 0.0899703471767722,
|
|
"learning_rate": 9.634320250200213e-07,
|
|
"loss": 1.0222,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 4.547314578005115,
|
|
"grad_norm": 0.08777620122531764,
|
|
"learning_rate": 9.415859030327667e-07,
|
|
"loss": 1.049,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 4.552429667519181,
|
|
"grad_norm": 0.08862100817321585,
|
|
"learning_rate": 9.199843418493625e-07,
|
|
"loss": 1.033,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 4.557544757033249,
|
|
"grad_norm": 0.0924502407146251,
|
|
"learning_rate": 8.986276186647092e-07,
|
|
"loss": 1.0474,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 4.562659846547315,
|
|
"grad_norm": 0.08475994721795894,
|
|
"learning_rate": 8.775160075319001e-07,
|
|
"loss": 1.0504,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 4.567774936061381,
|
|
"grad_norm": 0.08839104158168999,
|
|
"learning_rate": 8.566497793587158e-07,
|
|
"loss": 1.0164,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 4.572890025575448,
|
|
"grad_norm": 0.08678786116042993,
|
|
"learning_rate": 8.360292019041405e-07,
|
|
"loss": 1.0253,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 4.578005115089514,
|
|
"grad_norm": 0.09067943165736797,
|
|
"learning_rate": 8.156545397749127e-07,
|
|
"loss": 1.0578,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 4.58312020460358,
|
|
"grad_norm": 0.09064381529233775,
|
|
"learning_rate": 7.955260544221621e-07,
|
|
"loss": 1.0363,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 4.588235294117647,
|
|
"grad_norm": 0.08444296552020542,
|
|
"learning_rate": 7.756440041380297e-07,
|
|
"loss": 1.0339,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 4.593350383631714,
|
|
"grad_norm": 0.0890656704511778,
|
|
"learning_rate": 7.560086440523528e-07,
|
|
"loss": 1.0268,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 4.59846547314578,
|
|
"grad_norm": 0.08823096230866823,
|
|
"learning_rate": 7.366202261294098e-07,
|
|
"loss": 1.0295,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 4.603580562659847,
|
|
"grad_norm": 0.08543654183534846,
|
|
"learning_rate": 7.174789991646602e-07,
|
|
"loss": 1.0403,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 4.608695652173913,
|
|
"grad_norm": 0.08651006361043005,
|
|
"learning_rate": 6.985852087815903e-07,
|
|
"loss": 1.0107,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 4.6138107416879794,
|
|
"grad_norm": 0.0893115824376349,
|
|
"learning_rate": 6.799390974285169e-07,
|
|
"loss": 1.0148,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 4.618925831202046,
|
|
"grad_norm": 0.0905211238552103,
|
|
"learning_rate": 6.615409043755039e-07,
|
|
"loss": 1.0235,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 4.624040920716112,
|
|
"grad_norm": 0.08373868267565032,
|
|
"learning_rate": 6.433908657112886e-07,
|
|
"loss": 1.0583,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 4.629156010230179,
|
|
"grad_norm": 0.08517234012511272,
|
|
"learning_rate": 6.254892143402469e-07,
|
|
"loss": 1.0428,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 4.634271099744246,
|
|
"grad_norm": 0.0829556502483681,
|
|
"learning_rate": 6.078361799794086e-07,
|
|
"loss": 1.0495,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 4.639386189258312,
|
|
"grad_norm": 0.08390852555662921,
|
|
"learning_rate": 5.904319891555021e-07,
|
|
"loss": 1.0353,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 4.6445012787723785,
|
|
"grad_norm": 0.08446045440701189,
|
|
"learning_rate": 5.732768652020615e-07,
|
|
"loss": 1.0622,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 4.649616368286445,
|
|
"grad_norm": 0.09204361483711933,
|
|
"learning_rate": 5.563710282565504e-07,
|
|
"loss": 1.0531,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 4.654731457800511,
|
|
"grad_norm": 0.08394901633656762,
|
|
"learning_rate": 5.397146952575316e-07,
|
|
"loss": 1.0171,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 4.659846547314578,
|
|
"grad_norm": 0.08343494614241122,
|
|
"learning_rate": 5.233080799418999e-07,
|
|
"loss": 1.0396,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 4.664961636828645,
|
|
"grad_norm": 0.07955675153850743,
|
|
"learning_rate": 5.071513928421268e-07,
|
|
"loss": 1.0189,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 4.670076726342711,
|
|
"grad_norm": 0.08101944215370342,
|
|
"learning_rate": 4.912448412835625e-07,
|
|
"loss": 1.0258,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 4.675191815856778,
|
|
"grad_norm": 0.08331490147990027,
|
|
"learning_rate": 4.7558862938177796e-07,
|
|
"loss": 1.0217,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 4.680306905370844,
|
|
"grad_norm": 0.08702803562906056,
|
|
"learning_rate": 4.601829580399364e-07,
|
|
"loss": 1.0396,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 4.68542199488491,
|
|
"grad_norm": 0.0872058124086669,
|
|
"learning_rate": 4.4502802494623023e-07,
|
|
"loss": 1.0455,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 4.690537084398977,
|
|
"grad_norm": 0.08338361238231247,
|
|
"learning_rate": 4.301240245713345e-07,
|
|
"loss": 1.0562,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 4.695652173913043,
|
|
"grad_norm": 0.0851827498369095,
|
|
"learning_rate": 4.1547114816590684e-07,
|
|
"loss": 1.0537,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 4.70076726342711,
|
|
"grad_norm": 0.08046194701441498,
|
|
"learning_rate": 4.010695837581446e-07,
|
|
"loss": 1.0238,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 4.705882352941177,
|
|
"grad_norm": 0.08281618992470802,
|
|
"learning_rate": 3.8691951615136946e-07,
|
|
"loss": 1.0356,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 4.710997442455243,
|
|
"grad_norm": 0.08283580448405972,
|
|
"learning_rate": 3.730211269216488e-07,
|
|
"loss": 1.0239,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 4.716112531969309,
|
|
"grad_norm": 0.08087364868453263,
|
|
"learning_rate": 3.593745944154692e-07,
|
|
"loss": 1.0363,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 4.721227621483376,
|
|
"grad_norm": 0.08333792792360416,
|
|
"learning_rate": 3.459800937474533e-07,
|
|
"loss": 1.0377,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 4.726342710997442,
|
|
"grad_norm": 0.08198078335380529,
|
|
"learning_rate": 3.328377967981089e-07,
|
|
"loss": 1.0525,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 4.731457800511509,
|
|
"grad_norm": 0.08207405352725915,
|
|
"learning_rate": 3.1994787221161674e-07,
|
|
"loss": 1.0487,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 4.736572890025576,
|
|
"grad_norm": 0.08127709502910237,
|
|
"learning_rate": 3.0731048539367924e-07,
|
|
"loss": 1.0317,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 4.741687979539642,
|
|
"grad_norm": 0.08182420357301565,
|
|
"learning_rate": 2.949257985093845e-07,
|
|
"loss": 1.0645,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 4.746803069053708,
|
|
"grad_norm": 0.08540342640125807,
|
|
"learning_rate": 2.827939704811433e-07,
|
|
"loss": 1.0225,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 4.751918158567775,
|
|
"grad_norm": 0.08105962695785912,
|
|
"learning_rate": 2.7091515698662863e-07,
|
|
"loss": 1.0403,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 4.757033248081841,
|
|
"grad_norm": 0.08316360123978976,
|
|
"learning_rate": 2.592895104567861e-07,
|
|
"loss": 1.0377,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 4.762148337595908,
|
|
"grad_norm": 0.08916967768404561,
|
|
"learning_rate": 2.479171800738911e-07,
|
|
"loss": 1.0432,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 4.767263427109975,
|
|
"grad_norm": 0.08337042713060398,
|
|
"learning_rate": 2.3679831176961487e-07,
|
|
"loss": 1.0586,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 4.772378516624041,
|
|
"grad_norm": 0.08366883341035822,
|
|
"learning_rate": 2.2593304822316365e-07,
|
|
"loss": 1.0398,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 4.7774936061381075,
|
|
"grad_norm": 0.08119050050082849,
|
|
"learning_rate": 2.153215288594379e-07,
|
|
"loss": 1.0269,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 4.782608695652174,
|
|
"grad_norm": 0.0825222553646159,
|
|
"learning_rate": 2.0496388984726056e-07,
|
|
"loss": 1.0378,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 4.78772378516624,
|
|
"grad_norm": 0.08164754045193766,
|
|
"learning_rate": 1.9486026409761162e-07,
|
|
"loss": 1.0507,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 4.792838874680307,
|
|
"grad_norm": 0.08069756368018306,
|
|
"learning_rate": 1.8501078126193172e-07,
|
|
"loss": 1.0141,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 4.797953964194374,
|
|
"grad_norm": 0.08172592463148753,
|
|
"learning_rate": 1.7541556773045255e-07,
|
|
"loss": 1.024,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 4.80306905370844,
|
|
"grad_norm": 0.08172502645150909,
|
|
"learning_rate": 1.6607474663058677e-07,
|
|
"loss": 1.0431,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 4.8081841432225065,
|
|
"grad_norm": 0.08077050134165903,
|
|
"learning_rate": 1.569884378253317e-07,
|
|
"loss": 1.0179,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 4.813299232736573,
|
|
"grad_norm": 0.08523300001438115,
|
|
"learning_rate": 1.4815675791175043e-07,
|
|
"loss": 1.0318,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 4.818414322250639,
|
|
"grad_norm": 0.08046827663058376,
|
|
"learning_rate": 1.3957982021945093e-07,
|
|
"loss": 1.0185,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 4.823529411764706,
|
|
"grad_norm": 0.0826399800178752,
|
|
"learning_rate": 1.3125773480915592e-07,
|
|
"loss": 1.0364,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 4.828644501278772,
|
|
"grad_norm": 0.08143311184926899,
|
|
"learning_rate": 1.2319060847127972e-07,
|
|
"loss": 1.0441,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 4.833759590792839,
|
|
"grad_norm": 0.0812111566830204,
|
|
"learning_rate": 1.1537854472455368e-07,
|
|
"loss": 1.0194,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 4.838874680306906,
|
|
"grad_norm": 0.08087505193021655,
|
|
"learning_rate": 1.0782164381470506e-07,
|
|
"loss": 1.0272,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 4.843989769820972,
|
|
"grad_norm": 0.08202686030913739,
|
|
"learning_rate": 1.0052000271317142e-07,
|
|
"loss": 1.0137,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 4.849104859335038,
|
|
"grad_norm": 0.08725113377075328,
|
|
"learning_rate": 9.347371511585046e-08,
|
|
"loss": 1.0444,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 4.854219948849105,
|
|
"grad_norm": 0.08074935681692166,
|
|
"learning_rate": 8.66828714418988e-08,
|
|
"loss": 1.0367,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 4.859335038363171,
|
|
"grad_norm": 0.0833488513811118,
|
|
"learning_rate": 8.014755883257508e-08,
|
|
"loss": 1.0484,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 4.864450127877237,
|
|
"grad_norm": 0.0811386973621194,
|
|
"learning_rate": 7.386786115011868e-08,
|
|
"loss": 1.0298,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 4.869565217391305,
|
|
"grad_norm": 0.08217471436638513,
|
|
"learning_rate": 6.784385897667723e-08,
|
|
"loss": 1.0481,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 4.874680306905371,
|
|
"grad_norm": 0.08218495988946739,
|
|
"learning_rate": 6.207562961326963e-08,
|
|
"loss": 1.0419,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 4.879795396419437,
|
|
"grad_norm": 0.08060090907067169,
|
|
"learning_rate": 5.656324707879357e-08,
|
|
"loss": 1.0535,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 4.884910485933504,
|
|
"grad_norm": 0.08129135659128915,
|
|
"learning_rate": 5.130678210907514e-08,
|
|
"loss": 1.0504,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 4.89002557544757,
|
|
"grad_norm": 0.08562145837052391,
|
|
"learning_rate": 4.630630215596732e-08,
|
|
"loss": 1.0332,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 4.8951406649616365,
|
|
"grad_norm": 0.08242537780405768,
|
|
"learning_rate": 4.156187138647516e-08,
|
|
"loss": 1.0429,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 4.900255754475703,
|
|
"grad_norm": 0.07982429118069245,
|
|
"learning_rate": 3.707355068194085e-08,
|
|
"loss": 1.0497,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 4.90537084398977,
|
|
"grad_norm": 0.08337322755077417,
|
|
"learning_rate": 3.284139763725769e-08,
|
|
"loss": 1.0039,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 4.910485933503836,
|
|
"grad_norm": 0.08052583915874226,
|
|
"learning_rate": 2.8865466560130673e-08,
|
|
"loss": 1.0475,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 4.915601023017903,
|
|
"grad_norm": 0.08011637722476794,
|
|
"learning_rate": 2.5145808470383727e-08,
|
|
"loss": 1.0436,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 4.920716112531969,
|
|
"grad_norm": 0.07954539736607873,
|
|
"learning_rate": 2.1682471099297996e-08,
|
|
"loss": 1.0232,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 4.9258312020460355,
|
|
"grad_norm": 0.08203718970751432,
|
|
"learning_rate": 1.8475498889010125e-08,
|
|
"loss": 1.0611,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 4.930946291560103,
|
|
"grad_norm": 0.08091342862475163,
|
|
"learning_rate": 1.5524932991928253e-08,
|
|
"loss": 1.0421,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 4.936061381074169,
|
|
"grad_norm": 0.08209037571378622,
|
|
"learning_rate": 1.2830811270214682e-08,
|
|
"loss": 1.0559,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 4.9411764705882355,
|
|
"grad_norm": 0.08253421197403137,
|
|
"learning_rate": 1.0393168295299571e-08,
|
|
"loss": 1.0556,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 4.946291560102302,
|
|
"grad_norm": 0.08010862800679533,
|
|
"learning_rate": 8.212035347427983e-09,
|
|
"loss": 1.0281,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 4.951406649616368,
|
|
"grad_norm": 0.08063925174776879,
|
|
"learning_rate": 6.287440415271295e-09,
|
|
"loss": 1.0279,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 4.956521739130435,
|
|
"grad_norm": 0.08061660864354202,
|
|
"learning_rate": 4.619408195554176e-09,
|
|
"loss": 1.0224,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 4.961636828644501,
|
|
"grad_norm": 2.467052842174403,
|
|
"learning_rate": 3.207960092752593e-09,
|
|
"loss": 1.0545,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 4.966751918158568,
|
|
"grad_norm": 0.08327486179105495,
|
|
"learning_rate": 2.0531142188073837e-09,
|
|
"loss": 1.0395,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 4.971867007672635,
|
|
"grad_norm": 0.07879283157932467,
|
|
"learning_rate": 1.154885392895544e-09,
|
|
"loss": 1.0258,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 4.976982097186701,
|
|
"grad_norm": 0.08591637879884984,
|
|
"learning_rate": 5.132851412437135e-10,
|
|
"loss": 1.0512,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 4.982097186700767,
|
|
"grad_norm": 0.07945597108084437,
|
|
"learning_rate": 1.283216969727441e-10,
|
|
"loss": 1.026,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 4.987212276214834,
|
|
"grad_norm": 0.0809414158911861,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.0639,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 4.987212276214834,
|
|
"step": 975,
|
|
"total_flos": 7100358600687616.0,
|
|
"train_loss": 1.0860105241261995,
|
|
"train_runtime": 35157.9071,
|
|
"train_samples_per_second": 14.222,
|
|
"train_steps_per_second": 0.028
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 975,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 7100358600687616.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|