Model: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz256_lr16e5_epochs5 Source: Original Platform
13728 lines
332 KiB
JSON
13728 lines
332 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1955,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0025575447570332483,
|
|
"grad_norm": 2.9563186457899664,
|
|
"learning_rate": 8.163265306122449e-07,
|
|
"loss": 1.5213,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.005115089514066497,
|
|
"grad_norm": 2.9570403686006705,
|
|
"learning_rate": 1.6326530612244897e-06,
|
|
"loss": 1.4742,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0076726342710997444,
|
|
"grad_norm": 3.010165733072805,
|
|
"learning_rate": 2.4489795918367347e-06,
|
|
"loss": 1.4946,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.010230179028132993,
|
|
"grad_norm": 2.868210172096221,
|
|
"learning_rate": 3.2653061224489794e-06,
|
|
"loss": 1.482,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.01278772378516624,
|
|
"grad_norm": 2.6518374719836477,
|
|
"learning_rate": 4.081632653061225e-06,
|
|
"loss": 1.4866,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.015345268542199489,
|
|
"grad_norm": 2.0719187075670176,
|
|
"learning_rate": 4.897959183673469e-06,
|
|
"loss": 1.4844,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.017902813299232736,
|
|
"grad_norm": 1.8691931463314044,
|
|
"learning_rate": 5.7142857142857145e-06,
|
|
"loss": 1.4533,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.020460358056265986,
|
|
"grad_norm": 1.8092896927352589,
|
|
"learning_rate": 6.530612244897959e-06,
|
|
"loss": 1.4433,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.023017902813299233,
|
|
"grad_norm": 1.7543993002445608,
|
|
"learning_rate": 7.346938775510205e-06,
|
|
"loss": 1.4744,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.02557544757033248,
|
|
"grad_norm": 1.6606628173638305,
|
|
"learning_rate": 8.16326530612245e-06,
|
|
"loss": 1.4586,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.028132992327365727,
|
|
"grad_norm": 2.197985553952399,
|
|
"learning_rate": 8.979591836734695e-06,
|
|
"loss": 1.4315,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.030690537084398978,
|
|
"grad_norm": 2.096672912966444,
|
|
"learning_rate": 9.795918367346939e-06,
|
|
"loss": 1.3907,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.03324808184143223,
|
|
"grad_norm": 1.7669816182231157,
|
|
"learning_rate": 1.0612244897959186e-05,
|
|
"loss": 1.4234,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.03580562659846547,
|
|
"grad_norm": 1.3020764177290665,
|
|
"learning_rate": 1.1428571428571429e-05,
|
|
"loss": 1.3478,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.03836317135549872,
|
|
"grad_norm": 1.2917276833945952,
|
|
"learning_rate": 1.2244897959183674e-05,
|
|
"loss": 1.378,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.04092071611253197,
|
|
"grad_norm": 0.9647900041095249,
|
|
"learning_rate": 1.3061224489795918e-05,
|
|
"loss": 1.3273,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.043478260869565216,
|
|
"grad_norm": 0.998986811649884,
|
|
"learning_rate": 1.3877551020408165e-05,
|
|
"loss": 1.3424,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.04603580562659847,
|
|
"grad_norm": 0.8293785359427173,
|
|
"learning_rate": 1.469387755102041e-05,
|
|
"loss": 1.3354,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.04859335038363171,
|
|
"grad_norm": 0.7442208693017255,
|
|
"learning_rate": 1.5510204081632655e-05,
|
|
"loss": 1.3216,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.05115089514066496,
|
|
"grad_norm": 0.8334097235660463,
|
|
"learning_rate": 1.63265306122449e-05,
|
|
"loss": 1.305,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.05370843989769821,
|
|
"grad_norm": 0.7133053870238929,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 1.2863,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.056265984654731455,
|
|
"grad_norm": 0.5994613004850937,
|
|
"learning_rate": 1.795918367346939e-05,
|
|
"loss": 1.31,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.058823529411764705,
|
|
"grad_norm": 0.6168319603979278,
|
|
"learning_rate": 1.8775510204081636e-05,
|
|
"loss": 1.2652,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.061381074168797956,
|
|
"grad_norm": 0.5934674503101482,
|
|
"learning_rate": 1.9591836734693877e-05,
|
|
"loss": 1.2848,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.0639386189258312,
|
|
"grad_norm": 0.5809141308410171,
|
|
"learning_rate": 2.0408163265306126e-05,
|
|
"loss": 1.2605,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.06649616368286446,
|
|
"grad_norm": 0.5544963829723922,
|
|
"learning_rate": 2.122448979591837e-05,
|
|
"loss": 1.2663,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.06905370843989769,
|
|
"grad_norm": 0.5109525040926751,
|
|
"learning_rate": 2.2040816326530613e-05,
|
|
"loss": 1.2493,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.07161125319693094,
|
|
"grad_norm": 0.47071086900075043,
|
|
"learning_rate": 2.2857142857142858e-05,
|
|
"loss": 1.2725,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.0741687979539642,
|
|
"grad_norm": 0.47760033429842697,
|
|
"learning_rate": 2.3673469387755103e-05,
|
|
"loss": 1.2493,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.07672634271099744,
|
|
"grad_norm": 0.47942640683455684,
|
|
"learning_rate": 2.448979591836735e-05,
|
|
"loss": 1.2635,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0792838874680307,
|
|
"grad_norm": 0.3817784984018378,
|
|
"learning_rate": 2.5306122448979597e-05,
|
|
"loss": 1.2581,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.08184143222506395,
|
|
"grad_norm": 0.41863028873772656,
|
|
"learning_rate": 2.6122448979591835e-05,
|
|
"loss": 1.2319,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.08439897698209718,
|
|
"grad_norm": 0.4561646749370822,
|
|
"learning_rate": 2.6938775510204084e-05,
|
|
"loss": 1.2647,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.08695652173913043,
|
|
"grad_norm": 0.32944852639889954,
|
|
"learning_rate": 2.775510204081633e-05,
|
|
"loss": 1.2828,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.08951406649616368,
|
|
"grad_norm": 0.36090683632534276,
|
|
"learning_rate": 2.8571428571428574e-05,
|
|
"loss": 1.2245,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.09207161125319693,
|
|
"grad_norm": 0.36952861081098753,
|
|
"learning_rate": 2.938775510204082e-05,
|
|
"loss": 1.2383,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.09462915601023018,
|
|
"grad_norm": 0.39714992118388376,
|
|
"learning_rate": 3.020408163265306e-05,
|
|
"loss": 1.2524,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.09718670076726342,
|
|
"grad_norm": 0.3567290279003148,
|
|
"learning_rate": 3.102040816326531e-05,
|
|
"loss": 1.229,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.09974424552429667,
|
|
"grad_norm": 0.3806643799838351,
|
|
"learning_rate": 3.183673469387755e-05,
|
|
"loss": 1.2438,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.10230179028132992,
|
|
"grad_norm": 0.407422548294049,
|
|
"learning_rate": 3.26530612244898e-05,
|
|
"loss": 1.1862,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.10485933503836317,
|
|
"grad_norm": 0.34463209168828013,
|
|
"learning_rate": 3.346938775510204e-05,
|
|
"loss": 1.2127,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.10741687979539642,
|
|
"grad_norm": 0.36477387999624367,
|
|
"learning_rate": 3.4285714285714284e-05,
|
|
"loss": 1.2118,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.10997442455242967,
|
|
"grad_norm": 0.33681318596769666,
|
|
"learning_rate": 3.510204081632653e-05,
|
|
"loss": 1.1849,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.11253196930946291,
|
|
"grad_norm": 0.3683055012446813,
|
|
"learning_rate": 3.591836734693878e-05,
|
|
"loss": 1.1965,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.11508951406649616,
|
|
"grad_norm": 0.3236097196989051,
|
|
"learning_rate": 3.673469387755102e-05,
|
|
"loss": 1.1973,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.11764705882352941,
|
|
"grad_norm": 0.45336744047964317,
|
|
"learning_rate": 3.755102040816327e-05,
|
|
"loss": 1.219,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.12020460358056266,
|
|
"grad_norm": 0.6485049911187234,
|
|
"learning_rate": 3.836734693877551e-05,
|
|
"loss": 1.22,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.12276214833759591,
|
|
"grad_norm": 0.7308887737693851,
|
|
"learning_rate": 3.9183673469387755e-05,
|
|
"loss": 1.1927,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.12531969309462915,
|
|
"grad_norm": 0.7412779741523179,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.207,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.1278772378516624,
|
|
"grad_norm": 0.61907561491782,
|
|
"learning_rate": 4.081632653061225e-05,
|
|
"loss": 1.1761,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.13043478260869565,
|
|
"grad_norm": 0.5645180027937694,
|
|
"learning_rate": 4.1632653061224494e-05,
|
|
"loss": 1.1828,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.1329923273657289,
|
|
"grad_norm": 0.6097938476878244,
|
|
"learning_rate": 4.244897959183674e-05,
|
|
"loss": 1.1645,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.13554987212276215,
|
|
"grad_norm": 0.68105585214221,
|
|
"learning_rate": 4.3265306122448984e-05,
|
|
"loss": 1.1663,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.13810741687979539,
|
|
"grad_norm": 0.5148592364190684,
|
|
"learning_rate": 4.4081632653061226e-05,
|
|
"loss": 1.2013,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.14066496163682865,
|
|
"grad_norm": 0.6290537917728678,
|
|
"learning_rate": 4.489795918367347e-05,
|
|
"loss": 1.2142,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.1432225063938619,
|
|
"grad_norm": 0.8770682994258979,
|
|
"learning_rate": 4.5714285714285716e-05,
|
|
"loss": 1.2031,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.14578005115089515,
|
|
"grad_norm": 1.211521452597314,
|
|
"learning_rate": 4.6530612244897965e-05,
|
|
"loss": 1.1872,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.1483375959079284,
|
|
"grad_norm": 1.1706192692433377,
|
|
"learning_rate": 4.7346938775510206e-05,
|
|
"loss": 1.2026,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.15089514066496162,
|
|
"grad_norm": 1.0347528096815952,
|
|
"learning_rate": 4.8163265306122455e-05,
|
|
"loss": 1.1698,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.1534526854219949,
|
|
"grad_norm": 0.8917967843832559,
|
|
"learning_rate": 4.89795918367347e-05,
|
|
"loss": 1.1935,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.15601023017902813,
|
|
"grad_norm": 0.8447536110052303,
|
|
"learning_rate": 4.9795918367346945e-05,
|
|
"loss": 1.1816,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.1585677749360614,
|
|
"grad_norm": 1.0808910383761972,
|
|
"learning_rate": 5.0612244897959194e-05,
|
|
"loss": 1.2148,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.16112531969309463,
|
|
"grad_norm": 1.0232789536513451,
|
|
"learning_rate": 5.1428571428571436e-05,
|
|
"loss": 1.1974,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.1636828644501279,
|
|
"grad_norm": 0.870169282881004,
|
|
"learning_rate": 5.224489795918367e-05,
|
|
"loss": 1.1914,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.16624040920716113,
|
|
"grad_norm": 0.7292663989493176,
|
|
"learning_rate": 5.306122448979592e-05,
|
|
"loss": 1.183,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.16879795396419436,
|
|
"grad_norm": 0.8315009268144099,
|
|
"learning_rate": 5.387755102040817e-05,
|
|
"loss": 1.1457,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.17135549872122763,
|
|
"grad_norm": 1.04261775715331,
|
|
"learning_rate": 5.469387755102041e-05,
|
|
"loss": 1.1724,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.17391304347826086,
|
|
"grad_norm": 1.0040970248925822,
|
|
"learning_rate": 5.551020408163266e-05,
|
|
"loss": 1.1469,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.17647058823529413,
|
|
"grad_norm": 1.0399514999943609,
|
|
"learning_rate": 5.63265306122449e-05,
|
|
"loss": 1.1335,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.17902813299232737,
|
|
"grad_norm": 0.9541534570834667,
|
|
"learning_rate": 5.714285714285715e-05,
|
|
"loss": 1.1475,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1815856777493606,
|
|
"grad_norm": 1.155886502502828,
|
|
"learning_rate": 5.79591836734694e-05,
|
|
"loss": 1.1595,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.18414322250639387,
|
|
"grad_norm": 1.4920355778823207,
|
|
"learning_rate": 5.877551020408164e-05,
|
|
"loss": 1.1764,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.1867007672634271,
|
|
"grad_norm": 0.8392580472572768,
|
|
"learning_rate": 5.959183673469389e-05,
|
|
"loss": 1.2046,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.18925831202046037,
|
|
"grad_norm": 1.3327976055634758,
|
|
"learning_rate": 6.040816326530612e-05,
|
|
"loss": 1.1601,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.1918158567774936,
|
|
"grad_norm": 1.2349989957797203,
|
|
"learning_rate": 6.122448979591836e-05,
|
|
"loss": 1.1524,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.19437340153452684,
|
|
"grad_norm": 1.1978584662405511,
|
|
"learning_rate": 6.204081632653062e-05,
|
|
"loss": 1.1559,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1969309462915601,
|
|
"grad_norm": 1.0353931821191475,
|
|
"learning_rate": 6.285714285714286e-05,
|
|
"loss": 1.1657,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.19948849104859334,
|
|
"grad_norm": 0.9094148187384907,
|
|
"learning_rate": 6.36734693877551e-05,
|
|
"loss": 1.1471,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.2020460358056266,
|
|
"grad_norm": 1.187032715727395,
|
|
"learning_rate": 6.448979591836736e-05,
|
|
"loss": 1.1408,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.20460358056265984,
|
|
"grad_norm": 1.0732720468700825,
|
|
"learning_rate": 6.53061224489796e-05,
|
|
"loss": 1.1565,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.2071611253196931,
|
|
"grad_norm": 0.8103161887096414,
|
|
"learning_rate": 6.612244897959184e-05,
|
|
"loss": 1.1394,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.20971867007672634,
|
|
"grad_norm": 1.0683800405517745,
|
|
"learning_rate": 6.693877551020408e-05,
|
|
"loss": 1.1366,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.21227621483375958,
|
|
"grad_norm": 1.0570001635125388,
|
|
"learning_rate": 6.775510204081634e-05,
|
|
"loss": 1.1521,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.21483375959079284,
|
|
"grad_norm": 1.0038253962694932,
|
|
"learning_rate": 6.857142857142857e-05,
|
|
"loss": 1.1485,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.21739130434782608,
|
|
"grad_norm": 1.1691956641199828,
|
|
"learning_rate": 6.938775510204082e-05,
|
|
"loss": 1.199,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.21994884910485935,
|
|
"grad_norm": 1.160205747766507,
|
|
"learning_rate": 7.020408163265306e-05,
|
|
"loss": 1.15,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.22250639386189258,
|
|
"grad_norm": 1.0403901594667788,
|
|
"learning_rate": 7.10204081632653e-05,
|
|
"loss": 1.1547,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.22506393861892582,
|
|
"grad_norm": 1.253302691517826,
|
|
"learning_rate": 7.183673469387756e-05,
|
|
"loss": 1.1808,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.22762148337595908,
|
|
"grad_norm": 1.0181115029064822,
|
|
"learning_rate": 7.26530612244898e-05,
|
|
"loss": 1.1399,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.23017902813299232,
|
|
"grad_norm": 1.179029120883534,
|
|
"learning_rate": 7.346938775510205e-05,
|
|
"loss": 1.1709,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.23273657289002558,
|
|
"grad_norm": 0.8065046535786934,
|
|
"learning_rate": 7.42857142857143e-05,
|
|
"loss": 1.1649,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.23529411764705882,
|
|
"grad_norm": 0.9920804997259105,
|
|
"learning_rate": 7.510204081632654e-05,
|
|
"loss": 1.1693,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.23785166240409208,
|
|
"grad_norm": 1.4041632222361236,
|
|
"learning_rate": 7.591836734693878e-05,
|
|
"loss": 1.1525,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.24040920716112532,
|
|
"grad_norm": 1.1202267325769892,
|
|
"learning_rate": 7.673469387755103e-05,
|
|
"loss": 1.1727,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.24296675191815856,
|
|
"grad_norm": 0.9214700487119486,
|
|
"learning_rate": 7.755102040816327e-05,
|
|
"loss": 1.1193,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.24552429667519182,
|
|
"grad_norm": 0.9731714014969046,
|
|
"learning_rate": 7.836734693877551e-05,
|
|
"loss": 1.1605,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.24808184143222506,
|
|
"grad_norm": 1.2370098654676154,
|
|
"learning_rate": 7.918367346938776e-05,
|
|
"loss": 1.1663,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.2506393861892583,
|
|
"grad_norm": 0.856182416906324,
|
|
"learning_rate": 8e-05,
|
|
"loss": 1.134,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.2531969309462916,
|
|
"grad_norm": 1.0086218583881408,
|
|
"learning_rate": 8.081632653061225e-05,
|
|
"loss": 1.1307,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.2557544757033248,
|
|
"grad_norm": 1.3360855997576195,
|
|
"learning_rate": 8.16326530612245e-05,
|
|
"loss": 1.1283,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.25831202046035806,
|
|
"grad_norm": 1.1442169715816302,
|
|
"learning_rate": 8.244897959183673e-05,
|
|
"loss": 1.1486,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.2608695652173913,
|
|
"grad_norm": 0.9528964485268216,
|
|
"learning_rate": 8.326530612244899e-05,
|
|
"loss": 1.1539,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.26342710997442453,
|
|
"grad_norm": 1.2014260964822987,
|
|
"learning_rate": 8.408163265306123e-05,
|
|
"loss": 1.1246,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.2659846547314578,
|
|
"grad_norm": 1.2896301281378582,
|
|
"learning_rate": 8.489795918367348e-05,
|
|
"loss": 1.1193,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.26854219948849106,
|
|
"grad_norm": 1.2365104040466046,
|
|
"learning_rate": 8.571428571428571e-05,
|
|
"loss": 1.1257,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.2710997442455243,
|
|
"grad_norm": 0.8909578987791607,
|
|
"learning_rate": 8.653061224489797e-05,
|
|
"loss": 1.1127,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.27365728900255754,
|
|
"grad_norm": 1.170325095506981,
|
|
"learning_rate": 8.734693877551021e-05,
|
|
"loss": 1.1441,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.27621483375959077,
|
|
"grad_norm": 0.8299590325351531,
|
|
"learning_rate": 8.816326530612245e-05,
|
|
"loss": 1.1199,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.27877237851662406,
|
|
"grad_norm": 1.0039851893132474,
|
|
"learning_rate": 8.897959183673471e-05,
|
|
"loss": 1.1454,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.2813299232736573,
|
|
"grad_norm": 1.309094467948393,
|
|
"learning_rate": 8.979591836734694e-05,
|
|
"loss": 1.1534,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.28388746803069054,
|
|
"grad_norm": 1.030513843956652,
|
|
"learning_rate": 9.061224489795919e-05,
|
|
"loss": 1.1518,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.2864450127877238,
|
|
"grad_norm": 1.1548472835092134,
|
|
"learning_rate": 9.142857142857143e-05,
|
|
"loss": 1.1422,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.289002557544757,
|
|
"grad_norm": 1.0781950243182032,
|
|
"learning_rate": 9.224489795918369e-05,
|
|
"loss": 1.1125,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.2915601023017903,
|
|
"grad_norm": 1.4696697800626741,
|
|
"learning_rate": 9.306122448979593e-05,
|
|
"loss": 1.147,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 0.8932168550895682,
|
|
"learning_rate": 9.387755102040817e-05,
|
|
"loss": 1.1225,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.2966751918158568,
|
|
"grad_norm": 1.4609624921794502,
|
|
"learning_rate": 9.469387755102041e-05,
|
|
"loss": 1.1402,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.29923273657289,
|
|
"grad_norm": 1.1608303447004447,
|
|
"learning_rate": 9.551020408163267e-05,
|
|
"loss": 1.1268,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.30179028132992325,
|
|
"grad_norm": 1.3699566135342083,
|
|
"learning_rate": 9.632653061224491e-05,
|
|
"loss": 1.1782,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.30434782608695654,
|
|
"grad_norm": 1.0951988856065036,
|
|
"learning_rate": 9.714285714285714e-05,
|
|
"loss": 1.1383,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.3069053708439898,
|
|
"grad_norm": 1.311071103466961,
|
|
"learning_rate": 9.79591836734694e-05,
|
|
"loss": 1.1485,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.309462915601023,
|
|
"grad_norm": 0.8986951965704776,
|
|
"learning_rate": 9.877551020408164e-05,
|
|
"loss": 1.1604,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.31202046035805625,
|
|
"grad_norm": 1.2243542530871734,
|
|
"learning_rate": 9.959183673469389e-05,
|
|
"loss": 1.1129,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.3145780051150895,
|
|
"grad_norm": 1.3033780963392814,
|
|
"learning_rate": 0.00010040816326530613,
|
|
"loss": 1.1344,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.3171355498721228,
|
|
"grad_norm": 1.1948786110977876,
|
|
"learning_rate": 0.00010122448979591839,
|
|
"loss": 1.1269,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.319693094629156,
|
|
"grad_norm": 1.142953671137177,
|
|
"learning_rate": 0.00010204081632653062,
|
|
"loss": 1.1078,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.32225063938618925,
|
|
"grad_norm": 1.1524456987304121,
|
|
"learning_rate": 0.00010285714285714287,
|
|
"loss": 1.1653,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.3248081841432225,
|
|
"grad_norm": 1.1658601331325984,
|
|
"learning_rate": 0.00010367346938775511,
|
|
"loss": 1.1088,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.3273657289002558,
|
|
"grad_norm": 1.72409589259486,
|
|
"learning_rate": 0.00010448979591836734,
|
|
"loss": 1.1289,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.329923273657289,
|
|
"grad_norm": 0.7330929431707807,
|
|
"learning_rate": 0.0001053061224489796,
|
|
"loss": 1.1191,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.33248081841432225,
|
|
"grad_norm": 1.2646590423606026,
|
|
"learning_rate": 0.00010612244897959184,
|
|
"loss": 1.1527,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.3350383631713555,
|
|
"grad_norm": 1.723349785426215,
|
|
"learning_rate": 0.0001069387755102041,
|
|
"loss": 1.118,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.3375959079283887,
|
|
"grad_norm": 0.8139452555798524,
|
|
"learning_rate": 0.00010775510204081634,
|
|
"loss": 1.1702,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.340153452685422,
|
|
"grad_norm": 1.1603914477308341,
|
|
"learning_rate": 0.00010857142857142859,
|
|
"loss": 1.1467,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.34271099744245526,
|
|
"grad_norm": 1.2110578835398869,
|
|
"learning_rate": 0.00010938775510204082,
|
|
"loss": 1.1174,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.3452685421994885,
|
|
"grad_norm": 1.3576198261777483,
|
|
"learning_rate": 0.00011020408163265307,
|
|
"loss": 1.1746,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.34782608695652173,
|
|
"grad_norm": 1.0573462588351146,
|
|
"learning_rate": 0.00011102040816326532,
|
|
"loss": 1.1333,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.35038363171355497,
|
|
"grad_norm": 1.3340765255200633,
|
|
"learning_rate": 0.00011183673469387757,
|
|
"loss": 1.1482,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.35294117647058826,
|
|
"grad_norm": 0.9284689786425085,
|
|
"learning_rate": 0.0001126530612244898,
|
|
"loss": 1.1304,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.3554987212276215,
|
|
"grad_norm": 1.4254480759114776,
|
|
"learning_rate": 0.00011346938775510204,
|
|
"loss": 1.1106,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.35805626598465473,
|
|
"grad_norm": 1.3594890583091455,
|
|
"learning_rate": 0.0001142857142857143,
|
|
"loss": 1.1664,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.36061381074168797,
|
|
"grad_norm": 1.086678024627712,
|
|
"learning_rate": 0.00011510204081632654,
|
|
"loss": 1.0802,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.3631713554987212,
|
|
"grad_norm": 1.533977830454029,
|
|
"learning_rate": 0.0001159183673469388,
|
|
"loss": 1.1332,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.3657289002557545,
|
|
"grad_norm": 0.940287237501315,
|
|
"learning_rate": 0.00011673469387755102,
|
|
"loss": 1.1573,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.36828644501278773,
|
|
"grad_norm": 1.2572408225100642,
|
|
"learning_rate": 0.00011755102040816328,
|
|
"loss": 1.1292,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.37084398976982097,
|
|
"grad_norm": 0.9995509690787548,
|
|
"learning_rate": 0.00011836734693877552,
|
|
"loss": 1.1375,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.3734015345268542,
|
|
"grad_norm": 1.6478855533912629,
|
|
"learning_rate": 0.00011918367346938777,
|
|
"loss": 1.1281,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.37595907928388744,
|
|
"grad_norm": 0.9807964464883856,
|
|
"learning_rate": 0.00012000000000000002,
|
|
"loss": 1.1604,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.37851662404092073,
|
|
"grad_norm": 1.3424151204814954,
|
|
"learning_rate": 0.00012081632653061224,
|
|
"loss": 1.1247,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.38107416879795397,
|
|
"grad_norm": 1.1827965041877697,
|
|
"learning_rate": 0.0001216326530612245,
|
|
"loss": 1.1087,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.3836317135549872,
|
|
"grad_norm": 1.374289317317436,
|
|
"learning_rate": 0.00012244897959183673,
|
|
"loss": 1.1174,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.38618925831202044,
|
|
"grad_norm": 1.4462982798920152,
|
|
"learning_rate": 0.00012326530612244898,
|
|
"loss": 1.1243,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.3887468030690537,
|
|
"grad_norm": 1.2338591594860693,
|
|
"learning_rate": 0.00012408163265306124,
|
|
"loss": 1.127,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.391304347826087,
|
|
"grad_norm": 0.9926991217212723,
|
|
"learning_rate": 0.0001248979591836735,
|
|
"loss": 1.1152,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.3938618925831202,
|
|
"grad_norm": 1.6602432782777794,
|
|
"learning_rate": 0.00012571428571428572,
|
|
"loss": 1.129,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.39641943734015345,
|
|
"grad_norm": 1.0710563936657969,
|
|
"learning_rate": 0.00012653061224489798,
|
|
"loss": 1.1347,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.3989769820971867,
|
|
"grad_norm": 1.0203164310897854,
|
|
"learning_rate": 0.0001273469387755102,
|
|
"loss": 1.1398,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.40153452685422,
|
|
"grad_norm": 1.4486120558817688,
|
|
"learning_rate": 0.00012816326530612246,
|
|
"loss": 1.1572,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.4040920716112532,
|
|
"grad_norm": 1.0665461325193415,
|
|
"learning_rate": 0.00012897959183673472,
|
|
"loss": 1.1443,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.40664961636828645,
|
|
"grad_norm": 1.6999184553867208,
|
|
"learning_rate": 0.00012979591836734695,
|
|
"loss": 1.1027,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.4092071611253197,
|
|
"grad_norm": 1.0289801155197138,
|
|
"learning_rate": 0.0001306122448979592,
|
|
"loss": 1.1194,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.4117647058823529,
|
|
"grad_norm": 1.5775539926551432,
|
|
"learning_rate": 0.00013142857142857143,
|
|
"loss": 1.1209,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.4143222506393862,
|
|
"grad_norm": 0.9132827293227751,
|
|
"learning_rate": 0.00013224489795918368,
|
|
"loss": 1.1115,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.41687979539641945,
|
|
"grad_norm": 1.8502610336806449,
|
|
"learning_rate": 0.00013306122448979594,
|
|
"loss": 1.1237,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.4194373401534527,
|
|
"grad_norm": 1.3709322904356605,
|
|
"learning_rate": 0.00013387755102040817,
|
|
"loss": 1.1353,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.4219948849104859,
|
|
"grad_norm": 1.1361849330749851,
|
|
"learning_rate": 0.00013469387755102042,
|
|
"loss": 1.1043,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.42455242966751916,
|
|
"grad_norm": 1.1401579492886242,
|
|
"learning_rate": 0.00013551020408163268,
|
|
"loss": 1.1252,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.42710997442455245,
|
|
"grad_norm": 1.171525164401231,
|
|
"learning_rate": 0.0001363265306122449,
|
|
"loss": 1.1226,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.4296675191815857,
|
|
"grad_norm": 1.7103135890270424,
|
|
"learning_rate": 0.00013714285714285713,
|
|
"loss": 1.1323,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.4322250639386189,
|
|
"grad_norm": 1.0590485560747558,
|
|
"learning_rate": 0.0001379591836734694,
|
|
"loss": 1.1318,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.43478260869565216,
|
|
"grad_norm": 1.1381323068879685,
|
|
"learning_rate": 0.00013877551020408165,
|
|
"loss": 1.1093,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.4373401534526854,
|
|
"grad_norm": 1.8095148756504853,
|
|
"learning_rate": 0.0001395918367346939,
|
|
"loss": 1.1297,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.4398976982097187,
|
|
"grad_norm": 1.022630524722603,
|
|
"learning_rate": 0.00014040816326530613,
|
|
"loss": 1.1217,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.4424552429667519,
|
|
"grad_norm": 1.3822427448836618,
|
|
"learning_rate": 0.00014122448979591838,
|
|
"loss": 1.145,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.44501278772378516,
|
|
"grad_norm": 1.3577366143882001,
|
|
"learning_rate": 0.0001420408163265306,
|
|
"loss": 1.151,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.4475703324808184,
|
|
"grad_norm": 1.1001324929653025,
|
|
"learning_rate": 0.00014285714285714287,
|
|
"loss": 1.1209,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.45012787723785164,
|
|
"grad_norm": 1.7043306971887084,
|
|
"learning_rate": 0.00014367346938775512,
|
|
"loss": 1.155,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.45268542199488493,
|
|
"grad_norm": 0.8908531162714106,
|
|
"learning_rate": 0.00014448979591836735,
|
|
"loss": 1.1264,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.45524296675191817,
|
|
"grad_norm": 2.0064867394818577,
|
|
"learning_rate": 0.0001453061224489796,
|
|
"loss": 1.1339,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.4578005115089514,
|
|
"grad_norm": 1.272017394425661,
|
|
"learning_rate": 0.00014612244897959183,
|
|
"loss": 1.1179,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.46035805626598464,
|
|
"grad_norm": 1.6809099682132984,
|
|
"learning_rate": 0.0001469387755102041,
|
|
"loss": 1.1306,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.4629156010230179,
|
|
"grad_norm": 1.2729546637062361,
|
|
"learning_rate": 0.00014775510204081635,
|
|
"loss": 1.1547,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.46547314578005117,
|
|
"grad_norm": 1.2637405257695475,
|
|
"learning_rate": 0.0001485714285714286,
|
|
"loss": 1.1234,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.4680306905370844,
|
|
"grad_norm": 1.3792667256601667,
|
|
"learning_rate": 0.00014938775510204083,
|
|
"loss": 1.1384,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.47058823529411764,
|
|
"grad_norm": 1.0581158807496975,
|
|
"learning_rate": 0.00015020408163265308,
|
|
"loss": 1.1308,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.4731457800511509,
|
|
"grad_norm": 1.2395276036732317,
|
|
"learning_rate": 0.0001510204081632653,
|
|
"loss": 1.142,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.47570332480818417,
|
|
"grad_norm": 1.1474988241030795,
|
|
"learning_rate": 0.00015183673469387757,
|
|
"loss": 1.1399,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.4782608695652174,
|
|
"grad_norm": 1.4488607840873033,
|
|
"learning_rate": 0.0001526530612244898,
|
|
"loss": 1.1247,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.48081841432225064,
|
|
"grad_norm": 0.9895262383072666,
|
|
"learning_rate": 0.00015346938775510205,
|
|
"loss": 1.1439,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.4833759590792839,
|
|
"grad_norm": 1.509540789570866,
|
|
"learning_rate": 0.0001542857142857143,
|
|
"loss": 1.1268,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.4859335038363171,
|
|
"grad_norm": 1.2634220572499701,
|
|
"learning_rate": 0.00015510204081632654,
|
|
"loss": 1.1315,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.4884910485933504,
|
|
"grad_norm": 2.03411519572473,
|
|
"learning_rate": 0.0001559183673469388,
|
|
"loss": 1.0859,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.49104859335038364,
|
|
"grad_norm": 1.1783378998438716,
|
|
"learning_rate": 0.00015673469387755102,
|
|
"loss": 1.122,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.4936061381074169,
|
|
"grad_norm": 1.869178693106169,
|
|
"learning_rate": 0.00015755102040816327,
|
|
"loss": 1.0953,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.4961636828644501,
|
|
"grad_norm": 1.4133576585465655,
|
|
"learning_rate": 0.00015836734693877553,
|
|
"loss": 1.0973,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.49872122762148335,
|
|
"grad_norm": 1.1007402607506083,
|
|
"learning_rate": 0.00015918367346938778,
|
|
"loss": 1.1666,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.5012787723785166,
|
|
"grad_norm": 1.0455333445001125,
|
|
"learning_rate": 0.00016,
|
|
"loss": 1.1244,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.5038363171355499,
|
|
"grad_norm": 1.1414091012657146,
|
|
"learning_rate": 0.00015999987240667874,
|
|
"loss": 1.118,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.5063938618925832,
|
|
"grad_norm": 1.1934725533176622,
|
|
"learning_rate": 0.0001599994896271219,
|
|
"loss": 1.1489,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.5089514066496164,
|
|
"grad_norm": 1.3418673611629677,
|
|
"learning_rate": 0.0001599988516625505,
|
|
"loss": 1.1172,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.5115089514066496,
|
|
"grad_norm": 1.2281301450926736,
|
|
"learning_rate": 0.00015999795851499954,
|
|
"loss": 1.124,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.5140664961636828,
|
|
"grad_norm": 1.4232277874832118,
|
|
"learning_rate": 0.000159996810187318,
|
|
"loss": 1.1087,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.5166240409207161,
|
|
"grad_norm": 1.2445810609035501,
|
|
"learning_rate": 0.0001599954066831689,
|
|
"loss": 1.0977,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.5191815856777494,
|
|
"grad_norm": 1.4902156849341144,
|
|
"learning_rate": 0.00015999374800702916,
|
|
"loss": 1.1278,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.5217391304347826,
|
|
"grad_norm": 0.9117749926569193,
|
|
"learning_rate": 0.00015999183416418963,
|
|
"loss": 1.0978,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.5242966751918159,
|
|
"grad_norm": 1.521914055307176,
|
|
"learning_rate": 0.0001599896651607552,
|
|
"loss": 1.1255,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.5268542199488491,
|
|
"grad_norm": 1.675086821646465,
|
|
"learning_rate": 0.00015998724100364464,
|
|
"loss": 1.1117,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.5294117647058824,
|
|
"grad_norm": 1.0370916213463357,
|
|
"learning_rate": 0.00015998456170059059,
|
|
"loss": 1.1269,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.5319693094629157,
|
|
"grad_norm": 1.4543936507994073,
|
|
"learning_rate": 0.00015998162726013954,
|
|
"loss": 1.1159,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.5345268542199488,
|
|
"grad_norm": 1.628168132567413,
|
|
"learning_rate": 0.00015997843769165193,
|
|
"loss": 1.1025,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.5370843989769821,
|
|
"grad_norm": 1.114123127352084,
|
|
"learning_rate": 0.0001599749930053019,
|
|
"loss": 1.0962,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.5396419437340153,
|
|
"grad_norm": 1.7051681399590384,
|
|
"learning_rate": 0.00015997129321207747,
|
|
"loss": 1.1216,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.5421994884910486,
|
|
"grad_norm": 0.9137353240287979,
|
|
"learning_rate": 0.00015996733832378032,
|
|
"loss": 1.0845,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.5447570332480819,
|
|
"grad_norm": 1.3585376285654678,
|
|
"learning_rate": 0.00015996312835302593,
|
|
"loss": 1.1337,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.5473145780051151,
|
|
"grad_norm": 0.986649874454745,
|
|
"learning_rate": 0.00015995866331324334,
|
|
"loss": 1.0791,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.5498721227621484,
|
|
"grad_norm": 1.4872086766761456,
|
|
"learning_rate": 0.00015995394321867534,
|
|
"loss": 1.0898,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.5524296675191815,
|
|
"grad_norm": 1.3583123340693906,
|
|
"learning_rate": 0.0001599489680843782,
|
|
"loss": 1.1221,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.5549872122762148,
|
|
"grad_norm": 1.1209846232833984,
|
|
"learning_rate": 0.00015994373792622182,
|
|
"loss": 1.0914,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.5575447570332481,
|
|
"grad_norm": 1.1159100799958372,
|
|
"learning_rate": 0.0001599382527608895,
|
|
"loss": 1.0659,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.5601023017902813,
|
|
"grad_norm": 1.014792737157986,
|
|
"learning_rate": 0.00015993251260587796,
|
|
"loss": 1.0895,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.5626598465473146,
|
|
"grad_norm": 1.3514884114926682,
|
|
"learning_rate": 0.00015992651747949742,
|
|
"loss": 1.1447,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.5652173913043478,
|
|
"grad_norm": 1.3662814180004041,
|
|
"learning_rate": 0.00015992026740087125,
|
|
"loss": 1.082,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.5677749360613811,
|
|
"grad_norm": 1.1729073479593213,
|
|
"learning_rate": 0.00015991376238993623,
|
|
"loss": 1.0858,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.5703324808184144,
|
|
"grad_norm": 1.098894416827083,
|
|
"learning_rate": 0.0001599070024674422,
|
|
"loss": 1.0903,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.5728900255754475,
|
|
"grad_norm": 0.975594652798118,
|
|
"learning_rate": 0.0001598999876549522,
|
|
"loss": 1.1162,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.5754475703324808,
|
|
"grad_norm": 1.0143269006614197,
|
|
"learning_rate": 0.00015989271797484236,
|
|
"loss": 1.1131,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.578005115089514,
|
|
"grad_norm": 1.3483287924450105,
|
|
"learning_rate": 0.00015988519345030167,
|
|
"loss": 1.0896,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.5805626598465473,
|
|
"grad_norm": 0.7520971748388883,
|
|
"learning_rate": 0.00015987741410533217,
|
|
"loss": 1.0953,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.5831202046035806,
|
|
"grad_norm": 1.3201762056381772,
|
|
"learning_rate": 0.0001598693799647486,
|
|
"loss": 1.0837,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.5856777493606138,
|
|
"grad_norm": 1.2193125892583727,
|
|
"learning_rate": 0.00015986109105417862,
|
|
"loss": 1.1026,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 1.3892856581992825,
|
|
"learning_rate": 0.0001598525474000624,
|
|
"loss": 1.1069,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.5907928388746803,
|
|
"grad_norm": 0.8831793540357707,
|
|
"learning_rate": 0.00015984374902965284,
|
|
"loss": 1.1079,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.5933503836317136,
|
|
"grad_norm": 0.8405263869404558,
|
|
"learning_rate": 0.00015983469597101517,
|
|
"loss": 1.088,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.5959079283887468,
|
|
"grad_norm": 0.8048081062282874,
|
|
"learning_rate": 0.0001598253882530272,
|
|
"loss": 1.0947,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.59846547314578,
|
|
"grad_norm": 1.1026453527649267,
|
|
"learning_rate": 0.00015981582590537897,
|
|
"loss": 1.0527,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.6010230179028133,
|
|
"grad_norm": 1.945124480668707,
|
|
"learning_rate": 0.0001598060089585728,
|
|
"loss": 1.0747,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.6035805626598465,
|
|
"grad_norm": 0.6633926296437849,
|
|
"learning_rate": 0.00015979593744392312,
|
|
"loss": 1.1013,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.6061381074168798,
|
|
"grad_norm": 1.9149178380903846,
|
|
"learning_rate": 0.00015978561139355635,
|
|
"loss": 1.0967,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.6086956521739131,
|
|
"grad_norm": 1.3222885863625786,
|
|
"learning_rate": 0.00015977503084041087,
|
|
"loss": 1.0733,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.6112531969309463,
|
|
"grad_norm": 1.0130031801765467,
|
|
"learning_rate": 0.00015976419581823688,
|
|
"loss": 1.1196,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.6138107416879796,
|
|
"grad_norm": 1.5551163600364186,
|
|
"learning_rate": 0.00015975310636159632,
|
|
"loss": 1.088,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.6163682864450127,
|
|
"grad_norm": 1.2158294095692619,
|
|
"learning_rate": 0.00015974176250586265,
|
|
"loss": 1.0768,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.618925831202046,
|
|
"grad_norm": 1.0765542476008974,
|
|
"learning_rate": 0.00015973016428722094,
|
|
"loss": 1.106,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.6214833759590793,
|
|
"grad_norm": 1.1132699812581053,
|
|
"learning_rate": 0.0001597183117426675,
|
|
"loss": 1.1002,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.6240409207161125,
|
|
"grad_norm": 1.3600712766399181,
|
|
"learning_rate": 0.00015970620491001004,
|
|
"loss": 1.1445,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.6265984654731458,
|
|
"grad_norm": 1.0416236386170334,
|
|
"learning_rate": 0.00015969384382786729,
|
|
"loss": 1.1019,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.629156010230179,
|
|
"grad_norm": 1.3027622469497735,
|
|
"learning_rate": 0.00015968122853566905,
|
|
"loss": 1.1002,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.6317135549872123,
|
|
"grad_norm": 0.8037304289524585,
|
|
"learning_rate": 0.000159668359073656,
|
|
"loss": 1.0892,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.6342710997442456,
|
|
"grad_norm": 0.9188404876547497,
|
|
"learning_rate": 0.00015965523548287956,
|
|
"loss": 1.1395,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.6368286445012787,
|
|
"grad_norm": 1.1903100937742757,
|
|
"learning_rate": 0.0001596418578052018,
|
|
"loss": 1.1157,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.639386189258312,
|
|
"grad_norm": 1.134136870599723,
|
|
"learning_rate": 0.0001596282260832953,
|
|
"loss": 1.0961,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.6419437340153452,
|
|
"grad_norm": 1.1666299453160198,
|
|
"learning_rate": 0.00015961434036064294,
|
|
"loss": 1.1019,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.6445012787723785,
|
|
"grad_norm": 0.8723696508206527,
|
|
"learning_rate": 0.00015960020068153785,
|
|
"loss": 1.1053,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.6470588235294118,
|
|
"grad_norm": 0.9568431382175138,
|
|
"learning_rate": 0.00015958580709108332,
|
|
"loss": 1.0848,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.649616368286445,
|
|
"grad_norm": 1.1129808719393837,
|
|
"learning_rate": 0.00015957115963519244,
|
|
"loss": 1.136,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.6521739130434783,
|
|
"grad_norm": 1.3963252311082919,
|
|
"learning_rate": 0.00015955625836058815,
|
|
"loss": 1.0952,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.6547314578005116,
|
|
"grad_norm": 0.9298685363556572,
|
|
"learning_rate": 0.00015954110331480302,
|
|
"loss": 1.0809,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.6572890025575447,
|
|
"grad_norm": 0.7001103257159264,
|
|
"learning_rate": 0.00015952569454617916,
|
|
"loss": 1.116,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.659846547314578,
|
|
"grad_norm": 0.9441648189630093,
|
|
"learning_rate": 0.00015951003210386793,
|
|
"loss": 1.0784,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.6624040920716112,
|
|
"grad_norm": 1.4002615649377306,
|
|
"learning_rate": 0.0001594941160378299,
|
|
"loss": 1.1071,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.6649616368286445,
|
|
"grad_norm": 0.8178386113146091,
|
|
"learning_rate": 0.00015947794639883473,
|
|
"loss": 1.087,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.6675191815856778,
|
|
"grad_norm": 1.452979203118016,
|
|
"learning_rate": 0.0001594615232384608,
|
|
"loss": 1.0604,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.670076726342711,
|
|
"grad_norm": 0.6774046196617319,
|
|
"learning_rate": 0.00015944484660909523,
|
|
"loss": 1.076,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.6726342710997443,
|
|
"grad_norm": 0.7670969521082094,
|
|
"learning_rate": 0.00015942791656393376,
|
|
"loss": 1.1204,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.6751918158567775,
|
|
"grad_norm": 1.0850513811767653,
|
|
"learning_rate": 0.00015941073315698035,
|
|
"loss": 1.0986,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.6777493606138107,
|
|
"grad_norm": 1.472017968872445,
|
|
"learning_rate": 0.00015939329644304724,
|
|
"loss": 1.1274,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.680306905370844,
|
|
"grad_norm": 0.9702787550395545,
|
|
"learning_rate": 0.0001593756064777546,
|
|
"loss": 1.0934,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.6828644501278772,
|
|
"grad_norm": 1.0584827946044062,
|
|
"learning_rate": 0.00015935766331753049,
|
|
"loss": 1.0471,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.6854219948849105,
|
|
"grad_norm": 0.8089889110807604,
|
|
"learning_rate": 0.00015933946701961055,
|
|
"loss": 1.0887,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.6879795396419437,
|
|
"grad_norm": 1.0320882417148256,
|
|
"learning_rate": 0.000159321017642038,
|
|
"loss": 1.0667,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.690537084398977,
|
|
"grad_norm": 1.4674982303373638,
|
|
"learning_rate": 0.00015930231524366326,
|
|
"loss": 1.1073,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.6930946291560103,
|
|
"grad_norm": 0.7320918729382444,
|
|
"learning_rate": 0.0001592833598841438,
|
|
"loss": 1.1053,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.6956521739130435,
|
|
"grad_norm": 0.8289503109780553,
|
|
"learning_rate": 0.00015926415162394414,
|
|
"loss": 1.0707,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.6982097186700768,
|
|
"grad_norm": 1.130825151382903,
|
|
"learning_rate": 0.00015924469052433534,
|
|
"loss": 1.0878,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.7007672634271099,
|
|
"grad_norm": 0.9816938036576663,
|
|
"learning_rate": 0.00015922497664739508,
|
|
"loss": 1.1036,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.7033248081841432,
|
|
"grad_norm": 1.1744231549177595,
|
|
"learning_rate": 0.0001592050100560074,
|
|
"loss": 1.0826,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.7058823529411765,
|
|
"grad_norm": 1.1244228971801966,
|
|
"learning_rate": 0.0001591847908138623,
|
|
"loss": 1.0992,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.7084398976982097,
|
|
"grad_norm": 1.0273673884618308,
|
|
"learning_rate": 0.00015916431898545583,
|
|
"loss": 1.1122,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.710997442455243,
|
|
"grad_norm": 1.3019719478481941,
|
|
"learning_rate": 0.0001591435946360897,
|
|
"loss": 1.0797,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.7135549872122762,
|
|
"grad_norm": 0.9179007336169464,
|
|
"learning_rate": 0.00015912261783187113,
|
|
"loss": 1.1083,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.7161125319693095,
|
|
"grad_norm": 1.3938652199122237,
|
|
"learning_rate": 0.00015910138863971265,
|
|
"loss": 1.0768,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.7186700767263428,
|
|
"grad_norm": 0.8460589876687793,
|
|
"learning_rate": 0.00015907990712733176,
|
|
"loss": 1.0675,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.7212276214833759,
|
|
"grad_norm": 1.2311027949600852,
|
|
"learning_rate": 0.00015905817336325098,
|
|
"loss": 1.095,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.7237851662404092,
|
|
"grad_norm": 0.5637046057878358,
|
|
"learning_rate": 0.00015903618741679735,
|
|
"loss": 1.0227,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.7263427109974424,
|
|
"grad_norm": 0.8864195638565602,
|
|
"learning_rate": 0.00015901394935810236,
|
|
"loss": 1.0894,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.7289002557544757,
|
|
"grad_norm": 1.118154448385255,
|
|
"learning_rate": 0.00015899145925810172,
|
|
"loss": 1.0708,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.731457800511509,
|
|
"grad_norm": 0.8797417608904688,
|
|
"learning_rate": 0.0001589687171885351,
|
|
"loss": 1.0973,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.7340153452685422,
|
|
"grad_norm": 1.2417892204976435,
|
|
"learning_rate": 0.0001589457232219459,
|
|
"loss": 1.0959,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.7365728900255755,
|
|
"grad_norm": 1.3823792436001885,
|
|
"learning_rate": 0.000158922477431681,
|
|
"loss": 1.0588,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.7391304347826086,
|
|
"grad_norm": 0.5914973374896305,
|
|
"learning_rate": 0.00015889897989189065,
|
|
"loss": 1.0877,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.7416879795396419,
|
|
"grad_norm": 0.6894697219091279,
|
|
"learning_rate": 0.00015887523067752805,
|
|
"loss": 1.0987,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.7442455242966752,
|
|
"grad_norm": 0.9378104999898202,
|
|
"learning_rate": 0.0001588512298643492,
|
|
"loss": 1.0813,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.7468030690537084,
|
|
"grad_norm": 1.5924222953617497,
|
|
"learning_rate": 0.00015882697752891273,
|
|
"loss": 1.0493,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.7493606138107417,
|
|
"grad_norm": 0.8644236985398326,
|
|
"learning_rate": 0.0001588024737485795,
|
|
"loss": 1.0745,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.7519181585677749,
|
|
"grad_norm": 1.2617771174370838,
|
|
"learning_rate": 0.00015877771860151255,
|
|
"loss": 1.0756,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.7544757033248082,
|
|
"grad_norm": 0.6053221801377883,
|
|
"learning_rate": 0.00015875271216667658,
|
|
"loss": 1.0624,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.7570332480818415,
|
|
"grad_norm": 0.8733719684486176,
|
|
"learning_rate": 0.00015872745452383797,
|
|
"loss": 1.0713,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.7595907928388747,
|
|
"grad_norm": 1.0570673007983702,
|
|
"learning_rate": 0.00015870194575356444,
|
|
"loss": 1.1115,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.7621483375959079,
|
|
"grad_norm": 0.7325728255149376,
|
|
"learning_rate": 0.00015867618593722464,
|
|
"loss": 1.0871,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.7647058823529411,
|
|
"grad_norm": 0.7340524897043603,
|
|
"learning_rate": 0.00015865017515698807,
|
|
"loss": 1.0979,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.7672634271099744,
|
|
"grad_norm": 1.1656279626023016,
|
|
"learning_rate": 0.00015862391349582484,
|
|
"loss": 1.0597,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.7698209718670077,
|
|
"grad_norm": 0.9978239568565908,
|
|
"learning_rate": 0.00015859740103750522,
|
|
"loss": 1.0932,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.7723785166240409,
|
|
"grad_norm": 1.878442480743071,
|
|
"learning_rate": 0.00015857063786659954,
|
|
"loss": 1.0938,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.7749360613810742,
|
|
"grad_norm": 0.6117011045915516,
|
|
"learning_rate": 0.00015854362406847786,
|
|
"loss": 1.0623,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.7774936061381074,
|
|
"grad_norm": 1.8420720325784072,
|
|
"learning_rate": 0.00015851635972930967,
|
|
"loss": 1.0699,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.7800511508951407,
|
|
"grad_norm": 1.002131752478182,
|
|
"learning_rate": 0.00015848884493606367,
|
|
"loss": 1.0826,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.782608695652174,
|
|
"grad_norm": 1.2471718061674597,
|
|
"learning_rate": 0.00015846107977650743,
|
|
"loss": 1.0755,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.7851662404092071,
|
|
"grad_norm": 0.9634733361160541,
|
|
"learning_rate": 0.0001584330643392072,
|
|
"loss": 1.0416,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.7877237851662404,
|
|
"grad_norm": 1.790526532103535,
|
|
"learning_rate": 0.00015840479871352754,
|
|
"loss": 1.0754,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.7902813299232737,
|
|
"grad_norm": 0.8667875735812341,
|
|
"learning_rate": 0.00015837628298963105,
|
|
"loss": 1.0934,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.7928388746803069,
|
|
"grad_norm": 1.4536288271279978,
|
|
"learning_rate": 0.00015834751725847816,
|
|
"loss": 1.0632,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.7953964194373402,
|
|
"grad_norm": 1.3777516183353187,
|
|
"learning_rate": 0.00015831850161182677,
|
|
"loss": 1.0956,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.7979539641943734,
|
|
"grad_norm": 0.7721449298753891,
|
|
"learning_rate": 0.0001582892361422319,
|
|
"loss": 1.1069,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.8005115089514067,
|
|
"grad_norm": 1.174156872017157,
|
|
"learning_rate": 0.00015825972094304555,
|
|
"loss": 1.0728,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.80306905370844,
|
|
"grad_norm": 1.2588808228888746,
|
|
"learning_rate": 0.00015822995610841623,
|
|
"loss": 1.0772,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.8056265984654731,
|
|
"grad_norm": 0.8720000426242472,
|
|
"learning_rate": 0.00015819994173328885,
|
|
"loss": 1.0654,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.8081841432225064,
|
|
"grad_norm": 0.923631788770043,
|
|
"learning_rate": 0.00015816967791340417,
|
|
"loss": 1.0668,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.8107416879795396,
|
|
"grad_norm": 1.1357229877804957,
|
|
"learning_rate": 0.00015813916474529885,
|
|
"loss": 1.0911,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.8132992327365729,
|
|
"grad_norm": 0.8907121901474587,
|
|
"learning_rate": 0.0001581084023263047,
|
|
"loss": 1.0826,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.8158567774936062,
|
|
"grad_norm": 1.0350783431396418,
|
|
"learning_rate": 0.00015807739075454874,
|
|
"loss": 1.0426,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.8184143222506394,
|
|
"grad_norm": 1.2795269410097496,
|
|
"learning_rate": 0.00015804613012895268,
|
|
"loss": 1.0731,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.8209718670076727,
|
|
"grad_norm": 0.8440033467786482,
|
|
"learning_rate": 0.0001580146205492327,
|
|
"loss": 1.0491,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.8235294117647058,
|
|
"grad_norm": 0.9336906509179427,
|
|
"learning_rate": 0.00015798286211589916,
|
|
"loss": 1.0796,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.8260869565217391,
|
|
"grad_norm": 1.243210147279451,
|
|
"learning_rate": 0.00015795085493025608,
|
|
"loss": 1.0998,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.8286445012787724,
|
|
"grad_norm": 0.985781736568132,
|
|
"learning_rate": 0.00015791859909440107,
|
|
"loss": 1.097,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.8312020460358056,
|
|
"grad_norm": 1.115722030381177,
|
|
"learning_rate": 0.00015788609471122485,
|
|
"loss": 1.0594,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.8337595907928389,
|
|
"grad_norm": 0.6317177707367972,
|
|
"learning_rate": 0.000157853341884411,
|
|
"loss": 1.0672,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.8363171355498721,
|
|
"grad_norm": 0.7614994384747567,
|
|
"learning_rate": 0.00015782034071843557,
|
|
"loss": 1.1076,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.8388746803069054,
|
|
"grad_norm": 0.6788203373242645,
|
|
"learning_rate": 0.00015778709131856675,
|
|
"loss": 1.0794,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.8414322250639387,
|
|
"grad_norm": 0.6573621171258895,
|
|
"learning_rate": 0.00015775359379086455,
|
|
"loss": 1.1175,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.8439897698209718,
|
|
"grad_norm": 0.865009547315977,
|
|
"learning_rate": 0.00015771984824218053,
|
|
"loss": 1.0893,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.8465473145780051,
|
|
"grad_norm": 1.0982989183876286,
|
|
"learning_rate": 0.00015768585478015732,
|
|
"loss": 1.0628,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.8491048593350383,
|
|
"grad_norm": 1.5816845014682415,
|
|
"learning_rate": 0.00015765161351322845,
|
|
"loss": 1.0553,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.8516624040920716,
|
|
"grad_norm": 0.5583122236625028,
|
|
"learning_rate": 0.0001576171245506178,
|
|
"loss": 1.1007,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.8542199488491049,
|
|
"grad_norm": 1.4589646002026686,
|
|
"learning_rate": 0.00015758238800233937,
|
|
"loss": 1.0354,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.8567774936061381,
|
|
"grad_norm": 1.1988373358126654,
|
|
"learning_rate": 0.00015754740397919703,
|
|
"loss": 1.0609,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.8593350383631714,
|
|
"grad_norm": 0.7798431918437426,
|
|
"learning_rate": 0.0001575121725927839,
|
|
"loss": 1.0599,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.8618925831202046,
|
|
"grad_norm": 0.8001399476748517,
|
|
"learning_rate": 0.00015747669395548228,
|
|
"loss": 1.0825,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.8644501278772379,
|
|
"grad_norm": 0.9268381518772149,
|
|
"learning_rate": 0.00015744096818046306,
|
|
"loss": 1.0867,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.8670076726342711,
|
|
"grad_norm": 0.8482506857320948,
|
|
"learning_rate": 0.00015740499538168548,
|
|
"loss": 1.0519,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.8695652173913043,
|
|
"grad_norm": 1.1051027320167537,
|
|
"learning_rate": 0.00015736877567389682,
|
|
"loss": 1.0926,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.8721227621483376,
|
|
"grad_norm": 1.1295814345497992,
|
|
"learning_rate": 0.00015733230917263182,
|
|
"loss": 1.0485,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.8746803069053708,
|
|
"grad_norm": 0.8381578992561258,
|
|
"learning_rate": 0.00015729559599421262,
|
|
"loss": 1.0742,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.8772378516624041,
|
|
"grad_norm": 1.1355285501553987,
|
|
"learning_rate": 0.00015725863625574808,
|
|
"loss": 1.0731,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.8797953964194374,
|
|
"grad_norm": 1.2716344612482289,
|
|
"learning_rate": 0.0001572214300751336,
|
|
"loss": 1.0818,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 0.7977797928903454,
|
|
"learning_rate": 0.00015718397757105072,
|
|
"loss": 1.0592,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.8849104859335039,
|
|
"grad_norm": 0.5888833117266756,
|
|
"learning_rate": 0.0001571462788629666,
|
|
"loss": 1.124,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.887468030690537,
|
|
"grad_norm": 0.7277724084604381,
|
|
"learning_rate": 0.00015710833407113386,
|
|
"loss": 1.0076,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.8900255754475703,
|
|
"grad_norm": 0.7175876926395411,
|
|
"learning_rate": 0.00015707014331659008,
|
|
"loss": 1.0735,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.8925831202046036,
|
|
"grad_norm": 0.8127426786215441,
|
|
"learning_rate": 0.00015703170672115737,
|
|
"loss": 1.0582,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.8951406649616368,
|
|
"grad_norm": 1.0648976192629485,
|
|
"learning_rate": 0.00015699302440744202,
|
|
"loss": 1.0788,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.8976982097186701,
|
|
"grad_norm": 1.2133128800930093,
|
|
"learning_rate": 0.00015695409649883418,
|
|
"loss": 1.0986,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.9002557544757033,
|
|
"grad_norm": 0.946491692276404,
|
|
"learning_rate": 0.0001569149231195074,
|
|
"loss": 1.0522,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.9028132992327366,
|
|
"grad_norm": 1.2375939940771874,
|
|
"learning_rate": 0.0001568755043944182,
|
|
"loss": 1.077,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.9053708439897699,
|
|
"grad_norm": 0.7734830655451521,
|
|
"learning_rate": 0.00015683584044930572,
|
|
"loss": 1.0659,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.907928388746803,
|
|
"grad_norm": 0.6097683019560797,
|
|
"learning_rate": 0.00015679593141069132,
|
|
"loss": 1.0446,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.9104859335038363,
|
|
"grad_norm": 0.5759587093662797,
|
|
"learning_rate": 0.0001567557774058782,
|
|
"loss": 1.0577,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.9130434782608695,
|
|
"grad_norm": 0.5878753626840652,
|
|
"learning_rate": 0.0001567153785629509,
|
|
"loss": 1.0675,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.9156010230179028,
|
|
"grad_norm": 0.6653732754348032,
|
|
"learning_rate": 0.000156674735010775,
|
|
"loss": 1.0891,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.9181585677749361,
|
|
"grad_norm": 0.768263015413779,
|
|
"learning_rate": 0.00015663384687899663,
|
|
"loss": 1.0715,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.9207161125319693,
|
|
"grad_norm": 0.9765055577703315,
|
|
"learning_rate": 0.00015659271429804215,
|
|
"loss": 1.0396,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.9232736572890026,
|
|
"grad_norm": 1.4554265699809417,
|
|
"learning_rate": 0.00015655133739911757,
|
|
"loss": 1.0919,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.9258312020460358,
|
|
"grad_norm": 0.7208280463855818,
|
|
"learning_rate": 0.0001565097163142083,
|
|
"loss": 1.0151,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.928388746803069,
|
|
"grad_norm": 0.8611710190483517,
|
|
"learning_rate": 0.00015646785117607865,
|
|
"loss": 1.0796,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.9309462915601023,
|
|
"grad_norm": 1.1291766944081427,
|
|
"learning_rate": 0.00015642574211827142,
|
|
"loss": 1.0651,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.9335038363171355,
|
|
"grad_norm": 1.0023408896760695,
|
|
"learning_rate": 0.00015638338927510752,
|
|
"loss": 1.0785,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.9360613810741688,
|
|
"grad_norm": 1.2325468393537922,
|
|
"learning_rate": 0.00015634079278168542,
|
|
"loss": 1.1032,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.9386189258312021,
|
|
"grad_norm": 0.8116887550297889,
|
|
"learning_rate": 0.00015629795277388077,
|
|
"loss": 1.0784,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.9411764705882353,
|
|
"grad_norm": 0.8465793191190484,
|
|
"learning_rate": 0.00015625486938834613,
|
|
"loss": 1.0729,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.9437340153452686,
|
|
"grad_norm": 0.8630348039771475,
|
|
"learning_rate": 0.00015621154276251024,
|
|
"loss": 1.0676,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.9462915601023018,
|
|
"grad_norm": 0.8909789093135501,
|
|
"learning_rate": 0.00015616797303457782,
|
|
"loss": 1.0626,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.948849104859335,
|
|
"grad_norm": 1.3639686895279477,
|
|
"learning_rate": 0.00015612416034352906,
|
|
"loss": 1.0935,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.9514066496163683,
|
|
"grad_norm": 0.7547937680438821,
|
|
"learning_rate": 0.00015608010482911908,
|
|
"loss": 1.0714,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.9539641943734015,
|
|
"grad_norm": 0.6097577881338234,
|
|
"learning_rate": 0.00015603580663187765,
|
|
"loss": 1.0757,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.9565217391304348,
|
|
"grad_norm": 0.7408592240149442,
|
|
"learning_rate": 0.00015599126589310857,
|
|
"loss": 1.0762,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.959079283887468,
|
|
"grad_norm": 0.8123009573402776,
|
|
"learning_rate": 0.00015594648275488944,
|
|
"loss": 1.0991,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.9616368286445013,
|
|
"grad_norm": 0.8997010834862542,
|
|
"learning_rate": 0.00015590145736007091,
|
|
"loss": 1.0493,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.9641943734015346,
|
|
"grad_norm": 1.211365253216414,
|
|
"learning_rate": 0.00015585618985227657,
|
|
"loss": 1.0845,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.9667519181585678,
|
|
"grad_norm": 1.1546641796621098,
|
|
"learning_rate": 0.00015581068037590212,
|
|
"loss": 1.0851,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.969309462915601,
|
|
"grad_norm": 1.1673337321688009,
|
|
"learning_rate": 0.00015576492907611524,
|
|
"loss": 1.054,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.9718670076726342,
|
|
"grad_norm": 0.6737544031199463,
|
|
"learning_rate": 0.00015571893609885493,
|
|
"loss": 1.0377,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.9744245524296675,
|
|
"grad_norm": 0.8151328439701532,
|
|
"learning_rate": 0.00015567270159083107,
|
|
"loss": 1.0698,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.9769820971867008,
|
|
"grad_norm": 0.9445758081131683,
|
|
"learning_rate": 0.00015562622569952408,
|
|
"loss": 1.0723,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.979539641943734,
|
|
"grad_norm": 1.0143687259241263,
|
|
"learning_rate": 0.00015557950857318425,
|
|
"loss": 1.0753,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.9820971867007673,
|
|
"grad_norm": 1.0909144236610384,
|
|
"learning_rate": 0.00015553255036083145,
|
|
"loss": 1.0301,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.9846547314578005,
|
|
"grad_norm": 1.2562026829762518,
|
|
"learning_rate": 0.0001554853512122545,
|
|
"loss": 1.1103,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.9872122762148338,
|
|
"grad_norm": 0.7752538678352305,
|
|
"learning_rate": 0.00015543791127801084,
|
|
"loss": 1.0633,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.989769820971867,
|
|
"grad_norm": 0.6480828071883595,
|
|
"learning_rate": 0.0001553902307094259,
|
|
"loss": 1.0769,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.9923273657289002,
|
|
"grad_norm": 0.8764236095011647,
|
|
"learning_rate": 0.00015534230965859276,
|
|
"loss": 1.0905,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.9948849104859335,
|
|
"grad_norm": 1.1982183014384076,
|
|
"learning_rate": 0.00015529414827837156,
|
|
"loss": 1.0737,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.9974424552429667,
|
|
"grad_norm": 1.0015924584874194,
|
|
"learning_rate": 0.00015524574672238906,
|
|
"loss": 1.0539,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 1.3714997731388885,
|
|
"learning_rate": 0.00015519710514503814,
|
|
"loss": 1.0846,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 1.0025575447570332,
|
|
"grad_norm": 0.5566435857743947,
|
|
"learning_rate": 0.00015514822370147732,
|
|
"loss": 1.0432,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 1.0051150895140666,
|
|
"grad_norm": 0.7918387632633654,
|
|
"learning_rate": 0.00015509910254763023,
|
|
"loss": 1.0578,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 1.0076726342710998,
|
|
"grad_norm": 1.256938009132569,
|
|
"learning_rate": 0.0001550497418401852,
|
|
"loss": 1.0306,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 1.010230179028133,
|
|
"grad_norm": 1.2314520681198668,
|
|
"learning_rate": 0.00015500014173659457,
|
|
"loss": 1.0383,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 1.0127877237851663,
|
|
"grad_norm": 0.923069995672888,
|
|
"learning_rate": 0.00015495030239507442,
|
|
"loss": 1.0573,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 1.0153452685421995,
|
|
"grad_norm": 0.936236903889318,
|
|
"learning_rate": 0.00015490022397460392,
|
|
"loss": 1.0573,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 1.0179028132992327,
|
|
"grad_norm": 0.6628420746065794,
|
|
"learning_rate": 0.0001548499066349249,
|
|
"loss": 1.0474,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 1.020460358056266,
|
|
"grad_norm": 0.47759016557709666,
|
|
"learning_rate": 0.00015479935053654126,
|
|
"loss": 1.0175,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 1.0230179028132993,
|
|
"grad_norm": 0.61072929455943,
|
|
"learning_rate": 0.00015474855584071847,
|
|
"loss": 1.0724,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 1.0255754475703325,
|
|
"grad_norm": 0.607075351205747,
|
|
"learning_rate": 0.0001546975227094832,
|
|
"loss": 1.0527,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 1.0281329923273657,
|
|
"grad_norm": 0.5993295243529821,
|
|
"learning_rate": 0.00015464625130562256,
|
|
"loss": 1.0695,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 1.030690537084399,
|
|
"grad_norm": 0.9177173231285568,
|
|
"learning_rate": 0.0001545947417926838,
|
|
"loss": 1.0344,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 1.0332480818414322,
|
|
"grad_norm": 1.4911897806007488,
|
|
"learning_rate": 0.00015454299433497362,
|
|
"loss": 1.0443,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 1.0358056265984654,
|
|
"grad_norm": 0.6069008914687445,
|
|
"learning_rate": 0.00015449100909755784,
|
|
"loss": 1.0393,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 1.0383631713554988,
|
|
"grad_norm": 0.9163856494121054,
|
|
"learning_rate": 0.00015443878624626066,
|
|
"loss": 1.0737,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 1.040920716112532,
|
|
"grad_norm": 1.369010227838881,
|
|
"learning_rate": 0.0001543863259476642,
|
|
"loss": 1.0106,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 1.0434782608695652,
|
|
"grad_norm": 0.8651156065397383,
|
|
"learning_rate": 0.00015433362836910817,
|
|
"loss": 1.0399,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 1.0460358056265984,
|
|
"grad_norm": 0.8527058058258006,
|
|
"learning_rate": 0.00015428069367868892,
|
|
"loss": 1.0222,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 1.0485933503836318,
|
|
"grad_norm": 0.7680613356197566,
|
|
"learning_rate": 0.00015422752204525937,
|
|
"loss": 1.0161,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 1.051150895140665,
|
|
"grad_norm": 1.0745283772693792,
|
|
"learning_rate": 0.0001541741136384281,
|
|
"loss": 1.0446,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 1.0537084398976981,
|
|
"grad_norm": 1.0936408809378098,
|
|
"learning_rate": 0.00015412046862855902,
|
|
"loss": 1.0245,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 1.0562659846547315,
|
|
"grad_norm": 0.9926125079651018,
|
|
"learning_rate": 0.00015406658718677076,
|
|
"loss": 1.0308,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 1.0588235294117647,
|
|
"grad_norm": 1.1175953083121093,
|
|
"learning_rate": 0.00015401246948493612,
|
|
"loss": 1.0768,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 1.061381074168798,
|
|
"grad_norm": 0.8210085027845057,
|
|
"learning_rate": 0.00015395811569568154,
|
|
"loss": 1.0473,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 1.0639386189258313,
|
|
"grad_norm": 0.9226634652720442,
|
|
"learning_rate": 0.00015390352599238655,
|
|
"loss": 1.0299,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 1.0664961636828645,
|
|
"grad_norm": 1.2471786951586945,
|
|
"learning_rate": 0.00015384870054918314,
|
|
"loss": 1.0139,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 1.0690537084398977,
|
|
"grad_norm": 0.8806851237766041,
|
|
"learning_rate": 0.00015379363954095535,
|
|
"loss": 1.0237,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 1.0716112531969308,
|
|
"grad_norm": 0.727069173053958,
|
|
"learning_rate": 0.0001537383431433386,
|
|
"loss": 1.0786,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 1.0741687979539642,
|
|
"grad_norm": 0.6337579771769642,
|
|
"learning_rate": 0.00015368281153271918,
|
|
"loss": 1.0264,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 1.0767263427109974,
|
|
"grad_norm": 0.8868138217653037,
|
|
"learning_rate": 0.0001536270448862336,
|
|
"loss": 1.0413,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 1.0792838874680306,
|
|
"grad_norm": 0.8013668539540468,
|
|
"learning_rate": 0.00015357104338176823,
|
|
"loss": 1.0305,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 1.081841432225064,
|
|
"grad_norm": 1.0111414586274687,
|
|
"learning_rate": 0.00015351480719795845,
|
|
"loss": 1.0177,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 1.0843989769820972,
|
|
"grad_norm": 1.3128642093201517,
|
|
"learning_rate": 0.00015345833651418835,
|
|
"loss": 1.0663,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 1.0869565217391304,
|
|
"grad_norm": 0.7074818377117421,
|
|
"learning_rate": 0.00015340163151058997,
|
|
"loss": 1.0262,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 1.0895140664961638,
|
|
"grad_norm": 0.7476417982075203,
|
|
"learning_rate": 0.00015334469236804278,
|
|
"loss": 1.0166,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 1.092071611253197,
|
|
"grad_norm": 0.7163607115802371,
|
|
"learning_rate": 0.00015328751926817314,
|
|
"loss": 1.041,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 1.0946291560102301,
|
|
"grad_norm": 1.0614664295591614,
|
|
"learning_rate": 0.0001532301123933537,
|
|
"loss": 1.0236,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 1.0971867007672633,
|
|
"grad_norm": 1.265439568931787,
|
|
"learning_rate": 0.00015317247192670282,
|
|
"loss": 1.0528,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 1.0997442455242967,
|
|
"grad_norm": 0.7025263297795912,
|
|
"learning_rate": 0.00015311459805208397,
|
|
"loss": 1.0277,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 1.10230179028133,
|
|
"grad_norm": 0.8167641509021383,
|
|
"learning_rate": 0.0001530564909541051,
|
|
"loss": 1.0582,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 1.104859335038363,
|
|
"grad_norm": 0.8716549745993203,
|
|
"learning_rate": 0.0001529981508181182,
|
|
"loss": 1.077,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 1.1074168797953965,
|
|
"grad_norm": 0.7246028123611893,
|
|
"learning_rate": 0.00015293957783021854,
|
|
"loss": 1.0542,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 1.1099744245524297,
|
|
"grad_norm": 0.6784199036145839,
|
|
"learning_rate": 0.0001528807721772442,
|
|
"loss": 1.0418,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 1.1125319693094629,
|
|
"grad_norm": 0.8506075875171634,
|
|
"learning_rate": 0.00015282173404677533,
|
|
"loss": 1.0343,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 1.1150895140664963,
|
|
"grad_norm": 0.8375757880980345,
|
|
"learning_rate": 0.00015276246362713375,
|
|
"loss": 1.0341,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 1.1176470588235294,
|
|
"grad_norm": 0.7540319449850698,
|
|
"learning_rate": 0.00015270296110738221,
|
|
"loss": 1.014,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 1.1202046035805626,
|
|
"grad_norm": 0.9166441931706429,
|
|
"learning_rate": 0.0001526432266773238,
|
|
"loss": 1.0269,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 1.1227621483375958,
|
|
"grad_norm": 1.0822305273066126,
|
|
"learning_rate": 0.0001525832605275014,
|
|
"loss": 1.0472,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 1.1253196930946292,
|
|
"grad_norm": 0.9450917972251209,
|
|
"learning_rate": 0.000152523062849197,
|
|
"loss": 1.024,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 1.1278772378516624,
|
|
"grad_norm": 1.1333566165350994,
|
|
"learning_rate": 0.0001524626338344311,
|
|
"loss": 1.0448,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 1.1304347826086956,
|
|
"grad_norm": 1.177581998734778,
|
|
"learning_rate": 0.00015240197367596226,
|
|
"loss": 1.0244,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 1.132992327365729,
|
|
"grad_norm": 0.8866480092962395,
|
|
"learning_rate": 0.00015234108256728616,
|
|
"loss": 1.0499,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 1.1355498721227621,
|
|
"grad_norm": 0.6882160288370965,
|
|
"learning_rate": 0.00015227996070263535,
|
|
"loss": 1.0151,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 1.1381074168797953,
|
|
"grad_norm": 0.7419397568748587,
|
|
"learning_rate": 0.00015221860827697832,
|
|
"loss": 1.0345,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 1.1406649616368287,
|
|
"grad_norm": 0.854881931061872,
|
|
"learning_rate": 0.00015215702548601907,
|
|
"loss": 1.008,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 1.143222506393862,
|
|
"grad_norm": 0.8138274292487687,
|
|
"learning_rate": 0.00015209521252619644,
|
|
"loss": 0.9962,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 1.145780051150895,
|
|
"grad_norm": 0.7536271031473499,
|
|
"learning_rate": 0.00015203316959468344,
|
|
"loss": 1.0299,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 1.1483375959079285,
|
|
"grad_norm": 0.9110426205382722,
|
|
"learning_rate": 0.0001519708968893867,
|
|
"loss": 1.019,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 1.1508951406649617,
|
|
"grad_norm": 1.2088991550402766,
|
|
"learning_rate": 0.00015190839460894567,
|
|
"loss": 1.0708,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 1.1534526854219949,
|
|
"grad_norm": 0.8573913285400658,
|
|
"learning_rate": 0.00015184566295273227,
|
|
"loss": 1.0417,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 1.156010230179028,
|
|
"grad_norm": 0.6951469442919158,
|
|
"learning_rate": 0.00015178270212084995,
|
|
"loss": 1.0464,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 1.1585677749360614,
|
|
"grad_norm": 0.6419948195410027,
|
|
"learning_rate": 0.00015171951231413328,
|
|
"loss": 1.0612,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 1.1611253196930946,
|
|
"grad_norm": 0.6841619518854335,
|
|
"learning_rate": 0.00015165609373414722,
|
|
"loss": 1.0325,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 1.1636828644501278,
|
|
"grad_norm": 0.8037291566188051,
|
|
"learning_rate": 0.0001515924465831864,
|
|
"loss": 1.0295,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 1.1662404092071612,
|
|
"grad_norm": 1.1795212959071533,
|
|
"learning_rate": 0.00015152857106427462,
|
|
"loss": 1.0231,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 1.1687979539641944,
|
|
"grad_norm": 1.1007425485117117,
|
|
"learning_rate": 0.00015146446738116412,
|
|
"loss": 1.015,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 1.1713554987212276,
|
|
"grad_norm": 1.072656472389329,
|
|
"learning_rate": 0.00015140013573833498,
|
|
"loss": 1.0195,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 1.1739130434782608,
|
|
"grad_norm": 0.9339605123999745,
|
|
"learning_rate": 0.00015133557634099435,
|
|
"loss": 1.026,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 0.8580962355846978,
|
|
"learning_rate": 0.00015127078939507595,
|
|
"loss": 1.055,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 1.1790281329923273,
|
|
"grad_norm": 1.028703820245517,
|
|
"learning_rate": 0.00015120577510723934,
|
|
"loss": 1.0768,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 1.1815856777493605,
|
|
"grad_norm": 1.1535909770008528,
|
|
"learning_rate": 0.00015114053368486919,
|
|
"loss": 1.0227,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 1.184143222506394,
|
|
"grad_norm": 0.7549525724152655,
|
|
"learning_rate": 0.0001510750653360748,
|
|
"loss": 1.0101,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 1.186700767263427,
|
|
"grad_norm": 0.6560485854233202,
|
|
"learning_rate": 0.00015100937026968922,
|
|
"loss": 1.0372,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 1.1892583120204603,
|
|
"grad_norm": 0.5946694031246916,
|
|
"learning_rate": 0.0001509434486952688,
|
|
"loss": 1.0471,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 1.1918158567774937,
|
|
"grad_norm": 0.5311919492244818,
|
|
"learning_rate": 0.00015087730082309232,
|
|
"loss": 1.0431,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 1.1943734015345269,
|
|
"grad_norm": 0.5154174371307244,
|
|
"learning_rate": 0.00015081092686416043,
|
|
"loss": 1.0199,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 1.19693094629156,
|
|
"grad_norm": 0.505383670902881,
|
|
"learning_rate": 0.00015074432703019504,
|
|
"loss": 1.0706,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 1.1994884910485935,
|
|
"grad_norm": 0.4907682209551291,
|
|
"learning_rate": 0.00015067750153363845,
|
|
"loss": 1.0346,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 1.2020460358056266,
|
|
"grad_norm": 0.39066205442828883,
|
|
"learning_rate": 0.00015061045058765282,
|
|
"loss": 1.0554,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 1.2046035805626598,
|
|
"grad_norm": 0.34420579713251814,
|
|
"learning_rate": 0.0001505431744061195,
|
|
"loss": 1.0279,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 1.207161125319693,
|
|
"grad_norm": 0.43688810183174753,
|
|
"learning_rate": 0.0001504756732036383,
|
|
"loss": 0.9885,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 1.2097186700767264,
|
|
"grad_norm": 0.4751633909038584,
|
|
"learning_rate": 0.00015040794719552676,
|
|
"loss": 1.0432,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 1.2122762148337596,
|
|
"grad_norm": 0.5269656781598262,
|
|
"learning_rate": 0.00015033999659781953,
|
|
"loss": 1.027,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 1.2148337595907928,
|
|
"grad_norm": 0.5712060191776948,
|
|
"learning_rate": 0.00015027182162726769,
|
|
"loss": 1.0421,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 1.2173913043478262,
|
|
"grad_norm": 0.6411090148779058,
|
|
"learning_rate": 0.000150203422501338,
|
|
"loss": 1.013,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 1.2199488491048593,
|
|
"grad_norm": 0.922985318540642,
|
|
"learning_rate": 0.00015013479943821225,
|
|
"loss": 1.0671,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 1.2225063938618925,
|
|
"grad_norm": 1.411342942170953,
|
|
"learning_rate": 0.00015006595265678655,
|
|
"loss": 1.0506,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 1.2250639386189257,
|
|
"grad_norm": 0.7044934707287243,
|
|
"learning_rate": 0.00014999688237667065,
|
|
"loss": 1.058,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 1.227621483375959,
|
|
"grad_norm": 0.844446069080729,
|
|
"learning_rate": 0.00014992758881818722,
|
|
"loss": 1.0112,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.2301790281329923,
|
|
"grad_norm": 0.863795773273135,
|
|
"learning_rate": 0.00014985807220237112,
|
|
"loss": 1.0223,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 1.2327365728900257,
|
|
"grad_norm": 1.1955253111068895,
|
|
"learning_rate": 0.00014978833275096872,
|
|
"loss": 1.0437,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 1.2352941176470589,
|
|
"grad_norm": 0.9710436321082059,
|
|
"learning_rate": 0.00014971837068643732,
|
|
"loss": 1.0331,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 1.237851662404092,
|
|
"grad_norm": 0.9838152365395039,
|
|
"learning_rate": 0.00014964818623194412,
|
|
"loss": 1.0503,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 1.2404092071611252,
|
|
"grad_norm": 1.3111101164937617,
|
|
"learning_rate": 0.00014957777961136588,
|
|
"loss": 1.0536,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 1.2429667519181586,
|
|
"grad_norm": 0.9426881648292104,
|
|
"learning_rate": 0.00014950715104928794,
|
|
"loss": 1.0452,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 1.2455242966751918,
|
|
"grad_norm": 0.9708865131907598,
|
|
"learning_rate": 0.0001494363007710036,
|
|
"loss": 1.0205,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 1.248081841432225,
|
|
"grad_norm": 0.735118260321914,
|
|
"learning_rate": 0.00014936522900251348,
|
|
"loss": 1.0355,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 1.2506393861892584,
|
|
"grad_norm": 0.8962772386972064,
|
|
"learning_rate": 0.00014929393597052458,
|
|
"loss": 1.0455,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 1.2531969309462916,
|
|
"grad_norm": 0.6546912235303116,
|
|
"learning_rate": 0.00014922242190244981,
|
|
"loss": 1.0625,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.2557544757033248,
|
|
"grad_norm": 0.5383201135001036,
|
|
"learning_rate": 0.0001491506870264071,
|
|
"loss": 1.0346,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 1.258312020460358,
|
|
"grad_norm": 0.8097960021561659,
|
|
"learning_rate": 0.00014907873157121875,
|
|
"loss": 1.0605,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 1.2608695652173914,
|
|
"grad_norm": 0.670808763781411,
|
|
"learning_rate": 0.00014900655576641057,
|
|
"loss": 1.0282,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 1.2634271099744245,
|
|
"grad_norm": 0.7979394762122887,
|
|
"learning_rate": 0.00014893415984221141,
|
|
"loss": 1.0264,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 1.265984654731458,
|
|
"grad_norm": 1.026770422301297,
|
|
"learning_rate": 0.00014886154402955217,
|
|
"loss": 1.0514,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.2685421994884911,
|
|
"grad_norm": 1.032280976957703,
|
|
"learning_rate": 0.00014878870856006513,
|
|
"loss": 1.0408,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 1.2710997442455243,
|
|
"grad_norm": 1.1296018012465836,
|
|
"learning_rate": 0.00014871565366608329,
|
|
"loss": 1.0338,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 1.2736572890025575,
|
|
"grad_norm": 0.9749313409863054,
|
|
"learning_rate": 0.0001486423795806396,
|
|
"loss": 1.0193,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 1.2762148337595907,
|
|
"grad_norm": 0.8177048634676223,
|
|
"learning_rate": 0.00014856888653746607,
|
|
"loss": 1.0324,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 1.278772378516624,
|
|
"grad_norm": 0.7747012524305006,
|
|
"learning_rate": 0.00014849517477099334,
|
|
"loss": 1.0076,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.2813299232736572,
|
|
"grad_norm": 0.8429034680075405,
|
|
"learning_rate": 0.00014842124451634956,
|
|
"loss": 1.0266,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 1.2838874680306906,
|
|
"grad_norm": 1.0704964042478793,
|
|
"learning_rate": 0.00014834709600935995,
|
|
"loss": 1.033,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 1.2864450127877238,
|
|
"grad_norm": 1.1030823411998563,
|
|
"learning_rate": 0.00014827272948654584,
|
|
"loss": 1.0519,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 1.289002557544757,
|
|
"grad_norm": 0.7099638951621647,
|
|
"learning_rate": 0.00014819814518512403,
|
|
"loss": 1.0258,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 1.2915601023017902,
|
|
"grad_norm": 0.5286675820388321,
|
|
"learning_rate": 0.000148123343343006,
|
|
"loss": 1.0398,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.2941176470588236,
|
|
"grad_norm": 0.5306607233732565,
|
|
"learning_rate": 0.0001480483241987971,
|
|
"loss": 1.0155,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 1.2966751918158568,
|
|
"grad_norm": 0.6060078277369222,
|
|
"learning_rate": 0.0001479730879917959,
|
|
"loss": 1.0486,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 1.29923273657289,
|
|
"grad_norm": 0.8537119327365599,
|
|
"learning_rate": 0.00014789763496199335,
|
|
"loss": 1.0115,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 1.3017902813299234,
|
|
"grad_norm": 1.0701098672995177,
|
|
"learning_rate": 0.00014782196535007198,
|
|
"loss": 1.0449,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 1.3043478260869565,
|
|
"grad_norm": 1.0452113870678157,
|
|
"learning_rate": 0.00014774607939740524,
|
|
"loss": 1.0132,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.3069053708439897,
|
|
"grad_norm": 1.0085703377598065,
|
|
"learning_rate": 0.0001476699773460567,
|
|
"loss": 1.0229,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 1.309462915601023,
|
|
"grad_norm": 0.8918712650363909,
|
|
"learning_rate": 0.00014759365943877906,
|
|
"loss": 1.0509,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 1.3120204603580563,
|
|
"grad_norm": 0.839691736422046,
|
|
"learning_rate": 0.00014751712591901385,
|
|
"loss": 1.0078,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 1.3145780051150895,
|
|
"grad_norm": 0.7023292683764998,
|
|
"learning_rate": 0.00014744037703089014,
|
|
"loss": 1.0289,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 1.317135549872123,
|
|
"grad_norm": 0.686332323144994,
|
|
"learning_rate": 0.00014736341301922406,
|
|
"loss": 1.0213,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.319693094629156,
|
|
"grad_norm": 0.5991056794621004,
|
|
"learning_rate": 0.00014728623412951802,
|
|
"loss": 1.0164,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 1.3222506393861893,
|
|
"grad_norm": 0.7507696949786656,
|
|
"learning_rate": 0.00014720884060795975,
|
|
"loss": 1.0119,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 1.3248081841432224,
|
|
"grad_norm": 0.8658712614342154,
|
|
"learning_rate": 0.00014713123270142163,
|
|
"loss": 1.0295,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.3273657289002558,
|
|
"grad_norm": 0.6119299788578647,
|
|
"learning_rate": 0.00014705341065745999,
|
|
"loss": 1.0197,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.329923273657289,
|
|
"grad_norm": 0.4927851179899278,
|
|
"learning_rate": 0.00014697537472431411,
|
|
"loss": 1.0624,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.3324808184143222,
|
|
"grad_norm": 0.4167468121183674,
|
|
"learning_rate": 0.0001468971251509056,
|
|
"loss": 1.0647,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.3350383631713556,
|
|
"grad_norm": 0.47586787480372,
|
|
"learning_rate": 0.00014681866218683757,
|
|
"loss": 1.0402,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.3375959079283888,
|
|
"grad_norm": 0.5745122439927115,
|
|
"learning_rate": 0.0001467399860823937,
|
|
"loss": 1.0304,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.340153452685422,
|
|
"grad_norm": 0.7552655303578069,
|
|
"learning_rate": 0.00014666109708853767,
|
|
"loss": 1.0548,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.3427109974424551,
|
|
"grad_norm": 1.06908823148847,
|
|
"learning_rate": 0.00014658199545691222,
|
|
"loss": 1.0287,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.3452685421994885,
|
|
"grad_norm": 1.1444185918054413,
|
|
"learning_rate": 0.0001465026814398383,
|
|
"loss": 1.0539,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.3478260869565217,
|
|
"grad_norm": 0.7989998085879703,
|
|
"learning_rate": 0.00014642315529031442,
|
|
"loss": 1.0035,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.350383631713555,
|
|
"grad_norm": 0.6352155319789643,
|
|
"learning_rate": 0.00014634341726201572,
|
|
"loss": 1.0659,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.3529411764705883,
|
|
"grad_norm": 0.5614215368601074,
|
|
"learning_rate": 0.00014626346760929316,
|
|
"loss": 1.0282,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.3554987212276215,
|
|
"grad_norm": 0.5422618777488837,
|
|
"learning_rate": 0.00014618330658717278,
|
|
"loss": 1.0002,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.3580562659846547,
|
|
"grad_norm": 0.4783637133302247,
|
|
"learning_rate": 0.00014610293445135492,
|
|
"loss": 1.0377,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.3606138107416879,
|
|
"grad_norm": 0.4390483950197236,
|
|
"learning_rate": 0.00014602235145821322,
|
|
"loss": 1.023,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.3631713554987213,
|
|
"grad_norm": 0.4768466306371761,
|
|
"learning_rate": 0.00014594155786479398,
|
|
"loss": 1.0601,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.3657289002557544,
|
|
"grad_norm": 0.7582418871164014,
|
|
"learning_rate": 0.00014586055392881527,
|
|
"loss": 1.0292,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.3682864450127878,
|
|
"grad_norm": 1.0430189228296438,
|
|
"learning_rate": 0.00014577933990866617,
|
|
"loss": 1.0397,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.370843989769821,
|
|
"grad_norm": 1.2646327577842662,
|
|
"learning_rate": 0.00014569791606340577,
|
|
"loss": 1.0749,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.3734015345268542,
|
|
"grad_norm": 0.6922891659849906,
|
|
"learning_rate": 0.00014561628265276257,
|
|
"loss": 1.0293,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.3759590792838874,
|
|
"grad_norm": 0.44386889614919295,
|
|
"learning_rate": 0.00014553443993713355,
|
|
"loss": 1.0398,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.3785166240409208,
|
|
"grad_norm": 0.5439717030086442,
|
|
"learning_rate": 0.00014545238817758327,
|
|
"loss": 1.0268,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.381074168797954,
|
|
"grad_norm": 0.8373630963710572,
|
|
"learning_rate": 0.00014537012763584316,
|
|
"loss": 1.0354,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.3836317135549872,
|
|
"grad_norm": 1.3266757684220118,
|
|
"learning_rate": 0.0001452876585743106,
|
|
"loss": 1.0642,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.3861892583120206,
|
|
"grad_norm": 0.7488029622406787,
|
|
"learning_rate": 0.00014520498125604814,
|
|
"loss": 1.0534,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.3887468030690537,
|
|
"grad_norm": 0.7282698103684015,
|
|
"learning_rate": 0.00014512209594478263,
|
|
"loss": 1.01,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.391304347826087,
|
|
"grad_norm": 0.7969771518742094,
|
|
"learning_rate": 0.00014503900290490436,
|
|
"loss": 1.0307,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.39386189258312,
|
|
"grad_norm": 0.9263524028660353,
|
|
"learning_rate": 0.00014495570240146625,
|
|
"loss": 1.0211,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.3964194373401535,
|
|
"grad_norm": 1.1608361715103017,
|
|
"learning_rate": 0.000144872194700183,
|
|
"loss": 1.0005,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.3989769820971867,
|
|
"grad_norm": 0.836914057851843,
|
|
"learning_rate": 0.00014478848006743022,
|
|
"loss": 1.0387,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.40153452685422,
|
|
"grad_norm": 0.6826412525653701,
|
|
"learning_rate": 0.00014470455877024365,
|
|
"loss": 1.0292,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.4040920716112533,
|
|
"grad_norm": 0.48703773893723834,
|
|
"learning_rate": 0.00014462043107631818,
|
|
"loss": 1.0511,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.4066496163682864,
|
|
"grad_norm": 0.6223475644721191,
|
|
"learning_rate": 0.00014453609725400713,
|
|
"loss": 0.9925,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.4092071611253196,
|
|
"grad_norm": 0.8882232962821335,
|
|
"learning_rate": 0.0001444515575723213,
|
|
"loss": 1.0061,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.4117647058823528,
|
|
"grad_norm": 1.1304081971561695,
|
|
"learning_rate": 0.00014436681230092815,
|
|
"loss": 1.0488,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.4143222506393862,
|
|
"grad_norm": 0.8848381914341709,
|
|
"learning_rate": 0.00014428186171015097,
|
|
"loss": 1.0324,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.4168797953964194,
|
|
"grad_norm": 0.7483522323458203,
|
|
"learning_rate": 0.00014419670607096791,
|
|
"loss": 1.0422,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.4194373401534528,
|
|
"grad_norm": 0.7721209602826212,
|
|
"learning_rate": 0.00014411134565501133,
|
|
"loss": 1.056,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.421994884910486,
|
|
"grad_norm": 0.8535777213626637,
|
|
"learning_rate": 0.00014402578073456661,
|
|
"loss": 1.0408,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 1.4245524296675192,
|
|
"grad_norm": 0.6959036355749549,
|
|
"learning_rate": 0.00014394001158257163,
|
|
"loss": 1.0271,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 1.4271099744245523,
|
|
"grad_norm": 0.6014343484373971,
|
|
"learning_rate": 0.00014385403847261562,
|
|
"loss": 1.0193,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 1.4296675191815857,
|
|
"grad_norm": 0.7106873814775013,
|
|
"learning_rate": 0.00014376786167893846,
|
|
"loss": 1.0122,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 1.432225063938619,
|
|
"grad_norm": 0.8444210941994957,
|
|
"learning_rate": 0.00014368148147642974,
|
|
"loss": 1.0045,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.434782608695652,
|
|
"grad_norm": 0.8805969266684864,
|
|
"learning_rate": 0.00014359489814062788,
|
|
"loss": 1.0144,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 1.4373401534526855,
|
|
"grad_norm": 1.009450224204603,
|
|
"learning_rate": 0.00014350811194771928,
|
|
"loss": 1.0287,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 1.4398976982097187,
|
|
"grad_norm": 1.2351992837125931,
|
|
"learning_rate": 0.00014342112317453738,
|
|
"loss": 1.0566,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 1.4424552429667519,
|
|
"grad_norm": 0.6573457770192163,
|
|
"learning_rate": 0.00014333393209856182,
|
|
"loss": 1.052,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 1.445012787723785,
|
|
"grad_norm": 0.5070847718255479,
|
|
"learning_rate": 0.00014324653899791765,
|
|
"loss": 1.0608,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.4475703324808185,
|
|
"grad_norm": 0.6935855951791632,
|
|
"learning_rate": 0.00014315894415137416,
|
|
"loss": 1.0234,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 1.4501278772378516,
|
|
"grad_norm": 0.7956146938043426,
|
|
"learning_rate": 0.00014307114783834442,
|
|
"loss": 1.0048,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 1.452685421994885,
|
|
"grad_norm": 0.9003410836319078,
|
|
"learning_rate": 0.0001429831503388839,
|
|
"loss": 1.0363,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 1.4552429667519182,
|
|
"grad_norm": 1.0643618726104027,
|
|
"learning_rate": 0.00014289495193368996,
|
|
"loss": 1.0269,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 1.4578005115089514,
|
|
"grad_norm": 0.9080907950888324,
|
|
"learning_rate": 0.0001428065529041008,
|
|
"loss": 1.017,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.4603580562659846,
|
|
"grad_norm": 0.8536436997073572,
|
|
"learning_rate": 0.00014271795353209456,
|
|
"loss": 1.0375,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 1.4629156010230178,
|
|
"grad_norm": 0.9398461282489688,
|
|
"learning_rate": 0.00014262915410028848,
|
|
"loss": 1.0434,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 1.4654731457800512,
|
|
"grad_norm": 0.9631928132083718,
|
|
"learning_rate": 0.00014254015489193782,
|
|
"loss": 1.0292,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 1.4680306905370843,
|
|
"grad_norm": 0.9076791954370104,
|
|
"learning_rate": 0.00014245095619093532,
|
|
"loss": 1.0159,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 0.9587339014454659,
|
|
"learning_rate": 0.00014236155828180983,
|
|
"loss": 1.0484,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.473145780051151,
|
|
"grad_norm": 0.8891566782622077,
|
|
"learning_rate": 0.00014227196144972582,
|
|
"loss": 1.0508,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 1.4757033248081841,
|
|
"grad_norm": 0.6581614104684226,
|
|
"learning_rate": 0.0001421821659804822,
|
|
"loss": 1.0403,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 1.4782608695652173,
|
|
"grad_norm": 0.5861192400584929,
|
|
"learning_rate": 0.00014209217216051156,
|
|
"loss": 1.0304,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 1.4808184143222507,
|
|
"grad_norm": 0.5774127863656433,
|
|
"learning_rate": 0.00014200198027687912,
|
|
"loss": 1.0102,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 1.4833759590792839,
|
|
"grad_norm": 0.6502157171768282,
|
|
"learning_rate": 0.00014191159061728193,
|
|
"loss": 1.0253,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.485933503836317,
|
|
"grad_norm": 0.5386614139768452,
|
|
"learning_rate": 0.00014182100347004793,
|
|
"loss": 1.044,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 1.4884910485933505,
|
|
"grad_norm": 0.4786011997004328,
|
|
"learning_rate": 0.000141730219124135,
|
|
"loss": 1.0322,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 1.4910485933503836,
|
|
"grad_norm": 0.5755235187273994,
|
|
"learning_rate": 0.00014163923786913004,
|
|
"loss": 1.0572,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 1.4936061381074168,
|
|
"grad_norm": 0.641263771557679,
|
|
"learning_rate": 0.00014154805999524802,
|
|
"loss": 1.0627,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 1.49616368286445,
|
|
"grad_norm": 0.798665776000645,
|
|
"learning_rate": 0.0001414566857933312,
|
|
"loss": 1.0017,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.4987212276214834,
|
|
"grad_norm": 0.8759678129527348,
|
|
"learning_rate": 0.00014136511555484798,
|
|
"loss": 1.0168,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 1.5012787723785166,
|
|
"grad_norm": 0.7904395533793586,
|
|
"learning_rate": 0.00014127334957189219,
|
|
"loss": 1.0253,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 1.50383631713555,
|
|
"grad_norm": 0.6451046472087583,
|
|
"learning_rate": 0.00014118138813718192,
|
|
"loss": 1.0523,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 1.5063938618925832,
|
|
"grad_norm": 0.5705461372803496,
|
|
"learning_rate": 0.0001410892315440588,
|
|
"loss": 0.9921,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 1.5089514066496164,
|
|
"grad_norm": 0.6000400371240294,
|
|
"learning_rate": 0.00014099688008648703,
|
|
"loss": 1.0219,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.5115089514066495,
|
|
"grad_norm": 0.6112952152068515,
|
|
"learning_rate": 0.0001409043340590523,
|
|
"loss": 0.9963,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 1.5140664961636827,
|
|
"grad_norm": 0.5886324573188866,
|
|
"learning_rate": 0.00014081159375696102,
|
|
"loss": 1.0484,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 1.5166240409207161,
|
|
"grad_norm": 0.5048817308801855,
|
|
"learning_rate": 0.00014071865947603922,
|
|
"loss": 0.978,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 1.5191815856777495,
|
|
"grad_norm": 0.5000111304078102,
|
|
"learning_rate": 0.00014062553151273177,
|
|
"loss": 1.0431,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 1.5217391304347827,
|
|
"grad_norm": 0.47701322805085783,
|
|
"learning_rate": 0.0001405322101641013,
|
|
"loss": 1.0157,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.5242966751918159,
|
|
"grad_norm": 0.45047959305759844,
|
|
"learning_rate": 0.00014043869572782737,
|
|
"loss": 1.026,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 1.526854219948849,
|
|
"grad_norm": 0.37562193605886857,
|
|
"learning_rate": 0.00014034498850220537,
|
|
"loss": 1.0334,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 1.5294117647058822,
|
|
"grad_norm": 0.44055163797782626,
|
|
"learning_rate": 0.00014025108878614576,
|
|
"loss": 1.0353,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 1.5319693094629157,
|
|
"grad_norm": 0.39725606847915634,
|
|
"learning_rate": 0.0001401569968791729,
|
|
"loss": 1.0115,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 1.5345268542199488,
|
|
"grad_norm": 0.39650786805208904,
|
|
"learning_rate": 0.00014006271308142433,
|
|
"loss": 1.0604,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.5370843989769822,
|
|
"grad_norm": 0.32569926641458746,
|
|
"learning_rate": 0.0001399682376936495,
|
|
"loss": 1.0096,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 1.5396419437340154,
|
|
"grad_norm": 0.43543100187257516,
|
|
"learning_rate": 0.00013987357101720929,
|
|
"loss": 1.0059,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 1.5421994884910486,
|
|
"grad_norm": 0.458695174168892,
|
|
"learning_rate": 0.00013977871335407445,
|
|
"loss": 1.0197,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 1.5447570332480818,
|
|
"grad_norm": 0.43690410697330667,
|
|
"learning_rate": 0.00013968366500682514,
|
|
"loss": 1.0302,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 1.547314578005115,
|
|
"grad_norm": 0.4143725631119223,
|
|
"learning_rate": 0.00013958842627864975,
|
|
"loss": 1.0167,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.5498721227621484,
|
|
"grad_norm": 0.36509470245988934,
|
|
"learning_rate": 0.00013949299747334387,
|
|
"loss": 0.994,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 1.5524296675191815,
|
|
"grad_norm": 0.42997115738098735,
|
|
"learning_rate": 0.00013939737889530948,
|
|
"loss": 1.0182,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 1.554987212276215,
|
|
"grad_norm": 0.519737904298238,
|
|
"learning_rate": 0.00013930157084955387,
|
|
"loss": 1.0432,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 1.5575447570332481,
|
|
"grad_norm": 0.5413718715320616,
|
|
"learning_rate": 0.00013920557364168872,
|
|
"loss": 1.0392,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 1.5601023017902813,
|
|
"grad_norm": 0.4622784565390988,
|
|
"learning_rate": 0.00013910938757792911,
|
|
"loss": 1.0089,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.5626598465473145,
|
|
"grad_norm": 0.517572135003303,
|
|
"learning_rate": 0.00013901301296509247,
|
|
"loss": 1.0433,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 1.5652173913043477,
|
|
"grad_norm": 0.6472771877158792,
|
|
"learning_rate": 0.00013891645011059774,
|
|
"loss": 1.033,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 1.567774936061381,
|
|
"grad_norm": 0.73777975779115,
|
|
"learning_rate": 0.00013881969932246434,
|
|
"loss": 1.0233,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 1.5703324808184145,
|
|
"grad_norm": 0.6556752106938734,
|
|
"learning_rate": 0.00013872276090931112,
|
|
"loss": 1.0283,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 1.5728900255754477,
|
|
"grad_norm": 0.647001672639268,
|
|
"learning_rate": 0.0001386256351803554,
|
|
"loss": 1.0449,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.5754475703324808,
|
|
"grad_norm": 0.755466796600313,
|
|
"learning_rate": 0.00013852832244541207,
|
|
"loss": 1.0005,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 1.578005115089514,
|
|
"grad_norm": 0.9067726592525303,
|
|
"learning_rate": 0.00013843082301489247,
|
|
"loss": 1.034,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 1.5805626598465472,
|
|
"grad_norm": 1.205016289595881,
|
|
"learning_rate": 0.00013833313719980358,
|
|
"loss": 1.0292,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 1.5831202046035806,
|
|
"grad_norm": 0.8478168612376876,
|
|
"learning_rate": 0.00013823526531174675,
|
|
"loss": 1.0142,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 1.5856777493606138,
|
|
"grad_norm": 0.7403592560784086,
|
|
"learning_rate": 0.000138137207662917,
|
|
"loss": 1.0019,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.5882352941176472,
|
|
"grad_norm": 0.6403376151233803,
|
|
"learning_rate": 0.00013803896456610187,
|
|
"loss": 1.0308,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 1.5907928388746804,
|
|
"grad_norm": 0.712308710605845,
|
|
"learning_rate": 0.0001379405363346804,
|
|
"loss": 1.0455,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 1.5933503836317136,
|
|
"grad_norm": 0.6512025986675177,
|
|
"learning_rate": 0.00013784192328262227,
|
|
"loss": 1.018,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 1.5959079283887467,
|
|
"grad_norm": 0.6467882755688008,
|
|
"learning_rate": 0.00013774312572448658,
|
|
"loss": 1.0566,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 1.59846547314578,
|
|
"grad_norm": 0.7409770827879977,
|
|
"learning_rate": 0.00013764414397542113,
|
|
"loss": 1.0759,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 1.6010230179028133,
|
|
"grad_norm": 0.8147656835217053,
|
|
"learning_rate": 0.0001375449783511611,
|
|
"loss": 1.0041,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 1.6035805626598465,
|
|
"grad_norm": 0.9034624506464588,
|
|
"learning_rate": 0.0001374456291680283,
|
|
"loss": 1.0141,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 1.60613810741688,
|
|
"grad_norm": 1.0050570938199166,
|
|
"learning_rate": 0.00013734609674293001,
|
|
"loss": 1.0532,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 1.608695652173913,
|
|
"grad_norm": 0.9807521253903259,
|
|
"learning_rate": 0.00013724638139335808,
|
|
"loss": 1.0079,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 1.6112531969309463,
|
|
"grad_norm": 1.0251289878636651,
|
|
"learning_rate": 0.00013714648343738785,
|
|
"loss": 1.014,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.6138107416879794,
|
|
"grad_norm": 1.1145588268761022,
|
|
"learning_rate": 0.00013704640319367706,
|
|
"loss": 1.0217,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 1.6163682864450126,
|
|
"grad_norm": 0.9024588644594059,
|
|
"learning_rate": 0.000136946140981465,
|
|
"loss": 1.0151,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 1.618925831202046,
|
|
"grad_norm": 0.7164435145214515,
|
|
"learning_rate": 0.00013684569712057141,
|
|
"loss": 0.9972,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 1.6214833759590794,
|
|
"grad_norm": 0.40989603024156007,
|
|
"learning_rate": 0.0001367450719313954,
|
|
"loss": 1.0438,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 1.6240409207161126,
|
|
"grad_norm": 0.4621187072292993,
|
|
"learning_rate": 0.00013664426573491454,
|
|
"loss": 0.9964,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 1.6265984654731458,
|
|
"grad_norm": 0.7796243265332405,
|
|
"learning_rate": 0.0001365432788526838,
|
|
"loss": 1.0428,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 1.629156010230179,
|
|
"grad_norm": 0.9807118313427811,
|
|
"learning_rate": 0.0001364421116068344,
|
|
"loss": 1.0374,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 1.6317135549872122,
|
|
"grad_norm": 1.0521751456854462,
|
|
"learning_rate": 0.00013634076432007298,
|
|
"loss": 1.022,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 1.6342710997442456,
|
|
"grad_norm": 1.014819808376515,
|
|
"learning_rate": 0.00013623923731568053,
|
|
"loss": 1.0555,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 1.6368286445012787,
|
|
"grad_norm": 0.8908217824529507,
|
|
"learning_rate": 0.00013613753091751117,
|
|
"loss": 0.9896,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.6393861892583121,
|
|
"grad_norm": 0.7338590542416318,
|
|
"learning_rate": 0.00013603564544999134,
|
|
"loss": 1.0104,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 1.6419437340153453,
|
|
"grad_norm": 0.4947515917010355,
|
|
"learning_rate": 0.00013593358123811873,
|
|
"loss": 1.013,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 1.6445012787723785,
|
|
"grad_norm": 0.3613565103885808,
|
|
"learning_rate": 0.00013583133860746102,
|
|
"loss": 1.0285,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 1.6470588235294117,
|
|
"grad_norm": 0.44918465574622884,
|
|
"learning_rate": 0.00013572891788415526,
|
|
"loss": 1.0735,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 1.6496163682864449,
|
|
"grad_norm": 0.6919277753013154,
|
|
"learning_rate": 0.00013562631939490638,
|
|
"loss": 0.9838,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 1.6521739130434783,
|
|
"grad_norm": 0.998596135317296,
|
|
"learning_rate": 0.00013552354346698644,
|
|
"loss": 1.0407,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 1.6547314578005117,
|
|
"grad_norm": 1.1274200277350097,
|
|
"learning_rate": 0.0001354205904282335,
|
|
"loss": 0.9994,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 1.6572890025575449,
|
|
"grad_norm": 0.7298162047765786,
|
|
"learning_rate": 0.0001353174606070505,
|
|
"loss": 1.0158,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 1.659846547314578,
|
|
"grad_norm": 0.4959923867676345,
|
|
"learning_rate": 0.00013521415433240448,
|
|
"loss": 1.0223,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 1.6624040920716112,
|
|
"grad_norm": 0.4028073795408234,
|
|
"learning_rate": 0.0001351106719338251,
|
|
"loss": 1.0048,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.6649616368286444,
|
|
"grad_norm": 0.4151895967851957,
|
|
"learning_rate": 0.000135007013741404,
|
|
"loss": 1.031,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 1.6675191815856778,
|
|
"grad_norm": 0.493296338959119,
|
|
"learning_rate": 0.0001349031800857934,
|
|
"loss": 1.0551,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 1.670076726342711,
|
|
"grad_norm": 0.5474927271625798,
|
|
"learning_rate": 0.00013479917129820547,
|
|
"loss": 1.0296,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 1.6726342710997444,
|
|
"grad_norm": 0.6314250125042725,
|
|
"learning_rate": 0.00013469498771041078,
|
|
"loss": 1.0355,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 1.6751918158567776,
|
|
"grad_norm": 0.7183033795455095,
|
|
"learning_rate": 0.0001345906296547376,
|
|
"loss": 1.0239,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 1.6777493606138107,
|
|
"grad_norm": 0.6627049343116693,
|
|
"learning_rate": 0.00013448609746407076,
|
|
"loss": 1.0107,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 1.680306905370844,
|
|
"grad_norm": 0.8323267890128159,
|
|
"learning_rate": 0.0001343813914718504,
|
|
"loss": 1.0132,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 1.682864450127877,
|
|
"grad_norm": 1.0100396544553614,
|
|
"learning_rate": 0.0001342765120120712,
|
|
"loss": 1.034,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 1.6854219948849105,
|
|
"grad_norm": 0.9397586944756832,
|
|
"learning_rate": 0.0001341714594192811,
|
|
"loss": 1.0359,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 1.6879795396419437,
|
|
"grad_norm": 0.60948367814948,
|
|
"learning_rate": 0.00013406623402858038,
|
|
"loss": 1.0515,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.690537084398977,
|
|
"grad_norm": 0.4064851961480879,
|
|
"learning_rate": 0.00013396083617562041,
|
|
"loss": 1.0295,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 1.6930946291560103,
|
|
"grad_norm": 0.4835321670487211,
|
|
"learning_rate": 0.0001338552661966028,
|
|
"loss": 1.0218,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 1.6956521739130435,
|
|
"grad_norm": 0.5087590456762057,
|
|
"learning_rate": 0.00013374952442827813,
|
|
"loss": 1.0438,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 1.6982097186700766,
|
|
"grad_norm": 0.487251739240553,
|
|
"learning_rate": 0.00013364361120794495,
|
|
"loss": 1.0293,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 1.7007672634271098,
|
|
"grad_norm": 0.5712982739684782,
|
|
"learning_rate": 0.00013353752687344882,
|
|
"loss": 1.0332,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 1.7033248081841432,
|
|
"grad_norm": 0.7033661782388088,
|
|
"learning_rate": 0.000133431271763181,
|
|
"loss": 1.0053,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 1.7058823529411766,
|
|
"grad_norm": 0.6935444307133046,
|
|
"learning_rate": 0.00013332484621607758,
|
|
"loss": 1.0262,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 1.7084398976982098,
|
|
"grad_norm": 0.7341105705188075,
|
|
"learning_rate": 0.00013321825057161825,
|
|
"loss": 1.0156,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 1.710997442455243,
|
|
"grad_norm": 0.7907280681410083,
|
|
"learning_rate": 0.00013311148516982534,
|
|
"loss": 1.0413,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 1.7135549872122762,
|
|
"grad_norm": 0.7112672488330658,
|
|
"learning_rate": 0.00013300455035126268,
|
|
"loss": 1.0199,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.7161125319693094,
|
|
"grad_norm": 0.5766576717286938,
|
|
"learning_rate": 0.00013289744645703444,
|
|
"loss": 1.0361,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 1.7186700767263428,
|
|
"grad_norm": 0.5059688666618373,
|
|
"learning_rate": 0.0001327901738287842,
|
|
"loss": 1.0385,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 1.721227621483376,
|
|
"grad_norm": 0.45263501963427877,
|
|
"learning_rate": 0.0001326827328086937,
|
|
"loss": 1.0163,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 1.7237851662404093,
|
|
"grad_norm": 0.5156404930129397,
|
|
"learning_rate": 0.00013257512373948186,
|
|
"loss": 1.0592,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 1.7263427109974425,
|
|
"grad_norm": 0.6373966994332245,
|
|
"learning_rate": 0.00013246734696440368,
|
|
"loss": 1.0303,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 1.7289002557544757,
|
|
"grad_norm": 0.6497706378399105,
|
|
"learning_rate": 0.000132359402827249,
|
|
"loss": 0.9963,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 1.7314578005115089,
|
|
"grad_norm": 0.6649205635237081,
|
|
"learning_rate": 0.0001322512916723417,
|
|
"loss": 1.0133,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 1.734015345268542,
|
|
"grad_norm": 0.7302337459964975,
|
|
"learning_rate": 0.00013214301384453824,
|
|
"loss": 1.0143,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 1.7365728900255755,
|
|
"grad_norm": 0.7742690150052379,
|
|
"learning_rate": 0.00013203456968922684,
|
|
"loss": 1.0164,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 1.7391304347826086,
|
|
"grad_norm": 0.6798309822233196,
|
|
"learning_rate": 0.0001319259595523262,
|
|
"loss": 1.0172,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.741687979539642,
|
|
"grad_norm": 0.5208733748449712,
|
|
"learning_rate": 0.0001318171837802846,
|
|
"loss": 1.0048,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.7442455242966752,
|
|
"grad_norm": 0.41856841228081965,
|
|
"learning_rate": 0.00013170824272007854,
|
|
"loss": 1.0508,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 1.7468030690537084,
|
|
"grad_norm": 0.41744052183195546,
|
|
"learning_rate": 0.00013159913671921184,
|
|
"loss": 1.0433,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 1.7493606138107416,
|
|
"grad_norm": 0.45034351237029546,
|
|
"learning_rate": 0.00013148986612571438,
|
|
"loss": 1.0281,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 1.7519181585677748,
|
|
"grad_norm": 0.5021896906440644,
|
|
"learning_rate": 0.00013138043128814114,
|
|
"loss": 1.0207,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 1.7544757033248082,
|
|
"grad_norm": 0.6367316434278153,
|
|
"learning_rate": 0.000131270832555571,
|
|
"loss": 1.0509,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 1.7570332480818416,
|
|
"grad_norm": 0.9449450079946309,
|
|
"learning_rate": 0.00013116107027760557,
|
|
"loss": 1.0263,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 1.7595907928388748,
|
|
"grad_norm": 1.2671861813793404,
|
|
"learning_rate": 0.00013105114480436823,
|
|
"loss": 1.015,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 1.762148337595908,
|
|
"grad_norm": 0.6133472053088566,
|
|
"learning_rate": 0.00013094105648650285,
|
|
"loss": 0.9964,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 0.5563333895443464,
|
|
"learning_rate": 0.00013083080567517284,
|
|
"loss": 1.0221,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.7672634271099743,
|
|
"grad_norm": 0.8984060988722041,
|
|
"learning_rate": 0.0001307203927220598,
|
|
"loss": 1.0333,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 1.7698209718670077,
|
|
"grad_norm": 1.1600459077736829,
|
|
"learning_rate": 0.0001306098179793627,
|
|
"loss": 1.0281,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 1.772378516624041,
|
|
"grad_norm": 0.8749748158295617,
|
|
"learning_rate": 0.00013049908179979644,
|
|
"loss": 1.0414,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 1.7749360613810743,
|
|
"grad_norm": 0.6456013771393564,
|
|
"learning_rate": 0.00013038818453659098,
|
|
"loss": 0.9934,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 1.7774936061381075,
|
|
"grad_norm": 0.4834000513881869,
|
|
"learning_rate": 0.00013027712654349003,
|
|
"loss": 1.0077,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 1.7800511508951407,
|
|
"grad_norm": 0.46969762642929197,
|
|
"learning_rate": 0.0001301659081747501,
|
|
"loss": 1.0408,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 1.7826086956521738,
|
|
"grad_norm": 0.5147779689056563,
|
|
"learning_rate": 0.0001300545297851392,
|
|
"loss": 1.0186,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 1.785166240409207,
|
|
"grad_norm": 0.55729153001615,
|
|
"learning_rate": 0.0001299429917299358,
|
|
"loss": 1.0329,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 1.7877237851662404,
|
|
"grad_norm": 0.5260414108398854,
|
|
"learning_rate": 0.00012983129436492763,
|
|
"loss": 1.0233,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 1.7902813299232738,
|
|
"grad_norm": 0.5427361149590243,
|
|
"learning_rate": 0.00012971943804641068,
|
|
"loss": 1.0409,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.792838874680307,
|
|
"grad_norm": 0.5405520825559765,
|
|
"learning_rate": 0.0001296074231311879,
|
|
"loss": 1.0066,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 1.7953964194373402,
|
|
"grad_norm": 0.6297890907155308,
|
|
"learning_rate": 0.0001294952499765682,
|
|
"loss": 1.0254,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 1.7979539641943734,
|
|
"grad_norm": 0.6644546067252105,
|
|
"learning_rate": 0.00012938291894036522,
|
|
"loss": 1.0285,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 1.8005115089514065,
|
|
"grad_norm": 0.683427488866508,
|
|
"learning_rate": 0.00012927043038089616,
|
|
"loss": 1.0091,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 1.80306905370844,
|
|
"grad_norm": 0.6319295334248269,
|
|
"learning_rate": 0.00012915778465698077,
|
|
"loss": 1.0397,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 1.8056265984654731,
|
|
"grad_norm": 0.5438735087695892,
|
|
"learning_rate": 0.00012904498212794007,
|
|
"loss": 0.991,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 1.8081841432225065,
|
|
"grad_norm": 0.5047705166677889,
|
|
"learning_rate": 0.00012893202315359537,
|
|
"loss": 0.9944,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 1.8107416879795397,
|
|
"grad_norm": 0.5361496724146492,
|
|
"learning_rate": 0.00012881890809426688,
|
|
"loss": 1.0212,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 1.813299232736573,
|
|
"grad_norm": 0.4758891777297796,
|
|
"learning_rate": 0.00012870563731077277,
|
|
"loss": 0.9717,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 1.815856777493606,
|
|
"grad_norm": 0.41562952895729655,
|
|
"learning_rate": 0.0001285922111644279,
|
|
"loss": 1.0162,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.8184143222506393,
|
|
"grad_norm": 0.4923656957788762,
|
|
"learning_rate": 0.00012847863001704278,
|
|
"loss": 1.0685,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 1.8209718670076727,
|
|
"grad_norm": 0.43817036243213936,
|
|
"learning_rate": 0.00012836489423092225,
|
|
"loss": 1.0166,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 1.8235294117647058,
|
|
"grad_norm": 0.36194875273904087,
|
|
"learning_rate": 0.00012825100416886454,
|
|
"loss": 1.0255,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 1.8260869565217392,
|
|
"grad_norm": 0.5507986270387409,
|
|
"learning_rate": 0.0001281369601941599,
|
|
"loss": 1.0135,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 1.8286445012787724,
|
|
"grad_norm": 0.685338916623197,
|
|
"learning_rate": 0.00012802276267058957,
|
|
"loss": 0.999,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 1.8312020460358056,
|
|
"grad_norm": 0.5568312967518175,
|
|
"learning_rate": 0.00012790841196242458,
|
|
"loss": 1.0153,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 1.8337595907928388,
|
|
"grad_norm": 0.4401729278401454,
|
|
"learning_rate": 0.00012779390843442462,
|
|
"loss": 0.9855,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 1.836317135549872,
|
|
"grad_norm": 0.4249893778808539,
|
|
"learning_rate": 0.00012767925245183676,
|
|
"loss": 1.0351,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 1.8388746803069054,
|
|
"grad_norm": 0.47539299147834413,
|
|
"learning_rate": 0.00012756444438039453,
|
|
"loss": 1.035,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 1.8414322250639388,
|
|
"grad_norm": 0.5475741371560751,
|
|
"learning_rate": 0.00012744948458631646,
|
|
"loss": 1.0412,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.843989769820972,
|
|
"grad_norm": 0.5751955332609484,
|
|
"learning_rate": 0.0001273343734363051,
|
|
"loss": 1.0419,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 1.8465473145780051,
|
|
"grad_norm": 0.5673429560849089,
|
|
"learning_rate": 0.00012721911129754578,
|
|
"loss": 0.9993,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 1.8491048593350383,
|
|
"grad_norm": 0.475786389030356,
|
|
"learning_rate": 0.0001271036985377055,
|
|
"loss": 1.0255,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 1.8516624040920715,
|
|
"grad_norm": 0.4435215042959613,
|
|
"learning_rate": 0.00012698813552493174,
|
|
"loss": 1.0159,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 1.854219948849105,
|
|
"grad_norm": 0.6384652673350472,
|
|
"learning_rate": 0.00012687242262785116,
|
|
"loss": 1.0468,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 1.856777493606138,
|
|
"grad_norm": 0.660707948092585,
|
|
"learning_rate": 0.00012675656021556855,
|
|
"loss": 0.9702,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 1.8593350383631715,
|
|
"grad_norm": 0.5190779530078301,
|
|
"learning_rate": 0.00012664054865766573,
|
|
"loss": 0.9959,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 1.8618925831202047,
|
|
"grad_norm": 0.59002541889049,
|
|
"learning_rate": 0.00012652438832420017,
|
|
"loss": 1.0009,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 1.8644501278772379,
|
|
"grad_norm": 0.724406502768554,
|
|
"learning_rate": 0.00012640807958570394,
|
|
"loss": 1.0572,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 1.867007672634271,
|
|
"grad_norm": 0.606082979636232,
|
|
"learning_rate": 0.00012629162281318248,
|
|
"loss": 1.0123,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.8695652173913042,
|
|
"grad_norm": 0.3890444487309348,
|
|
"learning_rate": 0.00012617501837811347,
|
|
"loss": 0.9835,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 1.8721227621483376,
|
|
"grad_norm": 0.4748189131220067,
|
|
"learning_rate": 0.00012605826665244559,
|
|
"loss": 1.0206,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 1.8746803069053708,
|
|
"grad_norm": 0.5894024279814004,
|
|
"learning_rate": 0.00012594136800859733,
|
|
"loss": 1.0312,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 1.8772378516624042,
|
|
"grad_norm": 0.8812294314944346,
|
|
"learning_rate": 0.00012582432281945587,
|
|
"loss": 0.9929,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 1.8797953964194374,
|
|
"grad_norm": 1.2695722544281176,
|
|
"learning_rate": 0.0001257071314583758,
|
|
"loss": 1.0232,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 1.8823529411764706,
|
|
"grad_norm": 0.7877721338048511,
|
|
"learning_rate": 0.00012558979429917803,
|
|
"loss": 1.0528,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 1.8849104859335037,
|
|
"grad_norm": 0.6479567586178989,
|
|
"learning_rate": 0.00012547231171614845,
|
|
"loss": 1.0262,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 1.887468030690537,
|
|
"grad_norm": 0.6844520570754378,
|
|
"learning_rate": 0.00012535468408403697,
|
|
"loss": 1.0333,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 1.8900255754475703,
|
|
"grad_norm": 0.6085957966970293,
|
|
"learning_rate": 0.00012523691177805597,
|
|
"loss": 1.0168,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 1.8925831202046037,
|
|
"grad_norm": 0.5254572324853038,
|
|
"learning_rate": 0.00012511899517387955,
|
|
"loss": 0.9883,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.895140664961637,
|
|
"grad_norm": 0.6139364866532532,
|
|
"learning_rate": 0.00012500093464764197,
|
|
"loss": 0.9977,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 1.89769820971867,
|
|
"grad_norm": 0.6998963267481692,
|
|
"learning_rate": 0.00012488273057593654,
|
|
"loss": 1.0044,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 1.9002557544757033,
|
|
"grad_norm": 0.5270554785542413,
|
|
"learning_rate": 0.00012476438333581456,
|
|
"loss": 1.0412,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 1.9028132992327365,
|
|
"grad_norm": 0.5157043265448235,
|
|
"learning_rate": 0.00012464589330478398,
|
|
"loss": 0.9978,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 1.9053708439897699,
|
|
"grad_norm": 0.5631065206891138,
|
|
"learning_rate": 0.0001245272608608082,
|
|
"loss": 0.9944,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 1.907928388746803,
|
|
"grad_norm": 0.4807212257749526,
|
|
"learning_rate": 0.00012440848638230485,
|
|
"loss": 1.0184,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 1.9104859335038364,
|
|
"grad_norm": 0.42670701279562534,
|
|
"learning_rate": 0.00012428957024814477,
|
|
"loss": 1.0105,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 1.9130434782608696,
|
|
"grad_norm": 0.41188284810782877,
|
|
"learning_rate": 0.00012417051283765055,
|
|
"loss": 1.0256,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 1.9156010230179028,
|
|
"grad_norm": 0.39912216267661754,
|
|
"learning_rate": 0.0001240513145305954,
|
|
"loss": 1.0479,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 1.918158567774936,
|
|
"grad_norm": 0.40181896505552256,
|
|
"learning_rate": 0.00012393197570720208,
|
|
"loss": 1.0006,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.9207161125319692,
|
|
"grad_norm": 0.4686514718132313,
|
|
"learning_rate": 0.0001238124967481415,
|
|
"loss": 1.0527,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 1.9232736572890026,
|
|
"grad_norm": 0.4847458570755899,
|
|
"learning_rate": 0.00012369287803453156,
|
|
"loss": 1.0039,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 1.9258312020460358,
|
|
"grad_norm": 0.5873940841619928,
|
|
"learning_rate": 0.00012357311994793603,
|
|
"loss": 1.0191,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 1.9283887468030692,
|
|
"grad_norm": 0.6710549953392281,
|
|
"learning_rate": 0.00012345322287036315,
|
|
"loss": 1.014,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 1.9309462915601023,
|
|
"grad_norm": 0.7897611598340533,
|
|
"learning_rate": 0.0001233331871842646,
|
|
"loss": 0.9853,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 1.9335038363171355,
|
|
"grad_norm": 0.870069888372245,
|
|
"learning_rate": 0.0001232130132725342,
|
|
"loss": 1.022,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 1.9360613810741687,
|
|
"grad_norm": 1.0698935466826593,
|
|
"learning_rate": 0.00012309270151850666,
|
|
"loss": 1.0199,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 1.938618925831202,
|
|
"grad_norm": 1.0318153691478889,
|
|
"learning_rate": 0.00012297225230595637,
|
|
"loss": 1.0008,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 1.9411764705882353,
|
|
"grad_norm": 0.8031059628622865,
|
|
"learning_rate": 0.0001228516660190962,
|
|
"loss": 1.0464,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 1.9437340153452687,
|
|
"grad_norm": 0.4432470641559668,
|
|
"learning_rate": 0.00012273094304257633,
|
|
"loss": 1.0486,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.9462915601023019,
|
|
"grad_norm": 0.4413834236432169,
|
|
"learning_rate": 0.00012261008376148282,
|
|
"loss": 1.0483,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 1.948849104859335,
|
|
"grad_norm": 0.5753204802658383,
|
|
"learning_rate": 0.0001224890885613366,
|
|
"loss": 1.026,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 1.9514066496163682,
|
|
"grad_norm": 0.6330964706251369,
|
|
"learning_rate": 0.00012236795782809225,
|
|
"loss": 1.017,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 1.9539641943734014,
|
|
"grad_norm": 0.6869010778127252,
|
|
"learning_rate": 0.00012224669194813647,
|
|
"loss": 1.031,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 1.9565217391304348,
|
|
"grad_norm": 0.7455335150670086,
|
|
"learning_rate": 0.00012212529130828725,
|
|
"loss": 0.9639,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 1.959079283887468,
|
|
"grad_norm": 0.6598851148094896,
|
|
"learning_rate": 0.00012200375629579234,
|
|
"loss": 1.0298,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 1.9616368286445014,
|
|
"grad_norm": 0.44847708135640946,
|
|
"learning_rate": 0.0001218820872983281,
|
|
"loss": 0.9979,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 1.9641943734015346,
|
|
"grad_norm": 0.4421542384496395,
|
|
"learning_rate": 0.00012176028470399836,
|
|
"loss": 1.0219,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 1.9667519181585678,
|
|
"grad_norm": 0.5551681283301225,
|
|
"learning_rate": 0.00012163834890133303,
|
|
"loss": 1.0321,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 1.969309462915601,
|
|
"grad_norm": 0.5433680138372817,
|
|
"learning_rate": 0.000121516280279287,
|
|
"loss": 1.0152,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.9718670076726341,
|
|
"grad_norm": 0.3927534411279976,
|
|
"learning_rate": 0.00012139407922723875,
|
|
"loss": 1.0056,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 1.9744245524296675,
|
|
"grad_norm": 0.3504638375301521,
|
|
"learning_rate": 0.00012127174613498925,
|
|
"loss": 1.0211,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 1.976982097186701,
|
|
"grad_norm": 0.5235226714465111,
|
|
"learning_rate": 0.00012114928139276064,
|
|
"loss": 1.0298,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 1.979539641943734,
|
|
"grad_norm": 0.47218634270204046,
|
|
"learning_rate": 0.00012102668539119501,
|
|
"loss": 0.997,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 1.9820971867007673,
|
|
"grad_norm": 0.3909468495312419,
|
|
"learning_rate": 0.00012090395852135314,
|
|
"loss": 1.008,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 1.9846547314578005,
|
|
"grad_norm": 0.3354579546285365,
|
|
"learning_rate": 0.0001207811011747132,
|
|
"loss": 1.0247,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 1.9872122762148337,
|
|
"grad_norm": 0.3467079716757078,
|
|
"learning_rate": 0.00012065811374316966,
|
|
"loss": 1.0049,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 1.989769820971867,
|
|
"grad_norm": 0.3407603167118022,
|
|
"learning_rate": 0.0001205349966190319,
|
|
"loss": 1.0454,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 1.9923273657289002,
|
|
"grad_norm": 0.3172074392515775,
|
|
"learning_rate": 0.00012041175019502295,
|
|
"loss": 1.0269,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 1.9948849104859336,
|
|
"grad_norm": 0.38289682905322714,
|
|
"learning_rate": 0.00012028837486427837,
|
|
"loss": 1.0085,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.9974424552429668,
|
|
"grad_norm": 0.3409699287203162,
|
|
"learning_rate": 0.00012016487102034482,
|
|
"loss": 1.0151,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.4841721621140613,
|
|
"learning_rate": 0.00012004123905717898,
|
|
"loss": 0.9888,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 2.002557544757033,
|
|
"grad_norm": 0.5947034995797379,
|
|
"learning_rate": 0.00011991747936914614,
|
|
"loss": 0.98,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 2.0051150895140664,
|
|
"grad_norm": 0.5314717777356649,
|
|
"learning_rate": 0.00011979359235101906,
|
|
"loss": 0.966,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 2.0076726342710995,
|
|
"grad_norm": 0.4148615363763489,
|
|
"learning_rate": 0.00011966957839797664,
|
|
"loss": 0.9695,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 2.010230179028133,
|
|
"grad_norm": 0.4001599305252567,
|
|
"learning_rate": 0.00011954543790560267,
|
|
"loss": 1.0493,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 2.0127877237851663,
|
|
"grad_norm": 0.43752065357850173,
|
|
"learning_rate": 0.00011942117126988461,
|
|
"loss": 0.9883,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 2.0153452685421995,
|
|
"grad_norm": 0.5092717368916159,
|
|
"learning_rate": 0.00011929677888721227,
|
|
"loss": 0.9984,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 2.0179028132992327,
|
|
"grad_norm": 0.5840375290444557,
|
|
"learning_rate": 0.00011917226115437656,
|
|
"loss": 0.9833,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 2.020460358056266,
|
|
"grad_norm": 0.573138093028074,
|
|
"learning_rate": 0.00011904761846856831,
|
|
"loss": 0.9724,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 2.023017902813299,
|
|
"grad_norm": 0.5890770850578259,
|
|
"learning_rate": 0.00011892285122737683,
|
|
"loss": 0.9699,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 2.0255754475703327,
|
|
"grad_norm": 0.5692021165096304,
|
|
"learning_rate": 0.00011879795982878883,
|
|
"loss": 0.9741,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 2.028132992327366,
|
|
"grad_norm": 0.6399550167383995,
|
|
"learning_rate": 0.00011867294467118698,
|
|
"loss": 0.9682,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 2.030690537084399,
|
|
"grad_norm": 0.7338640869363395,
|
|
"learning_rate": 0.00011854780615334875,
|
|
"loss": 0.9683,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 2.0332480818414322,
|
|
"grad_norm": 0.806906500405086,
|
|
"learning_rate": 0.00011842254467444517,
|
|
"loss": 0.9756,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 2.0358056265984654,
|
|
"grad_norm": 0.7925351913713344,
|
|
"learning_rate": 0.0001182971606340394,
|
|
"loss": 0.9853,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 2.0383631713554986,
|
|
"grad_norm": 0.6258347835444797,
|
|
"learning_rate": 0.00011817165443208562,
|
|
"loss": 1.0054,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 2.040920716112532,
|
|
"grad_norm": 0.4512585898690294,
|
|
"learning_rate": 0.00011804602646892762,
|
|
"loss": 0.9792,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 2.0434782608695654,
|
|
"grad_norm": 0.3681772077619349,
|
|
"learning_rate": 0.00011792027714529767,
|
|
"loss": 0.9788,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 2.0460358056265986,
|
|
"grad_norm": 0.4769785686846811,
|
|
"learning_rate": 0.0001177944068623151,
|
|
"loss": 1.023,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 2.0485933503836318,
|
|
"grad_norm": 0.5513670753501893,
|
|
"learning_rate": 0.00011766841602148507,
|
|
"loss": 0.9758,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 2.051150895140665,
|
|
"grad_norm": 0.5343242524485008,
|
|
"learning_rate": 0.00011754230502469739,
|
|
"loss": 0.9828,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 2.053708439897698,
|
|
"grad_norm": 0.3790786798266737,
|
|
"learning_rate": 0.00011741607427422502,
|
|
"loss": 0.9891,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 2.0562659846547313,
|
|
"grad_norm": 0.3356594047836669,
|
|
"learning_rate": 0.000117289724172723,
|
|
"loss": 1.0182,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 2.0588235294117645,
|
|
"grad_norm": 0.4979916614188739,
|
|
"learning_rate": 0.00011716325512322707,
|
|
"loss": 0.9653,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 2.061381074168798,
|
|
"grad_norm": 0.5917115439040083,
|
|
"learning_rate": 0.00011703666752915235,
|
|
"loss": 0.9779,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 2.0639386189258313,
|
|
"grad_norm": 0.7711282568070231,
|
|
"learning_rate": 0.00011690996179429219,
|
|
"loss": 1.0192,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 2.0664961636828645,
|
|
"grad_norm": 0.9738458712850159,
|
|
"learning_rate": 0.00011678313832281664,
|
|
"loss": 0.9929,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 2.0690537084398977,
|
|
"grad_norm": 1.0543246508556696,
|
|
"learning_rate": 0.00011665619751927146,
|
|
"loss": 0.9711,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 2.071611253196931,
|
|
"grad_norm": 0.7273546848221022,
|
|
"learning_rate": 0.00011652913978857664,
|
|
"loss": 0.9732,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 2.074168797953964,
|
|
"grad_norm": 0.5119256334998138,
|
|
"learning_rate": 0.00011640196553602505,
|
|
"loss": 0.9955,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 2.0767263427109977,
|
|
"grad_norm": 0.36268273560962566,
|
|
"learning_rate": 0.00011627467516728138,
|
|
"loss": 0.9706,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 2.079283887468031,
|
|
"grad_norm": 0.40355937427082544,
|
|
"learning_rate": 0.00011614726908838063,
|
|
"loss": 0.9712,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 2.081841432225064,
|
|
"grad_norm": 0.5018343946579583,
|
|
"learning_rate": 0.00011601974770572692,
|
|
"loss": 1.0314,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 2.084398976982097,
|
|
"grad_norm": 0.49570234160885446,
|
|
"learning_rate": 0.0001158921114260922,
|
|
"loss": 0.961,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 2.0869565217391304,
|
|
"grad_norm": 0.5836483164644858,
|
|
"learning_rate": 0.00011576436065661484,
|
|
"loss": 0.9732,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 2.0895140664961636,
|
|
"grad_norm": 0.562651886144191,
|
|
"learning_rate": 0.00011563649580479848,
|
|
"loss": 0.9827,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 2.0920716112531967,
|
|
"grad_norm": 0.3634053027085326,
|
|
"learning_rate": 0.00011550851727851067,
|
|
"loss": 0.9634,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 2.0946291560102304,
|
|
"grad_norm": 0.35421206748470696,
|
|
"learning_rate": 0.00011538042548598154,
|
|
"loss": 0.9674,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 2.0971867007672635,
|
|
"grad_norm": 0.34410099266933664,
|
|
"learning_rate": 0.00011525222083580247,
|
|
"loss": 0.9682,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 2.0997442455242967,
|
|
"grad_norm": 0.36019738429870557,
|
|
"learning_rate": 0.00011512390373692495,
|
|
"loss": 0.98,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 2.10230179028133,
|
|
"grad_norm": 0.4497160405180852,
|
|
"learning_rate": 0.00011499547459865908,
|
|
"loss": 0.9658,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 2.104859335038363,
|
|
"grad_norm": 0.48924052145081715,
|
|
"learning_rate": 0.00011486693383067234,
|
|
"loss": 0.9961,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 2.1074168797953963,
|
|
"grad_norm": 0.51728675513698,
|
|
"learning_rate": 0.0001147382818429884,
|
|
"loss": 0.9886,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 2.10997442455243,
|
|
"grad_norm": 0.48298534091718054,
|
|
"learning_rate": 0.0001146095190459855,
|
|
"loss": 0.99,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 2.112531969309463,
|
|
"grad_norm": 0.3873329201691133,
|
|
"learning_rate": 0.00011448064585039555,
|
|
"loss": 0.9855,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 2.1150895140664963,
|
|
"grad_norm": 0.36617676835976043,
|
|
"learning_rate": 0.0001143516626673025,
|
|
"loss": 0.9784,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 2.1176470588235294,
|
|
"grad_norm": 0.39303542839485295,
|
|
"learning_rate": 0.00011422256990814115,
|
|
"loss": 0.9884,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 2.1202046035805626,
|
|
"grad_norm": 0.5159106405133932,
|
|
"learning_rate": 0.0001140933679846959,
|
|
"loss": 0.9926,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 2.122762148337596,
|
|
"grad_norm": 0.7469560811887815,
|
|
"learning_rate": 0.00011396405730909925,
|
|
"loss": 1.0183,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 2.125319693094629,
|
|
"grad_norm": 0.7327464479712988,
|
|
"learning_rate": 0.00011383463829383071,
|
|
"loss": 1.0098,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 2.1278772378516626,
|
|
"grad_norm": 0.5977082749289835,
|
|
"learning_rate": 0.00011370511135171532,
|
|
"loss": 1.0071,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 2.130434782608696,
|
|
"grad_norm": 0.4052295767189102,
|
|
"learning_rate": 0.00011357547689592237,
|
|
"loss": 1.0049,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 2.132992327365729,
|
|
"grad_norm": 0.5292207555015371,
|
|
"learning_rate": 0.00011344573533996417,
|
|
"loss": 0.9656,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 2.135549872122762,
|
|
"grad_norm": 0.4549224765225602,
|
|
"learning_rate": 0.0001133158870976946,
|
|
"loss": 0.9968,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 2.1381074168797953,
|
|
"grad_norm": 0.4460508304219039,
|
|
"learning_rate": 0.00011318593258330785,
|
|
"loss": 1.0134,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 2.1406649616368285,
|
|
"grad_norm": 0.46592246024671363,
|
|
"learning_rate": 0.00011305587221133718,
|
|
"loss": 0.9522,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 2.1432225063938617,
|
|
"grad_norm": 0.4489945484428353,
|
|
"learning_rate": 0.00011292570639665342,
|
|
"loss": 1.0104,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 2.1457800511508953,
|
|
"grad_norm": 0.46784938019320965,
|
|
"learning_rate": 0.00011279543555446379,
|
|
"loss": 0.988,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 2.1483375959079285,
|
|
"grad_norm": 0.4200222134898951,
|
|
"learning_rate": 0.00011266506010031052,
|
|
"loss": 1.0119,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 2.1508951406649617,
|
|
"grad_norm": 0.3655050664603677,
|
|
"learning_rate": 0.00011253458045006955,
|
|
"loss": 0.9895,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 2.153452685421995,
|
|
"grad_norm": 0.3022642865356664,
|
|
"learning_rate": 0.00011240399701994919,
|
|
"loss": 1.001,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 2.156010230179028,
|
|
"grad_norm": 0.3188747440198214,
|
|
"learning_rate": 0.00011227331022648877,
|
|
"loss": 0.9773,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 2.1585677749360612,
|
|
"grad_norm": 0.41190200456297044,
|
|
"learning_rate": 0.00011214252048655733,
|
|
"loss": 1.024,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 2.1611253196930944,
|
|
"grad_norm": 0.33803198230453474,
|
|
"learning_rate": 0.00011201162821735228,
|
|
"loss": 0.9843,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 2.163682864450128,
|
|
"grad_norm": 0.36583158073668925,
|
|
"learning_rate": 0.00011188063383639817,
|
|
"loss": 0.9809,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 2.166240409207161,
|
|
"grad_norm": 0.39675634848639996,
|
|
"learning_rate": 0.00011174953776154516,
|
|
"loss": 0.942,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 2.1687979539641944,
|
|
"grad_norm": 0.4164372273567332,
|
|
"learning_rate": 0.00011161834041096782,
|
|
"loss": 1.0337,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 2.1713554987212276,
|
|
"grad_norm": 0.42306948681428896,
|
|
"learning_rate": 0.00011148704220316387,
|
|
"loss": 0.9913,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 2.1739130434782608,
|
|
"grad_norm": 0.374454297267049,
|
|
"learning_rate": 0.0001113556435569526,
|
|
"loss": 0.9928,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 2.176470588235294,
|
|
"grad_norm": 0.31767286286037444,
|
|
"learning_rate": 0.00011122414489147376,
|
|
"loss": 0.9972,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 2.1790281329923276,
|
|
"grad_norm": 0.36673595005863613,
|
|
"learning_rate": 0.00011109254662618616,
|
|
"loss": 1.0105,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 2.1815856777493607,
|
|
"grad_norm": 0.5025085408193712,
|
|
"learning_rate": 0.00011096084918086626,
|
|
"loss": 0.9508,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 2.184143222506394,
|
|
"grad_norm": 0.5453118752197188,
|
|
"learning_rate": 0.00011082905297560697,
|
|
"loss": 0.9354,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 2.186700767263427,
|
|
"grad_norm": 0.535508310533172,
|
|
"learning_rate": 0.00011069715843081613,
|
|
"loss": 0.986,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 2.1892583120204603,
|
|
"grad_norm": 0.5550105153386212,
|
|
"learning_rate": 0.00011056516596721534,
|
|
"loss": 1.0047,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 2.1918158567774935,
|
|
"grad_norm": 0.5522958050937595,
|
|
"learning_rate": 0.00011043307600583854,
|
|
"loss": 1.0204,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 2.1943734015345266,
|
|
"grad_norm": 0.514732209947304,
|
|
"learning_rate": 0.0001103008889680306,
|
|
"loss": 1.0137,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 2.1969309462915603,
|
|
"grad_norm": 0.5281211410564769,
|
|
"learning_rate": 0.00011016860527544616,
|
|
"loss": 1.0085,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 2.1994884910485935,
|
|
"grad_norm": 0.46959816689384604,
|
|
"learning_rate": 0.00011003622535004806,
|
|
"loss": 1.0058,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 2.2020460358056266,
|
|
"grad_norm": 0.3407338275520536,
|
|
"learning_rate": 0.0001099037496141062,
|
|
"loss": 0.9986,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 2.20460358056266,
|
|
"grad_norm": 0.47884582066611536,
|
|
"learning_rate": 0.00010977117849019604,
|
|
"loss": 0.9707,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 2.207161125319693,
|
|
"grad_norm": 0.6169099163617163,
|
|
"learning_rate": 0.00010963851240119731,
|
|
"loss": 0.9957,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 2.209718670076726,
|
|
"grad_norm": 0.5842777084702644,
|
|
"learning_rate": 0.00010950575177029271,
|
|
"loss": 0.9971,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 2.21227621483376,
|
|
"grad_norm": 0.5415512252484223,
|
|
"learning_rate": 0.00010937289702096648,
|
|
"loss": 0.955,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 2.214833759590793,
|
|
"grad_norm": 0.5584987591506012,
|
|
"learning_rate": 0.00010923994857700308,
|
|
"loss": 0.9858,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 2.217391304347826,
|
|
"grad_norm": 0.5438681169787357,
|
|
"learning_rate": 0.00010910690686248587,
|
|
"loss": 1.0272,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 2.2199488491048593,
|
|
"grad_norm": 0.45923876211266634,
|
|
"learning_rate": 0.00010897377230179568,
|
|
"loss": 0.9689,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 2.2225063938618925,
|
|
"grad_norm": 0.344989298275585,
|
|
"learning_rate": 0.00010884054531960956,
|
|
"loss": 1.005,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 2.2250639386189257,
|
|
"grad_norm": 0.3203832886307522,
|
|
"learning_rate": 0.00010870722634089927,
|
|
"loss": 0.9904,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 2.227621483375959,
|
|
"grad_norm": 0.4050058894119621,
|
|
"learning_rate": 0.0001085738157909302,
|
|
"loss": 0.9716,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 2.2301790281329925,
|
|
"grad_norm": 0.5042105083367587,
|
|
"learning_rate": 0.00010844031409525962,
|
|
"loss": 0.9921,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 2.2327365728900257,
|
|
"grad_norm": 0.5771976233792036,
|
|
"learning_rate": 0.00010830672167973572,
|
|
"loss": 1.0081,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 2.235294117647059,
|
|
"grad_norm": 0.6444239077326948,
|
|
"learning_rate": 0.00010817303897049597,
|
|
"loss": 0.9961,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 2.237851662404092,
|
|
"grad_norm": 0.6303091061510789,
|
|
"learning_rate": 0.0001080392663939659,
|
|
"loss": 0.9648,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 2.2404092071611252,
|
|
"grad_norm": 0.5383211537711221,
|
|
"learning_rate": 0.00010790540437685771,
|
|
"loss": 0.9835,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 2.2429667519181584,
|
|
"grad_norm": 0.4021404516007495,
|
|
"learning_rate": 0.00010777145334616884,
|
|
"loss": 0.9732,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 2.2455242966751916,
|
|
"grad_norm": 0.31439318271272565,
|
|
"learning_rate": 0.00010763741372918076,
|
|
"loss": 0.9799,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 2.2480818414322252,
|
|
"grad_norm": 0.4404091457741591,
|
|
"learning_rate": 0.00010750328595345744,
|
|
"loss": 0.9798,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 2.2506393861892584,
|
|
"grad_norm": 0.5676899676174939,
|
|
"learning_rate": 0.00010736907044684409,
|
|
"loss": 0.956,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 2.2531969309462916,
|
|
"grad_norm": 0.6251515987816799,
|
|
"learning_rate": 0.00010723476763746578,
|
|
"loss": 0.9766,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 2.2557544757033248,
|
|
"grad_norm": 0.6188152066667294,
|
|
"learning_rate": 0.00010710037795372604,
|
|
"loss": 0.9436,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 2.258312020460358,
|
|
"grad_norm": 0.561619175816319,
|
|
"learning_rate": 0.00010696590182430552,
|
|
"loss": 0.9829,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 2.260869565217391,
|
|
"grad_norm": 0.42915411587906266,
|
|
"learning_rate": 0.00010683133967816062,
|
|
"loss": 0.9776,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 2.2634271099744243,
|
|
"grad_norm": 0.3524127037006637,
|
|
"learning_rate": 0.00010669669194452213,
|
|
"loss": 0.9966,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 2.265984654731458,
|
|
"grad_norm": 0.3537805903644639,
|
|
"learning_rate": 0.00010656195905289382,
|
|
"loss": 1.0042,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 2.268542199488491,
|
|
"grad_norm": 0.38907067845530163,
|
|
"learning_rate": 0.00010642714143305115,
|
|
"loss": 0.9591,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 2.2710997442455243,
|
|
"grad_norm": 0.4388187336605131,
|
|
"learning_rate": 0.00010629223951503975,
|
|
"loss": 0.9657,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 2.2736572890025575,
|
|
"grad_norm": 0.5259226887120563,
|
|
"learning_rate": 0.00010615725372917429,
|
|
"loss": 0.9902,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 2.2762148337595907,
|
|
"grad_norm": 0.5228861897572435,
|
|
"learning_rate": 0.00010602218450603687,
|
|
"loss": 1.0222,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 2.2787723785166243,
|
|
"grad_norm": 0.5036534202887699,
|
|
"learning_rate": 0.00010588703227647573,
|
|
"loss": 1.0003,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 2.2813299232736575,
|
|
"grad_norm": 0.3581923819862395,
|
|
"learning_rate": 0.00010575179747160391,
|
|
"loss": 0.9834,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 2.2838874680306906,
|
|
"grad_norm": 0.3410033765731837,
|
|
"learning_rate": 0.00010561648052279792,
|
|
"loss": 0.9893,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 2.286445012787724,
|
|
"grad_norm": 0.48497621648344247,
|
|
"learning_rate": 0.00010548108186169619,
|
|
"loss": 1.0097,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 2.289002557544757,
|
|
"grad_norm": 0.4811056602507645,
|
|
"learning_rate": 0.00010534560192019784,
|
|
"loss": 0.9987,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 2.29156010230179,
|
|
"grad_norm": 0.5430558900686754,
|
|
"learning_rate": 0.00010521004113046126,
|
|
"loss": 0.9863,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 2.2941176470588234,
|
|
"grad_norm": 0.5520225619306299,
|
|
"learning_rate": 0.00010507439992490274,
|
|
"loss": 0.9854,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 2.296675191815857,
|
|
"grad_norm": 0.5368891057768155,
|
|
"learning_rate": 0.00010493867873619509,
|
|
"loss": 0.962,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 2.29923273657289,
|
|
"grad_norm": 0.45785580350946786,
|
|
"learning_rate": 0.00010480287799726624,
|
|
"loss": 0.9951,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 2.3017902813299234,
|
|
"grad_norm": 0.3134044741551554,
|
|
"learning_rate": 0.00010466699814129784,
|
|
"loss": 0.9808,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 2.3043478260869565,
|
|
"grad_norm": 0.3718160522616458,
|
|
"learning_rate": 0.00010453103960172399,
|
|
"loss": 0.9722,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 2.3069053708439897,
|
|
"grad_norm": 0.42777708592376057,
|
|
"learning_rate": 0.0001043950028122297,
|
|
"loss": 0.9778,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 2.309462915601023,
|
|
"grad_norm": 0.5114598924445181,
|
|
"learning_rate": 0.00010425888820674964,
|
|
"loss": 0.9999,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 2.312020460358056,
|
|
"grad_norm": 0.42665599355653705,
|
|
"learning_rate": 0.00010412269621946664,
|
|
"loss": 0.9277,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 2.3145780051150897,
|
|
"grad_norm": 0.32425667546420855,
|
|
"learning_rate": 0.0001039864272848104,
|
|
"loss": 0.9623,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 2.317135549872123,
|
|
"grad_norm": 0.278767997134977,
|
|
"learning_rate": 0.00010385008183745614,
|
|
"loss": 0.9709,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 2.319693094629156,
|
|
"grad_norm": 0.2973268406415685,
|
|
"learning_rate": 0.00010371366031232298,
|
|
"loss": 0.9752,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 2.3222506393861893,
|
|
"grad_norm": 0.32805655210523665,
|
|
"learning_rate": 0.00010357716314457286,
|
|
"loss": 1.0151,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 2.3248081841432224,
|
|
"grad_norm": 0.3136457006720511,
|
|
"learning_rate": 0.00010344059076960893,
|
|
"loss": 0.9525,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 2.3273657289002556,
|
|
"grad_norm": 0.36706796314794027,
|
|
"learning_rate": 0.00010330394362307426,
|
|
"loss": 1.0263,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 2.329923273657289,
|
|
"grad_norm": 0.3628334304816528,
|
|
"learning_rate": 0.00010316722214085048,
|
|
"loss": 1.0032,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 2.3324808184143224,
|
|
"grad_norm": 0.4614008122870428,
|
|
"learning_rate": 0.00010303042675905623,
|
|
"loss": 0.9655,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 2.3350383631713556,
|
|
"grad_norm": 0.5091780040539386,
|
|
"learning_rate": 0.00010289355791404597,
|
|
"loss": 0.9963,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 2.337595907928389,
|
|
"grad_norm": 0.4886959522852251,
|
|
"learning_rate": 0.00010275661604240844,
|
|
"loss": 0.9959,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 2.340153452685422,
|
|
"grad_norm": 0.3477812096500851,
|
|
"learning_rate": 0.00010261960158096538,
|
|
"loss": 0.9923,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 2.342710997442455,
|
|
"grad_norm": 0.3003617995320152,
|
|
"learning_rate": 0.00010248251496677002,
|
|
"loss": 1.0133,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 2.3452685421994883,
|
|
"grad_norm": 0.3907656568645366,
|
|
"learning_rate": 0.00010234535663710578,
|
|
"loss": 0.9559,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 2.3478260869565215,
|
|
"grad_norm": 0.44450800877616453,
|
|
"learning_rate": 0.00010220812702948483,
|
|
"loss": 0.9839,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 2.350383631713555,
|
|
"grad_norm": 0.41444476133681435,
|
|
"learning_rate": 0.00010207082658164668,
|
|
"loss": 0.9695,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 0.3486015741078046,
|
|
"learning_rate": 0.00010193345573155686,
|
|
"loss": 0.9699,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 2.3554987212276215,
|
|
"grad_norm": 0.305313779906682,
|
|
"learning_rate": 0.00010179601491740546,
|
|
"loss": 0.9737,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 2.3580562659846547,
|
|
"grad_norm": 0.3210944860271877,
|
|
"learning_rate": 0.00010165850457760569,
|
|
"loss": 0.9734,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 2.360613810741688,
|
|
"grad_norm": 0.33354001864174027,
|
|
"learning_rate": 0.00010152092515079263,
|
|
"loss": 0.9758,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 2.363171355498721,
|
|
"grad_norm": 0.3630435985390137,
|
|
"learning_rate": 0.00010138327707582161,
|
|
"loss": 0.9843,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 2.3657289002557547,
|
|
"grad_norm": 0.3068154551503405,
|
|
"learning_rate": 0.00010124556079176705,
|
|
"loss": 0.9718,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 2.368286445012788,
|
|
"grad_norm": 0.3145375023118287,
|
|
"learning_rate": 0.0001011077767379209,
|
|
"loss": 0.9485,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 2.370843989769821,
|
|
"grad_norm": 0.4562062846091247,
|
|
"learning_rate": 0.00010096992535379125,
|
|
"loss": 1.0041,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 2.373401534526854,
|
|
"grad_norm": 0.4613854636034836,
|
|
"learning_rate": 0.00010083200707910109,
|
|
"loss": 1.0095,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 2.3759590792838874,
|
|
"grad_norm": 0.5020460478647006,
|
|
"learning_rate": 0.00010069402235378657,
|
|
"loss": 0.9793,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 2.3785166240409206,
|
|
"grad_norm": 0.47032502181209285,
|
|
"learning_rate": 0.000100555971617996,
|
|
"loss": 1.003,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 2.381074168797954,
|
|
"grad_norm": 0.37153265133623853,
|
|
"learning_rate": 0.00010041785531208813,
|
|
"loss": 0.9707,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 2.3836317135549874,
|
|
"grad_norm": 0.2954908430723523,
|
|
"learning_rate": 0.00010027967387663098,
|
|
"loss": 0.9943,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 2.3861892583120206,
|
|
"grad_norm": 0.2860326087524264,
|
|
"learning_rate": 0.00010014142775240018,
|
|
"loss": 0.978,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 2.3887468030690537,
|
|
"grad_norm": 0.36670864980970264,
|
|
"learning_rate": 0.00010000311738037786,
|
|
"loss": 0.9654,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 2.391304347826087,
|
|
"grad_norm": 0.39639852002586273,
|
|
"learning_rate": 9.986474320175097e-05,
|
|
"loss": 0.964,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 2.39386189258312,
|
|
"grad_norm": 0.3585981520256939,
|
|
"learning_rate": 9.972630565791003e-05,
|
|
"loss": 0.9825,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 2.3964194373401533,
|
|
"grad_norm": 0.3189834091257556,
|
|
"learning_rate": 9.958780519044772e-05,
|
|
"loss": 0.9851,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 2.398976982097187,
|
|
"grad_norm": 0.3049358905004256,
|
|
"learning_rate": 9.944924224115737e-05,
|
|
"loss": 0.9939,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 2.40153452685422,
|
|
"grad_norm": 0.2622458924767327,
|
|
"learning_rate": 9.931061725203167e-05,
|
|
"loss": 0.9781,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 2.4040920716112533,
|
|
"grad_norm": 0.2924257759631161,
|
|
"learning_rate": 9.917193066526122e-05,
|
|
"loss": 0.9868,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 2.4066496163682864,
|
|
"grad_norm": 0.3604978006726876,
|
|
"learning_rate": 9.903318292323301e-05,
|
|
"loss": 0.9754,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 2.4092071611253196,
|
|
"grad_norm": 0.29745498369836404,
|
|
"learning_rate": 9.889437446852923e-05,
|
|
"loss": 0.9859,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 2.411764705882353,
|
|
"grad_norm": 0.37371862497237623,
|
|
"learning_rate": 9.875550574392565e-05,
|
|
"loss": 0.9896,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 2.414322250639386,
|
|
"grad_norm": 0.38638295584959187,
|
|
"learning_rate": 9.86165771923903e-05,
|
|
"loss": 0.9881,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 2.4168797953964196,
|
|
"grad_norm": 0.4041126989806797,
|
|
"learning_rate": 9.84775892570821e-05,
|
|
"loss": 0.9428,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 2.419437340153453,
|
|
"grad_norm": 0.395096912214402,
|
|
"learning_rate": 9.833854238134931e-05,
|
|
"loss": 0.9622,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 2.421994884910486,
|
|
"grad_norm": 0.3464290247147215,
|
|
"learning_rate": 9.819943700872828e-05,
|
|
"loss": 1.0125,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 2.424552429667519,
|
|
"grad_norm": 0.28843985739584715,
|
|
"learning_rate": 9.806027358294195e-05,
|
|
"loss": 0.9712,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 2.4271099744245523,
|
|
"grad_norm": 0.38051542261971155,
|
|
"learning_rate": 9.792105254789834e-05,
|
|
"loss": 0.9851,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 2.4296675191815855,
|
|
"grad_norm": 0.4466310758086544,
|
|
"learning_rate": 9.778177434768935e-05,
|
|
"loss": 0.9683,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 2.4322250639386187,
|
|
"grad_norm": 0.4692147641165216,
|
|
"learning_rate": 9.764243942658919e-05,
|
|
"loss": 0.9841,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 2.4347826086956523,
|
|
"grad_norm": 0.35373867138680226,
|
|
"learning_rate": 9.750304822905297e-05,
|
|
"loss": 0.9492,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 2.4373401534526855,
|
|
"grad_norm": 0.28385300113252654,
|
|
"learning_rate": 9.736360119971537e-05,
|
|
"loss": 0.9996,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 2.4398976982097187,
|
|
"grad_norm": 0.2937003946020655,
|
|
"learning_rate": 9.722409878338908e-05,
|
|
"loss": 1.0015,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 2.442455242966752,
|
|
"grad_norm": 0.3969860787197417,
|
|
"learning_rate": 9.708454142506354e-05,
|
|
"loss": 0.9774,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 2.445012787723785,
|
|
"grad_norm": 0.5498839614052679,
|
|
"learning_rate": 9.694492956990345e-05,
|
|
"loss": 0.9847,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 2.4475703324808182,
|
|
"grad_norm": 0.5513989094448135,
|
|
"learning_rate": 9.680526366324726e-05,
|
|
"loss": 0.9565,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 2.4501278772378514,
|
|
"grad_norm": 0.506905247181652,
|
|
"learning_rate": 9.666554415060596e-05,
|
|
"loss": 0.9517,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 2.452685421994885,
|
|
"grad_norm": 0.44474310752723095,
|
|
"learning_rate": 9.652577147766142e-05,
|
|
"loss": 0.9743,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 2.455242966751918,
|
|
"grad_norm": 0.37097475676427244,
|
|
"learning_rate": 9.638594609026515e-05,
|
|
"loss": 0.9506,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 2.4578005115089514,
|
|
"grad_norm": 0.2734924283931777,
|
|
"learning_rate": 9.624606843443675e-05,
|
|
"loss": 1.0158,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 2.4603580562659846,
|
|
"grad_norm": 0.31804819233085263,
|
|
"learning_rate": 9.610613895636263e-05,
|
|
"loss": 0.992,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 2.4629156010230178,
|
|
"grad_norm": 0.41664714320663915,
|
|
"learning_rate": 9.596615810239445e-05,
|
|
"loss": 0.999,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 2.4654731457800514,
|
|
"grad_norm": 0.5523065515247985,
|
|
"learning_rate": 9.582612631904779e-05,
|
|
"loss": 1.0055,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 2.4680306905370846,
|
|
"grad_norm": 0.4671305490762141,
|
|
"learning_rate": 9.568604405300062e-05,
|
|
"loss": 0.9579,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 2.4705882352941178,
|
|
"grad_norm": 0.3279722497396409,
|
|
"learning_rate": 9.554591175109194e-05,
|
|
"loss": 0.9731,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 2.473145780051151,
|
|
"grad_norm": 0.25846610901040445,
|
|
"learning_rate": 9.54057298603205e-05,
|
|
"loss": 0.9817,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 2.475703324808184,
|
|
"grad_norm": 0.3730225408971352,
|
|
"learning_rate": 9.526549882784305e-05,
|
|
"loss": 0.9874,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 2.4782608695652173,
|
|
"grad_norm": 0.7271461728885226,
|
|
"learning_rate": 9.512521910097316e-05,
|
|
"loss": 1.0348,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 2.4808184143222505,
|
|
"grad_norm": 0.32875046425746846,
|
|
"learning_rate": 9.49848911271798e-05,
|
|
"loss": 0.9565,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 2.483375959079284,
|
|
"grad_norm": 0.3205410594330121,
|
|
"learning_rate": 9.484451535408572e-05,
|
|
"loss": 0.9784,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 2.4859335038363173,
|
|
"grad_norm": 0.26205949445440796,
|
|
"learning_rate": 9.470409222946623e-05,
|
|
"loss": 0.9983,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 2.4884910485933505,
|
|
"grad_norm": 0.3237027571460551,
|
|
"learning_rate": 9.456362220124766e-05,
|
|
"loss": 0.98,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 2.4910485933503836,
|
|
"grad_norm": 0.35272232039199597,
|
|
"learning_rate": 9.442310571750588e-05,
|
|
"loss": 0.9779,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 2.493606138107417,
|
|
"grad_norm": 0.305939353717968,
|
|
"learning_rate": 9.42825432264651e-05,
|
|
"loss": 0.9581,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 2.49616368286445,
|
|
"grad_norm": 0.2932577303248136,
|
|
"learning_rate": 9.414193517649614e-05,
|
|
"loss": 0.9855,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 2.498721227621483,
|
|
"grad_norm": 0.30059710492898495,
|
|
"learning_rate": 9.400128201611521e-05,
|
|
"loss": 0.9754,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 2.501278772378517,
|
|
"grad_norm": 0.2973031341519278,
|
|
"learning_rate": 9.386058419398243e-05,
|
|
"loss": 0.9909,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 2.50383631713555,
|
|
"grad_norm": 0.3722883437832787,
|
|
"learning_rate": 9.371984215890032e-05,
|
|
"loss": 0.9946,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 2.506393861892583,
|
|
"grad_norm": 0.3473263838445932,
|
|
"learning_rate": 9.357905635981251e-05,
|
|
"loss": 0.9543,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 2.5089514066496164,
|
|
"grad_norm": 0.2867570028047222,
|
|
"learning_rate": 9.34382272458022e-05,
|
|
"loss": 0.9638,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 2.5115089514066495,
|
|
"grad_norm": 0.30564756429493334,
|
|
"learning_rate": 9.329735526609071e-05,
|
|
"loss": 0.9464,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 2.5140664961636827,
|
|
"grad_norm": 0.277493802953859,
|
|
"learning_rate": 9.315644087003614e-05,
|
|
"loss": 0.9565,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 2.516624040920716,
|
|
"grad_norm": 0.32107200459340096,
|
|
"learning_rate": 9.301548450713193e-05,
|
|
"loss": 0.987,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 2.5191815856777495,
|
|
"grad_norm": 0.34282165398687586,
|
|
"learning_rate": 9.28744866270053e-05,
|
|
"loss": 0.985,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 2.5217391304347827,
|
|
"grad_norm": 0.32220988156237623,
|
|
"learning_rate": 9.273344767941595e-05,
|
|
"loss": 0.958,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 2.524296675191816,
|
|
"grad_norm": 0.2659763342921004,
|
|
"learning_rate": 9.259236811425458e-05,
|
|
"loss": 0.9693,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 2.526854219948849,
|
|
"grad_norm": 0.31738841820079255,
|
|
"learning_rate": 9.245124838154145e-05,
|
|
"loss": 0.9938,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 2.5294117647058822,
|
|
"grad_norm": 0.32830918791297703,
|
|
"learning_rate": 9.231008893142496e-05,
|
|
"loss": 0.9934,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 2.531969309462916,
|
|
"grad_norm": 0.3402708856013208,
|
|
"learning_rate": 9.216889021418015e-05,
|
|
"loss": 1.0013,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 2.5345268542199486,
|
|
"grad_norm": 0.4044102426145664,
|
|
"learning_rate": 9.202765268020734e-05,
|
|
"loss": 0.9831,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 2.5370843989769822,
|
|
"grad_norm": 0.42862262278596586,
|
|
"learning_rate": 9.188637678003078e-05,
|
|
"loss": 0.9997,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 2.5396419437340154,
|
|
"grad_norm": 0.4484266743548927,
|
|
"learning_rate": 9.17450629642969e-05,
|
|
"loss": 0.9828,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 2.5421994884910486,
|
|
"grad_norm": 0.3265912580211292,
|
|
"learning_rate": 9.160371168377322e-05,
|
|
"loss": 0.9643,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 2.544757033248082,
|
|
"grad_norm": 0.32534751123207517,
|
|
"learning_rate": 9.146232338934671e-05,
|
|
"loss": 0.9582,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 2.547314578005115,
|
|
"grad_norm": 0.38239024918470127,
|
|
"learning_rate": 9.132089853202243e-05,
|
|
"loss": 0.9744,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 2.5498721227621486,
|
|
"grad_norm": 0.46563347602108834,
|
|
"learning_rate": 9.117943756292208e-05,
|
|
"loss": 0.9792,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 2.5524296675191813,
|
|
"grad_norm": 0.39461054417861174,
|
|
"learning_rate": 9.103794093328248e-05,
|
|
"loss": 0.9755,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 2.554987212276215,
|
|
"grad_norm": 0.3125908044097884,
|
|
"learning_rate": 9.089640909445431e-05,
|
|
"loss": 0.9716,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 2.557544757033248,
|
|
"grad_norm": 0.2684368877044592,
|
|
"learning_rate": 9.075484249790048e-05,
|
|
"loss": 0.9747,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 2.5601023017902813,
|
|
"grad_norm": 0.28891578856074146,
|
|
"learning_rate": 9.061324159519476e-05,
|
|
"loss": 0.9762,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 2.5626598465473145,
|
|
"grad_norm": 0.3034677475712927,
|
|
"learning_rate": 9.047160683802046e-05,
|
|
"loss": 0.9674,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 2.5652173913043477,
|
|
"grad_norm": 0.31908253316340884,
|
|
"learning_rate": 9.032993867816876e-05,
|
|
"loss": 0.9942,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 2.5677749360613813,
|
|
"grad_norm": 0.2544491678916064,
|
|
"learning_rate": 9.018823756753746e-05,
|
|
"loss": 1.0001,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 2.5703324808184145,
|
|
"grad_norm": 0.2995352776229395,
|
|
"learning_rate": 9.00465039581294e-05,
|
|
"loss": 0.9929,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 2.5728900255754477,
|
|
"grad_norm": 0.35913882534331126,
|
|
"learning_rate": 8.990473830205118e-05,
|
|
"loss": 0.9318,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 2.575447570332481,
|
|
"grad_norm": 0.37010668314829087,
|
|
"learning_rate": 8.976294105151154e-05,
|
|
"loss": 1.0079,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 2.578005115089514,
|
|
"grad_norm": 0.2570784147501355,
|
|
"learning_rate": 8.962111265882006e-05,
|
|
"loss": 0.9952,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 2.580562659846547,
|
|
"grad_norm": 0.3149539278736431,
|
|
"learning_rate": 8.947925357638561e-05,
|
|
"loss": 0.9941,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 2.5831202046035804,
|
|
"grad_norm": 0.2855340149405739,
|
|
"learning_rate": 8.933736425671495e-05,
|
|
"loss": 0.9816,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 2.585677749360614,
|
|
"grad_norm": 0.25345884892793763,
|
|
"learning_rate": 8.91954451524114e-05,
|
|
"loss": 0.9818,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 2.588235294117647,
|
|
"grad_norm": 0.29694516426804485,
|
|
"learning_rate": 8.905349671617313e-05,
|
|
"loss": 0.9876,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 2.5907928388746804,
|
|
"grad_norm": 0.3052840810260173,
|
|
"learning_rate": 8.891151940079198e-05,
|
|
"loss": 0.9702,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 2.5933503836317136,
|
|
"grad_norm": 0.2661838830871243,
|
|
"learning_rate": 8.87695136591519e-05,
|
|
"loss": 0.9877,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 2.5959079283887467,
|
|
"grad_norm": 0.2986390559549456,
|
|
"learning_rate": 8.862747994422744e-05,
|
|
"loss": 0.9707,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 2.59846547314578,
|
|
"grad_norm": 0.3613476612681819,
|
|
"learning_rate": 8.848541870908248e-05,
|
|
"loss": 0.9703,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 2.601023017902813,
|
|
"grad_norm": 0.33024018130732985,
|
|
"learning_rate": 8.834333040686867e-05,
|
|
"loss": 0.979,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 2.6035805626598467,
|
|
"grad_norm": 0.31187166502347763,
|
|
"learning_rate": 8.820121549082389e-05,
|
|
"loss": 0.9829,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 2.60613810741688,
|
|
"grad_norm": 0.3469288630004611,
|
|
"learning_rate": 8.805907441427107e-05,
|
|
"loss": 0.9558,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 2.608695652173913,
|
|
"grad_norm": 0.3134454892157028,
|
|
"learning_rate": 8.791690763061646e-05,
|
|
"loss": 0.9644,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 2.6112531969309463,
|
|
"grad_norm": 0.30922058220600745,
|
|
"learning_rate": 8.777471559334835e-05,
|
|
"loss": 0.9769,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 2.6138107416879794,
|
|
"grad_norm": 0.3164613704707754,
|
|
"learning_rate": 8.763249875603568e-05,
|
|
"loss": 0.9699,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 2.6163682864450126,
|
|
"grad_norm": 0.3937696035168064,
|
|
"learning_rate": 8.74902575723263e-05,
|
|
"loss": 0.9913,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 2.618925831202046,
|
|
"grad_norm": 0.3269757525342128,
|
|
"learning_rate": 8.734799249594593e-05,
|
|
"loss": 0.9714,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 2.6214833759590794,
|
|
"grad_norm": 0.3137372841061025,
|
|
"learning_rate": 8.720570398069639e-05,
|
|
"loss": 0.9667,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 2.6240409207161126,
|
|
"grad_norm": 0.296905098424126,
|
|
"learning_rate": 8.706339248045425e-05,
|
|
"loss": 0.9748,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 2.626598465473146,
|
|
"grad_norm": 0.3341447796223413,
|
|
"learning_rate": 8.692105844916946e-05,
|
|
"loss": 0.9813,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 2.629156010230179,
|
|
"grad_norm": 0.3756191138022281,
|
|
"learning_rate": 8.677870234086383e-05,
|
|
"loss": 0.9908,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 2.631713554987212,
|
|
"grad_norm": 0.3559465468948902,
|
|
"learning_rate": 8.663632460962956e-05,
|
|
"loss": 0.9936,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 2.634271099744246,
|
|
"grad_norm": 0.300711572823478,
|
|
"learning_rate": 8.649392570962781e-05,
|
|
"loss": 0.9795,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 2.6368286445012785,
|
|
"grad_norm": 0.3320572865051935,
|
|
"learning_rate": 8.635150609508733e-05,
|
|
"loss": 0.984,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 2.639386189258312,
|
|
"grad_norm": 0.3635828441982571,
|
|
"learning_rate": 8.620906622030292e-05,
|
|
"loss": 0.9536,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 2.6419437340153453,
|
|
"grad_norm": 0.3278411915419061,
|
|
"learning_rate": 8.6066606539634e-05,
|
|
"loss": 1.0088,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 2.6445012787723785,
|
|
"grad_norm": 0.32767767702958833,
|
|
"learning_rate": 8.592412750750312e-05,
|
|
"loss": 0.9876,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 2.6470588235294117,
|
|
"grad_norm": 0.35097964529502185,
|
|
"learning_rate": 8.578162957839462e-05,
|
|
"loss": 0.9915,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 2.649616368286445,
|
|
"grad_norm": 0.31991735732581283,
|
|
"learning_rate": 8.563911320685312e-05,
|
|
"loss": 0.9638,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 2.6521739130434785,
|
|
"grad_norm": 0.23787926653601094,
|
|
"learning_rate": 8.549657884748205e-05,
|
|
"loss": 0.9713,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 2.6547314578005117,
|
|
"grad_norm": 0.32244485030641373,
|
|
"learning_rate": 8.535402695494221e-05,
|
|
"loss": 0.9772,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 2.657289002557545,
|
|
"grad_norm": 0.312950136510117,
|
|
"learning_rate": 8.521145798395035e-05,
|
|
"loss": 0.9841,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 2.659846547314578,
|
|
"grad_norm": 0.26212781885375047,
|
|
"learning_rate": 8.506887238927764e-05,
|
|
"loss": 0.9955,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 2.662404092071611,
|
|
"grad_norm": 0.34105099182259796,
|
|
"learning_rate": 8.492627062574837e-05,
|
|
"loss": 0.9729,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 2.6649616368286444,
|
|
"grad_norm": 0.297943326170416,
|
|
"learning_rate": 8.478365314823831e-05,
|
|
"loss": 1.0041,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 2.6675191815856776,
|
|
"grad_norm": 0.23653735859455993,
|
|
"learning_rate": 8.464102041167343e-05,
|
|
"loss": 0.9385,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 2.670076726342711,
|
|
"grad_norm": 0.24103662980964566,
|
|
"learning_rate": 8.449837287102837e-05,
|
|
"loss": 0.9798,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 2.6726342710997444,
|
|
"grad_norm": 0.3266522540557997,
|
|
"learning_rate": 8.43557109813249e-05,
|
|
"loss": 0.9664,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 2.6751918158567776,
|
|
"grad_norm": 0.34157505937073707,
|
|
"learning_rate": 8.421303519763067e-05,
|
|
"loss": 0.9512,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 2.6777493606138107,
|
|
"grad_norm": 0.32745487240393034,
|
|
"learning_rate": 8.407034597505762e-05,
|
|
"loss": 0.9847,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 2.680306905370844,
|
|
"grad_norm": 0.30390244215100753,
|
|
"learning_rate": 8.392764376876049e-05,
|
|
"loss": 0.9847,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 2.682864450127877,
|
|
"grad_norm": 0.28021611753279574,
|
|
"learning_rate": 8.378492903393555e-05,
|
|
"loss": 0.9592,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 2.6854219948849103,
|
|
"grad_norm": 0.3320556275827844,
|
|
"learning_rate": 8.364220222581896e-05,
|
|
"loss": 0.9846,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 2.687979539641944,
|
|
"grad_norm": 0.3136101711766941,
|
|
"learning_rate": 8.34994637996854e-05,
|
|
"loss": 0.9811,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 2.690537084398977,
|
|
"grad_norm": 0.2618192450012102,
|
|
"learning_rate": 8.335671421084661e-05,
|
|
"loss": 0.9744,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 2.6930946291560103,
|
|
"grad_norm": 0.3220025314640929,
|
|
"learning_rate": 8.321395391464995e-05,
|
|
"loss": 0.9868,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 2.6956521739130435,
|
|
"grad_norm": 0.3598315892247714,
|
|
"learning_rate": 8.307118336647694e-05,
|
|
"loss": 0.951,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 2.6982097186700766,
|
|
"grad_norm": 0.4106007096012368,
|
|
"learning_rate": 8.292840302174178e-05,
|
|
"loss": 0.9643,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 2.70076726342711,
|
|
"grad_norm": 0.2548097195613678,
|
|
"learning_rate": 8.278561333588993e-05,
|
|
"loss": 0.9841,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 2.703324808184143,
|
|
"grad_norm": 0.3371557483370203,
|
|
"learning_rate": 8.264281476439662e-05,
|
|
"loss": 0.984,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 2.7058823529411766,
|
|
"grad_norm": 0.38976688577634183,
|
|
"learning_rate": 8.250000776276551e-05,
|
|
"loss": 0.9731,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 2.70843989769821,
|
|
"grad_norm": 0.2695308176694805,
|
|
"learning_rate": 8.235719278652704e-05,
|
|
"loss": 1.0008,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 2.710997442455243,
|
|
"grad_norm": 0.2799834287903197,
|
|
"learning_rate": 8.221437029123715e-05,
|
|
"loss": 0.96,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 2.713554987212276,
|
|
"grad_norm": 0.3887662531222578,
|
|
"learning_rate": 8.20715407324758e-05,
|
|
"loss": 1.0134,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 2.7161125319693094,
|
|
"grad_norm": 0.36475843384332224,
|
|
"learning_rate": 8.192870456584536e-05,
|
|
"loss": 0.9869,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 2.718670076726343,
|
|
"grad_norm": 0.3842950619442295,
|
|
"learning_rate": 8.178586224696938e-05,
|
|
"loss": 1.0191,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 2.7212276214833757,
|
|
"grad_norm": 0.29521526511075435,
|
|
"learning_rate": 8.164301423149104e-05,
|
|
"loss": 0.9847,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 2.7237851662404093,
|
|
"grad_norm": 0.2510688717518455,
|
|
"learning_rate": 8.150016097507161e-05,
|
|
"loss": 0.9537,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 2.7263427109974425,
|
|
"grad_norm": 0.31175386208986516,
|
|
"learning_rate": 8.135730293338918e-05,
|
|
"loss": 0.9715,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 2.7289002557544757,
|
|
"grad_norm": 0.2969969026627777,
|
|
"learning_rate": 8.121444056213698e-05,
|
|
"loss": 0.9778,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 2.731457800511509,
|
|
"grad_norm": 0.316196872282454,
|
|
"learning_rate": 8.107157431702219e-05,
|
|
"loss": 0.9979,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 2.734015345268542,
|
|
"grad_norm": 0.2677096371345643,
|
|
"learning_rate": 8.092870465376422e-05,
|
|
"loss": 0.972,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 2.7365728900255757,
|
|
"grad_norm": 0.25111395109245066,
|
|
"learning_rate": 8.078583202809347e-05,
|
|
"loss": 1.0173,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 2.7391304347826084,
|
|
"grad_norm": 0.23618007037740435,
|
|
"learning_rate": 8.064295689574979e-05,
|
|
"loss": 0.9681,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 2.741687979539642,
|
|
"grad_norm": 0.2462154966468633,
|
|
"learning_rate": 8.050007971248095e-05,
|
|
"loss": 0.9977,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 2.7442455242966752,
|
|
"grad_norm": 0.2396576027964869,
|
|
"learning_rate": 8.035720093404133e-05,
|
|
"loss": 0.9817,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 2.7468030690537084,
|
|
"grad_norm": 0.23288900252567163,
|
|
"learning_rate": 8.021432101619034e-05,
|
|
"loss": 0.9677,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 2.7493606138107416,
|
|
"grad_norm": 0.309943456329605,
|
|
"learning_rate": 8.007144041469111e-05,
|
|
"loss": 1.0198,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 2.7519181585677748,
|
|
"grad_norm": 0.2438257902275988,
|
|
"learning_rate": 7.992855958530893e-05,
|
|
"loss": 0.9774,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 2.7544757033248084,
|
|
"grad_norm": 0.24225939294568138,
|
|
"learning_rate": 7.978567898380968e-05,
|
|
"loss": 0.9975,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 2.7570332480818416,
|
|
"grad_norm": 0.2557453042666024,
|
|
"learning_rate": 7.96427990659587e-05,
|
|
"loss": 0.9601,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 2.7595907928388748,
|
|
"grad_norm": 0.25399744095479343,
|
|
"learning_rate": 7.949992028751908e-05,
|
|
"loss": 0.94,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 2.762148337595908,
|
|
"grad_norm": 0.25806395609838956,
|
|
"learning_rate": 7.935704310425022e-05,
|
|
"loss": 0.9856,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 2.764705882352941,
|
|
"grad_norm": 0.2778516319437345,
|
|
"learning_rate": 7.921416797190653e-05,
|
|
"loss": 0.9485,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 2.7672634271099743,
|
|
"grad_norm": 0.2652382709743763,
|
|
"learning_rate": 7.90712953462358e-05,
|
|
"loss": 0.9852,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 2.7698209718670075,
|
|
"grad_norm": 0.3078124836381294,
|
|
"learning_rate": 7.892842568297784e-05,
|
|
"loss": 0.9843,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 2.772378516624041,
|
|
"grad_norm": 0.2630029283693419,
|
|
"learning_rate": 7.878555943786304e-05,
|
|
"loss": 0.9866,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 2.7749360613810743,
|
|
"grad_norm": 0.3230772942242779,
|
|
"learning_rate": 7.864269706661084e-05,
|
|
"loss": 0.9617,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 2.7774936061381075,
|
|
"grad_norm": 0.33688102829350425,
|
|
"learning_rate": 7.84998390249284e-05,
|
|
"loss": 1.0151,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 2.7800511508951407,
|
|
"grad_norm": 0.27010473360932136,
|
|
"learning_rate": 7.8356985768509e-05,
|
|
"loss": 0.9416,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 2.782608695652174,
|
|
"grad_norm": 0.3216032949279463,
|
|
"learning_rate": 7.821413775303063e-05,
|
|
"loss": 0.9677,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 2.785166240409207,
|
|
"grad_norm": 0.3184797598775921,
|
|
"learning_rate": 7.807129543415467e-05,
|
|
"loss": 0.9878,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 2.78772378516624,
|
|
"grad_norm": 0.26980179286312655,
|
|
"learning_rate": 7.792845926752422e-05,
|
|
"loss": 0.9559,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 2.790281329923274,
|
|
"grad_norm": 0.2788560924053536,
|
|
"learning_rate": 7.778562970876285e-05,
|
|
"loss": 0.9315,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 2.792838874680307,
|
|
"grad_norm": 0.34225351537345716,
|
|
"learning_rate": 7.764280721347296e-05,
|
|
"loss": 0.9905,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 2.79539641943734,
|
|
"grad_norm": 0.3181751957801659,
|
|
"learning_rate": 7.749999223723451e-05,
|
|
"loss": 0.992,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 2.7979539641943734,
|
|
"grad_norm": 0.2617895154207013,
|
|
"learning_rate": 7.73571852356034e-05,
|
|
"loss": 0.976,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 2.8005115089514065,
|
|
"grad_norm": 0.26160435542511723,
|
|
"learning_rate": 7.72143866641101e-05,
|
|
"loss": 0.9717,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 2.80306905370844,
|
|
"grad_norm": 0.3005466825228635,
|
|
"learning_rate": 7.707159697825824e-05,
|
|
"loss": 1.019,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 2.805626598465473,
|
|
"grad_norm": 0.2737567544420114,
|
|
"learning_rate": 7.692881663352306e-05,
|
|
"loss": 0.9877,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 2.8081841432225065,
|
|
"grad_norm": 0.25383083364525466,
|
|
"learning_rate": 7.678604608535007e-05,
|
|
"loss": 1.0,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 2.8107416879795397,
|
|
"grad_norm": 0.24966621455789795,
|
|
"learning_rate": 7.664328578915341e-05,
|
|
"loss": 0.9913,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 2.813299232736573,
|
|
"grad_norm": 0.26731325577468995,
|
|
"learning_rate": 7.650053620031461e-05,
|
|
"loss": 0.9667,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.815856777493606,
|
|
"grad_norm": 0.24369512341274932,
|
|
"learning_rate": 7.635779777418105e-05,
|
|
"loss": 0.9941,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 2.8184143222506393,
|
|
"grad_norm": 0.22967457166848224,
|
|
"learning_rate": 7.621507096606445e-05,
|
|
"loss": 0.9755,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 2.820971867007673,
|
|
"grad_norm": 0.2571549233122558,
|
|
"learning_rate": 7.607235623123952e-05,
|
|
"loss": 0.9896,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 2.8235294117647056,
|
|
"grad_norm": 0.21308122874558627,
|
|
"learning_rate": 7.592965402494242e-05,
|
|
"loss": 0.9671,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 2.8260869565217392,
|
|
"grad_norm": 0.23965692093466115,
|
|
"learning_rate": 7.578696480236935e-05,
|
|
"loss": 0.9572,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 2.8286445012787724,
|
|
"grad_norm": 0.20206088609556147,
|
|
"learning_rate": 7.564428901867512e-05,
|
|
"loss": 0.9874,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 2.8312020460358056,
|
|
"grad_norm": 0.24456595967971878,
|
|
"learning_rate": 7.550162712897166e-05,
|
|
"loss": 0.9834,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 2.833759590792839,
|
|
"grad_norm": 0.2395628798306672,
|
|
"learning_rate": 7.535897958832657e-05,
|
|
"loss": 0.9932,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 2.836317135549872,
|
|
"grad_norm": 0.24488788117262922,
|
|
"learning_rate": 7.521634685176171e-05,
|
|
"loss": 0.9976,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 2.8388746803069056,
|
|
"grad_norm": 0.2475079536458042,
|
|
"learning_rate": 7.507372937425166e-05,
|
|
"loss": 0.979,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 2.8414322250639388,
|
|
"grad_norm": 0.25103418982918085,
|
|
"learning_rate": 7.493112761072238e-05,
|
|
"loss": 0.9784,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 2.843989769820972,
|
|
"grad_norm": 0.21080156526173952,
|
|
"learning_rate": 7.478854201604967e-05,
|
|
"loss": 0.9861,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 2.846547314578005,
|
|
"grad_norm": 0.2636072879534979,
|
|
"learning_rate": 7.464597304505779e-05,
|
|
"loss": 0.9767,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 2.8491048593350383,
|
|
"grad_norm": 0.3447559742850428,
|
|
"learning_rate": 7.450342115251793e-05,
|
|
"loss": 0.9763,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 2.8516624040920715,
|
|
"grad_norm": 0.3554201272513753,
|
|
"learning_rate": 7.436088679314689e-05,
|
|
"loss": 0.9814,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 2.8542199488491047,
|
|
"grad_norm": 0.2338897866384284,
|
|
"learning_rate": 7.42183704216054e-05,
|
|
"loss": 0.9737,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 2.8567774936061383,
|
|
"grad_norm": 0.3005337593534035,
|
|
"learning_rate": 7.407587249249691e-05,
|
|
"loss": 0.9593,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 2.8593350383631715,
|
|
"grad_norm": 0.28306065139483866,
|
|
"learning_rate": 7.393339346036604e-05,
|
|
"loss": 0.9912,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 2.8618925831202047,
|
|
"grad_norm": 0.32462258403513267,
|
|
"learning_rate": 7.379093377969708e-05,
|
|
"loss": 0.9636,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 2.864450127877238,
|
|
"grad_norm": 0.23458466619854929,
|
|
"learning_rate": 7.364849390491269e-05,
|
|
"loss": 1.0179,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 2.867007672634271,
|
|
"grad_norm": 0.26599173050846503,
|
|
"learning_rate": 7.350607429037222e-05,
|
|
"loss": 0.9865,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 2.869565217391304,
|
|
"grad_norm": 0.28672176422376533,
|
|
"learning_rate": 7.336367539037047e-05,
|
|
"loss": 0.9697,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 2.8721227621483374,
|
|
"grad_norm": 0.38174167324236646,
|
|
"learning_rate": 7.32212976591362e-05,
|
|
"loss": 0.9394,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 2.874680306905371,
|
|
"grad_norm": 0.3008937451500426,
|
|
"learning_rate": 7.307894155083054e-05,
|
|
"loss": 1.0193,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 2.877237851662404,
|
|
"grad_norm": 0.2647744376072329,
|
|
"learning_rate": 7.293660751954576e-05,
|
|
"loss": 0.9959,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 2.8797953964194374,
|
|
"grad_norm": 0.3361184185105208,
|
|
"learning_rate": 7.279429601930365e-05,
|
|
"loss": 0.9886,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 2.8823529411764706,
|
|
"grad_norm": 0.28703805124273124,
|
|
"learning_rate": 7.265200750405408e-05,
|
|
"loss": 0.9552,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 2.8849104859335037,
|
|
"grad_norm": 0.2282314607084684,
|
|
"learning_rate": 7.250974242767372e-05,
|
|
"loss": 0.9613,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 2.887468030690537,
|
|
"grad_norm": 0.2492748754541012,
|
|
"learning_rate": 7.236750124396435e-05,
|
|
"loss": 0.9668,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 2.89002557544757,
|
|
"grad_norm": 0.25888788395575085,
|
|
"learning_rate": 7.222528440665167e-05,
|
|
"loss": 0.9925,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 2.8925831202046037,
|
|
"grad_norm": 0.24496080625420605,
|
|
"learning_rate": 7.20830923693836e-05,
|
|
"loss": 1.0041,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 2.895140664961637,
|
|
"grad_norm": 0.23733176427430222,
|
|
"learning_rate": 7.194092558572897e-05,
|
|
"loss": 0.9425,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 2.89769820971867,
|
|
"grad_norm": 0.27037826071655174,
|
|
"learning_rate": 7.179878450917613e-05,
|
|
"loss": 0.9618,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 2.9002557544757033,
|
|
"grad_norm": 0.2110486047552461,
|
|
"learning_rate": 7.165666959313135e-05,
|
|
"loss": 0.9625,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 2.9028132992327365,
|
|
"grad_norm": 0.2356138250996952,
|
|
"learning_rate": 7.151458129091752e-05,
|
|
"loss": 0.9868,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 2.90537084398977,
|
|
"grad_norm": 0.2507648626394698,
|
|
"learning_rate": 7.137252005577256e-05,
|
|
"loss": 0.9579,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 2.907928388746803,
|
|
"grad_norm": 0.21729817798268314,
|
|
"learning_rate": 7.123048634084815e-05,
|
|
"loss": 1.0193,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 2.9104859335038364,
|
|
"grad_norm": 0.25511738825377567,
|
|
"learning_rate": 7.108848059920805e-05,
|
|
"loss": 0.9594,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 2.9130434782608696,
|
|
"grad_norm": 0.25447395942517514,
|
|
"learning_rate": 7.09465032838269e-05,
|
|
"loss": 0.9746,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 2.915601023017903,
|
|
"grad_norm": 0.24784365067022293,
|
|
"learning_rate": 7.080455484758863e-05,
|
|
"loss": 0.9659,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 2.918158567774936,
|
|
"grad_norm": 0.2730224277035152,
|
|
"learning_rate": 7.066263574328505e-05,
|
|
"loss": 0.9818,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 2.920716112531969,
|
|
"grad_norm": 0.30594100479026,
|
|
"learning_rate": 7.052074642361444e-05,
|
|
"loss": 0.9915,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 2.923273657289003,
|
|
"grad_norm": 0.32054932862442914,
|
|
"learning_rate": 7.037888734117998e-05,
|
|
"loss": 0.9882,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 2.9258312020460355,
|
|
"grad_norm": 0.23958919561701653,
|
|
"learning_rate": 7.023705894848848e-05,
|
|
"loss": 0.9666,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 2.928388746803069,
|
|
"grad_norm": 0.27076318118261017,
|
|
"learning_rate": 7.009526169794885e-05,
|
|
"loss": 0.9746,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 2.9309462915601023,
|
|
"grad_norm": 0.2729574133461879,
|
|
"learning_rate": 6.995349604187061e-05,
|
|
"loss": 0.9624,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 2.9335038363171355,
|
|
"grad_norm": 0.3259725455577868,
|
|
"learning_rate": 6.981176243246257e-05,
|
|
"loss": 0.9795,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 2.9360613810741687,
|
|
"grad_norm": 0.34256481150449963,
|
|
"learning_rate": 6.967006132183127e-05,
|
|
"loss": 0.977,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 2.938618925831202,
|
|
"grad_norm": 0.2828018012599345,
|
|
"learning_rate": 6.952839316197956e-05,
|
|
"loss": 0.9928,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 0.2397889702793678,
|
|
"learning_rate": 6.938675840480525e-05,
|
|
"loss": 0.9822,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 2.9437340153452687,
|
|
"grad_norm": 0.331164422112377,
|
|
"learning_rate": 6.924515750209954e-05,
|
|
"loss": 0.9973,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 2.946291560102302,
|
|
"grad_norm": 0.2704740780802998,
|
|
"learning_rate": 6.910359090554572e-05,
|
|
"loss": 0.9685,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 2.948849104859335,
|
|
"grad_norm": 0.2437699512495755,
|
|
"learning_rate": 6.896205906671755e-05,
|
|
"loss": 0.9896,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 2.9514066496163682,
|
|
"grad_norm": 0.24008371878492457,
|
|
"learning_rate": 6.882056243707796e-05,
|
|
"loss": 0.9948,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 2.9539641943734014,
|
|
"grad_norm": 0.2714718735118312,
|
|
"learning_rate": 6.86791014679776e-05,
|
|
"loss": 1.0107,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 2.9565217391304346,
|
|
"grad_norm": 0.2689100345729253,
|
|
"learning_rate": 6.85376766106533e-05,
|
|
"loss": 0.9844,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 2.959079283887468,
|
|
"grad_norm": 0.217002318039709,
|
|
"learning_rate": 6.839628831622681e-05,
|
|
"loss": 0.9748,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 2.9616368286445014,
|
|
"grad_norm": 0.2919920400101465,
|
|
"learning_rate": 6.825493703570311e-05,
|
|
"loss": 0.9699,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 2.9641943734015346,
|
|
"grad_norm": 0.3490734108048557,
|
|
"learning_rate": 6.811362321996926e-05,
|
|
"loss": 0.9694,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 2.9667519181585678,
|
|
"grad_norm": 0.3103643754348234,
|
|
"learning_rate": 6.797234731979267e-05,
|
|
"loss": 0.991,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 2.969309462915601,
|
|
"grad_norm": 0.1939069857875497,
|
|
"learning_rate": 6.783110978581989e-05,
|
|
"loss": 0.9614,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 2.971867007672634,
|
|
"grad_norm": 0.2495187824732926,
|
|
"learning_rate": 6.768991106857508e-05,
|
|
"loss": 0.9656,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 2.9744245524296673,
|
|
"grad_norm": 0.3034345894428266,
|
|
"learning_rate": 6.754875161845855e-05,
|
|
"loss": 1.0069,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 2.976982097186701,
|
|
"grad_norm": 0.3567922857742952,
|
|
"learning_rate": 6.740763188574546e-05,
|
|
"loss": 0.9612,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 2.979539641943734,
|
|
"grad_norm": 0.25891106467169334,
|
|
"learning_rate": 6.726655232058409e-05,
|
|
"loss": 0.9696,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 2.9820971867007673,
|
|
"grad_norm": 0.25153156564503487,
|
|
"learning_rate": 6.712551337299473e-05,
|
|
"loss": 1.0014,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 2.9846547314578005,
|
|
"grad_norm": 0.32964252932862226,
|
|
"learning_rate": 6.69845154928681e-05,
|
|
"loss": 0.9773,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 2.9872122762148337,
|
|
"grad_norm": 0.2917177962042733,
|
|
"learning_rate": 6.684355912996386e-05,
|
|
"loss": 0.9911,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 2.9897698209718673,
|
|
"grad_norm": 0.2002913243087303,
|
|
"learning_rate": 6.670264473390931e-05,
|
|
"loss": 0.9683,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 2.9923273657289,
|
|
"grad_norm": 0.26813771266232983,
|
|
"learning_rate": 6.656177275419785e-05,
|
|
"loss": 0.967,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 2.9948849104859336,
|
|
"grad_norm": 0.2590485360645914,
|
|
"learning_rate": 6.64209436401875e-05,
|
|
"loss": 0.9638,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 2.997442455242967,
|
|
"grad_norm": 0.26357426110685056,
|
|
"learning_rate": 6.62801578410997e-05,
|
|
"loss": 1.0056,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.22456837673610008,
|
|
"learning_rate": 6.61394158060176e-05,
|
|
"loss": 0.9933,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 3.002557544757033,
|
|
"grad_norm": 0.22123515970304183,
|
|
"learning_rate": 6.59987179838848e-05,
|
|
"loss": 0.9712,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 3.0051150895140664,
|
|
"grad_norm": 0.2497098271402969,
|
|
"learning_rate": 6.58580648235039e-05,
|
|
"loss": 0.9701,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 3.0076726342710995,
|
|
"grad_norm": 0.2264514281442564,
|
|
"learning_rate": 6.571745677353492e-05,
|
|
"loss": 0.9498,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 3.010230179028133,
|
|
"grad_norm": 0.24110920081950274,
|
|
"learning_rate": 6.557689428249414e-05,
|
|
"loss": 0.9841,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 3.0127877237851663,
|
|
"grad_norm": 0.28882150068726187,
|
|
"learning_rate": 6.543637779875237e-05,
|
|
"loss": 0.9728,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 3.0153452685421995,
|
|
"grad_norm": 0.22165888817736834,
|
|
"learning_rate": 6.529590777053378e-05,
|
|
"loss": 0.9263,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 3.0179028132992327,
|
|
"grad_norm": 0.2715939791147568,
|
|
"learning_rate": 6.515548464591428e-05,
|
|
"loss": 0.9353,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 3.020460358056266,
|
|
"grad_norm": 0.3321798212445876,
|
|
"learning_rate": 6.501510887282024e-05,
|
|
"loss": 0.948,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 3.023017902813299,
|
|
"grad_norm": 0.2852631687681614,
|
|
"learning_rate": 6.487478089902685e-05,
|
|
"loss": 0.9406,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 3.0255754475703327,
|
|
"grad_norm": 0.23938138232215803,
|
|
"learning_rate": 6.473450117215699e-05,
|
|
"loss": 0.9612,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 3.028132992327366,
|
|
"grad_norm": 0.2897634546793638,
|
|
"learning_rate": 6.459427013967953e-05,
|
|
"loss": 0.93,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 3.030690537084399,
|
|
"grad_norm": 0.28668995967161215,
|
|
"learning_rate": 6.445408824890805e-05,
|
|
"loss": 0.943,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 3.0332480818414322,
|
|
"grad_norm": 0.23250708905243717,
|
|
"learning_rate": 6.431395594699943e-05,
|
|
"loss": 0.9264,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 3.0358056265984654,
|
|
"grad_norm": 0.3127461016723165,
|
|
"learning_rate": 6.417387368095225e-05,
|
|
"loss": 0.9492,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 3.0383631713554986,
|
|
"grad_norm": 0.26702473205124055,
|
|
"learning_rate": 6.403384189760556e-05,
|
|
"loss": 0.9173,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 3.040920716112532,
|
|
"grad_norm": 0.2692197582092417,
|
|
"learning_rate": 6.389386104363738e-05,
|
|
"loss": 0.9483,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 3.0434782608695654,
|
|
"grad_norm": 0.29389458281034464,
|
|
"learning_rate": 6.375393156556325e-05,
|
|
"loss": 0.938,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 3.0460358056265986,
|
|
"grad_norm": 0.24003231343808254,
|
|
"learning_rate": 6.361405390973489e-05,
|
|
"loss": 0.9174,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 3.0485933503836318,
|
|
"grad_norm": 0.25208756985944336,
|
|
"learning_rate": 6.347422852233862e-05,
|
|
"loss": 0.9542,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 3.051150895140665,
|
|
"grad_norm": 0.24466794377181064,
|
|
"learning_rate": 6.333445584939407e-05,
|
|
"loss": 0.9617,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 3.053708439897698,
|
|
"grad_norm": 0.23317237737554486,
|
|
"learning_rate": 6.319473633675275e-05,
|
|
"loss": 0.9349,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 3.0562659846547313,
|
|
"grad_norm": 0.24590715837760968,
|
|
"learning_rate": 6.305507043009657e-05,
|
|
"loss": 0.9414,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 3.0588235294117645,
|
|
"grad_norm": 0.21035477411097228,
|
|
"learning_rate": 6.291545857493645e-05,
|
|
"loss": 0.9512,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 3.061381074168798,
|
|
"grad_norm": 0.2248505455887991,
|
|
"learning_rate": 6.277590121661098e-05,
|
|
"loss": 0.9522,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 3.0639386189258313,
|
|
"grad_norm": 0.2471462687532793,
|
|
"learning_rate": 6.263639880028468e-05,
|
|
"loss": 0.9493,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 3.0664961636828645,
|
|
"grad_norm": 0.22868376945738234,
|
|
"learning_rate": 6.249695177094707e-05,
|
|
"loss": 0.9668,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 3.0690537084398977,
|
|
"grad_norm": 0.23527194146680278,
|
|
"learning_rate": 6.235756057341084e-05,
|
|
"loss": 0.9279,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 3.071611253196931,
|
|
"grad_norm": 0.2513612868250463,
|
|
"learning_rate": 6.221822565231066e-05,
|
|
"loss": 0.9403,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 3.074168797953964,
|
|
"grad_norm": 0.22860913544864897,
|
|
"learning_rate": 6.207894745210168e-05,
|
|
"loss": 0.9616,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 3.0767263427109977,
|
|
"grad_norm": 0.24014291985565175,
|
|
"learning_rate": 6.193972641705809e-05,
|
|
"loss": 0.9664,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 3.079283887468031,
|
|
"grad_norm": 0.22572397342217615,
|
|
"learning_rate": 6.180056299127174e-05,
|
|
"loss": 0.9663,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 3.081841432225064,
|
|
"grad_norm": 0.25121933762619786,
|
|
"learning_rate": 6.16614576186507e-05,
|
|
"loss": 0.9676,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 3.084398976982097,
|
|
"grad_norm": 0.21264743561877053,
|
|
"learning_rate": 6.152241074291791e-05,
|
|
"loss": 0.9385,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 3.0869565217391304,
|
|
"grad_norm": 0.2110657205113156,
|
|
"learning_rate": 6.13834228076097e-05,
|
|
"loss": 0.9593,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 3.0895140664961636,
|
|
"grad_norm": 0.23064076505093895,
|
|
"learning_rate": 6.12444942560744e-05,
|
|
"loss": 0.9859,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 3.0920716112531967,
|
|
"grad_norm": 0.2327889001545048,
|
|
"learning_rate": 6.110562553147078e-05,
|
|
"loss": 0.9343,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 3.0946291560102304,
|
|
"grad_norm": 0.22081121627352496,
|
|
"learning_rate": 6.0966817076767e-05,
|
|
"loss": 0.9572,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 3.0971867007672635,
|
|
"grad_norm": 0.21410596357542921,
|
|
"learning_rate": 6.08280693347388e-05,
|
|
"loss": 0.9577,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 3.0997442455242967,
|
|
"grad_norm": 0.22670771449737367,
|
|
"learning_rate": 6.068938274796834e-05,
|
|
"loss": 0.9253,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 3.10230179028133,
|
|
"grad_norm": 0.205343189542066,
|
|
"learning_rate": 6.055075775884263e-05,
|
|
"loss": 0.9896,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 3.104859335038363,
|
|
"grad_norm": 0.22769741326879356,
|
|
"learning_rate": 6.0412194809552316e-05,
|
|
"loss": 0.9387,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 3.1074168797953963,
|
|
"grad_norm": 0.19822402152888394,
|
|
"learning_rate": 6.027369434208999e-05,
|
|
"loss": 0.9808,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 3.10997442455243,
|
|
"grad_norm": 0.23051970557462004,
|
|
"learning_rate": 6.0135256798249047e-05,
|
|
"loss": 0.933,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 3.112531969309463,
|
|
"grad_norm": 0.20329115598362008,
|
|
"learning_rate": 5.999688261962216e-05,
|
|
"loss": 0.9684,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 3.1150895140664963,
|
|
"grad_norm": 0.21036340816499827,
|
|
"learning_rate": 5.985857224759981e-05,
|
|
"loss": 0.944,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 3.1176470588235294,
|
|
"grad_norm": 0.20307590074585102,
|
|
"learning_rate": 5.972032612336906e-05,
|
|
"loss": 0.9598,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 3.1202046035805626,
|
|
"grad_norm": 0.2259792004822342,
|
|
"learning_rate": 5.958214468791189e-05,
|
|
"loss": 0.9483,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 3.122762148337596,
|
|
"grad_norm": 0.21243681629633632,
|
|
"learning_rate": 5.944402838200404e-05,
|
|
"loss": 0.9455,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 3.125319693094629,
|
|
"grad_norm": 0.21205256563770825,
|
|
"learning_rate": 5.930597764621347e-05,
|
|
"loss": 0.8963,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 3.1278772378516626,
|
|
"grad_norm": 0.19717448713959743,
|
|
"learning_rate": 5.916799292089895e-05,
|
|
"loss": 0.9564,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 3.130434782608696,
|
|
"grad_norm": 0.2244196417767959,
|
|
"learning_rate": 5.9030074646208745e-05,
|
|
"loss": 0.9272,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 3.132992327365729,
|
|
"grad_norm": 0.21563385011040548,
|
|
"learning_rate": 5.8892223262079144e-05,
|
|
"loss": 0.9316,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 3.135549872122762,
|
|
"grad_norm": 0.2350946628160643,
|
|
"learning_rate": 5.875443920823297e-05,
|
|
"loss": 0.9487,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 3.1381074168797953,
|
|
"grad_norm": 0.2865769039296874,
|
|
"learning_rate": 5.861672292417842e-05,
|
|
"loss": 0.9492,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 3.1406649616368285,
|
|
"grad_norm": 0.23430970345425967,
|
|
"learning_rate": 5.84790748492074e-05,
|
|
"loss": 0.966,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 3.1432225063938617,
|
|
"grad_norm": 0.2467472265535791,
|
|
"learning_rate": 5.834149542239431e-05,
|
|
"loss": 0.9708,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 3.1457800511508953,
|
|
"grad_norm": 0.26772393728125105,
|
|
"learning_rate": 5.8203985082594575e-05,
|
|
"loss": 0.9557,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 3.1483375959079285,
|
|
"grad_norm": 0.2338023529317996,
|
|
"learning_rate": 5.806654426844315e-05,
|
|
"loss": 0.9638,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 3.1508951406649617,
|
|
"grad_norm": 0.2523069016121197,
|
|
"learning_rate": 5.792917341835335e-05,
|
|
"loss": 0.9434,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 3.153452685421995,
|
|
"grad_norm": 0.2766552697496739,
|
|
"learning_rate": 5.77918729705152e-05,
|
|
"loss": 0.9809,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 3.156010230179028,
|
|
"grad_norm": 0.22646812781120942,
|
|
"learning_rate": 5.765464336289424e-05,
|
|
"loss": 0.9639,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 3.1585677749360612,
|
|
"grad_norm": 0.2205961359884855,
|
|
"learning_rate": 5.751748503322999e-05,
|
|
"loss": 0.954,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 3.1611253196930944,
|
|
"grad_norm": 0.2701811323136191,
|
|
"learning_rate": 5.7380398419034644e-05,
|
|
"loss": 0.9589,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 3.163682864450128,
|
|
"grad_norm": 0.2081039558632908,
|
|
"learning_rate": 5.7243383957591586e-05,
|
|
"loss": 0.9471,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 3.166240409207161,
|
|
"grad_norm": 0.19643865068397245,
|
|
"learning_rate": 5.7106442085954045e-05,
|
|
"loss": 0.9518,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 3.1687979539641944,
|
|
"grad_norm": 0.30921257471256036,
|
|
"learning_rate": 5.69695732409438e-05,
|
|
"loss": 0.9242,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 3.1713554987212276,
|
|
"grad_norm": 0.24583021366711547,
|
|
"learning_rate": 5.6832777859149536e-05,
|
|
"loss": 0.9423,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 3.1739130434782608,
|
|
"grad_norm": 0.18950822302407402,
|
|
"learning_rate": 5.669605637692575e-05,
|
|
"loss": 0.932,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 3.176470588235294,
|
|
"grad_norm": 0.25157456578331905,
|
|
"learning_rate": 5.655940923039111e-05,
|
|
"loss": 0.9379,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 3.1790281329923276,
|
|
"grad_norm": 0.18343916898513093,
|
|
"learning_rate": 5.642283685542717e-05,
|
|
"loss": 0.9456,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 3.1815856777493607,
|
|
"grad_norm": 0.19560349844702873,
|
|
"learning_rate": 5.6286339687677044e-05,
|
|
"loss": 0.9328,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 3.184143222506394,
|
|
"grad_norm": 0.189610936953741,
|
|
"learning_rate": 5.614991816254388e-05,
|
|
"loss": 0.9109,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 3.186700767263427,
|
|
"grad_norm": 0.18320058939508785,
|
|
"learning_rate": 5.601357271518959e-05,
|
|
"loss": 0.9584,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 3.1892583120204603,
|
|
"grad_norm": 0.17494234166851327,
|
|
"learning_rate": 5.587730378053339e-05,
|
|
"loss": 0.9656,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 3.1918158567774935,
|
|
"grad_norm": 0.19092078945148688,
|
|
"learning_rate": 5.574111179325039e-05,
|
|
"loss": 0.9487,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 3.1943734015345266,
|
|
"grad_norm": 0.1860857981568226,
|
|
"learning_rate": 5.560499718777031e-05,
|
|
"loss": 0.9372,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 3.1969309462915603,
|
|
"grad_norm": 0.18572653447801232,
|
|
"learning_rate": 5.5468960398276014e-05,
|
|
"loss": 0.9459,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 3.1994884910485935,
|
|
"grad_norm": 0.19107345846336404,
|
|
"learning_rate": 5.5333001858702164e-05,
|
|
"loss": 0.9255,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 3.2020460358056266,
|
|
"grad_norm": 0.20057541760798753,
|
|
"learning_rate": 5.519712200273381e-05,
|
|
"loss": 0.9615,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 3.20460358056266,
|
|
"grad_norm": 0.20198119736904155,
|
|
"learning_rate": 5.5061321263804933e-05,
|
|
"loss": 0.9204,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 3.207161125319693,
|
|
"grad_norm": 0.21942879387381486,
|
|
"learning_rate": 5.4925600075097285e-05,
|
|
"loss": 0.945,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 3.209718670076726,
|
|
"grad_norm": 0.19469068958831684,
|
|
"learning_rate": 5.4789958869538756e-05,
|
|
"loss": 0.9435,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 3.21227621483376,
|
|
"grad_norm": 0.20250937006123632,
|
|
"learning_rate": 5.4654398079802183e-05,
|
|
"loss": 0.9364,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 3.214833759590793,
|
|
"grad_norm": 0.19846072138477766,
|
|
"learning_rate": 5.451891813830382e-05,
|
|
"loss": 0.94,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 3.217391304347826,
|
|
"grad_norm": 0.20425114535656635,
|
|
"learning_rate": 5.4383519477202103e-05,
|
|
"loss": 0.9363,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 3.2199488491048593,
|
|
"grad_norm": 0.185008322081447,
|
|
"learning_rate": 5.42482025283961e-05,
|
|
"loss": 0.9815,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 3.2225063938618925,
|
|
"grad_norm": 0.2151529732841821,
|
|
"learning_rate": 5.41129677235243e-05,
|
|
"loss": 0.9498,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 3.2250639386189257,
|
|
"grad_norm": 0.1885448397273564,
|
|
"learning_rate": 5.397781549396316e-05,
|
|
"loss": 0.9337,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 3.227621483375959,
|
|
"grad_norm": 0.21418784649002942,
|
|
"learning_rate": 5.3842746270825705e-05,
|
|
"loss": 0.9171,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 3.2301790281329925,
|
|
"grad_norm": 0.20068889946827412,
|
|
"learning_rate": 5.370776048496026e-05,
|
|
"loss": 0.9376,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 3.2327365728900257,
|
|
"grad_norm": 0.24899426008654885,
|
|
"learning_rate": 5.357285856694891e-05,
|
|
"loss": 0.9429,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 3.235294117647059,
|
|
"grad_norm": 0.19686757692012147,
|
|
"learning_rate": 5.34380409471062e-05,
|
|
"loss": 0.9377,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 3.237851662404092,
|
|
"grad_norm": 0.24870949090788627,
|
|
"learning_rate": 5.33033080554779e-05,
|
|
"loss": 0.945,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 3.2404092071611252,
|
|
"grad_norm": 0.20621519140618658,
|
|
"learning_rate": 5.3168660321839386e-05,
|
|
"loss": 0.9379,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 3.2429667519181584,
|
|
"grad_norm": 0.21652792479122668,
|
|
"learning_rate": 5.303409817569449e-05,
|
|
"loss": 0.9021,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 3.2455242966751916,
|
|
"grad_norm": 0.19103019263904417,
|
|
"learning_rate": 5.2899622046274e-05,
|
|
"loss": 0.9613,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 3.2480818414322252,
|
|
"grad_norm": 0.21245341007957305,
|
|
"learning_rate": 5.276523236253425e-05,
|
|
"loss": 0.9387,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 3.2506393861892584,
|
|
"grad_norm": 0.2106216561170891,
|
|
"learning_rate": 5.263092955315595e-05,
|
|
"loss": 0.9546,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 3.2531969309462916,
|
|
"grad_norm": 0.197972453520414,
|
|
"learning_rate": 5.2496714046542583e-05,
|
|
"loss": 0.9391,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 3.2557544757033248,
|
|
"grad_norm": 0.199650022114146,
|
|
"learning_rate": 5.2362586270819256e-05,
|
|
"loss": 0.9386,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 3.258312020460358,
|
|
"grad_norm": 0.18979777369555925,
|
|
"learning_rate": 5.222854665383116e-05,
|
|
"loss": 0.9495,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 3.260869565217391,
|
|
"grad_norm": 0.2173804109344821,
|
|
"learning_rate": 5.2094595623142326e-05,
|
|
"loss": 0.9588,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 3.2634271099744243,
|
|
"grad_norm": 0.2016383197459456,
|
|
"learning_rate": 5.1960733606034126e-05,
|
|
"loss": 0.9151,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 3.265984654731458,
|
|
"grad_norm": 0.2047292724222713,
|
|
"learning_rate": 5.182696102950404e-05,
|
|
"loss": 0.9686,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 3.268542199488491,
|
|
"grad_norm": 0.2065833579125683,
|
|
"learning_rate": 5.1693278320264304e-05,
|
|
"loss": 0.9384,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 3.2710997442455243,
|
|
"grad_norm": 0.20569255957459082,
|
|
"learning_rate": 5.1559685904740386e-05,
|
|
"loss": 0.9869,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 3.2736572890025575,
|
|
"grad_norm": 0.19840584494069785,
|
|
"learning_rate": 5.142618420906985e-05,
|
|
"loss": 0.9557,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 3.2762148337595907,
|
|
"grad_norm": 0.20387885459079644,
|
|
"learning_rate": 5.1292773659100755e-05,
|
|
"loss": 0.9642,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 3.2787723785166243,
|
|
"grad_norm": 0.2101778694530114,
|
|
"learning_rate": 5.115945468039048e-05,
|
|
"loss": 0.9509,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 3.2813299232736575,
|
|
"grad_norm": 0.2155780933816927,
|
|
"learning_rate": 5.1026227698204335e-05,
|
|
"loss": 0.9499,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 3.2838874680306906,
|
|
"grad_norm": 0.24104255752130535,
|
|
"learning_rate": 5.089309313751415e-05,
|
|
"loss": 0.9458,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 3.286445012787724,
|
|
"grad_norm": 0.2121724580915078,
|
|
"learning_rate": 5.0760051422996925e-05,
|
|
"loss": 0.9499,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 3.289002557544757,
|
|
"grad_norm": 0.20440164305922942,
|
|
"learning_rate": 5.0627102979033546e-05,
|
|
"loss": 0.9458,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 3.29156010230179,
|
|
"grad_norm": 0.21910653895674295,
|
|
"learning_rate": 5.049424822970731e-05,
|
|
"loss": 0.9379,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 3.2941176470588234,
|
|
"grad_norm": 0.17657372919405595,
|
|
"learning_rate": 5.036148759880272e-05,
|
|
"loss": 0.9249,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 3.296675191815857,
|
|
"grad_norm": 0.22994935624931387,
|
|
"learning_rate": 5.0228821509803984e-05,
|
|
"loss": 0.9247,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 3.29923273657289,
|
|
"grad_norm": 0.18809716520389427,
|
|
"learning_rate": 5.0096250385893825e-05,
|
|
"loss": 0.9236,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 3.3017902813299234,
|
|
"grad_norm": 0.20395108123985592,
|
|
"learning_rate": 4.9963774649951975e-05,
|
|
"loss": 0.9351,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 3.3043478260869565,
|
|
"grad_norm": 0.21017478598124728,
|
|
"learning_rate": 4.983139472455387e-05,
|
|
"loss": 0.9603,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 3.3069053708439897,
|
|
"grad_norm": 0.21877137266724161,
|
|
"learning_rate": 4.969911103196942e-05,
|
|
"loss": 0.9067,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 3.309462915601023,
|
|
"grad_norm": 0.18726348177523444,
|
|
"learning_rate": 4.956692399416149e-05,
|
|
"loss": 0.9368,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 3.312020460358056,
|
|
"grad_norm": 0.2241750270363803,
|
|
"learning_rate": 4.943483403278468e-05,
|
|
"loss": 0.947,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 3.3145780051150897,
|
|
"grad_norm": 0.20581443285806397,
|
|
"learning_rate": 4.9302841569183884e-05,
|
|
"loss": 0.9575,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 3.317135549872123,
|
|
"grad_norm": 0.17452182993008977,
|
|
"learning_rate": 4.9170947024393074e-05,
|
|
"loss": 0.9156,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 3.319693094629156,
|
|
"grad_norm": 0.198949333785195,
|
|
"learning_rate": 4.9039150819133775e-05,
|
|
"loss": 0.9348,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 3.3222506393861893,
|
|
"grad_norm": 0.16601657169918604,
|
|
"learning_rate": 4.890745337381388e-05,
|
|
"loss": 0.9587,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 3.3248081841432224,
|
|
"grad_norm": 0.23036877304791145,
|
|
"learning_rate": 4.877585510852627e-05,
|
|
"loss": 0.9792,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 3.3273657289002556,
|
|
"grad_norm": 0.18765197640496664,
|
|
"learning_rate": 4.864435644304742e-05,
|
|
"loss": 0.9253,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 3.329923273657289,
|
|
"grad_norm": 0.19041731553942576,
|
|
"learning_rate": 4.851295779683616e-05,
|
|
"loss": 0.9535,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 3.3324808184143224,
|
|
"grad_norm": 0.2087435808060436,
|
|
"learning_rate": 4.8381659589032186e-05,
|
|
"loss": 0.9338,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 3.3350383631713556,
|
|
"grad_norm": 0.1903448069067344,
|
|
"learning_rate": 4.825046223845486e-05,
|
|
"loss": 0.9499,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 3.337595907928389,
|
|
"grad_norm": 0.21308090181205586,
|
|
"learning_rate": 4.811936616360186e-05,
|
|
"loss": 0.9256,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 3.340153452685422,
|
|
"grad_norm": 0.2023342708755437,
|
|
"learning_rate": 4.798837178264772e-05,
|
|
"loss": 0.9582,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 3.342710997442455,
|
|
"grad_norm": 0.21619791962247753,
|
|
"learning_rate": 4.78574795134427e-05,
|
|
"loss": 0.9125,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 3.3452685421994883,
|
|
"grad_norm": 0.2487539660815107,
|
|
"learning_rate": 4.772668977351128e-05,
|
|
"loss": 0.9537,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 3.3478260869565215,
|
|
"grad_norm": 0.2240156883350933,
|
|
"learning_rate": 4.7596002980050834e-05,
|
|
"loss": 0.9401,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 3.350383631713555,
|
|
"grad_norm": 0.2251746608186689,
|
|
"learning_rate": 4.7465419549930476e-05,
|
|
"loss": 0.9782,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 3.3529411764705883,
|
|
"grad_norm": 0.22881310384597994,
|
|
"learning_rate": 4.733493989968949e-05,
|
|
"loss": 0.9458,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 3.3554987212276215,
|
|
"grad_norm": 0.2141099007638843,
|
|
"learning_rate": 4.7204564445536234e-05,
|
|
"loss": 0.9396,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 3.3580562659846547,
|
|
"grad_norm": 0.1882802550926345,
|
|
"learning_rate": 4.707429360334662e-05,
|
|
"loss": 0.942,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 3.360613810741688,
|
|
"grad_norm": 0.2179119833942681,
|
|
"learning_rate": 4.694412778866285e-05,
|
|
"loss": 0.9504,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 3.363171355498721,
|
|
"grad_norm": 0.16843886415285414,
|
|
"learning_rate": 4.681406741669216e-05,
|
|
"loss": 0.9221,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 3.3657289002557547,
|
|
"grad_norm": 0.21980007814521796,
|
|
"learning_rate": 4.668411290230543e-05,
|
|
"loss": 0.944,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 3.368286445012788,
|
|
"grad_norm": 0.1510130725197139,
|
|
"learning_rate": 4.655426466003586e-05,
|
|
"loss": 0.9563,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 3.370843989769821,
|
|
"grad_norm": 0.19586517189701522,
|
|
"learning_rate": 4.6424523104077654e-05,
|
|
"loss": 0.9508,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 3.373401534526854,
|
|
"grad_norm": 0.1995467600478656,
|
|
"learning_rate": 4.629488864828472e-05,
|
|
"loss": 0.9502,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 3.3759590792838874,
|
|
"grad_norm": 0.1742993616386661,
|
|
"learning_rate": 4.6165361706169325e-05,
|
|
"loss": 0.9268,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 3.3785166240409206,
|
|
"grad_norm": 0.2067544794585532,
|
|
"learning_rate": 4.603594269090078e-05,
|
|
"loss": 0.9268,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 3.381074168797954,
|
|
"grad_norm": 0.2227068577818483,
|
|
"learning_rate": 4.5906632015304116e-05,
|
|
"loss": 0.9358,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 3.3836317135549874,
|
|
"grad_norm": 0.2034466989052333,
|
|
"learning_rate": 4.5777430091858855e-05,
|
|
"loss": 0.9302,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 3.3861892583120206,
|
|
"grad_norm": 0.20709571806774676,
|
|
"learning_rate": 4.564833733269755e-05,
|
|
"loss": 0.9427,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 3.3887468030690537,
|
|
"grad_norm": 0.22013092566675613,
|
|
"learning_rate": 4.5519354149604474e-05,
|
|
"loss": 0.9437,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 3.391304347826087,
|
|
"grad_norm": 0.18450541197105383,
|
|
"learning_rate": 4.539048095401452e-05,
|
|
"loss": 0.9466,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 3.39386189258312,
|
|
"grad_norm": 0.22548387813850762,
|
|
"learning_rate": 4.526171815701165e-05,
|
|
"loss": 0.9336,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 3.3964194373401533,
|
|
"grad_norm": 0.1820733823905873,
|
|
"learning_rate": 4.513306616932764e-05,
|
|
"loss": 0.9215,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 3.398976982097187,
|
|
"grad_norm": 0.21404349632115405,
|
|
"learning_rate": 4.5004525401340915e-05,
|
|
"loss": 0.9801,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 3.40153452685422,
|
|
"grad_norm": 0.18377817821243256,
|
|
"learning_rate": 4.487609626307508e-05,
|
|
"loss": 0.9655,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 3.4040920716112533,
|
|
"grad_norm": 0.1923893878636668,
|
|
"learning_rate": 4.4747779164197535e-05,
|
|
"loss": 0.9382,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 3.4066496163682864,
|
|
"grad_norm": 0.19516009680845245,
|
|
"learning_rate": 4.4619574514018486e-05,
|
|
"loss": 0.9557,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 3.4092071611253196,
|
|
"grad_norm": 0.19144644869283248,
|
|
"learning_rate": 4.449148272148934e-05,
|
|
"loss": 0.9345,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 3.411764705882353,
|
|
"grad_norm": 0.1817955488888704,
|
|
"learning_rate": 4.436350419520154e-05,
|
|
"loss": 0.9608,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 3.414322250639386,
|
|
"grad_norm": 0.2056911128568184,
|
|
"learning_rate": 4.423563934338519e-05,
|
|
"loss": 0.9458,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 3.4168797953964196,
|
|
"grad_norm": 0.1693771378014072,
|
|
"learning_rate": 4.410788857390785e-05,
|
|
"loss": 0.9466,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 3.419437340153453,
|
|
"grad_norm": 0.20830311663566495,
|
|
"learning_rate": 4.39802522942731e-05,
|
|
"loss": 0.9408,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 3.421994884910486,
|
|
"grad_norm": 0.1698790309922409,
|
|
"learning_rate": 4.385273091161937e-05,
|
|
"loss": 0.9305,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 3.424552429667519,
|
|
"grad_norm": 0.19474240897387077,
|
|
"learning_rate": 4.372532483271863e-05,
|
|
"loss": 0.9375,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 3.4271099744245523,
|
|
"grad_norm": 0.2059429092680418,
|
|
"learning_rate": 4.3598034463974966e-05,
|
|
"loss": 0.9869,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 3.4296675191815855,
|
|
"grad_norm": 0.19031026060303782,
|
|
"learning_rate": 4.347086021142339e-05,
|
|
"loss": 0.9765,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 3.4322250639386187,
|
|
"grad_norm": 0.19960933133782244,
|
|
"learning_rate": 4.3343802480728544e-05,
|
|
"loss": 0.9431,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 3.4347826086956523,
|
|
"grad_norm": 0.1924073308227482,
|
|
"learning_rate": 4.321686167718337e-05,
|
|
"loss": 0.9545,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 3.4373401534526855,
|
|
"grad_norm": 0.2028658725938022,
|
|
"learning_rate": 4.309003820570785e-05,
|
|
"loss": 0.9377,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 3.4398976982097187,
|
|
"grad_norm": 0.2106823975486889,
|
|
"learning_rate": 4.296333247084764e-05,
|
|
"loss": 0.9283,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 3.442455242966752,
|
|
"grad_norm": 0.21370019365379003,
|
|
"learning_rate": 4.283674487677297e-05,
|
|
"loss": 0.9663,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 3.445012787723785,
|
|
"grad_norm": 0.20381679039668288,
|
|
"learning_rate": 4.271027582727703e-05,
|
|
"loss": 0.9425,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 3.4475703324808182,
|
|
"grad_norm": 0.2465303759456818,
|
|
"learning_rate": 4.2583925725774996e-05,
|
|
"loss": 0.963,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 3.4501278772378514,
|
|
"grad_norm": 0.2017710128697274,
|
|
"learning_rate": 4.2457694975302625e-05,
|
|
"loss": 0.969,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 3.452685421994885,
|
|
"grad_norm": 0.2599485575517086,
|
|
"learning_rate": 4.233158397851494e-05,
|
|
"loss": 0.9578,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 3.455242966751918,
|
|
"grad_norm": 0.20994916380961168,
|
|
"learning_rate": 4.220559313768492e-05,
|
|
"loss": 0.9517,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 3.4578005115089514,
|
|
"grad_norm": 0.25562334357376887,
|
|
"learning_rate": 4.207972285470236e-05,
|
|
"loss": 0.9593,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 3.4603580562659846,
|
|
"grad_norm": 0.2018942765243476,
|
|
"learning_rate": 4.1953973531072403e-05,
|
|
"loss": 0.9238,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 3.4629156010230178,
|
|
"grad_norm": 0.23893893502461097,
|
|
"learning_rate": 4.1828345567914426e-05,
|
|
"loss": 0.9463,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 3.4654731457800514,
|
|
"grad_norm": 0.2377570507765394,
|
|
"learning_rate": 4.17028393659606e-05,
|
|
"loss": 0.9379,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 3.4680306905370846,
|
|
"grad_norm": 0.21617110584103066,
|
|
"learning_rate": 4.157745532555484e-05,
|
|
"loss": 0.9445,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 3.4705882352941178,
|
|
"grad_norm": 0.20973373939841763,
|
|
"learning_rate": 4.145219384665128e-05,
|
|
"loss": 0.9471,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 3.473145780051151,
|
|
"grad_norm": 0.19248666440528944,
|
|
"learning_rate": 4.1327055328813036e-05,
|
|
"loss": 0.9492,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 3.475703324808184,
|
|
"grad_norm": 0.19782620860430303,
|
|
"learning_rate": 4.1202040171211195e-05,
|
|
"loss": 0.9677,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 3.4782608695652173,
|
|
"grad_norm": 0.18288110899297144,
|
|
"learning_rate": 4.107714877262318e-05,
|
|
"loss": 0.9574,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 3.4808184143222505,
|
|
"grad_norm": 0.18982354052970898,
|
|
"learning_rate": 4.0952381531431716e-05,
|
|
"loss": 0.9411,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 3.483375959079284,
|
|
"grad_norm": 0.19047078322563796,
|
|
"learning_rate": 4.082773884562342e-05,
|
|
"loss": 0.9465,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 3.4859335038363173,
|
|
"grad_norm": 0.20024490556690386,
|
|
"learning_rate": 4.0703221112787774e-05,
|
|
"loss": 0.9631,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 3.4884910485933505,
|
|
"grad_norm": 0.18855297057246742,
|
|
"learning_rate": 4.057882873011543e-05,
|
|
"loss": 0.9333,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 3.4910485933503836,
|
|
"grad_norm": 0.18121257314529818,
|
|
"learning_rate": 4.045456209439734e-05,
|
|
"loss": 0.9683,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 3.493606138107417,
|
|
"grad_norm": 0.19866185503250056,
|
|
"learning_rate": 4.033042160202337e-05,
|
|
"loss": 0.9872,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 3.49616368286445,
|
|
"grad_norm": 0.17010036933663283,
|
|
"learning_rate": 4.020640764898096e-05,
|
|
"loss": 0.9685,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 3.498721227621483,
|
|
"grad_norm": 0.18176622769606524,
|
|
"learning_rate": 4.0082520630853865e-05,
|
|
"loss": 0.9112,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 3.501278772378517,
|
|
"grad_norm": 0.1861883153790341,
|
|
"learning_rate": 3.995876094282104e-05,
|
|
"loss": 0.9585,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 3.50383631713555,
|
|
"grad_norm": 0.19579755858911602,
|
|
"learning_rate": 3.983512897965519e-05,
|
|
"loss": 0.959,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 3.506393861892583,
|
|
"grad_norm": 0.18488711544490097,
|
|
"learning_rate": 3.9711625135721664e-05,
|
|
"loss": 0.9555,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 3.5089514066496164,
|
|
"grad_norm": 0.2073614939639127,
|
|
"learning_rate": 3.958824980497704e-05,
|
|
"loss": 0.9744,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 3.5115089514066495,
|
|
"grad_norm": 0.17154095562950622,
|
|
"learning_rate": 3.946500338096811e-05,
|
|
"loss": 0.9353,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 3.5140664961636827,
|
|
"grad_norm": 0.20478213377969626,
|
|
"learning_rate": 3.934188625683037e-05,
|
|
"loss": 0.9568,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 3.516624040920716,
|
|
"grad_norm": 0.18373687324276738,
|
|
"learning_rate": 3.9218898825286806e-05,
|
|
"loss": 0.9279,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 3.5191815856777495,
|
|
"grad_norm": 0.1716453870437831,
|
|
"learning_rate": 3.9096041478646885e-05,
|
|
"loss": 0.9342,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 3.5217391304347827,
|
|
"grad_norm": 0.18268819201544698,
|
|
"learning_rate": 3.8973314608805e-05,
|
|
"loss": 0.962,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 3.524296675191816,
|
|
"grad_norm": 0.16258821810908097,
|
|
"learning_rate": 3.885071860723937e-05,
|
|
"loss": 0.9293,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 3.526854219948849,
|
|
"grad_norm": 0.165376063640211,
|
|
"learning_rate": 3.8728253865010765e-05,
|
|
"loss": 0.9895,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 3.5294117647058822,
|
|
"grad_norm": 0.16721193942916188,
|
|
"learning_rate": 3.8605920772761274e-05,
|
|
"loss": 0.9328,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 3.531969309462916,
|
|
"grad_norm": 0.16130857457103082,
|
|
"learning_rate": 3.848371972071304e-05,
|
|
"loss": 0.9859,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 3.5345268542199486,
|
|
"grad_norm": 0.16278759213568428,
|
|
"learning_rate": 3.8361651098666967e-05,
|
|
"loss": 0.9569,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 3.5370843989769822,
|
|
"grad_norm": 0.17183294163130294,
|
|
"learning_rate": 3.8239715296001654e-05,
|
|
"loss": 0.9418,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 3.5396419437340154,
|
|
"grad_norm": 0.155240959003008,
|
|
"learning_rate": 3.8117912701671905e-05,
|
|
"loss": 0.9696,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 3.5421994884910486,
|
|
"grad_norm": 0.17273359598041008,
|
|
"learning_rate": 3.7996243704207686e-05,
|
|
"loss": 0.9502,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 3.544757033248082,
|
|
"grad_norm": 0.1703572907276737,
|
|
"learning_rate": 3.787470869171277e-05,
|
|
"loss": 0.9673,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 3.547314578005115,
|
|
"grad_norm": 0.163047329660931,
|
|
"learning_rate": 3.7753308051863534e-05,
|
|
"loss": 0.9244,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 3.5498721227621486,
|
|
"grad_norm": 0.16125670043718637,
|
|
"learning_rate": 3.763204217190778e-05,
|
|
"loss": 0.9414,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 3.5524296675191813,
|
|
"grad_norm": 0.17450887360011574,
|
|
"learning_rate": 3.751091143866338e-05,
|
|
"loss": 0.9677,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 3.554987212276215,
|
|
"grad_norm": 0.15580595508138104,
|
|
"learning_rate": 3.7389916238517224e-05,
|
|
"loss": 0.9758,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 3.557544757033248,
|
|
"grad_norm": 0.17069367779408143,
|
|
"learning_rate": 3.726905695742372e-05,
|
|
"loss": 0.9142,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 3.5601023017902813,
|
|
"grad_norm": 0.16910211167776398,
|
|
"learning_rate": 3.7148333980903796e-05,
|
|
"loss": 0.9389,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 3.5626598465473145,
|
|
"grad_norm": 0.1663225487056752,
|
|
"learning_rate": 3.7027747694043645e-05,
|
|
"loss": 0.9557,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 3.5652173913043477,
|
|
"grad_norm": 0.16804185773204355,
|
|
"learning_rate": 3.690729848149335e-05,
|
|
"loss": 0.9588,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 3.5677749360613813,
|
|
"grad_norm": 0.16402784688128466,
|
|
"learning_rate": 3.678698672746581e-05,
|
|
"loss": 0.964,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 3.5703324808184145,
|
|
"grad_norm": 0.18174268933477528,
|
|
"learning_rate": 3.6666812815735424e-05,
|
|
"loss": 0.9433,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 3.5728900255754477,
|
|
"grad_norm": 0.15614453400715234,
|
|
"learning_rate": 3.6546777129636886e-05,
|
|
"loss": 0.9252,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 3.575447570332481,
|
|
"grad_norm": 0.16700607138470522,
|
|
"learning_rate": 3.6426880052064026e-05,
|
|
"loss": 0.9636,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 3.578005115089514,
|
|
"grad_norm": 0.20568461367374485,
|
|
"learning_rate": 3.630712196546844e-05,
|
|
"loss": 0.9649,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 3.580562659846547,
|
|
"grad_norm": 0.14660657078481024,
|
|
"learning_rate": 3.6187503251858505e-05,
|
|
"loss": 0.9267,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 3.5831202046035804,
|
|
"grad_norm": 0.16935747703951526,
|
|
"learning_rate": 3.6068024292797945e-05,
|
|
"loss": 0.9356,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 3.585677749360614,
|
|
"grad_norm": 0.15782075450424704,
|
|
"learning_rate": 3.59486854694046e-05,
|
|
"loss": 0.9548,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 3.588235294117647,
|
|
"grad_norm": 0.17132410907270623,
|
|
"learning_rate": 3.582948716234948e-05,
|
|
"loss": 0.9493,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 3.5907928388746804,
|
|
"grad_norm": 0.16858095077712948,
|
|
"learning_rate": 3.571042975185524e-05,
|
|
"loss": 0.9552,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 3.5933503836317136,
|
|
"grad_norm": 0.1634251285228488,
|
|
"learning_rate": 3.559151361769517e-05,
|
|
"loss": 0.9466,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 3.5959079283887467,
|
|
"grad_norm": 0.1729430282795056,
|
|
"learning_rate": 3.547273913919182e-05,
|
|
"loss": 0.95,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 3.59846547314578,
|
|
"grad_norm": 0.1821907434145911,
|
|
"learning_rate": 3.535410669521605e-05,
|
|
"loss": 0.9588,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 3.601023017902813,
|
|
"grad_norm": 0.15781654283531932,
|
|
"learning_rate": 3.5235616664185465e-05,
|
|
"loss": 0.9591,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 3.6035805626598467,
|
|
"grad_norm": 0.1677674098580371,
|
|
"learning_rate": 3.5117269424063466e-05,
|
|
"loss": 0.9372,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 3.60613810741688,
|
|
"grad_norm": 0.1668467714604029,
|
|
"learning_rate": 3.4999065352358055e-05,
|
|
"loss": 0.9128,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 3.608695652173913,
|
|
"grad_norm": 0.16023804099695482,
|
|
"learning_rate": 3.488100482612046e-05,
|
|
"loss": 0.9533,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 3.6112531969309463,
|
|
"grad_norm": 0.17448057130149636,
|
|
"learning_rate": 3.476308822194404e-05,
|
|
"loss": 0.9696,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 3.6138107416879794,
|
|
"grad_norm": 0.17176757036978785,
|
|
"learning_rate": 3.4645315915963085e-05,
|
|
"loss": 0.9295,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 3.6163682864450126,
|
|
"grad_norm": 0.16582442582314796,
|
|
"learning_rate": 3.452768828385156e-05,
|
|
"loss": 0.9478,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 3.618925831202046,
|
|
"grad_norm": 0.16508960150611576,
|
|
"learning_rate": 3.4410205700822e-05,
|
|
"loss": 0.9267,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 3.6214833759590794,
|
|
"grad_norm": 0.15842544276922507,
|
|
"learning_rate": 3.42928685416242e-05,
|
|
"loss": 0.9487,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 3.6240409207161126,
|
|
"grad_norm": 0.16737847990453103,
|
|
"learning_rate": 3.417567718054413e-05,
|
|
"loss": 0.9257,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 3.626598465473146,
|
|
"grad_norm": 0.16179442819088455,
|
|
"learning_rate": 3.405863199140271e-05,
|
|
"loss": 0.9594,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 3.629156010230179,
|
|
"grad_norm": 0.17740705653386357,
|
|
"learning_rate": 3.3941733347554434e-05,
|
|
"loss": 0.954,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 3.631713554987212,
|
|
"grad_norm": 0.1745105989485467,
|
|
"learning_rate": 3.3824981621886545e-05,
|
|
"loss": 0.9536,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 3.634271099744246,
|
|
"grad_norm": 0.1927262004385616,
|
|
"learning_rate": 3.370837718681754e-05,
|
|
"loss": 0.9685,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 3.6368286445012785,
|
|
"grad_norm": 0.15752590578867717,
|
|
"learning_rate": 3.3591920414296094e-05,
|
|
"loss": 0.9248,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 3.639386189258312,
|
|
"grad_norm": 0.21240595387549532,
|
|
"learning_rate": 3.347561167579986e-05,
|
|
"loss": 0.9521,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 3.6419437340153453,
|
|
"grad_norm": 0.17508530317965004,
|
|
"learning_rate": 3.3359451342334306e-05,
|
|
"loss": 0.9431,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 3.6445012787723785,
|
|
"grad_norm": 0.21738581132916354,
|
|
"learning_rate": 3.324343978443148e-05,
|
|
"loss": 0.9716,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 3.6470588235294117,
|
|
"grad_norm": 0.16746773638107448,
|
|
"learning_rate": 3.3127577372148874e-05,
|
|
"loss": 0.9322,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 3.649616368286445,
|
|
"grad_norm": 0.2122059201301744,
|
|
"learning_rate": 3.301186447506827e-05,
|
|
"loss": 0.9422,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 3.6521739130434785,
|
|
"grad_norm": 0.15741451467355758,
|
|
"learning_rate": 3.289630146229449e-05,
|
|
"loss": 0.9366,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 3.6547314578005117,
|
|
"grad_norm": 0.19813994445803942,
|
|
"learning_rate": 3.278088870245423e-05,
|
|
"loss": 0.9286,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 3.657289002557545,
|
|
"grad_norm": 0.16851843081939155,
|
|
"learning_rate": 3.2665626563694937e-05,
|
|
"loss": 0.9572,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 3.659846547314578,
|
|
"grad_norm": 0.20717471275600138,
|
|
"learning_rate": 3.2550515413683574e-05,
|
|
"loss": 0.9512,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 3.662404092071611,
|
|
"grad_norm": 0.16245953402744545,
|
|
"learning_rate": 3.2435555619605504e-05,
|
|
"loss": 0.9542,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 3.6649616368286444,
|
|
"grad_norm": 0.19641538640030912,
|
|
"learning_rate": 3.232074754816323e-05,
|
|
"loss": 0.9306,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 3.6675191815856776,
|
|
"grad_norm": 0.1594631052144963,
|
|
"learning_rate": 3.220609156557544e-05,
|
|
"loss": 0.9363,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 3.670076726342711,
|
|
"grad_norm": 0.18455147659478868,
|
|
"learning_rate": 3.209158803757546e-05,
|
|
"loss": 0.9321,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 3.6726342710997444,
|
|
"grad_norm": 0.1790498881096886,
|
|
"learning_rate": 3.1977237329410446e-05,
|
|
"loss": 0.9608,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 3.6751918158567776,
|
|
"grad_norm": 0.1870454897435218,
|
|
"learning_rate": 3.186303980584012e-05,
|
|
"loss": 0.9389,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 3.6777493606138107,
|
|
"grad_norm": 0.20530561810770268,
|
|
"learning_rate": 3.174899583113548e-05,
|
|
"loss": 0.9945,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 3.680306905370844,
|
|
"grad_norm": 0.18019213638281067,
|
|
"learning_rate": 3.1635105769077766e-05,
|
|
"loss": 0.9307,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 3.682864450127877,
|
|
"grad_norm": 0.20610761052130405,
|
|
"learning_rate": 3.152136998295727e-05,
|
|
"loss": 0.9321,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 3.6854219948849103,
|
|
"grad_norm": 0.17985929842660886,
|
|
"learning_rate": 3.140778883557213e-05,
|
|
"loss": 0.932,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 3.687979539641944,
|
|
"grad_norm": 0.20013068677532989,
|
|
"learning_rate": 3.129436268922728e-05,
|
|
"loss": 0.9324,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 3.690537084398977,
|
|
"grad_norm": 0.17562501633026537,
|
|
"learning_rate": 3.118109190573313e-05,
|
|
"loss": 0.9145,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 3.6930946291560103,
|
|
"grad_norm": 0.18827294282018908,
|
|
"learning_rate": 3.106797684640464e-05,
|
|
"loss": 0.9402,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 3.6956521739130435,
|
|
"grad_norm": 0.20170283801470837,
|
|
"learning_rate": 3.0955017872059956e-05,
|
|
"loss": 0.9591,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 3.6982097186700766,
|
|
"grad_norm": 0.15387225427234089,
|
|
"learning_rate": 3.084221534301926e-05,
|
|
"loss": 0.9253,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 3.70076726342711,
|
|
"grad_norm": 0.24032338349831264,
|
|
"learning_rate": 3.0729569619103876e-05,
|
|
"loss": 0.9501,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 3.703324808184143,
|
|
"grad_norm": 0.1613801252077293,
|
|
"learning_rate": 3.061708105963481e-05,
|
|
"loss": 0.9706,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 3.7058823529411766,
|
|
"grad_norm": 0.18342909310635377,
|
|
"learning_rate": 3.0504750023431787e-05,
|
|
"loss": 0.9268,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 3.70843989769821,
|
|
"grad_norm": 0.1656531219879725,
|
|
"learning_rate": 3.039257686881209e-05,
|
|
"loss": 0.9385,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 3.710997442455243,
|
|
"grad_norm": 0.1781080191407481,
|
|
"learning_rate": 3.028056195358936e-05,
|
|
"loss": 0.9201,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 3.713554987212276,
|
|
"grad_norm": 0.1682926250161123,
|
|
"learning_rate": 3.016870563507241e-05,
|
|
"loss": 0.9486,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 3.7161125319693094,
|
|
"grad_norm": 0.17403568022524737,
|
|
"learning_rate": 3.0057008270064226e-05,
|
|
"loss": 0.9326,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 3.718670076726343,
|
|
"grad_norm": 0.17412534323602966,
|
|
"learning_rate": 2.9945470214860815e-05,
|
|
"loss": 0.9737,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 3.7212276214833757,
|
|
"grad_norm": 0.2012938530305388,
|
|
"learning_rate": 2.9834091825249908e-05,
|
|
"loss": 0.9319,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 3.7237851662404093,
|
|
"grad_norm": 0.15521247782508635,
|
|
"learning_rate": 2.9722873456509985e-05,
|
|
"loss": 0.9289,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 3.7263427109974425,
|
|
"grad_norm": 0.15552821509875525,
|
|
"learning_rate": 2.961181546340906e-05,
|
|
"loss": 0.9707,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 3.7289002557544757,
|
|
"grad_norm": 0.19037886779641314,
|
|
"learning_rate": 2.95009182002036e-05,
|
|
"loss": 0.9313,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 3.731457800511509,
|
|
"grad_norm": 0.16615970202045902,
|
|
"learning_rate": 2.939018202063732e-05,
|
|
"loss": 0.9647,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 3.734015345268542,
|
|
"grad_norm": 0.17646317393385902,
|
|
"learning_rate": 2.9279607277940196e-05,
|
|
"loss": 0.9474,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 3.7365728900255757,
|
|
"grad_norm": 0.16080135640987508,
|
|
"learning_rate": 2.9169194324827183e-05,
|
|
"loss": 0.926,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 3.7391304347826084,
|
|
"grad_norm": 0.17325852442311754,
|
|
"learning_rate": 2.9058943513497158e-05,
|
|
"loss": 0.9312,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 3.741687979539642,
|
|
"grad_norm": 0.2657172615999172,
|
|
"learning_rate": 2.8948855195631797e-05,
|
|
"loss": 0.9417,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 3.7442455242966752,
|
|
"grad_norm": 0.18232454995244132,
|
|
"learning_rate": 2.883892972239445e-05,
|
|
"loss": 0.9596,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 3.7468030690537084,
|
|
"grad_norm": 0.15153887237658853,
|
|
"learning_rate": 2.8729167444429042e-05,
|
|
"loss": 0.9476,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 3.7493606138107416,
|
|
"grad_norm": 0.17675913819692224,
|
|
"learning_rate": 2.8619568711858858e-05,
|
|
"loss": 0.945,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 3.7519181585677748,
|
|
"grad_norm": 0.16206615280321732,
|
|
"learning_rate": 2.8510133874285633e-05,
|
|
"loss": 0.9462,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 3.7544757033248084,
|
|
"grad_norm": 0.1553778010776279,
|
|
"learning_rate": 2.8400863280788207e-05,
|
|
"loss": 0.9407,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 3.7570332480818416,
|
|
"grad_norm": 0.16829547679009138,
|
|
"learning_rate": 2.829175727992147e-05,
|
|
"loss": 0.963,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 3.7595907928388748,
|
|
"grad_norm": 0.13746655170307476,
|
|
"learning_rate": 2.818281621971541e-05,
|
|
"loss": 0.9221,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 3.762148337595908,
|
|
"grad_norm": 0.16271667131621254,
|
|
"learning_rate": 2.8074040447673794e-05,
|
|
"loss": 0.9535,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 3.764705882352941,
|
|
"grad_norm": 0.16318435465235073,
|
|
"learning_rate": 2.7965430310773184e-05,
|
|
"loss": 0.9475,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 3.7672634271099743,
|
|
"grad_norm": 0.16520541373584413,
|
|
"learning_rate": 2.7856986155461777e-05,
|
|
"loss": 0.9315,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 3.7698209718670075,
|
|
"grad_norm": 0.32117889861607873,
|
|
"learning_rate": 2.7748708327658317e-05,
|
|
"loss": 0.9455,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 3.772378516624041,
|
|
"grad_norm": 0.17314463246020131,
|
|
"learning_rate": 2.7640597172751004e-05,
|
|
"loss": 0.9525,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 3.7749360613810743,
|
|
"grad_norm": 0.15225032038812816,
|
|
"learning_rate": 2.7532653035596336e-05,
|
|
"loss": 0.9453,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 3.7774936061381075,
|
|
"grad_norm": 0.17247417052786013,
|
|
"learning_rate": 2.7424876260518146e-05,
|
|
"loss": 0.9152,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 3.7800511508951407,
|
|
"grad_norm": 0.15503112719134568,
|
|
"learning_rate": 2.7317267191306318e-05,
|
|
"loss": 0.9398,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 3.782608695652174,
|
|
"grad_norm": 0.1631084235061464,
|
|
"learning_rate": 2.7209826171215827e-05,
|
|
"loss": 0.9246,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 3.785166240409207,
|
|
"grad_norm": 0.15506280568530903,
|
|
"learning_rate": 2.7102553542965577e-05,
|
|
"loss": 0.936,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 3.78772378516624,
|
|
"grad_norm": 0.1404687271754989,
|
|
"learning_rate": 2.6995449648737343e-05,
|
|
"loss": 0.9359,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 3.790281329923274,
|
|
"grad_norm": 0.1557007128341937,
|
|
"learning_rate": 2.6888514830174678e-05,
|
|
"loss": 0.954,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 3.792838874680307,
|
|
"grad_norm": 0.16612555940333462,
|
|
"learning_rate": 2.6781749428381752e-05,
|
|
"loss": 1.0034,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 3.79539641943734,
|
|
"grad_norm": 0.1733496961568388,
|
|
"learning_rate": 2.6675153783922457e-05,
|
|
"loss": 0.9518,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 3.7979539641943734,
|
|
"grad_norm": 0.15940418283478483,
|
|
"learning_rate": 2.6568728236819023e-05,
|
|
"loss": 0.9817,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 3.8005115089514065,
|
|
"grad_norm": 0.19079011728203774,
|
|
"learning_rate": 2.6462473126551187e-05,
|
|
"loss": 0.9735,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 3.80306905370844,
|
|
"grad_norm": 0.16130729906636684,
|
|
"learning_rate": 2.635638879205504e-05,
|
|
"loss": 0.9579,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 3.805626598465473,
|
|
"grad_norm": 0.1745866503183891,
|
|
"learning_rate": 2.625047557172189e-05,
|
|
"loss": 0.9402,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 3.8081841432225065,
|
|
"grad_norm": 0.18057372768582713,
|
|
"learning_rate": 2.6144733803397212e-05,
|
|
"loss": 0.9474,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 3.8107416879795397,
|
|
"grad_norm": 0.1560777993171654,
|
|
"learning_rate": 2.6039163824379588e-05,
|
|
"loss": 0.9506,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 3.813299232736573,
|
|
"grad_norm": 0.1674616567029557,
|
|
"learning_rate": 2.5933765971419647e-05,
|
|
"loss": 0.9488,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 3.815856777493606,
|
|
"grad_norm": 0.15672982172497663,
|
|
"learning_rate": 2.582854058071892e-05,
|
|
"loss": 0.9458,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 3.8184143222506393,
|
|
"grad_norm": 0.1558200464104945,
|
|
"learning_rate": 2.5723487987928817e-05,
|
|
"loss": 0.9518,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 3.820971867007673,
|
|
"grad_norm": 0.14208299213871128,
|
|
"learning_rate": 2.5618608528149614e-05,
|
|
"loss": 0.93,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 3.8235294117647056,
|
|
"grad_norm": 0.16087610572734629,
|
|
"learning_rate": 2.5513902535929288e-05,
|
|
"loss": 0.9763,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 3.8260869565217392,
|
|
"grad_norm": 0.1493299114392072,
|
|
"learning_rate": 2.5409370345262385e-05,
|
|
"loss": 0.9471,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 3.8286445012787724,
|
|
"grad_norm": 0.15214002644065255,
|
|
"learning_rate": 2.5305012289589223e-05,
|
|
"loss": 0.9588,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 3.8312020460358056,
|
|
"grad_norm": 0.15727057443971326,
|
|
"learning_rate": 2.5200828701794543e-05,
|
|
"loss": 0.9294,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 3.833759590792839,
|
|
"grad_norm": 0.14966978310373255,
|
|
"learning_rate": 2.5096819914206592e-05,
|
|
"loss": 0.9372,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 3.836317135549872,
|
|
"grad_norm": 0.160200304381001,
|
|
"learning_rate": 2.4992986258596023e-05,
|
|
"loss": 0.9648,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 3.8388746803069056,
|
|
"grad_norm": 0.1364407301299318,
|
|
"learning_rate": 2.4889328066174932e-05,
|
|
"loss": 0.9458,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 3.8414322250639388,
|
|
"grad_norm": 0.15554384512550426,
|
|
"learning_rate": 2.4785845667595565e-05,
|
|
"loss": 0.9532,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 3.843989769820972,
|
|
"grad_norm": 0.14270917443883158,
|
|
"learning_rate": 2.4682539392949494e-05,
|
|
"loss": 0.9194,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 3.846547314578005,
|
|
"grad_norm": 0.15315949958673647,
|
|
"learning_rate": 2.4579409571766543e-05,
|
|
"loss": 0.9619,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 3.8491048593350383,
|
|
"grad_norm": 0.14236120859618645,
|
|
"learning_rate": 2.4476456533013597e-05,
|
|
"loss": 0.9637,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 3.8516624040920715,
|
|
"grad_norm": 0.14065482492078218,
|
|
"learning_rate": 2.437368060509365e-05,
|
|
"loss": 0.9406,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 3.8542199488491047,
|
|
"grad_norm": 0.13361767868605823,
|
|
"learning_rate": 2.427108211584476e-05,
|
|
"loss": 0.9595,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 3.8567774936061383,
|
|
"grad_norm": 0.13594955260031957,
|
|
"learning_rate": 2.4168661392538982e-05,
|
|
"loss": 0.9421,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 3.8593350383631715,
|
|
"grad_norm": 0.13851801316117543,
|
|
"learning_rate": 2.4066418761881308e-05,
|
|
"loss": 0.9687,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 3.8618925831202047,
|
|
"grad_norm": 0.13380711931983305,
|
|
"learning_rate": 2.396435455000864e-05,
|
|
"loss": 0.9468,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 3.864450127877238,
|
|
"grad_norm": 0.13649849585417867,
|
|
"learning_rate": 2.386246908248883e-05,
|
|
"loss": 0.9228,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 3.867007672634271,
|
|
"grad_norm": 0.13210578639270845,
|
|
"learning_rate": 2.3760762684319508e-05,
|
|
"loss": 0.9094,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 3.869565217391304,
|
|
"grad_norm": 0.14259288669579517,
|
|
"learning_rate": 2.3659235679927016e-05,
|
|
"loss": 0.9351,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 3.8721227621483374,
|
|
"grad_norm": 0.1388101682540646,
|
|
"learning_rate": 2.3557888393165627e-05,
|
|
"loss": 0.9454,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 3.874680306905371,
|
|
"grad_norm": 0.12901592134412895,
|
|
"learning_rate": 2.345672114731624e-05,
|
|
"loss": 0.9481,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 3.877237851662404,
|
|
"grad_norm": 0.13894304934030247,
|
|
"learning_rate": 2.335573426508547e-05,
|
|
"loss": 0.9583,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 3.8797953964194374,
|
|
"grad_norm": 0.1370325882290817,
|
|
"learning_rate": 2.325492806860462e-05,
|
|
"loss": 0.9799,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 3.8823529411764706,
|
|
"grad_norm": 0.13421409804749201,
|
|
"learning_rate": 2.315430287942862e-05,
|
|
"loss": 0.9533,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 3.8849104859335037,
|
|
"grad_norm": 0.13298313283238028,
|
|
"learning_rate": 2.3053859018535026e-05,
|
|
"loss": 0.9709,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 3.887468030690537,
|
|
"grad_norm": 0.1361450777437208,
|
|
"learning_rate": 2.295359680632295e-05,
|
|
"loss": 0.9615,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 3.89002557544757,
|
|
"grad_norm": 0.1486100399377403,
|
|
"learning_rate": 2.2853516562612173e-05,
|
|
"loss": 0.9376,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 3.8925831202046037,
|
|
"grad_norm": 0.13690524401965368,
|
|
"learning_rate": 2.2753618606641928e-05,
|
|
"loss": 0.9092,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 3.895140664961637,
|
|
"grad_norm": 0.15669583951357616,
|
|
"learning_rate": 2.2653903257070012e-05,
|
|
"loss": 0.9443,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 3.89769820971867,
|
|
"grad_norm": 0.12931778250099024,
|
|
"learning_rate": 2.2554370831971743e-05,
|
|
"loss": 0.9406,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 3.9002557544757033,
|
|
"grad_norm": 0.17258200785982056,
|
|
"learning_rate": 2.2455021648838935e-05,
|
|
"loss": 0.9614,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 3.9028132992327365,
|
|
"grad_norm": 0.1521157336174598,
|
|
"learning_rate": 2.235585602457891e-05,
|
|
"loss": 0.9487,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 3.90537084398977,
|
|
"grad_norm": 0.14390268768179504,
|
|
"learning_rate": 2.225687427551341e-05,
|
|
"loss": 0.9401,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 3.907928388746803,
|
|
"grad_norm": 0.16337966447000044,
|
|
"learning_rate": 2.2158076717377765e-05,
|
|
"loss": 0.9536,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 3.9104859335038364,
|
|
"grad_norm": 0.15324748802477992,
|
|
"learning_rate": 2.2059463665319623e-05,
|
|
"loss": 0.9198,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 3.9130434782608696,
|
|
"grad_norm": 0.14907378875032545,
|
|
"learning_rate": 2.196103543389815e-05,
|
|
"loss": 0.9481,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 3.915601023017903,
|
|
"grad_norm": 0.14207939797213323,
|
|
"learning_rate": 2.1862792337083017e-05,
|
|
"loss": 0.9387,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 3.918158567774936,
|
|
"grad_norm": 0.13959510597089575,
|
|
"learning_rate": 2.176473468825328e-05,
|
|
"loss": 0.9536,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 3.920716112531969,
|
|
"grad_norm": 0.14016454333503284,
|
|
"learning_rate": 2.1666862800196454e-05,
|
|
"loss": 0.9491,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 3.923273657289003,
|
|
"grad_norm": 0.14885818803453518,
|
|
"learning_rate": 2.1569176985107535e-05,
|
|
"loss": 0.9612,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 3.9258312020460355,
|
|
"grad_norm": 0.14403866973582788,
|
|
"learning_rate": 2.1471677554587958e-05,
|
|
"loss": 0.9511,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 3.928388746803069,
|
|
"grad_norm": 0.13223516573639468,
|
|
"learning_rate": 2.1374364819644623e-05,
|
|
"loss": 0.9373,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 3.9309462915601023,
|
|
"grad_norm": 0.14036184466315108,
|
|
"learning_rate": 2.1277239090688894e-05,
|
|
"loss": 0.9353,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 3.9335038363171355,
|
|
"grad_norm": 0.1396968491520172,
|
|
"learning_rate": 2.1180300677535655e-05,
|
|
"loss": 0.9531,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 3.9360613810741687,
|
|
"grad_norm": 0.13659743962984422,
|
|
"learning_rate": 2.108354988940228e-05,
|
|
"loss": 0.936,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 3.938618925831202,
|
|
"grad_norm": 0.1508626854215839,
|
|
"learning_rate": 2.0986987034907554e-05,
|
|
"loss": 0.9452,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 3.9411764705882355,
|
|
"grad_norm": 0.14129695624224084,
|
|
"learning_rate": 2.089061242207092e-05,
|
|
"loss": 0.9369,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 3.9437340153452687,
|
|
"grad_norm": 0.1428765331179949,
|
|
"learning_rate": 2.0794426358311294e-05,
|
|
"loss": 0.9142,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 3.946291560102302,
|
|
"grad_norm": 0.1330347524331098,
|
|
"learning_rate": 2.069842915044614e-05,
|
|
"loss": 0.9381,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 3.948849104859335,
|
|
"grad_norm": 0.14069953111767788,
|
|
"learning_rate": 2.0602621104690517e-05,
|
|
"loss": 0.921,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 3.9514066496163682,
|
|
"grad_norm": 0.1456949051715094,
|
|
"learning_rate": 2.050700252665615e-05,
|
|
"loss": 0.9549,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 3.9539641943734014,
|
|
"grad_norm": 0.13746866783044756,
|
|
"learning_rate": 2.041157372135028e-05,
|
|
"loss": 0.9287,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 3.9565217391304346,
|
|
"grad_norm": 0.15606889468360874,
|
|
"learning_rate": 2.0316334993174856e-05,
|
|
"loss": 0.9555,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 3.959079283887468,
|
|
"grad_norm": 0.14118323164397703,
|
|
"learning_rate": 2.0221286645925558e-05,
|
|
"loss": 0.9343,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 3.9616368286445014,
|
|
"grad_norm": 0.1363380304979579,
|
|
"learning_rate": 2.012642898279074e-05,
|
|
"loss": 0.9961,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 3.9641943734015346,
|
|
"grad_norm": 0.14317404024733354,
|
|
"learning_rate": 2.003176230635049e-05,
|
|
"loss": 0.9647,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 3.9667519181585678,
|
|
"grad_norm": 0.14674699824614082,
|
|
"learning_rate": 1.9937286918575713e-05,
|
|
"loss": 0.9541,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 3.969309462915601,
|
|
"grad_norm": 0.1392728526341487,
|
|
"learning_rate": 1.984300312082711e-05,
|
|
"loss": 0.9549,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 3.971867007672634,
|
|
"grad_norm": 0.1388687318173855,
|
|
"learning_rate": 1.9748911213854267e-05,
|
|
"loss": 0.9538,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 3.9744245524296673,
|
|
"grad_norm": 0.13901730161036177,
|
|
"learning_rate": 1.9655011497794616e-05,
|
|
"loss": 0.9426,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 3.976982097186701,
|
|
"grad_norm": 0.13747089636524243,
|
|
"learning_rate": 1.9561304272172644e-05,
|
|
"loss": 0.9639,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 3.979539641943734,
|
|
"grad_norm": 0.1395863657318075,
|
|
"learning_rate": 1.946778983589873e-05,
|
|
"loss": 0.9733,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 3.9820971867007673,
|
|
"grad_norm": 0.1388892460599247,
|
|
"learning_rate": 1.9374468487268254e-05,
|
|
"loss": 0.944,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 3.9846547314578005,
|
|
"grad_norm": 0.1542426182338673,
|
|
"learning_rate": 1.9281340523960806e-05,
|
|
"loss": 0.9575,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 3.9872122762148337,
|
|
"grad_norm": 0.14702194394411322,
|
|
"learning_rate": 1.9188406243039015e-05,
|
|
"loss": 0.939,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 3.9897698209718673,
|
|
"grad_norm": 0.15088719580788107,
|
|
"learning_rate": 1.9095665940947717e-05,
|
|
"loss": 0.9523,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 3.9923273657289,
|
|
"grad_norm": 0.13979637370531914,
|
|
"learning_rate": 1.9003119913512992e-05,
|
|
"loss": 0.9518,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 3.9948849104859336,
|
|
"grad_norm": 0.13293457854923818,
|
|
"learning_rate": 1.891076845594122e-05,
|
|
"loss": 0.966,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 3.997442455242967,
|
|
"grad_norm": 0.1330659091048459,
|
|
"learning_rate": 1.881861186281813e-05,
|
|
"loss": 0.9425,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.15532958865697588,
|
|
"learning_rate": 1.872665042810784e-05,
|
|
"loss": 0.9491,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 4.002557544757034,
|
|
"grad_norm": 0.172134213325208,
|
|
"learning_rate": 1.863488444515203e-05,
|
|
"loss": 0.9131,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 4.005115089514066,
|
|
"grad_norm": 0.15705142364202992,
|
|
"learning_rate": 1.854331420666882e-05,
|
|
"loss": 0.9254,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 4.0076726342711,
|
|
"grad_norm": 0.16319791463669756,
|
|
"learning_rate": 1.845194000475199e-05,
|
|
"loss": 0.9005,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 4.010230179028133,
|
|
"grad_norm": 0.16550445546270565,
|
|
"learning_rate": 1.836076213087e-05,
|
|
"loss": 0.9177,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 4.012787723785166,
|
|
"grad_norm": 0.17000604940332,
|
|
"learning_rate": 1.826978087586502e-05,
|
|
"loss": 0.9288,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 4.015345268542199,
|
|
"grad_norm": 0.17439370178321326,
|
|
"learning_rate": 1.8178996529952088e-05,
|
|
"loss": 0.9302,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 4.017902813299233,
|
|
"grad_norm": 0.16621808084873166,
|
|
"learning_rate": 1.808840938271807e-05,
|
|
"loss": 0.9277,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 4.020460358056266,
|
|
"grad_norm": 0.1502855048809297,
|
|
"learning_rate": 1.799801972312092e-05,
|
|
"loss": 0.9146,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 4.023017902813299,
|
|
"grad_norm": 0.15792591947199125,
|
|
"learning_rate": 1.7907827839488474e-05,
|
|
"loss": 0.9175,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 4.025575447570333,
|
|
"grad_norm": 0.1563775392864349,
|
|
"learning_rate": 1.7817834019517805e-05,
|
|
"loss": 0.9128,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 4.028132992327365,
|
|
"grad_norm": 0.14597718440990778,
|
|
"learning_rate": 1.7728038550274193e-05,
|
|
"loss": 0.9185,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 4.030690537084399,
|
|
"grad_norm": 0.1569564550463153,
|
|
"learning_rate": 1.7638441718190192e-05,
|
|
"loss": 0.9296,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 4.033248081841432,
|
|
"grad_norm": 0.15089755959303894,
|
|
"learning_rate": 1.7549043809064697e-05,
|
|
"loss": 0.9011,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 4.035805626598465,
|
|
"grad_norm": 0.14320940233490406,
|
|
"learning_rate": 1.74598451080622e-05,
|
|
"loss": 0.9301,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 4.038363171355499,
|
|
"grad_norm": 0.1640364740345872,
|
|
"learning_rate": 1.737084589971157e-05,
|
|
"loss": 0.9294,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 4.040920716112532,
|
|
"grad_norm": 0.15372462860199906,
|
|
"learning_rate": 1.728204646790544e-05,
|
|
"loss": 0.9464,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 4.043478260869565,
|
|
"grad_norm": 0.14792763942080298,
|
|
"learning_rate": 1.7193447095899206e-05,
|
|
"loss": 0.9224,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 4.046035805626598,
|
|
"grad_norm": 0.13951058738523123,
|
|
"learning_rate": 1.710504806631005e-05,
|
|
"loss": 0.9087,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 4.048593350383632,
|
|
"grad_norm": 0.13260882878617228,
|
|
"learning_rate": 1.701684966111615e-05,
|
|
"loss": 0.9036,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 4.051150895140665,
|
|
"grad_norm": 0.14125256658288957,
|
|
"learning_rate": 1.6928852161655616e-05,
|
|
"loss": 0.92,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 4.053708439897698,
|
|
"grad_norm": 0.13237438231494236,
|
|
"learning_rate": 1.684105584862584e-05,
|
|
"loss": 0.9156,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 4.056265984654732,
|
|
"grad_norm": 0.1359119819403516,
|
|
"learning_rate": 1.6753461002082395e-05,
|
|
"loss": 0.9554,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 4.0588235294117645,
|
|
"grad_norm": 0.136943228077222,
|
|
"learning_rate": 1.6666067901438178e-05,
|
|
"loss": 0.8844,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 4.061381074168798,
|
|
"grad_norm": 0.14746043096646916,
|
|
"learning_rate": 1.657887682546264e-05,
|
|
"loss": 0.9091,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 4.063938618925831,
|
|
"grad_norm": 0.13289891251117492,
|
|
"learning_rate": 1.649188805228076e-05,
|
|
"loss": 0.9462,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 4.0664961636828645,
|
|
"grad_norm": 0.14117852752538673,
|
|
"learning_rate": 1.6405101859372123e-05,
|
|
"loss": 0.9153,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 4.069053708439898,
|
|
"grad_norm": 0.12613455462183037,
|
|
"learning_rate": 1.631851852357026e-05,
|
|
"loss": 0.9519,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 4.071611253196931,
|
|
"grad_norm": 0.1396860703236042,
|
|
"learning_rate": 1.6232138321061544e-05,
|
|
"loss": 0.9412,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 4.0741687979539645,
|
|
"grad_norm": 0.1360638603818121,
|
|
"learning_rate": 1.6145961527384395e-05,
|
|
"loss": 0.9517,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 4.076726342710997,
|
|
"grad_norm": 0.1324923155606263,
|
|
"learning_rate": 1.6059988417428396e-05,
|
|
"loss": 0.9513,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 4.079283887468031,
|
|
"grad_norm": 0.14265745538296148,
|
|
"learning_rate": 1.5974219265433406e-05,
|
|
"loss": 0.9154,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 4.081841432225064,
|
|
"grad_norm": 0.14492559140570338,
|
|
"learning_rate": 1.58886543449887e-05,
|
|
"loss": 0.9394,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 4.084398976982097,
|
|
"grad_norm": 0.12579546842676975,
|
|
"learning_rate": 1.5803293929032078e-05,
|
|
"loss": 0.9281,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 4.086956521739131,
|
|
"grad_norm": 0.14549537683931857,
|
|
"learning_rate": 1.5718138289849055e-05,
|
|
"loss": 0.8957,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 4.089514066496164,
|
|
"grad_norm": 0.14813650458162753,
|
|
"learning_rate": 1.563318769907187e-05,
|
|
"loss": 0.9004,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 4.092071611253197,
|
|
"grad_norm": 0.12523568970989923,
|
|
"learning_rate": 1.554844242767872e-05,
|
|
"loss": 0.9311,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 4.09462915601023,
|
|
"grad_norm": 0.13296174952051867,
|
|
"learning_rate": 1.546390274599289e-05,
|
|
"loss": 0.9256,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 4.0971867007672635,
|
|
"grad_norm": 0.12809367590620266,
|
|
"learning_rate": 1.5379568923681833e-05,
|
|
"loss": 0.9136,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 4.099744245524296,
|
|
"grad_norm": 0.13109260024902633,
|
|
"learning_rate": 1.5295441229756364e-05,
|
|
"loss": 0.9007,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 4.10230179028133,
|
|
"grad_norm": 0.12407094954940708,
|
|
"learning_rate": 1.521151993256977e-05,
|
|
"loss": 0.9406,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 4.1048593350383635,
|
|
"grad_norm": 0.1298161922376652,
|
|
"learning_rate": 1.5127805299817025e-05,
|
|
"loss": 0.9264,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 4.107416879795396,
|
|
"grad_norm": 0.1481163518427539,
|
|
"learning_rate": 1.5044297598533777e-05,
|
|
"loss": 0.9285,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 4.10997442455243,
|
|
"grad_norm": 0.12078740228639545,
|
|
"learning_rate": 1.496099709509565e-05,
|
|
"loss": 0.9078,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 4.112531969309463,
|
|
"grad_norm": 0.13027908099413282,
|
|
"learning_rate": 1.4877904055217376e-05,
|
|
"loss": 0.9149,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 4.115089514066496,
|
|
"grad_norm": 0.1468019204651356,
|
|
"learning_rate": 1.4795018743951857e-05,
|
|
"loss": 0.9304,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 4.117647058823529,
|
|
"grad_norm": 0.1349316946630024,
|
|
"learning_rate": 1.4712341425689406e-05,
|
|
"loss": 0.926,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 4.120204603580563,
|
|
"grad_norm": 0.1228754724620514,
|
|
"learning_rate": 1.4629872364156854e-05,
|
|
"loss": 0.9185,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 4.122762148337596,
|
|
"grad_norm": 0.14313419206388078,
|
|
"learning_rate": 1.4547611822416748e-05,
|
|
"loss": 0.9126,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 4.125319693094629,
|
|
"grad_norm": 0.14531581013669995,
|
|
"learning_rate": 1.446556006286648e-05,
|
|
"loss": 0.9372,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 4.127877237851663,
|
|
"grad_norm": 0.12636103579388067,
|
|
"learning_rate": 1.4383717347237425e-05,
|
|
"loss": 0.9255,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 4.130434782608695,
|
|
"grad_norm": 0.13484501378576969,
|
|
"learning_rate": 1.4302083936594247e-05,
|
|
"loss": 0.9267,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 4.132992327365729,
|
|
"grad_norm": 0.1306495047012211,
|
|
"learning_rate": 1.4220660091333875e-05,
|
|
"loss": 0.9237,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 4.135549872122763,
|
|
"grad_norm": 0.12979097348457122,
|
|
"learning_rate": 1.4139446071184737e-05,
|
|
"loss": 0.9197,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 4.138107416879795,
|
|
"grad_norm": 0.13739201337062779,
|
|
"learning_rate": 1.405844213520604e-05,
|
|
"loss": 0.9197,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 4.140664961636829,
|
|
"grad_norm": 0.1294644982423319,
|
|
"learning_rate": 1.3977648541786804e-05,
|
|
"loss": 0.896,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 4.143222506393862,
|
|
"grad_norm": 0.12588348274914363,
|
|
"learning_rate": 1.3897065548645104e-05,
|
|
"loss": 0.9453,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 4.145780051150895,
|
|
"grad_norm": 0.15398362387202247,
|
|
"learning_rate": 1.381669341282721e-05,
|
|
"loss": 0.9317,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 4.148337595907928,
|
|
"grad_norm": 0.13197721364304257,
|
|
"learning_rate": 1.3736532390706878e-05,
|
|
"loss": 0.9279,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 4.150895140664962,
|
|
"grad_norm": 0.12322044737512756,
|
|
"learning_rate": 1.3656582737984318e-05,
|
|
"loss": 0.9439,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 4.153452685421995,
|
|
"grad_norm": 0.12440470950789576,
|
|
"learning_rate": 1.3576844709685583e-05,
|
|
"loss": 0.9088,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 4.156010230179028,
|
|
"grad_norm": 0.12465116010990127,
|
|
"learning_rate": 1.3497318560161704e-05,
|
|
"loss": 0.9211,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 4.158567774936062,
|
|
"grad_norm": 0.13358086347052778,
|
|
"learning_rate": 1.3418004543087792e-05,
|
|
"loss": 0.9312,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 4.161125319693094,
|
|
"grad_norm": 0.1224560124714394,
|
|
"learning_rate": 1.3338902911462336e-05,
|
|
"loss": 0.9253,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 4.163682864450128,
|
|
"grad_norm": 0.12240140914681184,
|
|
"learning_rate": 1.3260013917606319e-05,
|
|
"loss": 0.9383,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 4.166240409207161,
|
|
"grad_norm": 0.12945740752464988,
|
|
"learning_rate": 1.318133781316247e-05,
|
|
"loss": 0.9416,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 4.168797953964194,
|
|
"grad_norm": 0.13087100044291045,
|
|
"learning_rate": 1.3102874849094414e-05,
|
|
"loss": 0.9316,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 4.171355498721228,
|
|
"grad_norm": 0.14189296661844325,
|
|
"learning_rate": 1.3024625275685891e-05,
|
|
"loss": 0.9465,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 4.173913043478261,
|
|
"grad_norm": 0.1297951759919457,
|
|
"learning_rate": 1.2946589342540023e-05,
|
|
"loss": 0.9275,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 4.176470588235294,
|
|
"grad_norm": 0.11911786087772278,
|
|
"learning_rate": 1.2868767298578395e-05,
|
|
"loss": 0.9225,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 4.179028132992327,
|
|
"grad_norm": 0.12225398214034955,
|
|
"learning_rate": 1.2791159392040275e-05,
|
|
"loss": 0.9196,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 4.181585677749361,
|
|
"grad_norm": 0.1310216078232746,
|
|
"learning_rate": 1.2713765870481995e-05,
|
|
"loss": 0.9353,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 4.1841432225063935,
|
|
"grad_norm": 0.12742055135018454,
|
|
"learning_rate": 1.2636586980775945e-05,
|
|
"loss": 0.9666,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 4.186700767263427,
|
|
"grad_norm": 0.12384487664186089,
|
|
"learning_rate": 1.2559622969109886e-05,
|
|
"loss": 0.9209,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 4.189258312020461,
|
|
"grad_norm": 0.1340544434519516,
|
|
"learning_rate": 1.2482874080986176e-05,
|
|
"loss": 0.9377,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 4.1918158567774935,
|
|
"grad_norm": 0.13746772119236356,
|
|
"learning_rate": 1.2406340561220947e-05,
|
|
"loss": 0.9207,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 4.194373401534527,
|
|
"grad_norm": 0.1280603990954687,
|
|
"learning_rate": 1.2330022653943358e-05,
|
|
"loss": 0.914,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 4.19693094629156,
|
|
"grad_norm": 0.12374468420399631,
|
|
"learning_rate": 1.2253920602594759e-05,
|
|
"loss": 0.8923,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 4.1994884910485935,
|
|
"grad_norm": 0.12384342114389504,
|
|
"learning_rate": 1.2178034649928034e-05,
|
|
"loss": 0.9396,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 4.202046035805626,
|
|
"grad_norm": 0.1230247461338335,
|
|
"learning_rate": 1.2102365038006672e-05,
|
|
"loss": 0.8981,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 4.20460358056266,
|
|
"grad_norm": 0.12441020446608941,
|
|
"learning_rate": 1.2026912008204117e-05,
|
|
"loss": 0.9395,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 4.207161125319693,
|
|
"grad_norm": 0.1207928603043833,
|
|
"learning_rate": 1.195167580120292e-05,
|
|
"loss": 0.9257,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 4.209718670076726,
|
|
"grad_norm": 0.12168214916803673,
|
|
"learning_rate": 1.1876656656994032e-05,
|
|
"loss": 0.907,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 4.21227621483376,
|
|
"grad_norm": 0.12409121363381591,
|
|
"learning_rate": 1.180185481487599e-05,
|
|
"loss": 0.9082,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 4.2148337595907925,
|
|
"grad_norm": 0.12218546237016087,
|
|
"learning_rate": 1.1727270513454161e-05,
|
|
"loss": 0.9207,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 4.217391304347826,
|
|
"grad_norm": 0.1373741099688316,
|
|
"learning_rate": 1.1652903990640075e-05,
|
|
"loss": 0.9041,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 4.21994884910486,
|
|
"grad_norm": 0.126043833861761,
|
|
"learning_rate": 1.1578755483650465e-05,
|
|
"loss": 0.9071,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 4.2225063938618925,
|
|
"grad_norm": 0.12907468546494064,
|
|
"learning_rate": 1.150482522900668e-05,
|
|
"loss": 0.9267,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 4.225063938618926,
|
|
"grad_norm": 0.11696490881508001,
|
|
"learning_rate": 1.1431113462533942e-05,
|
|
"loss": 0.9188,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 4.227621483375959,
|
|
"grad_norm": 0.1219772936698238,
|
|
"learning_rate": 1.1357620419360438e-05,
|
|
"loss": 0.93,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 4.2301790281329925,
|
|
"grad_norm": 0.12317189729882781,
|
|
"learning_rate": 1.128434633391673e-05,
|
|
"loss": 0.9248,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 4.232736572890025,
|
|
"grad_norm": 0.12135967777000363,
|
|
"learning_rate": 1.121129143993489e-05,
|
|
"loss": 0.9482,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 4.235294117647059,
|
|
"grad_norm": 0.12569146595438008,
|
|
"learning_rate": 1.1138455970447857e-05,
|
|
"loss": 0.9237,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 4.2378516624040925,
|
|
"grad_norm": 0.12009749843054457,
|
|
"learning_rate": 1.1065840157788599e-05,
|
|
"loss": 0.9117,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 4.240409207161125,
|
|
"grad_norm": 0.12262206120182582,
|
|
"learning_rate": 1.099344423358943e-05,
|
|
"loss": 0.944,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 4.242966751918159,
|
|
"grad_norm": 0.12739673009436395,
|
|
"learning_rate": 1.0921268428781277e-05,
|
|
"loss": 0.928,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 4.245524296675192,
|
|
"grad_norm": 0.12049563257356445,
|
|
"learning_rate": 1.084931297359293e-05,
|
|
"loss": 0.9307,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 4.248081841432225,
|
|
"grad_norm": 0.1268732696430339,
|
|
"learning_rate": 1.0777578097550206e-05,
|
|
"loss": 0.938,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 4.250639386189258,
|
|
"grad_norm": 0.1302689278877736,
|
|
"learning_rate": 1.0706064029475436e-05,
|
|
"loss": 0.9339,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 4.253196930946292,
|
|
"grad_norm": 0.1207622169109695,
|
|
"learning_rate": 1.0634770997486546e-05,
|
|
"loss": 0.9153,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 4.255754475703325,
|
|
"grad_norm": 0.11706181174774555,
|
|
"learning_rate": 1.0563699228996405e-05,
|
|
"loss": 0.9129,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 4.258312020460358,
|
|
"grad_norm": 0.11849875702011481,
|
|
"learning_rate": 1.0492848950712067e-05,
|
|
"loss": 0.9183,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 4.260869565217392,
|
|
"grad_norm": 0.12286048694545573,
|
|
"learning_rate": 1.0422220388634145e-05,
|
|
"loss": 0.9194,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 4.263427109974424,
|
|
"grad_norm": 0.12106155524848677,
|
|
"learning_rate": 1.03518137680559e-05,
|
|
"loss": 0.93,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 4.265984654731458,
|
|
"grad_norm": 0.11931612070623257,
|
|
"learning_rate": 1.0281629313562704e-05,
|
|
"loss": 0.8812,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 4.268542199488491,
|
|
"grad_norm": 0.12412002218869622,
|
|
"learning_rate": 1.0211667249031278e-05,
|
|
"loss": 0.9211,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 4.271099744245524,
|
|
"grad_norm": 0.11050129272365039,
|
|
"learning_rate": 1.0141927797628913e-05,
|
|
"loss": 0.9346,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 4.273657289002558,
|
|
"grad_norm": 0.11696142916514798,
|
|
"learning_rate": 1.0072411181812805e-05,
|
|
"loss": 0.9103,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 4.276214833759591,
|
|
"grad_norm": 0.12523114611535077,
|
|
"learning_rate": 1.0003117623329373e-05,
|
|
"loss": 0.9188,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 4.278772378516624,
|
|
"grad_norm": 0.1211246626009557,
|
|
"learning_rate": 9.934047343213468e-06,
|
|
"loss": 0.8779,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 4.281329923273657,
|
|
"grad_norm": 0.11896385138151676,
|
|
"learning_rate": 9.865200561787779e-06,
|
|
"loss": 0.916,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 4.283887468030691,
|
|
"grad_norm": 0.12907351319734606,
|
|
"learning_rate": 9.796577498662017e-06,
|
|
"loss": 0.9316,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 4.286445012787723,
|
|
"grad_norm": 0.1175024733129538,
|
|
"learning_rate": 9.728178372732323e-06,
|
|
"loss": 0.9175,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 4.289002557544757,
|
|
"grad_norm": 0.11765409328640529,
|
|
"learning_rate": 9.660003402180495e-06,
|
|
"loss": 0.9322,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 4.291560102301791,
|
|
"grad_norm": 0.11606048414482627,
|
|
"learning_rate": 9.592052804473248e-06,
|
|
"loss": 0.9338,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 4.294117647058823,
|
|
"grad_norm": 0.12217997194310143,
|
|
"learning_rate": 9.524326796361704e-06,
|
|
"loss": 0.9198,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 4.296675191815857,
|
|
"grad_norm": 0.13681552209998984,
|
|
"learning_rate": 9.456825593880502e-06,
|
|
"loss": 0.9381,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 4.29923273657289,
|
|
"grad_norm": 0.11707040245774833,
|
|
"learning_rate": 9.389549412347204e-06,
|
|
"loss": 0.9114,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 4.301790281329923,
|
|
"grad_norm": 0.11739134713610266,
|
|
"learning_rate": 9.322498466361574e-06,
|
|
"loss": 0.9564,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 4.304347826086957,
|
|
"grad_norm": 0.11490889884017837,
|
|
"learning_rate": 9.25567296980499e-06,
|
|
"loss": 0.9372,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 4.30690537084399,
|
|
"grad_norm": 0.13548343430667473,
|
|
"learning_rate": 9.18907313583958e-06,
|
|
"loss": 0.9571,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 4.309462915601023,
|
|
"grad_norm": 0.1169879093609689,
|
|
"learning_rate": 9.122699176907699e-06,
|
|
"loss": 0.91,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 4.312020460358056,
|
|
"grad_norm": 0.12181883918771313,
|
|
"learning_rate": 9.056551304731216e-06,
|
|
"loss": 0.9403,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 4.31457800511509,
|
|
"grad_norm": 0.11516301601447926,
|
|
"learning_rate": 8.990629730310787e-06,
|
|
"loss": 0.9045,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 4.3171355498721224,
|
|
"grad_norm": 0.1130886469711019,
|
|
"learning_rate": 8.924934663925228e-06,
|
|
"loss": 0.9005,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 4.319693094629156,
|
|
"grad_norm": 0.12056683149234801,
|
|
"learning_rate": 8.859466315130833e-06,
|
|
"loss": 0.905,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 4.322250639386189,
|
|
"grad_norm": 0.12131053610936289,
|
|
"learning_rate": 8.794224892760694e-06,
|
|
"loss": 0.964,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 4.324808184143222,
|
|
"grad_norm": 0.11072666373506544,
|
|
"learning_rate": 8.729210604924075e-06,
|
|
"loss": 0.9168,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 4.327365728900256,
|
|
"grad_norm": 0.11419375138008123,
|
|
"learning_rate": 8.66442365900566e-06,
|
|
"loss": 0.9155,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 4.329923273657289,
|
|
"grad_norm": 0.11067325544749756,
|
|
"learning_rate": 8.599864261665032e-06,
|
|
"loss": 0.929,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 4.332480818414322,
|
|
"grad_norm": 0.13119769270640452,
|
|
"learning_rate": 8.535532618835894e-06,
|
|
"loss": 0.9196,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 4.335038363171355,
|
|
"grad_norm": 0.12122259309350006,
|
|
"learning_rate": 8.471428935725394e-06,
|
|
"loss": 0.9097,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 4.337595907928389,
|
|
"grad_norm": 0.1186567073290791,
|
|
"learning_rate": 8.407553416813621e-06,
|
|
"loss": 0.9486,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 4.340153452685422,
|
|
"grad_norm": 0.13863787273855152,
|
|
"learning_rate": 8.343906265852806e-06,
|
|
"loss": 0.9194,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 4.342710997442455,
|
|
"grad_norm": 0.11736813648606277,
|
|
"learning_rate": 8.280487685866707e-06,
|
|
"loss": 0.8964,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 4.345268542199489,
|
|
"grad_norm": 0.11874382513666652,
|
|
"learning_rate": 8.217297879150065e-06,
|
|
"loss": 0.9305,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 0.12096917615982158,
|
|
"learning_rate": 8.154337047267763e-06,
|
|
"loss": 0.926,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 4.350383631713555,
|
|
"grad_norm": 0.12459874607610563,
|
|
"learning_rate": 8.091605391054354e-06,
|
|
"loss": 0.8922,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 4.352941176470588,
|
|
"grad_norm": 0.12221739613538536,
|
|
"learning_rate": 8.02910311061333e-06,
|
|
"loss": 0.9401,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 4.3554987212276215,
|
|
"grad_norm": 0.12254645629749011,
|
|
"learning_rate": 7.966830405316561e-06,
|
|
"loss": 0.9547,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 4.358056265984655,
|
|
"grad_norm": 0.12001133797508247,
|
|
"learning_rate": 7.90478747380357e-06,
|
|
"loss": 0.9103,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 4.360613810741688,
|
|
"grad_norm": 0.12199519070925526,
|
|
"learning_rate": 7.842974513980946e-06,
|
|
"loss": 0.9271,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 4.3631713554987215,
|
|
"grad_norm": 0.11295241635294967,
|
|
"learning_rate": 7.781391723021711e-06,
|
|
"loss": 0.9363,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 4.365728900255754,
|
|
"grad_norm": 0.12686526411244078,
|
|
"learning_rate": 7.720039297364681e-06,
|
|
"loss": 0.9274,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 4.368286445012788,
|
|
"grad_norm": 0.1333081116381865,
|
|
"learning_rate": 7.658917432713839e-06,
|
|
"loss": 0.9172,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 4.370843989769821,
|
|
"grad_norm": 0.12577470275328256,
|
|
"learning_rate": 7.598026324037762e-06,
|
|
"loss": 0.939,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 4.373401534526854,
|
|
"grad_norm": 0.12345544691397578,
|
|
"learning_rate": 7.537366165568909e-06,
|
|
"loss": 0.9288,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 4.375959079283888,
|
|
"grad_norm": 0.11948532376497799,
|
|
"learning_rate": 7.476937150803025e-06,
|
|
"loss": 0.9497,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 4.378516624040921,
|
|
"grad_norm": 0.12876903997603817,
|
|
"learning_rate": 7.416739472498613e-06,
|
|
"loss": 0.9479,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 4.381074168797954,
|
|
"grad_norm": 0.11529385831506739,
|
|
"learning_rate": 7.356773322676205e-06,
|
|
"loss": 0.9158,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 4.383631713554987,
|
|
"grad_norm": 0.11078825541988917,
|
|
"learning_rate": 7.2970388926178045e-06,
|
|
"loss": 0.937,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 4.3861892583120206,
|
|
"grad_norm": 0.11173435690628004,
|
|
"learning_rate": 7.237536372866247e-06,
|
|
"loss": 0.9327,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 4.388746803069053,
|
|
"grad_norm": 0.1223612229123131,
|
|
"learning_rate": 7.178265953224701e-06,
|
|
"loss": 0.9227,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 4.391304347826087,
|
|
"grad_norm": 0.12507251852936713,
|
|
"learning_rate": 7.119227822755843e-06,
|
|
"loss": 0.9571,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 4.3938618925831205,
|
|
"grad_norm": 0.11397092222799754,
|
|
"learning_rate": 7.060422169781467e-06,
|
|
"loss": 0.9041,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 4.396419437340153,
|
|
"grad_norm": 0.10753667090584995,
|
|
"learning_rate": 7.001849181881808e-06,
|
|
"loss": 0.9166,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 4.398976982097187,
|
|
"grad_norm": 0.12054572854799732,
|
|
"learning_rate": 6.943509045894905e-06,
|
|
"loss": 0.9341,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 4.40153452685422,
|
|
"grad_norm": 0.11185867845020742,
|
|
"learning_rate": 6.885401947916048e-06,
|
|
"loss": 0.9514,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 4.404092071611253,
|
|
"grad_norm": 0.11085335077105966,
|
|
"learning_rate": 6.827528073297185e-06,
|
|
"loss": 0.9382,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 4.406649616368286,
|
|
"grad_norm": 0.11479224410155166,
|
|
"learning_rate": 6.769887606646306e-06,
|
|
"loss": 0.9414,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 4.40920716112532,
|
|
"grad_norm": 0.11417555802279347,
|
|
"learning_rate": 6.712480731826878e-06,
|
|
"loss": 0.912,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 4.411764705882353,
|
|
"grad_norm": 0.11413292812828428,
|
|
"learning_rate": 6.6553076319572394e-06,
|
|
"loss": 0.9268,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 4.414322250639386,
|
|
"grad_norm": 0.10996848327532169,
|
|
"learning_rate": 6.59836848941005e-06,
|
|
"loss": 0.9253,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 4.41687979539642,
|
|
"grad_norm": 0.12150368369219573,
|
|
"learning_rate": 6.541663485811667e-06,
|
|
"loss": 0.915,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 4.419437340153452,
|
|
"grad_norm": 0.11980533715997778,
|
|
"learning_rate": 6.485192802041553e-06,
|
|
"loss": 0.9156,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 4.421994884910486,
|
|
"grad_norm": 0.11392894414591724,
|
|
"learning_rate": 6.428956618231788e-06,
|
|
"loss": 0.9197,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 4.42455242966752,
|
|
"grad_norm": 0.11760332661995491,
|
|
"learning_rate": 6.3729551137664055e-06,
|
|
"loss": 0.9545,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 4.427109974424552,
|
|
"grad_norm": 0.10904085632244291,
|
|
"learning_rate": 6.3171884672808524e-06,
|
|
"loss": 0.9103,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 4.429667519181586,
|
|
"grad_norm": 0.10863502669554059,
|
|
"learning_rate": 6.26165685666142e-06,
|
|
"loss": 0.9016,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 4.432225063938619,
|
|
"grad_norm": 0.11509438949225145,
|
|
"learning_rate": 6.206360459044671e-06,
|
|
"loss": 0.931,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 4.434782608695652,
|
|
"grad_norm": 0.11748690634314717,
|
|
"learning_rate": 6.15129945081689e-06,
|
|
"loss": 0.9151,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 4.437340153452685,
|
|
"grad_norm": 0.11639698873895774,
|
|
"learning_rate": 6.096474007613476e-06,
|
|
"loss": 0.9365,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 4.439897698209719,
|
|
"grad_norm": 0.11159987657775047,
|
|
"learning_rate": 6.0418843043184636e-06,
|
|
"loss": 0.9552,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 4.442455242966752,
|
|
"grad_norm": 0.10952923402441073,
|
|
"learning_rate": 5.987530515063889e-06,
|
|
"loss": 0.9194,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 4.445012787723785,
|
|
"grad_norm": 0.11072771958857656,
|
|
"learning_rate": 5.933412813229256e-06,
|
|
"loss": 0.9189,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 4.447570332480819,
|
|
"grad_norm": 0.11775592911375234,
|
|
"learning_rate": 5.879531371440994e-06,
|
|
"loss": 0.9388,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 4.450127877237851,
|
|
"grad_norm": 0.11460729784468633,
|
|
"learning_rate": 5.825886361571922e-06,
|
|
"loss": 0.8945,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 4.452685421994885,
|
|
"grad_norm": 0.11581761610879335,
|
|
"learning_rate": 5.772477954740652e-06,
|
|
"loss": 0.9126,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 4.455242966751918,
|
|
"grad_norm": 0.11118413455302595,
|
|
"learning_rate": 5.719306321311075e-06,
|
|
"loss": 0.9565,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 4.457800511508951,
|
|
"grad_norm": 0.10749836975161339,
|
|
"learning_rate": 5.666371630891858e-06,
|
|
"loss": 0.9127,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 4.460358056265985,
|
|
"grad_norm": 0.10944652966346073,
|
|
"learning_rate": 5.613674052335798e-06,
|
|
"loss": 0.9184,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 4.462915601023018,
|
|
"grad_norm": 0.11540805854208941,
|
|
"learning_rate": 5.561213753739356e-06,
|
|
"loss": 0.9281,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 4.465473145780051,
|
|
"grad_norm": 0.11318814770450754,
|
|
"learning_rate": 5.5089909024421685e-06,
|
|
"loss": 0.9327,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 4.468030690537084,
|
|
"grad_norm": 0.11689654113549015,
|
|
"learning_rate": 5.4570056650263784e-06,
|
|
"loss": 0.9196,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 4.470588235294118,
|
|
"grad_norm": 0.11410697533075874,
|
|
"learning_rate": 5.405258207316228e-06,
|
|
"loss": 0.9248,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 4.4731457800511505,
|
|
"grad_norm": 0.11032997359153394,
|
|
"learning_rate": 5.3537486943774674e-06,
|
|
"loss": 0.9278,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 4.475703324808184,
|
|
"grad_norm": 0.11362254544830364,
|
|
"learning_rate": 5.302477290516832e-06,
|
|
"loss": 0.9508,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 4.478260869565218,
|
|
"grad_norm": 0.114903272001298,
|
|
"learning_rate": 5.251444159281551e-06,
|
|
"loss": 0.9177,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 4.4808184143222505,
|
|
"grad_norm": 0.11311594662750116,
|
|
"learning_rate": 5.200649463458769e-06,
|
|
"loss": 0.9315,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 4.483375959079284,
|
|
"grad_norm": 0.1080019715192445,
|
|
"learning_rate": 5.150093365075117e-06,
|
|
"loss": 0.9423,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 4.485933503836317,
|
|
"grad_norm": 0.11099521632078349,
|
|
"learning_rate": 5.0997760253961036e-06,
|
|
"loss": 0.9432,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 4.4884910485933505,
|
|
"grad_norm": 0.1115281668793938,
|
|
"learning_rate": 5.049697604925605e-06,
|
|
"loss": 0.9201,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 4.491048593350383,
|
|
"grad_norm": 0.11559474894332394,
|
|
"learning_rate": 4.999858263405468e-06,
|
|
"loss": 0.9335,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 4.493606138107417,
|
|
"grad_norm": 0.10752469888696953,
|
|
"learning_rate": 4.9502581598148425e-06,
|
|
"loss": 0.9326,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 4.4961636828644505,
|
|
"grad_norm": 0.11823364858584975,
|
|
"learning_rate": 4.900897452369782e-06,
|
|
"loss": 0.9085,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 4.498721227621483,
|
|
"grad_norm": 0.12367303838985884,
|
|
"learning_rate": 4.851776298522692e-06,
|
|
"loss": 0.8962,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 4.501278772378517,
|
|
"grad_norm": 0.11649199224229981,
|
|
"learning_rate": 4.802894854961882e-06,
|
|
"loss": 0.945,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 4.5038363171355495,
|
|
"grad_norm": 0.10951836253938066,
|
|
"learning_rate": 4.754253277610969e-06,
|
|
"loss": 0.9362,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 4.506393861892583,
|
|
"grad_norm": 0.11824940633958814,
|
|
"learning_rate": 4.705851721628465e-06,
|
|
"loss": 0.9489,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 4.508951406649617,
|
|
"grad_norm": 0.11623129349141179,
|
|
"learning_rate": 4.6576903414072576e-06,
|
|
"loss": 0.9345,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 4.5115089514066495,
|
|
"grad_norm": 0.10609179613886349,
|
|
"learning_rate": 4.6097692905741194e-06,
|
|
"loss": 0.912,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 4.514066496163683,
|
|
"grad_norm": 0.1110236313063869,
|
|
"learning_rate": 4.562088721989178e-06,
|
|
"loss": 0.9263,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 4.516624040920716,
|
|
"grad_norm": 0.10545968825146992,
|
|
"learning_rate": 4.514648787745506e-06,
|
|
"loss": 0.9132,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 4.5191815856777495,
|
|
"grad_norm": 0.11497860724139544,
|
|
"learning_rate": 4.467449639168564e-06,
|
|
"loss": 0.9435,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 4.521739130434782,
|
|
"grad_norm": 0.11514110122345275,
|
|
"learning_rate": 4.420491426815758e-06,
|
|
"loss": 0.9405,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 4.524296675191816,
|
|
"grad_norm": 0.1123546579246865,
|
|
"learning_rate": 4.373774300475928e-06,
|
|
"loss": 0.9013,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 4.526854219948849,
|
|
"grad_norm": 0.10434900776877028,
|
|
"learning_rate": 4.327298409168928e-06,
|
|
"loss": 0.9234,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 4.529411764705882,
|
|
"grad_norm": 0.10753377323226707,
|
|
"learning_rate": 4.281063901145102e-06,
|
|
"loss": 0.9191,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 4.531969309462916,
|
|
"grad_norm": 0.10990039699899636,
|
|
"learning_rate": 4.235070923884772e-06,
|
|
"loss": 0.9218,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 4.534526854219949,
|
|
"grad_norm": 0.10914742733757979,
|
|
"learning_rate": 4.18931962409789e-06,
|
|
"loss": 0.9109,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 4.537084398976982,
|
|
"grad_norm": 0.10959258250347798,
|
|
"learning_rate": 4.143810147723448e-06,
|
|
"loss": 0.9152,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 4.539641943734015,
|
|
"grad_norm": 0.11106116826490182,
|
|
"learning_rate": 4.098542639929086e-06,
|
|
"loss": 0.9046,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 4.542199488491049,
|
|
"grad_norm": 0.10748546841476085,
|
|
"learning_rate": 4.0535172451105785e-06,
|
|
"loss": 0.9128,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 4.544757033248082,
|
|
"grad_norm": 0.11225561412585737,
|
|
"learning_rate": 4.008734106891439e-06,
|
|
"loss": 0.929,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 4.547314578005115,
|
|
"grad_norm": 0.10831404168834766,
|
|
"learning_rate": 3.964193368122384e-06,
|
|
"loss": 0.9397,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 4.549872122762149,
|
|
"grad_norm": 0.11033594472176086,
|
|
"learning_rate": 3.919895170880938e-06,
|
|
"loss": 0.9252,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 4.552429667519181,
|
|
"grad_norm": 0.10441833953450541,
|
|
"learning_rate": 3.875839656470959e-06,
|
|
"loss": 0.9182,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 4.554987212276215,
|
|
"grad_norm": 0.11080119595164395,
|
|
"learning_rate": 3.832026965422184e-06,
|
|
"loss": 0.949,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 4.557544757033249,
|
|
"grad_norm": 0.11022335632664775,
|
|
"learning_rate": 3.788457237489773e-06,
|
|
"loss": 0.9238,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 4.560102301790281,
|
|
"grad_norm": 0.11308201432747443,
|
|
"learning_rate": 3.7451306116538867e-06,
|
|
"loss": 0.9711,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 4.562659846547315,
|
|
"grad_norm": 0.1028220418076954,
|
|
"learning_rate": 3.7020472261192253e-06,
|
|
"loss": 0.9005,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 4.565217391304348,
|
|
"grad_norm": 0.10528950924867539,
|
|
"learning_rate": 3.6592072183146043e-06,
|
|
"loss": 0.9014,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 4.567774936061381,
|
|
"grad_norm": 0.10885389205625104,
|
|
"learning_rate": 3.616610724892473e-06,
|
|
"loss": 0.9105,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 4.570332480818414,
|
|
"grad_norm": 0.10574673017545647,
|
|
"learning_rate": 3.5742578817285777e-06,
|
|
"loss": 0.9193,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 4.572890025575448,
|
|
"grad_norm": 0.1117883112559058,
|
|
"learning_rate": 3.532148823921375e-06,
|
|
"loss": 0.91,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 4.57544757033248,
|
|
"grad_norm": 0.1096961353796292,
|
|
"learning_rate": 3.490283685791722e-06,
|
|
"loss": 0.9594,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 4.578005115089514,
|
|
"grad_norm": 0.11161221492802147,
|
|
"learning_rate": 3.4486626008824575e-06,
|
|
"loss": 0.9327,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 4.580562659846548,
|
|
"grad_norm": 0.10744759992585007,
|
|
"learning_rate": 3.4072857019578787e-06,
|
|
"loss": 0.9219,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 4.58312020460358,
|
|
"grad_norm": 0.10620450789029019,
|
|
"learning_rate": 3.3661531210033684e-06,
|
|
"loss": 0.9256,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 4.585677749360614,
|
|
"grad_norm": 0.11017512262461532,
|
|
"learning_rate": 3.3252649892250123e-06,
|
|
"loss": 0.9188,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 4.588235294117647,
|
|
"grad_norm": 0.10649203584062787,
|
|
"learning_rate": 3.2846214370491114e-06,
|
|
"loss": 0.9286,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 4.59079283887468,
|
|
"grad_norm": 0.10775649571843056,
|
|
"learning_rate": 3.2442225941218175e-06,
|
|
"loss": 0.91,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 4.593350383631714,
|
|
"grad_norm": 0.10474409566182012,
|
|
"learning_rate": 3.20406858930868e-06,
|
|
"loss": 0.9187,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 4.595907928388747,
|
|
"grad_norm": 0.10901379780591824,
|
|
"learning_rate": 3.164159550694299e-06,
|
|
"loss": 0.9268,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 4.59846547314578,
|
|
"grad_norm": 0.10466246579829651,
|
|
"learning_rate": 3.12449560558183e-06,
|
|
"loss": 0.9045,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 4.601023017902813,
|
|
"grad_norm": 0.10734422633494305,
|
|
"learning_rate": 3.085076880492608e-06,
|
|
"loss": 0.9131,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 4.603580562659847,
|
|
"grad_norm": 0.1102245685075459,
|
|
"learning_rate": 3.045903501165821e-06,
|
|
"loss": 0.9456,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 4.6061381074168795,
|
|
"grad_norm": 0.10268613459994491,
|
|
"learning_rate": 3.0069755925579945e-06,
|
|
"loss": 0.9068,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 4.608695652173913,
|
|
"grad_norm": 0.1041191008417218,
|
|
"learning_rate": 2.9682932788426622e-06,
|
|
"loss": 0.8961,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 4.611253196930946,
|
|
"grad_norm": 0.10864214050559602,
|
|
"learning_rate": 2.9298566834099307e-06,
|
|
"loss": 0.9196,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 4.6138107416879794,
|
|
"grad_norm": 0.10289987799334356,
|
|
"learning_rate": 2.891665928866152e-06,
|
|
"loss": 0.8891,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 4.616368286445013,
|
|
"grad_norm": 0.10627932552480018,
|
|
"learning_rate": 2.853721137033425e-06,
|
|
"loss": 0.9309,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 4.618925831202046,
|
|
"grad_norm": 0.10976448315029629,
|
|
"learning_rate": 2.816022428949303e-06,
|
|
"loss": 0.8956,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 4.621483375959079,
|
|
"grad_norm": 0.10383428088111558,
|
|
"learning_rate": 2.7785699248663946e-06,
|
|
"loss": 0.9245,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 4.624040920716112,
|
|
"grad_norm": 0.10746935820829795,
|
|
"learning_rate": 2.741363744251917e-06,
|
|
"loss": 0.9641,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 4.626598465473146,
|
|
"grad_norm": 0.1077084422715649,
|
|
"learning_rate": 2.70440400578738e-06,
|
|
"loss": 0.936,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 4.629156010230179,
|
|
"grad_norm": 0.10619050887196295,
|
|
"learning_rate": 2.6676908273681745e-06,
|
|
"loss": 0.9236,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 4.631713554987212,
|
|
"grad_norm": 0.09868786010783248,
|
|
"learning_rate": 2.63122432610321e-06,
|
|
"loss": 0.9235,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 4.634271099744246,
|
|
"grad_norm": 0.10946907000550939,
|
|
"learning_rate": 2.5950046183145315e-06,
|
|
"loss": 0.9477,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 4.6368286445012785,
|
|
"grad_norm": 0.10911271296863308,
|
|
"learning_rate": 2.559031819536966e-06,
|
|
"loss": 0.8923,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 4.639386189258312,
|
|
"grad_norm": 0.1057852003057491,
|
|
"learning_rate": 2.523306044517737e-06,
|
|
"loss": 0.9575,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 4.641943734015345,
|
|
"grad_norm": 0.10597129201414962,
|
|
"learning_rate": 2.4878274072161147e-06,
|
|
"loss": 0.9478,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 4.6445012787723785,
|
|
"grad_norm": 0.10530345780753828,
|
|
"learning_rate": 2.4525960208029843e-06,
|
|
"loss": 0.9468,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 4.647058823529412,
|
|
"grad_norm": 0.11128520568838593,
|
|
"learning_rate": 2.417611997660636e-06,
|
|
"loss": 0.9441,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 4.649616368286445,
|
|
"grad_norm": 0.10763480468498407,
|
|
"learning_rate": 2.3828754493822315e-06,
|
|
"loss": 0.9342,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 4.6521739130434785,
|
|
"grad_norm": 0.10157629367738297,
|
|
"learning_rate": 2.348386486771572e-06,
|
|
"loss": 0.9121,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 4.654731457800511,
|
|
"grad_norm": 0.10471609831813257,
|
|
"learning_rate": 2.314145219842683e-06,
|
|
"loss": 0.8991,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 4.657289002557545,
|
|
"grad_norm": 0.10785688490272143,
|
|
"learning_rate": 2.2801517578194997e-06,
|
|
"loss": 0.9023,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 4.659846547314578,
|
|
"grad_norm": 0.10437430915631776,
|
|
"learning_rate": 2.246406209135481e-06,
|
|
"loss": 0.9526,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 4.662404092071611,
|
|
"grad_norm": 0.09976754454013415,
|
|
"learning_rate": 2.212908681433286e-06,
|
|
"loss": 0.9032,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 4.664961636828645,
|
|
"grad_norm": 0.10687421431181417,
|
|
"learning_rate": 2.179659281564446e-06,
|
|
"loss": 0.9164,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 4.667519181585678,
|
|
"grad_norm": 0.10095706529924005,
|
|
"learning_rate": 2.146658115589002e-06,
|
|
"loss": 0.9191,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 4.670076726342711,
|
|
"grad_norm": 0.10132269971777201,
|
|
"learning_rate": 2.113905288775149e-06,
|
|
"loss": 0.9155,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 4.672634271099744,
|
|
"grad_norm": 0.10307251320208077,
|
|
"learning_rate": 2.0814009055989403e-06,
|
|
"loss": 0.9165,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 4.675191815856778,
|
|
"grad_norm": 0.10286096825987698,
|
|
"learning_rate": 2.0491450697439362e-06,
|
|
"loss": 0.9101,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 4.677749360613811,
|
|
"grad_norm": 0.11262366728295894,
|
|
"learning_rate": 2.017137884100855e-06,
|
|
"loss": 0.914,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 4.680306905370844,
|
|
"grad_norm": 0.11116962011162274,
|
|
"learning_rate": 1.9853794507672885e-06,
|
|
"loss": 0.9376,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 4.6828644501278776,
|
|
"grad_norm": 0.1040833044448223,
|
|
"learning_rate": 1.9538698710473404e-06,
|
|
"loss": 0.9236,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 4.68542199488491,
|
|
"grad_norm": 0.10541970140434043,
|
|
"learning_rate": 1.9226092454512945e-06,
|
|
"loss": 0.9449,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 4.687979539641944,
|
|
"grad_norm": 0.10066677117893352,
|
|
"learning_rate": 1.8915976736953157e-06,
|
|
"loss": 0.9138,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 4.690537084398977,
|
|
"grad_norm": 0.10836258727940289,
|
|
"learning_rate": 1.8608352547011722e-06,
|
|
"loss": 0.9687,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 4.69309462915601,
|
|
"grad_norm": 0.11074221672096896,
|
|
"learning_rate": 1.8303220865958194e-06,
|
|
"loss": 0.9331,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 4.695652173913043,
|
|
"grad_norm": 0.10768331106543749,
|
|
"learning_rate": 1.8000582667111777e-06,
|
|
"loss": 0.945,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 4.698209718670077,
|
|
"grad_norm": 0.11098771435258944,
|
|
"learning_rate": 1.7700438915837858e-06,
|
|
"loss": 0.9284,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 4.70076726342711,
|
|
"grad_norm": 0.10799063090442731,
|
|
"learning_rate": 1.7402790569544813e-06,
|
|
"loss": 0.9,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 4.703324808184143,
|
|
"grad_norm": 0.1063256441527157,
|
|
"learning_rate": 1.7107638577681073e-06,
|
|
"loss": 0.8962,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 4.705882352941177,
|
|
"grad_norm": 0.1040346093959911,
|
|
"learning_rate": 1.681498388173246e-06,
|
|
"loss": 0.9516,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 4.708439897698209,
|
|
"grad_norm": 0.10335093559260676,
|
|
"learning_rate": 1.652482741521837e-06,
|
|
"loss": 0.9131,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 4.710997442455243,
|
|
"grad_norm": 0.10497266871186595,
|
|
"learning_rate": 1.6237170103689547e-06,
|
|
"loss": 0.9119,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 4.713554987212277,
|
|
"grad_norm": 0.09874397507531227,
|
|
"learning_rate": 1.5952012864724898e-06,
|
|
"loss": 0.9141,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 4.716112531969309,
|
|
"grad_norm": 0.10588059236614217,
|
|
"learning_rate": 1.5669356607928188e-06,
|
|
"loss": 0.9331,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 4.718670076726343,
|
|
"grad_norm": 0.10070088788493103,
|
|
"learning_rate": 1.5389202234925837e-06,
|
|
"loss": 0.929,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 4.721227621483376,
|
|
"grad_norm": 0.10575607673396381,
|
|
"learning_rate": 1.5111550639363447e-06,
|
|
"loss": 0.9195,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 4.723785166240409,
|
|
"grad_norm": 0.1052143724728097,
|
|
"learning_rate": 1.483640270690332e-06,
|
|
"loss": 0.9236,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 4.726342710997442,
|
|
"grad_norm": 0.10525748489261051,
|
|
"learning_rate": 1.4563759315221515e-06,
|
|
"loss": 0.9515,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 4.728900255754476,
|
|
"grad_norm": 0.10259868287875906,
|
|
"learning_rate": 1.4293621334004581e-06,
|
|
"loss": 0.9522,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 4.731457800511509,
|
|
"grad_norm": 0.10136041128342929,
|
|
"learning_rate": 1.4025989624947856e-06,
|
|
"loss": 0.9207,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 4.734015345268542,
|
|
"grad_norm": 0.09781638687367422,
|
|
"learning_rate": 1.3760865041751736e-06,
|
|
"loss": 0.9226,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 4.736572890025576,
|
|
"grad_norm": 0.10175570288516775,
|
|
"learning_rate": 1.3498248430119465e-06,
|
|
"loss": 0.9141,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 4.739130434782608,
|
|
"grad_norm": 0.10920419786681472,
|
|
"learning_rate": 1.3238140627754014e-06,
|
|
"loss": 0.9544,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 4.741687979539642,
|
|
"grad_norm": 0.10426566657693524,
|
|
"learning_rate": 1.2980542464355962e-06,
|
|
"loss": 0.9492,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 4.744245524296675,
|
|
"grad_norm": 0.10161986714655702,
|
|
"learning_rate": 1.272545476162037e-06,
|
|
"loss": 0.9253,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 4.746803069053708,
|
|
"grad_norm": 0.10568474804520346,
|
|
"learning_rate": 1.2472878333234407e-06,
|
|
"loss": 0.895,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 4.749360613810742,
|
|
"grad_norm": 0.10079844884131213,
|
|
"learning_rate": 1.2222813984874749e-06,
|
|
"loss": 0.9146,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 4.751918158567775,
|
|
"grad_norm": 0.09772653572503225,
|
|
"learning_rate": 1.197526251420502e-06,
|
|
"loss": 0.9434,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 4.754475703324808,
|
|
"grad_norm": 0.10521061309223152,
|
|
"learning_rate": 1.1730224710872862e-06,
|
|
"loss": 0.917,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 4.757033248081841,
|
|
"grad_norm": 0.10102811382690155,
|
|
"learning_rate": 1.148770135650814e-06,
|
|
"loss": 0.9402,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 4.759590792838875,
|
|
"grad_norm": 0.10184925109076563,
|
|
"learning_rate": 1.1247693224719768e-06,
|
|
"loss": 0.9341,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 4.762148337595908,
|
|
"grad_norm": 0.10416605640976224,
|
|
"learning_rate": 1.1010201081093653e-06,
|
|
"loss": 0.9258,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 4.764705882352941,
|
|
"grad_norm": 0.10242702305319981,
|
|
"learning_rate": 1.0775225683190027e-06,
|
|
"loss": 0.9401,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 4.767263427109975,
|
|
"grad_norm": 0.1054355472195325,
|
|
"learning_rate": 1.0542767780541242e-06,
|
|
"loss": 0.9452,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 4.7698209718670075,
|
|
"grad_norm": 0.09850748287302327,
|
|
"learning_rate": 1.0312828114649175e-06,
|
|
"loss": 0.9147,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 4.772378516624041,
|
|
"grad_norm": 0.10426914175715249,
|
|
"learning_rate": 1.008540741898285e-06,
|
|
"loss": 0.9364,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 4.774936061381074,
|
|
"grad_norm": 0.10421190980413071,
|
|
"learning_rate": 9.860506418976556e-07,
|
|
"loss": 0.9155,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 4.7774936061381075,
|
|
"grad_norm": 0.09974968560728949,
|
|
"learning_rate": 9.638125832026658e-07,
|
|
"loss": 0.9164,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 4.78005115089514,
|
|
"grad_norm": 0.10323506252287525,
|
|
"learning_rate": 9.418266367490347e-07,
|
|
"loss": 0.9294,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 4.782608695652174,
|
|
"grad_norm": 0.10057988567304277,
|
|
"learning_rate": 9.200928726682456e-07,
|
|
"loss": 0.9198,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 4.7851662404092075,
|
|
"grad_norm": 0.10109533674227822,
|
|
"learning_rate": 8.986113602873758e-07,
|
|
"loss": 0.9696,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 4.78772378516624,
|
|
"grad_norm": 0.10248654252247842,
|
|
"learning_rate": 8.773821681288752e-07,
|
|
"loss": 0.9059,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 4.790281329923274,
|
|
"grad_norm": 0.10623698814695832,
|
|
"learning_rate": 8.564053639103087e-07,
|
|
"loss": 0.9104,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 4.792838874680307,
|
|
"grad_norm": 0.10184589368398628,
|
|
"learning_rate": 8.356810145441874e-07,
|
|
"loss": 0.8999,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 4.79539641943734,
|
|
"grad_norm": 0.09973933906653507,
|
|
"learning_rate": 8.152091861377198e-07,
|
|
"loss": 0.9281,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 4.797953964194374,
|
|
"grad_norm": 0.0965602895068992,
|
|
"learning_rate": 7.949899439926345e-07,
|
|
"loss": 0.8972,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 4.8005115089514065,
|
|
"grad_norm": 0.09817984542309073,
|
|
"learning_rate": 7.750233526049222e-07,
|
|
"loss": 0.9374,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 4.80306905370844,
|
|
"grad_norm": 0.10767556941660049,
|
|
"learning_rate": 7.553094756646761e-07,
|
|
"loss": 0.922,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 4.805626598465473,
|
|
"grad_norm": 0.09968854723854502,
|
|
"learning_rate": 7.358483760558877e-07,
|
|
"loss": 0.9092,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 4.8081841432225065,
|
|
"grad_norm": 0.10013368895859236,
|
|
"learning_rate": 7.166401158561886e-07,
|
|
"loss": 0.9053,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 4.810741687979539,
|
|
"grad_norm": 0.10050188953527933,
|
|
"learning_rate": 6.976847563367539e-07,
|
|
"loss": 0.9342,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 4.813299232736573,
|
|
"grad_norm": 0.10572001540704473,
|
|
"learning_rate": 6.789823579619992e-07,
|
|
"loss": 0.9055,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 4.8158567774936065,
|
|
"grad_norm": 0.0958884248641111,
|
|
"learning_rate": 6.605329803894389e-07,
|
|
"loss": 0.8971,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 4.818414322250639,
|
|
"grad_norm": 0.10042711105691594,
|
|
"learning_rate": 6.423366824695265e-07,
|
|
"loss": 0.9176,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 4.820971867007673,
|
|
"grad_norm": 0.10511225981510647,
|
|
"learning_rate": 6.243935222454145e-07,
|
|
"loss": 0.9176,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 4.823529411764706,
|
|
"grad_norm": 0.09696941259664335,
|
|
"learning_rate": 6.067035569527768e-07,
|
|
"loss": 0.9336,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 4.826086956521739,
|
|
"grad_norm": 0.09743670957958701,
|
|
"learning_rate": 5.89266843019658e-07,
|
|
"loss": 0.9335,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 4.828644501278772,
|
|
"grad_norm": 0.10334868098940422,
|
|
"learning_rate": 5.720834360662597e-07,
|
|
"loss": 0.9302,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 4.831202046035806,
|
|
"grad_norm": 0.10567530011947436,
|
|
"learning_rate": 5.551533909047812e-07,
|
|
"loss": 0.9173,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 4.833759590792839,
|
|
"grad_norm": 0.10109569243664909,
|
|
"learning_rate": 5.384767615392328e-07,
|
|
"loss": 0.8973,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 4.836317135549872,
|
|
"grad_norm": 0.10107099176370515,
|
|
"learning_rate": 5.220536011652933e-07,
|
|
"loss": 0.9327,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 4.838874680306906,
|
|
"grad_norm": 0.09592817542499839,
|
|
"learning_rate": 5.058839621700973e-07,
|
|
"loss": 0.8986,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 4.841432225063938,
|
|
"grad_norm": 0.10402134439975212,
|
|
"learning_rate": 4.899678961320842e-07,
|
|
"loss": 0.8783,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 4.843989769820972,
|
|
"grad_norm": 0.09879349396951775,
|
|
"learning_rate": 4.743054538208558e-07,
|
|
"loss": 0.9265,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 4.846547314578006,
|
|
"grad_norm": 0.10801219003494308,
|
|
"learning_rate": 4.5889668519698117e-07,
|
|
"loss": 0.917,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 4.849104859335038,
|
|
"grad_norm": 0.10336628048777474,
|
|
"learning_rate": 4.437416394118721e-07,
|
|
"loss": 0.9475,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 4.851662404092072,
|
|
"grad_norm": 0.09915519846574018,
|
|
"learning_rate": 4.2884036480757896e-07,
|
|
"loss": 0.9136,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 4.854219948849105,
|
|
"grad_norm": 0.10488853611936978,
|
|
"learning_rate": 4.1419290891669293e-07,
|
|
"loss": 0.9276,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 4.856777493606138,
|
|
"grad_norm": 0.10257283710076046,
|
|
"learning_rate": 3.997993184621418e-07,
|
|
"loss": 0.9584,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 4.859335038363171,
|
|
"grad_norm": 0.10288770850501508,
|
|
"learning_rate": 3.856596393570744e-07,
|
|
"loss": 0.9128,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 4.861892583120205,
|
|
"grad_norm": 0.09729119851077626,
|
|
"learning_rate": 3.717739167047185e-07,
|
|
"loss": 0.912,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 4.864450127877237,
|
|
"grad_norm": 0.1024901619430387,
|
|
"learning_rate": 3.581421947982122e-07,
|
|
"loss": 0.9166,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 4.867007672634271,
|
|
"grad_norm": 0.10281823220549692,
|
|
"learning_rate": 3.447645171204528e-07,
|
|
"loss": 0.9308,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 4.869565217391305,
|
|
"grad_norm": 0.1014220238267167,
|
|
"learning_rate": 3.316409263440168e-07,
|
|
"loss": 0.9401,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 4.872122762148337,
|
|
"grad_norm": 0.10082233886495114,
|
|
"learning_rate": 3.1877146433095584e-07,
|
|
"loss": 0.9349,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 4.874680306905371,
|
|
"grad_norm": 0.09966232794121334,
|
|
"learning_rate": 3.0615617213271664e-07,
|
|
"loss": 0.9218,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 4.877237851662404,
|
|
"grad_norm": 0.09941244859685047,
|
|
"learning_rate": 2.937950899899633e-07,
|
|
"loss": 0.9278,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 4.879795396419437,
|
|
"grad_norm": 0.09951897237383148,
|
|
"learning_rate": 2.816882573324886e-07,
|
|
"loss": 0.949,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 4.882352941176471,
|
|
"grad_norm": 0.10401741016384587,
|
|
"learning_rate": 2.6983571277907184e-07,
|
|
"loss": 0.9563,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 4.884910485933504,
|
|
"grad_norm": 0.09725714975876674,
|
|
"learning_rate": 2.582374941373456e-07,
|
|
"loss": 0.9211,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 4.887468030690537,
|
|
"grad_norm": 0.10133318561817573,
|
|
"learning_rate": 2.468936384036891e-07,
|
|
"loss": 0.9013,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 4.89002557544757,
|
|
"grad_norm": 0.10119524228199774,
|
|
"learning_rate": 2.3580418176311293e-07,
|
|
"loss": 0.9417,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 4.892583120204604,
|
|
"grad_norm": 0.09951712783614965,
|
|
"learning_rate": 2.2496915958913458e-07,
|
|
"loss": 0.9253,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 4.8951406649616365,
|
|
"grad_norm": 0.0988058097334845,
|
|
"learning_rate": 2.143886064436629e-07,
|
|
"loss": 0.9344,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 4.89769820971867,
|
|
"grad_norm": 0.0988533205503812,
|
|
"learning_rate": 2.0406255607688274e-07,
|
|
"loss": 0.9258,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 4.900255754475703,
|
|
"grad_norm": 0.09899535759420186,
|
|
"learning_rate": 1.9399104142719283e-07,
|
|
"loss": 0.9484,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 4.9028132992327365,
|
|
"grad_norm": 0.10153569163687459,
|
|
"learning_rate": 1.8417409462102798e-07,
|
|
"loss": 0.9073,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 4.90537084398977,
|
|
"grad_norm": 0.09957601677253938,
|
|
"learning_rate": 1.746117469728148e-07,
|
|
"loss": 0.8841,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 4.907928388746803,
|
|
"grad_norm": 0.10184723073884586,
|
|
"learning_rate": 1.6530402898484733e-07,
|
|
"loss": 0.9525,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 4.910485933503836,
|
|
"grad_norm": 0.09694091907819868,
|
|
"learning_rate": 1.5625097034719815e-07,
|
|
"loss": 0.9193,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 4.913043478260869,
|
|
"grad_norm": 0.10383046531826044,
|
|
"learning_rate": 1.474525999375942e-07,
|
|
"loss": 0.9339,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 4.915601023017903,
|
|
"grad_norm": 0.09727962611523398,
|
|
"learning_rate": 1.3890894582138103e-07,
|
|
"loss": 0.9271,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 4.918158567774936,
|
|
"grad_norm": 0.10045856203495888,
|
|
"learning_rate": 1.3062003525138089e-07,
|
|
"loss": 0.9129,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 4.920716112531969,
|
|
"grad_norm": 0.09953247096750498,
|
|
"learning_rate": 1.225858946678393e-07,
|
|
"loss": 0.9149,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 4.923273657289003,
|
|
"grad_norm": 0.10381806462155738,
|
|
"learning_rate": 1.1480654969833638e-07,
|
|
"loss": 0.9473,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 4.9258312020460355,
|
|
"grad_norm": 0.09951540982333777,
|
|
"learning_rate": 1.0728202515766228e-07,
|
|
"loss": 0.9452,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 4.928388746803069,
|
|
"grad_norm": 0.09714908717583805,
|
|
"learning_rate": 1.0001234504779966e-07,
|
|
"loss": 0.9478,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 4.930946291560103,
|
|
"grad_norm": 0.10355673013634514,
|
|
"learning_rate": 9.299753255781696e-08,
|
|
"loss": 0.9113,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 4.9335038363171355,
|
|
"grad_norm": 0.1010600576834511,
|
|
"learning_rate": 8.623761006379738e-08,
|
|
"loss": 0.9322,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 4.936061381074169,
|
|
"grad_norm": 0.09937740112494577,
|
|
"learning_rate": 7.973259912875897e-08,
|
|
"loss": 0.9529,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 4.938618925831202,
|
|
"grad_norm": 0.10172138015837517,
|
|
"learning_rate": 7.348252050261018e-08,
|
|
"loss": 0.9516,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 4.9411764705882355,
|
|
"grad_norm": 0.10153203845561144,
|
|
"learning_rate": 6.748739412205218e-08,
|
|
"loss": 0.9327,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 4.943734015345268,
|
|
"grad_norm": 0.09630467933849142,
|
|
"learning_rate": 6.174723911053449e-08,
|
|
"loss": 0.9033,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 4.946291560102302,
|
|
"grad_norm": 0.09792982830145779,
|
|
"learning_rate": 5.6262073778192705e-08,
|
|
"loss": 0.9289,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 4.948849104859335,
|
|
"grad_norm": 0.10137971801200332,
|
|
"learning_rate": 5.1031915621795325e-08,
|
|
"loss": 0.9127,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 4.951406649616368,
|
|
"grad_norm": 0.09867014858433792,
|
|
"learning_rate": 4.605678132467262e-08,
|
|
"loss": 0.9195,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 4.953964194373402,
|
|
"grad_norm": 0.09945447399480298,
|
|
"learning_rate": 4.133668675666336e-08,
|
|
"loss": 0.9235,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 4.956521739130435,
|
|
"grad_norm": 0.09740241154451518,
|
|
"learning_rate": 3.687164697408818e-08,
|
|
"loss": 0.8983,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 4.959079283887468,
|
|
"grad_norm": 0.10216904139394242,
|
|
"learning_rate": 3.266167621967853e-08,
|
|
"loss": 0.9333,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 4.961636828644501,
|
|
"grad_norm": 1.7447830173428402,
|
|
"learning_rate": 2.8706787922541112e-08,
|
|
"loss": 0.9677,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 4.964194373401535,
|
|
"grad_norm": 0.10248140850999501,
|
|
"learning_rate": 2.5006994698095754e-08,
|
|
"loss": 0.9205,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 4.966751918158568,
|
|
"grad_norm": 0.10291780599089813,
|
|
"learning_rate": 2.156230834808426e-08,
|
|
"loss": 0.9314,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 4.969309462915601,
|
|
"grad_norm": 0.09792527077121264,
|
|
"learning_rate": 1.837273986046384e-08,
|
|
"loss": 0.9289,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 4.971867007672635,
|
|
"grad_norm": 0.0960164691356107,
|
|
"learning_rate": 1.5438299409433755e-08,
|
|
"loss": 0.9013,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 4.974424552429667,
|
|
"grad_norm": 0.09979959822032446,
|
|
"learning_rate": 1.2758996355373144e-08,
|
|
"loss": 0.9203,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 4.976982097186701,
|
|
"grad_norm": 0.10827315260460384,
|
|
"learning_rate": 1.0334839244805495e-08,
|
|
"loss": 0.9541,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 4.979539641943734,
|
|
"grad_norm": 0.0988359933652592,
|
|
"learning_rate": 8.165835810389766e-09,
|
|
"loss": 0.9064,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 4.982097186700767,
|
|
"grad_norm": 0.09820054319763678,
|
|
"learning_rate": 6.251992970875975e-09,
|
|
"loss": 0.9214,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 4.9846547314578,
|
|
"grad_norm": 0.10015641951197356,
|
|
"learning_rate": 4.5933168311140805e-09,
|
|
"loss": 0.9461,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 4.987212276214834,
|
|
"grad_norm": 0.10040227257081992,
|
|
"learning_rate": 3.1898126820006924e-09,
|
|
"loss": 0.9465,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 4.989769820971867,
|
|
"grad_norm": 0.09609050872126598,
|
|
"learning_rate": 2.041485000479071e-09,
|
|
"loss": 0.9108,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 4.9923273657289,
|
|
"grad_norm": 0.09913441529294063,
|
|
"learning_rate": 1.148337449521364e-09,
|
|
"loss": 0.9356,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 4.994884910485934,
|
|
"grad_norm": 0.09800757849537761,
|
|
"learning_rate": 5.103728781197248e-10,
|
|
"loss": 0.9002,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 4.997442455242966,
|
|
"grad_norm": 0.09827002033578132,
|
|
"learning_rate": 1.275933212774305e-10,
|
|
"loss": 0.9081,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.10411131044626397,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.9254,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"step": 1955,
|
|
"total_flos": 7122204608430080.0,
|
|
"train_loss": 1.0036099467436066,
|
|
"train_runtime": 36219.8634,
|
|
"train_samples_per_second": 13.805,
|
|
"train_steps_per_second": 0.054
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 1955,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 7122204608430080.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|