Model: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz256_lr2e5_epochs7 Source: Original Platform
19202 lines
466 KiB
JSON
19202 lines
466 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 2737,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0025575447570332483,
|
|
"grad_norm": 2.9635716191319874,
|
|
"learning_rate": 7.299270072992701e-08,
|
|
"loss": 1.5218,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.005115089514066497,
|
|
"grad_norm": 2.9570039035815743,
|
|
"learning_rate": 1.4598540145985402e-07,
|
|
"loss": 1.4755,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0076726342710997444,
|
|
"grad_norm": 3.017937072257941,
|
|
"learning_rate": 2.1897810218978106e-07,
|
|
"loss": 1.4935,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.010230179028132993,
|
|
"grad_norm": 2.960891290072101,
|
|
"learning_rate": 2.9197080291970804e-07,
|
|
"loss": 1.4813,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.01278772378516624,
|
|
"grad_norm": 2.976019939455323,
|
|
"learning_rate": 3.6496350364963505e-07,
|
|
"loss": 1.4941,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.015345268542199489,
|
|
"grad_norm": 3.0149006457959886,
|
|
"learning_rate": 4.379562043795621e-07,
|
|
"loss": 1.5066,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.017902813299232736,
|
|
"grad_norm": 2.9237260833122214,
|
|
"learning_rate": 5.109489051094891e-07,
|
|
"loss": 1.478,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.020460358056265986,
|
|
"grad_norm": 2.9640674426484077,
|
|
"learning_rate": 5.839416058394161e-07,
|
|
"loss": 1.4882,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.023017902813299233,
|
|
"grad_norm": 2.883080870578686,
|
|
"learning_rate": 6.569343065693432e-07,
|
|
"loss": 1.5219,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.02557544757033248,
|
|
"grad_norm": 2.8912016510708844,
|
|
"learning_rate": 7.299270072992701e-07,
|
|
"loss": 1.5149,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.028132992327365727,
|
|
"grad_norm": 2.8525137837011734,
|
|
"learning_rate": 8.029197080291971e-07,
|
|
"loss": 1.5065,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.030690537084398978,
|
|
"grad_norm": 2.6980401328828734,
|
|
"learning_rate": 8.759124087591242e-07,
|
|
"loss": 1.47,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.03324808184143223,
|
|
"grad_norm": 2.6499759522230795,
|
|
"learning_rate": 9.489051094890511e-07,
|
|
"loss": 1.5126,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.03580562659846547,
|
|
"grad_norm": 2.646192888612826,
|
|
"learning_rate": 1.0218978102189781e-06,
|
|
"loss": 1.4605,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.03836317135549872,
|
|
"grad_norm": 2.584050631976731,
|
|
"learning_rate": 1.0948905109489052e-06,
|
|
"loss": 1.4985,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.04092071611253197,
|
|
"grad_norm": 2.3627571129305425,
|
|
"learning_rate": 1.1678832116788322e-06,
|
|
"loss": 1.4523,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.043478260869565216,
|
|
"grad_norm": 2.052553239445229,
|
|
"learning_rate": 1.2408759124087592e-06,
|
|
"loss": 1.4734,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.04603580562659847,
|
|
"grad_norm": 2.0014770644457442,
|
|
"learning_rate": 1.3138686131386864e-06,
|
|
"loss": 1.479,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.04859335038363171,
|
|
"grad_norm": 1.9847838678835794,
|
|
"learning_rate": 1.3868613138686132e-06,
|
|
"loss": 1.4702,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.05115089514066496,
|
|
"grad_norm": 1.9111274600693329,
|
|
"learning_rate": 1.4598540145985402e-06,
|
|
"loss": 1.4617,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.05370843989769821,
|
|
"grad_norm": 1.870897574722989,
|
|
"learning_rate": 1.5328467153284674e-06,
|
|
"loss": 1.4463,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.056265984654731455,
|
|
"grad_norm": 1.4296640142109796,
|
|
"learning_rate": 1.6058394160583942e-06,
|
|
"loss": 1.4599,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.058823529411764705,
|
|
"grad_norm": 1.4790607914654283,
|
|
"learning_rate": 1.6788321167883212e-06,
|
|
"loss": 1.4157,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.061381074168797956,
|
|
"grad_norm": 1.6141927865863235,
|
|
"learning_rate": 1.7518248175182485e-06,
|
|
"loss": 1.4439,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.0639386189258312,
|
|
"grad_norm": 1.599753856171314,
|
|
"learning_rate": 1.8248175182481753e-06,
|
|
"loss": 1.4218,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.06649616368286446,
|
|
"grad_norm": 1.4847704184111228,
|
|
"learning_rate": 1.8978102189781023e-06,
|
|
"loss": 1.4269,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.06905370843989769,
|
|
"grad_norm": 1.3521166489305316,
|
|
"learning_rate": 1.9708029197080293e-06,
|
|
"loss": 1.4158,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.07161125319693094,
|
|
"grad_norm": 1.2579545228076663,
|
|
"learning_rate": 2.0437956204379563e-06,
|
|
"loss": 1.4405,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.0741687979539642,
|
|
"grad_norm": 1.009619956209423,
|
|
"learning_rate": 2.1167883211678833e-06,
|
|
"loss": 1.4151,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.07672634271099744,
|
|
"grad_norm": 1.1838282966029092,
|
|
"learning_rate": 2.1897810218978103e-06,
|
|
"loss": 1.419,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0792838874680307,
|
|
"grad_norm": 1.2384598412642265,
|
|
"learning_rate": 2.2627737226277373e-06,
|
|
"loss": 1.412,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.08184143222506395,
|
|
"grad_norm": 1.1754182466507677,
|
|
"learning_rate": 2.3357664233576643e-06,
|
|
"loss": 1.3866,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.08439897698209718,
|
|
"grad_norm": 1.0614055850869524,
|
|
"learning_rate": 2.4087591240875918e-06,
|
|
"loss": 1.4127,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.08695652173913043,
|
|
"grad_norm": 1.0576160445761484,
|
|
"learning_rate": 2.4817518248175183e-06,
|
|
"loss": 1.4281,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.08951406649616368,
|
|
"grad_norm": 1.0117252925259892,
|
|
"learning_rate": 2.5547445255474458e-06,
|
|
"loss": 1.3731,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.09207161125319693,
|
|
"grad_norm": 0.9022593000895403,
|
|
"learning_rate": 2.627737226277373e-06,
|
|
"loss": 1.3866,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.09462915601023018,
|
|
"grad_norm": 0.8340755212001483,
|
|
"learning_rate": 2.7007299270072994e-06,
|
|
"loss": 1.4026,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.09718670076726342,
|
|
"grad_norm": 0.7261384916519003,
|
|
"learning_rate": 2.7737226277372264e-06,
|
|
"loss": 1.372,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.09974424552429667,
|
|
"grad_norm": 0.6484685338282444,
|
|
"learning_rate": 2.8467153284671534e-06,
|
|
"loss": 1.3914,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.10230179028132992,
|
|
"grad_norm": 0.5852202685330168,
|
|
"learning_rate": 2.9197080291970804e-06,
|
|
"loss": 1.328,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.10485933503836317,
|
|
"grad_norm": 0.7534890308070339,
|
|
"learning_rate": 2.992700729927008e-06,
|
|
"loss": 1.3525,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.10741687979539642,
|
|
"grad_norm": 0.851146761403294,
|
|
"learning_rate": 3.065693430656935e-06,
|
|
"loss": 1.3478,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.10997442455242967,
|
|
"grad_norm": 0.7827817647570426,
|
|
"learning_rate": 3.1386861313868614e-06,
|
|
"loss": 1.3191,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.11253196930946291,
|
|
"grad_norm": 0.664689408470926,
|
|
"learning_rate": 3.2116788321167884e-06,
|
|
"loss": 1.3222,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.11508951406649616,
|
|
"grad_norm": 0.5490554622557167,
|
|
"learning_rate": 3.2846715328467155e-06,
|
|
"loss": 1.3238,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.11764705882352941,
|
|
"grad_norm": 0.5108750790400686,
|
|
"learning_rate": 3.3576642335766425e-06,
|
|
"loss": 1.3436,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.12020460358056266,
|
|
"grad_norm": 0.5445952611951665,
|
|
"learning_rate": 3.43065693430657e-06,
|
|
"loss": 1.3458,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.12276214833759591,
|
|
"grad_norm": 0.5697581064671751,
|
|
"learning_rate": 3.503649635036497e-06,
|
|
"loss": 1.3132,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.12531969309462915,
|
|
"grad_norm": 0.578411430323597,
|
|
"learning_rate": 3.576642335766424e-06,
|
|
"loss": 1.3268,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.1278772378516624,
|
|
"grad_norm": 0.5601792557806415,
|
|
"learning_rate": 3.6496350364963505e-06,
|
|
"loss": 1.2966,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.13043478260869565,
|
|
"grad_norm": 0.5306373264311374,
|
|
"learning_rate": 3.7226277372262775e-06,
|
|
"loss": 1.3004,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.1329923273657289,
|
|
"grad_norm": 0.4661660429983145,
|
|
"learning_rate": 3.7956204379562045e-06,
|
|
"loss": 1.2812,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.13554987212276215,
|
|
"grad_norm": 0.42244352277225405,
|
|
"learning_rate": 3.868613138686132e-06,
|
|
"loss": 1.2774,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.13810741687979539,
|
|
"grad_norm": 0.39129018686480066,
|
|
"learning_rate": 3.9416058394160585e-06,
|
|
"loss": 1.3168,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.14066496163682865,
|
|
"grad_norm": 0.3485115346190062,
|
|
"learning_rate": 4.014598540145986e-06,
|
|
"loss": 1.3283,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.1432225063938619,
|
|
"grad_norm": 0.3976730412907507,
|
|
"learning_rate": 4.0875912408759126e-06,
|
|
"loss": 1.3135,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.14578005115089515,
|
|
"grad_norm": 0.4153119646875293,
|
|
"learning_rate": 4.16058394160584e-06,
|
|
"loss": 1.2989,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.1483375959079284,
|
|
"grad_norm": 0.42065859451204163,
|
|
"learning_rate": 4.233576642335767e-06,
|
|
"loss": 1.3137,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.15089514066496162,
|
|
"grad_norm": 0.35014086468112804,
|
|
"learning_rate": 4.306569343065693e-06,
|
|
"loss": 1.2743,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.1534526854219949,
|
|
"grad_norm": 0.32228235531527744,
|
|
"learning_rate": 4.379562043795621e-06,
|
|
"loss": 1.2987,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.15601023017902813,
|
|
"grad_norm": 0.33710245284823415,
|
|
"learning_rate": 4.452554744525548e-06,
|
|
"loss": 1.2869,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.1585677749360614,
|
|
"grad_norm": 0.34426470471374965,
|
|
"learning_rate": 4.525547445255475e-06,
|
|
"loss": 1.3199,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.16112531969309463,
|
|
"grad_norm": 0.334431341569014,
|
|
"learning_rate": 4.598540145985402e-06,
|
|
"loss": 1.2972,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.1636828644501279,
|
|
"grad_norm": 0.33024914298061436,
|
|
"learning_rate": 4.671532846715329e-06,
|
|
"loss": 1.2928,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.16624040920716113,
|
|
"grad_norm": 0.3058316278280544,
|
|
"learning_rate": 4.744525547445255e-06,
|
|
"loss": 1.2861,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.16879795396419436,
|
|
"grad_norm": 0.292869194083437,
|
|
"learning_rate": 4.8175182481751835e-06,
|
|
"loss": 1.2461,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.17135549872122763,
|
|
"grad_norm": 0.24971695111221698,
|
|
"learning_rate": 4.89051094890511e-06,
|
|
"loss": 1.2661,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.17391304347826086,
|
|
"grad_norm": 0.26954765363549843,
|
|
"learning_rate": 4.963503649635037e-06,
|
|
"loss": 1.2467,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.17647058823529413,
|
|
"grad_norm": 0.25356010222488795,
|
|
"learning_rate": 5.036496350364964e-06,
|
|
"loss": 1.2303,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.17902813299232737,
|
|
"grad_norm": 0.2339589024717998,
|
|
"learning_rate": 5.1094890510948916e-06,
|
|
"loss": 1.2399,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1815856777493606,
|
|
"grad_norm": 0.22823462929167784,
|
|
"learning_rate": 5.182481751824818e-06,
|
|
"loss": 1.2498,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.18414322250639387,
|
|
"grad_norm": 0.24948571250389207,
|
|
"learning_rate": 5.255474452554746e-06,
|
|
"loss": 1.2643,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.1867007672634271,
|
|
"grad_norm": 0.2298632960982471,
|
|
"learning_rate": 5.328467153284672e-06,
|
|
"loss": 1.2958,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.18925831202046037,
|
|
"grad_norm": 0.22223759951095107,
|
|
"learning_rate": 5.401459854014599e-06,
|
|
"loss": 1.2422,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.1918158567774936,
|
|
"grad_norm": 0.23124679789968172,
|
|
"learning_rate": 5.474452554744526e-06,
|
|
"loss": 1.2407,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.19437340153452684,
|
|
"grad_norm": 0.2221181062125986,
|
|
"learning_rate": 5.547445255474453e-06,
|
|
"loss": 1.2456,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1969309462915601,
|
|
"grad_norm": 0.1998449044080008,
|
|
"learning_rate": 5.62043795620438e-06,
|
|
"loss": 1.2514,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.19948849104859334,
|
|
"grad_norm": 0.19727362882566524,
|
|
"learning_rate": 5.693430656934307e-06,
|
|
"loss": 1.2335,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.2020460358056266,
|
|
"grad_norm": 0.20659124094509168,
|
|
"learning_rate": 5.766423357664233e-06,
|
|
"loss": 1.2276,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.20460358056265984,
|
|
"grad_norm": 0.22959713985782182,
|
|
"learning_rate": 5.839416058394161e-06,
|
|
"loss": 1.2435,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.2071611253196931,
|
|
"grad_norm": 0.19904222253631854,
|
|
"learning_rate": 5.912408759124088e-06,
|
|
"loss": 1.2266,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.20971867007672634,
|
|
"grad_norm": 0.19344151897086864,
|
|
"learning_rate": 5.985401459854016e-06,
|
|
"loss": 1.2261,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.21227621483375958,
|
|
"grad_norm": 0.19302417663791685,
|
|
"learning_rate": 6.058394160583942e-06,
|
|
"loss": 1.2384,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.21483375959079284,
|
|
"grad_norm": 0.21396454463521547,
|
|
"learning_rate": 6.13138686131387e-06,
|
|
"loss": 1.235,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.21739130434782608,
|
|
"grad_norm": 0.1913859035516872,
|
|
"learning_rate": 6.204379562043796e-06,
|
|
"loss": 1.2838,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.21994884910485935,
|
|
"grad_norm": 0.17510278677847252,
|
|
"learning_rate": 6.277372262773723e-06,
|
|
"loss": 1.2358,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.22250639386189258,
|
|
"grad_norm": 0.19863525132725016,
|
|
"learning_rate": 6.35036496350365e-06,
|
|
"loss": 1.2419,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.22506393861892582,
|
|
"grad_norm": 0.19478563516185365,
|
|
"learning_rate": 6.423357664233577e-06,
|
|
"loss": 1.2641,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.22762148337595908,
|
|
"grad_norm": 0.17875499154062388,
|
|
"learning_rate": 6.496350364963504e-06,
|
|
"loss": 1.2239,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.23017902813299232,
|
|
"grad_norm": 0.1751251099110654,
|
|
"learning_rate": 6.569343065693431e-06,
|
|
"loss": 1.2524,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.23273657289002558,
|
|
"grad_norm": 0.1869390091762672,
|
|
"learning_rate": 6.6423357664233575e-06,
|
|
"loss": 1.2494,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.23529411764705882,
|
|
"grad_norm": 0.17676974553290642,
|
|
"learning_rate": 6.715328467153285e-06,
|
|
"loss": 1.2537,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.23785166240409208,
|
|
"grad_norm": 0.1806189007928041,
|
|
"learning_rate": 6.7883211678832115e-06,
|
|
"loss": 1.2349,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.24040920716112532,
|
|
"grad_norm": 0.18193990233718968,
|
|
"learning_rate": 6.86131386861314e-06,
|
|
"loss": 1.2583,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.24296675191815856,
|
|
"grad_norm": 0.19012671201766562,
|
|
"learning_rate": 6.934306569343066e-06,
|
|
"loss": 1.2029,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.24552429667519182,
|
|
"grad_norm": 0.16857838785815454,
|
|
"learning_rate": 7.007299270072994e-06,
|
|
"loss": 1.2423,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.24808184143222506,
|
|
"grad_norm": 0.18952785901605423,
|
|
"learning_rate": 7.08029197080292e-06,
|
|
"loss": 1.2394,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.2506393861892583,
|
|
"grad_norm": 0.18078294692872968,
|
|
"learning_rate": 7.153284671532848e-06,
|
|
"loss": 1.2122,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.2531969309462916,
|
|
"grad_norm": 0.17487368586515217,
|
|
"learning_rate": 7.2262773722627744e-06,
|
|
"loss": 1.2117,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.2557544757033248,
|
|
"grad_norm": 0.17732077203789362,
|
|
"learning_rate": 7.299270072992701e-06,
|
|
"loss": 1.2041,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.25831202046035806,
|
|
"grad_norm": 0.18421840800752218,
|
|
"learning_rate": 7.3722627737226285e-06,
|
|
"loss": 1.2231,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.2608695652173913,
|
|
"grad_norm": 0.1768000076239069,
|
|
"learning_rate": 7.445255474452555e-06,
|
|
"loss": 1.2325,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.26342710997442453,
|
|
"grad_norm": 0.16984854034130697,
|
|
"learning_rate": 7.5182481751824825e-06,
|
|
"loss": 1.2026,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.2659846547314578,
|
|
"grad_norm": 0.16277787684968492,
|
|
"learning_rate": 7.591240875912409e-06,
|
|
"loss": 1.193,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.26854219948849106,
|
|
"grad_norm": 0.17357111549131551,
|
|
"learning_rate": 7.664233576642336e-06,
|
|
"loss": 1.2009,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.2710997442455243,
|
|
"grad_norm": 0.1800163972852127,
|
|
"learning_rate": 7.737226277372264e-06,
|
|
"loss": 1.1909,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.27365728900255754,
|
|
"grad_norm": 0.1681574320113801,
|
|
"learning_rate": 7.810218978102191e-06,
|
|
"loss": 1.2194,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.27621483375959077,
|
|
"grad_norm": 0.16885285400717157,
|
|
"learning_rate": 7.883211678832117e-06,
|
|
"loss": 1.1985,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.27877237851662406,
|
|
"grad_norm": 0.17914067468814437,
|
|
"learning_rate": 7.956204379562045e-06,
|
|
"loss": 1.2218,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.2813299232736573,
|
|
"grad_norm": 0.16706925568533235,
|
|
"learning_rate": 8.029197080291972e-06,
|
|
"loss": 1.222,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.28388746803069054,
|
|
"grad_norm": 0.1641264132835115,
|
|
"learning_rate": 8.1021897810219e-06,
|
|
"loss": 1.2242,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.2864450127877238,
|
|
"grad_norm": 0.18443514799994437,
|
|
"learning_rate": 8.175182481751825e-06,
|
|
"loss": 1.2118,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.289002557544757,
|
|
"grad_norm": 0.17675822272527503,
|
|
"learning_rate": 8.248175182481753e-06,
|
|
"loss": 1.1849,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.2915601023017903,
|
|
"grad_norm": 0.1880451995042565,
|
|
"learning_rate": 8.32116788321168e-06,
|
|
"loss": 1.2103,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 0.16598375442205784,
|
|
"learning_rate": 8.394160583941606e-06,
|
|
"loss": 1.1937,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.2966751918158568,
|
|
"grad_norm": 0.190898911263414,
|
|
"learning_rate": 8.467153284671533e-06,
|
|
"loss": 1.2028,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.29923273657289,
|
|
"grad_norm": 0.18881369445042054,
|
|
"learning_rate": 8.54014598540146e-06,
|
|
"loss": 1.1976,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.30179028132992325,
|
|
"grad_norm": 0.20907040258575316,
|
|
"learning_rate": 8.613138686131386e-06,
|
|
"loss": 1.2476,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.30434782608695654,
|
|
"grad_norm": 0.1704000017989476,
|
|
"learning_rate": 8.686131386861315e-06,
|
|
"loss": 1.2087,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.3069053708439898,
|
|
"grad_norm": 0.19455649517228424,
|
|
"learning_rate": 8.759124087591241e-06,
|
|
"loss": 1.214,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.309462915601023,
|
|
"grad_norm": 0.18574238206663096,
|
|
"learning_rate": 8.832116788321169e-06,
|
|
"loss": 1.2276,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.31202046035805625,
|
|
"grad_norm": 0.19290426166252228,
|
|
"learning_rate": 8.905109489051096e-06,
|
|
"loss": 1.1805,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.3145780051150895,
|
|
"grad_norm": 0.1995598501375803,
|
|
"learning_rate": 8.978102189781024e-06,
|
|
"loss": 1.2007,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.3171355498721228,
|
|
"grad_norm": 0.17673439222358,
|
|
"learning_rate": 9.05109489051095e-06,
|
|
"loss": 1.1966,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.319693094629156,
|
|
"grad_norm": 0.1966681987874607,
|
|
"learning_rate": 9.124087591240877e-06,
|
|
"loss": 1.1739,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.32225063938618925,
|
|
"grad_norm": 0.20745524723498263,
|
|
"learning_rate": 9.197080291970804e-06,
|
|
"loss": 1.2309,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.3248081841432225,
|
|
"grad_norm": 0.20371417264487574,
|
|
"learning_rate": 9.27007299270073e-06,
|
|
"loss": 1.1718,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.3273657289002558,
|
|
"grad_norm": 0.20142192992356361,
|
|
"learning_rate": 9.343065693430657e-06,
|
|
"loss": 1.1981,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.329923273657289,
|
|
"grad_norm": 0.18157695452516256,
|
|
"learning_rate": 9.416058394160585e-06,
|
|
"loss": 1.187,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.33248081841432225,
|
|
"grad_norm": 0.18405529622418393,
|
|
"learning_rate": 9.48905109489051e-06,
|
|
"loss": 1.2154,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.3350383631713555,
|
|
"grad_norm": 0.18826966568044085,
|
|
"learning_rate": 9.56204379562044e-06,
|
|
"loss": 1.1823,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.3375959079283887,
|
|
"grad_norm": 0.17870276101242044,
|
|
"learning_rate": 9.635036496350367e-06,
|
|
"loss": 1.2399,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.340153452685422,
|
|
"grad_norm": 0.18386831261657108,
|
|
"learning_rate": 9.708029197080293e-06,
|
|
"loss": 1.2114,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.34271099744245526,
|
|
"grad_norm": 0.1795896309939293,
|
|
"learning_rate": 9.78102189781022e-06,
|
|
"loss": 1.1832,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.3452685421994885,
|
|
"grad_norm": 0.21827425129513728,
|
|
"learning_rate": 9.854014598540148e-06,
|
|
"loss": 1.2389,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.34782608695652173,
|
|
"grad_norm": 0.1768309026825683,
|
|
"learning_rate": 9.927007299270073e-06,
|
|
"loss": 1.1965,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.35038363171355497,
|
|
"grad_norm": 0.20302569863881262,
|
|
"learning_rate": 1e-05,
|
|
"loss": 1.2094,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.35294117647058826,
|
|
"grad_norm": 0.19427846203063504,
|
|
"learning_rate": 1.0072992700729928e-05,
|
|
"loss": 1.1974,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.3554987212276215,
|
|
"grad_norm": 0.17339331224519358,
|
|
"learning_rate": 1.0145985401459854e-05,
|
|
"loss": 1.1736,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.35805626598465473,
|
|
"grad_norm": 0.2466539718194467,
|
|
"learning_rate": 1.0218978102189783e-05,
|
|
"loss": 1.2279,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.36061381074168797,
|
|
"grad_norm": 0.21241110450455392,
|
|
"learning_rate": 1.0291970802919709e-05,
|
|
"loss": 1.1409,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.3631713554987212,
|
|
"grad_norm": 0.18293508498426997,
|
|
"learning_rate": 1.0364963503649636e-05,
|
|
"loss": 1.1957,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.3657289002557545,
|
|
"grad_norm": 0.19790775478208397,
|
|
"learning_rate": 1.0437956204379562e-05,
|
|
"loss": 1.2193,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.36828644501278773,
|
|
"grad_norm": 0.20929660856991877,
|
|
"learning_rate": 1.0510948905109491e-05,
|
|
"loss": 1.1866,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.37084398976982097,
|
|
"grad_norm": 0.1926018989518869,
|
|
"learning_rate": 1.0583941605839417e-05,
|
|
"loss": 1.2015,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.3734015345268542,
|
|
"grad_norm": 0.19192914492955238,
|
|
"learning_rate": 1.0656934306569344e-05,
|
|
"loss": 1.1886,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.37595907928388744,
|
|
"grad_norm": 0.20322534422512073,
|
|
"learning_rate": 1.072992700729927e-05,
|
|
"loss": 1.2199,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.37851662404092073,
|
|
"grad_norm": 0.18947938981971202,
|
|
"learning_rate": 1.0802919708029198e-05,
|
|
"loss": 1.1829,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.38107416879795397,
|
|
"grad_norm": 0.2154696847726249,
|
|
"learning_rate": 1.0875912408759123e-05,
|
|
"loss": 1.1655,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.3836317135549872,
|
|
"grad_norm": 0.20859256059256231,
|
|
"learning_rate": 1.0948905109489052e-05,
|
|
"loss": 1.1815,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.38618925831202044,
|
|
"grad_norm": 0.20565139563521717,
|
|
"learning_rate": 1.102189781021898e-05,
|
|
"loss": 1.1848,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.3887468030690537,
|
|
"grad_norm": 0.21340531513272162,
|
|
"learning_rate": 1.1094890510948906e-05,
|
|
"loss": 1.188,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.391304347826087,
|
|
"grad_norm": 0.22952365545919354,
|
|
"learning_rate": 1.1167883211678833e-05,
|
|
"loss": 1.1772,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.3938618925831202,
|
|
"grad_norm": 0.21489457470648385,
|
|
"learning_rate": 1.124087591240876e-05,
|
|
"loss": 1.1807,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.39641943734015345,
|
|
"grad_norm": 0.22932079381688553,
|
|
"learning_rate": 1.1313868613138688e-05,
|
|
"loss": 1.1949,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.3989769820971867,
|
|
"grad_norm": 0.23209900752946952,
|
|
"learning_rate": 1.1386861313868614e-05,
|
|
"loss": 1.1996,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.40153452685422,
|
|
"grad_norm": 0.22388173844283388,
|
|
"learning_rate": 1.1459854014598541e-05,
|
|
"loss": 1.2097,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.4040920716112532,
|
|
"grad_norm": 0.21380373488801446,
|
|
"learning_rate": 1.1532846715328467e-05,
|
|
"loss": 1.2082,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.40664961636828645,
|
|
"grad_norm": 0.21817873889647327,
|
|
"learning_rate": 1.1605839416058396e-05,
|
|
"loss": 1.1586,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.4092071611253197,
|
|
"grad_norm": 0.2450535248536084,
|
|
"learning_rate": 1.1678832116788322e-05,
|
|
"loss": 1.1765,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.4117647058823529,
|
|
"grad_norm": 0.24576894425899287,
|
|
"learning_rate": 1.1751824817518249e-05,
|
|
"loss": 1.1701,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.4143222506393862,
|
|
"grad_norm": 0.2781533359151788,
|
|
"learning_rate": 1.1824817518248176e-05,
|
|
"loss": 1.1686,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.41687979539641945,
|
|
"grad_norm": 0.23249844406377174,
|
|
"learning_rate": 1.1897810218978102e-05,
|
|
"loss": 1.169,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.4194373401534527,
|
|
"grad_norm": 0.2425823032194627,
|
|
"learning_rate": 1.1970802919708031e-05,
|
|
"loss": 1.1821,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.4219948849104859,
|
|
"grad_norm": 0.18932993548929591,
|
|
"learning_rate": 1.2043795620437957e-05,
|
|
"loss": 1.1538,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.42455242966751916,
|
|
"grad_norm": 0.2884159065917926,
|
|
"learning_rate": 1.2116788321167885e-05,
|
|
"loss": 1.1787,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.42710997442455245,
|
|
"grad_norm": 0.2667378207082784,
|
|
"learning_rate": 1.218978102189781e-05,
|
|
"loss": 1.1774,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.4296675191815857,
|
|
"grad_norm": 0.24644746723371008,
|
|
"learning_rate": 1.226277372262774e-05,
|
|
"loss": 1.1823,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.4322250639386189,
|
|
"grad_norm": 0.3049603900188157,
|
|
"learning_rate": 1.2335766423357665e-05,
|
|
"loss": 1.1808,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.43478260869565216,
|
|
"grad_norm": 0.24091240924103605,
|
|
"learning_rate": 1.2408759124087593e-05,
|
|
"loss": 1.1646,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.4373401534526854,
|
|
"grad_norm": 0.31462619972433453,
|
|
"learning_rate": 1.2481751824817518e-05,
|
|
"loss": 1.1742,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.4398976982097187,
|
|
"grad_norm": 0.25976500149808457,
|
|
"learning_rate": 1.2554744525547446e-05,
|
|
"loss": 1.1741,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.4424552429667519,
|
|
"grad_norm": 0.22869248627416927,
|
|
"learning_rate": 1.2627737226277371e-05,
|
|
"loss": 1.1927,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.44501278772378516,
|
|
"grad_norm": 0.27204853892769404,
|
|
"learning_rate": 1.27007299270073e-05,
|
|
"loss": 1.199,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.4475703324808184,
|
|
"grad_norm": 0.22922656795364751,
|
|
"learning_rate": 1.2773722627737228e-05,
|
|
"loss": 1.1742,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.45012787723785164,
|
|
"grad_norm": 0.3018012418428905,
|
|
"learning_rate": 1.2846715328467154e-05,
|
|
"loss": 1.2027,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.45268542199488493,
|
|
"grad_norm": 0.2578612414340434,
|
|
"learning_rate": 1.2919708029197083e-05,
|
|
"loss": 1.1757,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.45524296675191817,
|
|
"grad_norm": 0.25636745613132944,
|
|
"learning_rate": 1.2992700729927009e-05,
|
|
"loss": 1.1716,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.4578005115089514,
|
|
"grad_norm": 0.2715386790093217,
|
|
"learning_rate": 1.3065693430656936e-05,
|
|
"loss": 1.1583,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.46035805626598464,
|
|
"grad_norm": 0.2891675384844315,
|
|
"learning_rate": 1.3138686131386862e-05,
|
|
"loss": 1.1657,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.4629156010230179,
|
|
"grad_norm": 0.23385863111978508,
|
|
"learning_rate": 1.321167883211679e-05,
|
|
"loss": 1.1922,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.46547314578005117,
|
|
"grad_norm": 0.22994123507129197,
|
|
"learning_rate": 1.3284671532846715e-05,
|
|
"loss": 1.1717,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.4680306905370844,
|
|
"grad_norm": 0.23612727422353394,
|
|
"learning_rate": 1.3357664233576644e-05,
|
|
"loss": 1.1801,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.47058823529411764,
|
|
"grad_norm": 0.2069010463077349,
|
|
"learning_rate": 1.343065693430657e-05,
|
|
"loss": 1.177,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.4731457800511509,
|
|
"grad_norm": 0.2588170825534718,
|
|
"learning_rate": 1.3503649635036497e-05,
|
|
"loss": 1.1808,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.47570332480818417,
|
|
"grad_norm": 0.2157790774775731,
|
|
"learning_rate": 1.3576642335766423e-05,
|
|
"loss": 1.1821,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.4782608695652174,
|
|
"grad_norm": 0.23223470081294634,
|
|
"learning_rate": 1.3649635036496352e-05,
|
|
"loss": 1.1615,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.48081841432225064,
|
|
"grad_norm": 0.21725466354040374,
|
|
"learning_rate": 1.372262773722628e-05,
|
|
"loss": 1.1912,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.4833759590792839,
|
|
"grad_norm": 0.211538836700456,
|
|
"learning_rate": 1.3795620437956205e-05,
|
|
"loss": 1.1678,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.4859335038363171,
|
|
"grad_norm": 0.25537726955126566,
|
|
"learning_rate": 1.3868613138686133e-05,
|
|
"loss": 1.1745,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.4884910485933504,
|
|
"grad_norm": 0.28371208474889603,
|
|
"learning_rate": 1.3941605839416059e-05,
|
|
"loss": 1.1193,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.49104859335038364,
|
|
"grad_norm": 0.26303907455029885,
|
|
"learning_rate": 1.4014598540145988e-05,
|
|
"loss": 1.1622,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.4936061381074169,
|
|
"grad_norm": 0.2799114044156544,
|
|
"learning_rate": 1.4087591240875913e-05,
|
|
"loss": 1.136,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.4961636828644501,
|
|
"grad_norm": 0.24139333187754325,
|
|
"learning_rate": 1.416058394160584e-05,
|
|
"loss": 1.1306,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.49872122762148335,
|
|
"grad_norm": 0.2793729959544077,
|
|
"learning_rate": 1.4233576642335767e-05,
|
|
"loss": 1.2086,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.5012787723785166,
|
|
"grad_norm": 0.27570376951402886,
|
|
"learning_rate": 1.4306569343065696e-05,
|
|
"loss": 1.1628,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.5038363171355499,
|
|
"grad_norm": 0.32786685913286884,
|
|
"learning_rate": 1.4379562043795621e-05,
|
|
"loss": 1.1518,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.5063938618925832,
|
|
"grad_norm": 0.45385237120867455,
|
|
"learning_rate": 1.4452554744525549e-05,
|
|
"loss": 1.1856,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.5089514066496164,
|
|
"grad_norm": 0.41272427110721904,
|
|
"learning_rate": 1.4525547445255475e-05,
|
|
"loss": 1.1483,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.5115089514066496,
|
|
"grad_norm": 0.2841480764999212,
|
|
"learning_rate": 1.4598540145985402e-05,
|
|
"loss": 1.1629,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.5140664961636828,
|
|
"grad_norm": 0.27714909479279093,
|
|
"learning_rate": 1.4671532846715331e-05,
|
|
"loss": 1.1442,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.5166240409207161,
|
|
"grad_norm": 0.403242161588326,
|
|
"learning_rate": 1.4744525547445257e-05,
|
|
"loss": 1.1385,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.5191815856777494,
|
|
"grad_norm": 0.337013121025594,
|
|
"learning_rate": 1.4817518248175184e-05,
|
|
"loss": 1.171,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.5217391304347826,
|
|
"grad_norm": 0.4040109170859878,
|
|
"learning_rate": 1.489051094890511e-05,
|
|
"loss": 1.1418,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.5242966751918159,
|
|
"grad_norm": 0.48665453956547733,
|
|
"learning_rate": 1.4963503649635038e-05,
|
|
"loss": 1.164,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.5268542199488491,
|
|
"grad_norm": 0.24722444184837292,
|
|
"learning_rate": 1.5036496350364965e-05,
|
|
"loss": 1.1535,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.5294117647058824,
|
|
"grad_norm": 0.329077822667812,
|
|
"learning_rate": 1.5109489051094892e-05,
|
|
"loss": 1.1704,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.5319693094629157,
|
|
"grad_norm": 0.41651469422399784,
|
|
"learning_rate": 1.5182481751824818e-05,
|
|
"loss": 1.1559,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.5345268542199488,
|
|
"grad_norm": 0.32960667919190284,
|
|
"learning_rate": 1.5255474452554746e-05,
|
|
"loss": 1.1495,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.5370843989769821,
|
|
"grad_norm": 0.4781321369544006,
|
|
"learning_rate": 1.5328467153284673e-05,
|
|
"loss": 1.1387,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.5396419437340153,
|
|
"grad_norm": 0.43671817015361414,
|
|
"learning_rate": 1.54014598540146e-05,
|
|
"loss": 1.1607,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.5421994884910486,
|
|
"grad_norm": 0.32190848339790007,
|
|
"learning_rate": 1.5474452554744528e-05,
|
|
"loss": 1.1286,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.5447570332480819,
|
|
"grad_norm": 0.28497016967310845,
|
|
"learning_rate": 1.5547445255474454e-05,
|
|
"loss": 1.1701,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.5473145780051151,
|
|
"grad_norm": 0.30316718930544045,
|
|
"learning_rate": 1.5620437956204383e-05,
|
|
"loss": 1.1236,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.5498721227621484,
|
|
"grad_norm": 0.26835985072996216,
|
|
"learning_rate": 1.569343065693431e-05,
|
|
"loss": 1.1289,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.5524296675191815,
|
|
"grad_norm": 0.3009095238514411,
|
|
"learning_rate": 1.5766423357664234e-05,
|
|
"loss": 1.1636,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.5549872122762148,
|
|
"grad_norm": 0.3065933942839116,
|
|
"learning_rate": 1.583941605839416e-05,
|
|
"loss": 1.1368,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.5575447570332481,
|
|
"grad_norm": 0.26109719009183135,
|
|
"learning_rate": 1.591240875912409e-05,
|
|
"loss": 1.1077,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.5601023017902813,
|
|
"grad_norm": 0.3164778738084223,
|
|
"learning_rate": 1.5985401459854015e-05,
|
|
"loss": 1.1333,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.5626598465473146,
|
|
"grad_norm": 0.35400248747839075,
|
|
"learning_rate": 1.6058394160583944e-05,
|
|
"loss": 1.1865,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.5652173913043478,
|
|
"grad_norm": 0.28805686893200677,
|
|
"learning_rate": 1.613138686131387e-05,
|
|
"loss": 1.1293,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.5677749360613811,
|
|
"grad_norm": 0.30523736515745126,
|
|
"learning_rate": 1.62043795620438e-05,
|
|
"loss": 1.1296,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.5703324808184144,
|
|
"grad_norm": 0.4190076909483638,
|
|
"learning_rate": 1.6277372262773725e-05,
|
|
"loss": 1.1344,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.5728900255754475,
|
|
"grad_norm": 0.42243425644304494,
|
|
"learning_rate": 1.635036496350365e-05,
|
|
"loss": 1.1665,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.5754475703324808,
|
|
"grad_norm": 0.33398927440080145,
|
|
"learning_rate": 1.642335766423358e-05,
|
|
"loss": 1.1616,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.578005115089514,
|
|
"grad_norm": 0.31042126932738984,
|
|
"learning_rate": 1.6496350364963505e-05,
|
|
"loss": 1.1346,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.5805626598465473,
|
|
"grad_norm": 0.4022933069927679,
|
|
"learning_rate": 1.6569343065693434e-05,
|
|
"loss": 1.1474,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.5831202046035806,
|
|
"grad_norm": 0.34778708873533665,
|
|
"learning_rate": 1.664233576642336e-05,
|
|
"loss": 1.1328,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.5856777493606138,
|
|
"grad_norm": 0.35235801712692716,
|
|
"learning_rate": 1.6715328467153286e-05,
|
|
"loss": 1.1507,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.3378264430318775,
|
|
"learning_rate": 1.678832116788321e-05,
|
|
"loss": 1.1556,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.5907928388746803,
|
|
"grad_norm": 0.3260621828817585,
|
|
"learning_rate": 1.686131386861314e-05,
|
|
"loss": 1.152,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.5933503836317136,
|
|
"grad_norm": 0.39226471807556507,
|
|
"learning_rate": 1.6934306569343066e-05,
|
|
"loss": 1.1398,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.5959079283887468,
|
|
"grad_norm": 0.4562478952465355,
|
|
"learning_rate": 1.7007299270072995e-05,
|
|
"loss": 1.1447,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.59846547314578,
|
|
"grad_norm": 0.3451241092677777,
|
|
"learning_rate": 1.708029197080292e-05,
|
|
"loss": 1.1005,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.6010230179028133,
|
|
"grad_norm": 0.35647792283371854,
|
|
"learning_rate": 1.7153284671532847e-05,
|
|
"loss": 1.1227,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.6035805626598465,
|
|
"grad_norm": 0.4594520420622475,
|
|
"learning_rate": 1.7226277372262773e-05,
|
|
"loss": 1.1505,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.6061381074168798,
|
|
"grad_norm": 0.45224289985329424,
|
|
"learning_rate": 1.7299270072992702e-05,
|
|
"loss": 1.1308,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.6086956521739131,
|
|
"grad_norm": 0.40418344343634116,
|
|
"learning_rate": 1.737226277372263e-05,
|
|
"loss": 1.1181,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.6112531969309463,
|
|
"grad_norm": 0.3386408460236669,
|
|
"learning_rate": 1.7445255474452557e-05,
|
|
"loss": 1.1584,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.6138107416879796,
|
|
"grad_norm": 0.26946506842987866,
|
|
"learning_rate": 1.7518248175182482e-05,
|
|
"loss": 1.1264,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.6163682864450127,
|
|
"grad_norm": 0.36854128324837004,
|
|
"learning_rate": 1.7591240875912408e-05,
|
|
"loss": 1.1234,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.618925831202046,
|
|
"grad_norm": 0.40766745885420824,
|
|
"learning_rate": 1.7664233576642337e-05,
|
|
"loss": 1.1473,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.6214833759590793,
|
|
"grad_norm": 0.34418627419066,
|
|
"learning_rate": 1.7737226277372263e-05,
|
|
"loss": 1.1443,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.6240409207161125,
|
|
"grad_norm": 0.3132419041181749,
|
|
"learning_rate": 1.7810218978102192e-05,
|
|
"loss": 1.1898,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.6265984654731458,
|
|
"grad_norm": 0.3133703026128217,
|
|
"learning_rate": 1.7883211678832118e-05,
|
|
"loss": 1.1501,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.629156010230179,
|
|
"grad_norm": 0.3441898164827929,
|
|
"learning_rate": 1.7956204379562047e-05,
|
|
"loss": 1.1452,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.6317135549872123,
|
|
"grad_norm": 0.33750686928448953,
|
|
"learning_rate": 1.8029197080291973e-05,
|
|
"loss": 1.1359,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.6342710997442456,
|
|
"grad_norm": 0.374020584176404,
|
|
"learning_rate": 1.81021897810219e-05,
|
|
"loss": 1.1823,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.6368286445012787,
|
|
"grad_norm": 0.3514782831462071,
|
|
"learning_rate": 1.8175182481751824e-05,
|
|
"loss": 1.1632,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.639386189258312,
|
|
"grad_norm": 0.3606450922286876,
|
|
"learning_rate": 1.8248175182481753e-05,
|
|
"loss": 1.1409,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.6419437340153452,
|
|
"grad_norm": 0.261265823171208,
|
|
"learning_rate": 1.8321167883211683e-05,
|
|
"loss": 1.1499,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.6445012787723785,
|
|
"grad_norm": 0.42167995133388064,
|
|
"learning_rate": 1.8394160583941608e-05,
|
|
"loss": 1.154,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.6470588235294118,
|
|
"grad_norm": 0.3940819685714755,
|
|
"learning_rate": 1.8467153284671534e-05,
|
|
"loss": 1.1355,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.649616368286445,
|
|
"grad_norm": 0.3265578920410715,
|
|
"learning_rate": 1.854014598540146e-05,
|
|
"loss": 1.1874,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.6521739130434783,
|
|
"grad_norm": 0.39035015686633145,
|
|
"learning_rate": 1.861313868613139e-05,
|
|
"loss": 1.1374,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.6547314578005116,
|
|
"grad_norm": 0.41589276832005634,
|
|
"learning_rate": 1.8686131386861315e-05,
|
|
"loss": 1.1289,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.6572890025575447,
|
|
"grad_norm": 0.45228952583155346,
|
|
"learning_rate": 1.8759124087591244e-05,
|
|
"loss": 1.1646,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.659846547314578,
|
|
"grad_norm": 0.5348752543777668,
|
|
"learning_rate": 1.883211678832117e-05,
|
|
"loss": 1.1268,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.6624040920716112,
|
|
"grad_norm": 0.6021227056854751,
|
|
"learning_rate": 1.8905109489051095e-05,
|
|
"loss": 1.1593,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.6649616368286445,
|
|
"grad_norm": 0.5171238656799629,
|
|
"learning_rate": 1.897810218978102e-05,
|
|
"loss": 1.1374,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.6675191815856778,
|
|
"grad_norm": 0.4416261577215247,
|
|
"learning_rate": 1.905109489051095e-05,
|
|
"loss": 1.1093,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.670076726342711,
|
|
"grad_norm": 0.569218554097933,
|
|
"learning_rate": 1.912408759124088e-05,
|
|
"loss": 1.1232,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.6726342710997443,
|
|
"grad_norm": 0.6811617127901143,
|
|
"learning_rate": 1.9197080291970805e-05,
|
|
"loss": 1.1682,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.6751918158567775,
|
|
"grad_norm": 0.749600012327492,
|
|
"learning_rate": 1.9270072992700734e-05,
|
|
"loss": 1.1484,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.6777493606138107,
|
|
"grad_norm": 0.5547245978393044,
|
|
"learning_rate": 1.934306569343066e-05,
|
|
"loss": 1.1746,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.680306905370844,
|
|
"grad_norm": 0.29516123217758117,
|
|
"learning_rate": 1.9416058394160586e-05,
|
|
"loss": 1.1414,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.6828644501278772,
|
|
"grad_norm": 0.5616443320978407,
|
|
"learning_rate": 1.948905109489051e-05,
|
|
"loss": 1.096,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.6854219948849105,
|
|
"grad_norm": 0.7110950485565922,
|
|
"learning_rate": 1.956204379562044e-05,
|
|
"loss": 1.1383,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.6879795396419437,
|
|
"grad_norm": 0.5747864084575326,
|
|
"learning_rate": 1.9635036496350366e-05,
|
|
"loss": 1.1157,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.690537084398977,
|
|
"grad_norm": 0.5551996423553552,
|
|
"learning_rate": 1.9708029197080295e-05,
|
|
"loss": 1.1569,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.6930946291560103,
|
|
"grad_norm": 0.7165225607672224,
|
|
"learning_rate": 1.978102189781022e-05,
|
|
"loss": 1.1551,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.6956521739130435,
|
|
"grad_norm": 0.7036255091082283,
|
|
"learning_rate": 1.9854014598540147e-05,
|
|
"loss": 1.1155,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.6982097186700768,
|
|
"grad_norm": 0.37416829306334026,
|
|
"learning_rate": 1.9927007299270073e-05,
|
|
"loss": 1.1293,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.7007672634271099,
|
|
"grad_norm": 0.5000491272477234,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.1495,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.7033248081841432,
|
|
"grad_norm": 0.7162752485719868,
|
|
"learning_rate": 1.9999991865312627e-05,
|
|
"loss": 1.1267,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.7058823529411765,
|
|
"grad_norm": 0.6356341002049819,
|
|
"learning_rate": 1.9999967461263736e-05,
|
|
"loss": 1.1469,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.7084398976982097,
|
|
"grad_norm": 0.46429306768513406,
|
|
"learning_rate": 1.9999926787893038e-05,
|
|
"loss": 1.1605,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.710997442455243,
|
|
"grad_norm": 0.42193730725900314,
|
|
"learning_rate": 1.99998698452667e-05,
|
|
"loss": 1.1291,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.7135549872122762,
|
|
"grad_norm": 0.45111683276082,
|
|
"learning_rate": 1.999979663347736e-05,
|
|
"loss": 1.1594,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.7161125319693095,
|
|
"grad_norm": 0.48963964069881194,
|
|
"learning_rate": 1.9999707152644143e-05,
|
|
"loss": 1.1245,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.7186700767263428,
|
|
"grad_norm": 0.4979629650586617,
|
|
"learning_rate": 1.999960140291262e-05,
|
|
"loss": 1.119,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.7212276214833759,
|
|
"grad_norm": 0.4664713962878264,
|
|
"learning_rate": 1.9999479384454838e-05,
|
|
"loss": 1.1468,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.7237851662404092,
|
|
"grad_norm": 0.3844942432737082,
|
|
"learning_rate": 1.9999341097469313e-05,
|
|
"loss": 1.075,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.7263427109974424,
|
|
"grad_norm": 0.3748435881073205,
|
|
"learning_rate": 1.9999186542181038e-05,
|
|
"loss": 1.1388,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.7289002557544757,
|
|
"grad_norm": 0.37537611839818713,
|
|
"learning_rate": 1.9999015718841453e-05,
|
|
"loss": 1.1204,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.731457800511509,
|
|
"grad_norm": 0.2604152551489964,
|
|
"learning_rate": 1.9998828627728483e-05,
|
|
"loss": 1.1441,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.7340153452685422,
|
|
"grad_norm": 0.3500229133794647,
|
|
"learning_rate": 1.9998625269146515e-05,
|
|
"loss": 1.1418,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.7365728900255755,
|
|
"grad_norm": 0.40870411685426555,
|
|
"learning_rate": 1.9998405643426398e-05,
|
|
"loss": 1.107,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.7391304347826086,
|
|
"grad_norm": 0.4142193267583776,
|
|
"learning_rate": 1.999816975092545e-05,
|
|
"loss": 1.1386,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.7416879795396419,
|
|
"grad_norm": 0.3984615533147621,
|
|
"learning_rate": 1.9997917592027455e-05,
|
|
"loss": 1.1478,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.7442455242966752,
|
|
"grad_norm": 0.33486990703650343,
|
|
"learning_rate": 1.9997649167142654e-05,
|
|
"loss": 1.1322,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.7468030690537084,
|
|
"grad_norm": 0.34307927675012156,
|
|
"learning_rate": 1.9997364476707765e-05,
|
|
"loss": 1.0975,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.7493606138107417,
|
|
"grad_norm": 0.32862273663424796,
|
|
"learning_rate": 1.9997063521185956e-05,
|
|
"loss": 1.1234,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.7519181585677749,
|
|
"grad_norm": 0.3832334389775187,
|
|
"learning_rate": 1.9996746301066867e-05,
|
|
"loss": 1.1204,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.7544757033248082,
|
|
"grad_norm": 0.37651748057590684,
|
|
"learning_rate": 1.999641281686659e-05,
|
|
"loss": 1.1101,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.7570332480818415,
|
|
"grad_norm": 0.3987512509485477,
|
|
"learning_rate": 1.999606306912769e-05,
|
|
"loss": 1.1182,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.7595907928388747,
|
|
"grad_norm": 0.3135294282014092,
|
|
"learning_rate": 1.999569705841918e-05,
|
|
"loss": 1.1576,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.7621483375959079,
|
|
"grad_norm": 0.310570536991235,
|
|
"learning_rate": 1.9995314785336534e-05,
|
|
"loss": 1.1329,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.7647058823529411,
|
|
"grad_norm": 0.28886285275015344,
|
|
"learning_rate": 1.999491625050169e-05,
|
|
"loss": 1.1486,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.7672634271099744,
|
|
"grad_norm": 0.2810916108747155,
|
|
"learning_rate": 1.9994501454563046e-05,
|
|
"loss": 1.1067,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.7698209718670077,
|
|
"grad_norm": 0.2641826394714093,
|
|
"learning_rate": 1.9994070398195437e-05,
|
|
"loss": 1.1391,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.7723785166240409,
|
|
"grad_norm": 0.23992392919351505,
|
|
"learning_rate": 1.999362308210017e-05,
|
|
"loss": 1.1387,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.7749360613810742,
|
|
"grad_norm": 0.24856265925820004,
|
|
"learning_rate": 1.9993159507005e-05,
|
|
"loss": 1.1084,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.7774936061381074,
|
|
"grad_norm": 0.22572823705824116,
|
|
"learning_rate": 1.9992679673664136e-05,
|
|
"loss": 1.1134,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.7800511508951407,
|
|
"grad_norm": 0.27595626439843796,
|
|
"learning_rate": 1.9992183582858233e-05,
|
|
"loss": 1.1269,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.782608695652174,
|
|
"grad_norm": 0.33828817219220914,
|
|
"learning_rate": 1.9991671235394404e-05,
|
|
"loss": 1.1211,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.7851662404092071,
|
|
"grad_norm": 0.23908198593915184,
|
|
"learning_rate": 1.9991142632106205e-05,
|
|
"loss": 1.0874,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.7877237851662404,
|
|
"grad_norm": 0.32916775113793656,
|
|
"learning_rate": 1.999059777385364e-05,
|
|
"loss": 1.1189,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.7902813299232737,
|
|
"grad_norm": 0.4164086722930908,
|
|
"learning_rate": 1.9990036661523162e-05,
|
|
"loss": 1.1368,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.7928388746803069,
|
|
"grad_norm": 0.4356985530787425,
|
|
"learning_rate": 1.998945929602766e-05,
|
|
"loss": 1.1041,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.7953964194373402,
|
|
"grad_norm": 0.32329800121359825,
|
|
"learning_rate": 1.9988865678306476e-05,
|
|
"loss": 1.1381,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.7979539641943734,
|
|
"grad_norm": 0.28030048685966436,
|
|
"learning_rate": 1.998825580932539e-05,
|
|
"loss": 1.1505,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.8005115089514067,
|
|
"grad_norm": 0.3736128210505236,
|
|
"learning_rate": 1.9987629690076615e-05,
|
|
"loss": 1.116,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.80306905370844,
|
|
"grad_norm": 0.3711938440381308,
|
|
"learning_rate": 1.998698732157881e-05,
|
|
"loss": 1.1233,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.8056265984654731,
|
|
"grad_norm": 0.283799635820317,
|
|
"learning_rate": 1.998632870487707e-05,
|
|
"loss": 1.1112,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.8081841432225064,
|
|
"grad_norm": 0.29982174151777125,
|
|
"learning_rate": 1.9985653841042926e-05,
|
|
"loss": 1.1089,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.8107416879795396,
|
|
"grad_norm": 0.33144242270715973,
|
|
"learning_rate": 1.9984962731174336e-05,
|
|
"loss": 1.1387,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.8132992327365729,
|
|
"grad_norm": 0.33991853938376265,
|
|
"learning_rate": 1.998425537639569e-05,
|
|
"loss": 1.1292,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.8158567774936062,
|
|
"grad_norm": 0.342802086067408,
|
|
"learning_rate": 1.9983531777857817e-05,
|
|
"loss": 1.0907,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.8184143222506394,
|
|
"grad_norm": 0.3083367366680541,
|
|
"learning_rate": 1.998279193673796e-05,
|
|
"loss": 1.1157,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.8209718670076727,
|
|
"grad_norm": 0.32536985414256364,
|
|
"learning_rate": 1.9982035854239793e-05,
|
|
"loss": 1.0971,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.8235294117647058,
|
|
"grad_norm": 0.3810630836606236,
|
|
"learning_rate": 1.9981263531593422e-05,
|
|
"loss": 1.1236,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.8260869565217391,
|
|
"grad_norm": 0.36452300722278047,
|
|
"learning_rate": 1.9980474970055367e-05,
|
|
"loss": 1.1438,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.8286445012787724,
|
|
"grad_norm": 0.2795921565060519,
|
|
"learning_rate": 1.997967017090856e-05,
|
|
"loss": 1.1465,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.8312020460358056,
|
|
"grad_norm": 0.2986081929713523,
|
|
"learning_rate": 1.9978849135462367e-05,
|
|
"loss": 1.1061,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.8337595907928389,
|
|
"grad_norm": 0.3054440401423343,
|
|
"learning_rate": 1.9978011865052554e-05,
|
|
"loss": 1.1146,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.8363171355498721,
|
|
"grad_norm": 0.32318950453837997,
|
|
"learning_rate": 1.9977158361041317e-05,
|
|
"loss": 1.1554,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.8388746803069054,
|
|
"grad_norm": 0.30472902927491496,
|
|
"learning_rate": 1.997628862481725e-05,
|
|
"loss": 1.1274,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.8414322250639387,
|
|
"grad_norm": 0.4042829285862421,
|
|
"learning_rate": 1.9975402657795355e-05,
|
|
"loss": 1.1669,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.8439897698209718,
|
|
"grad_norm": 0.2804285578799784,
|
|
"learning_rate": 1.997450046141705e-05,
|
|
"loss": 1.1361,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.8465473145780051,
|
|
"grad_norm": 0.3569177728816469,
|
|
"learning_rate": 1.997358203715015e-05,
|
|
"loss": 1.1095,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.8491048593350383,
|
|
"grad_norm": 0.4230090216431553,
|
|
"learning_rate": 1.9972647386488873e-05,
|
|
"loss": 1.1016,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.8516624040920716,
|
|
"grad_norm": 0.37021286913388013,
|
|
"learning_rate": 1.997169651095384e-05,
|
|
"loss": 1.1475,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.8542199488491049,
|
|
"grad_norm": 0.3317123580055209,
|
|
"learning_rate": 1.9970729412092064e-05,
|
|
"loss": 1.0813,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.8567774936061381,
|
|
"grad_norm": 0.273842287695835,
|
|
"learning_rate": 1.9969746091476955e-05,
|
|
"loss": 1.1067,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.8593350383631714,
|
|
"grad_norm": 0.2673820670815786,
|
|
"learning_rate": 1.9968746550708313e-05,
|
|
"loss": 1.1069,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.8618925831202046,
|
|
"grad_norm": 0.2979937548082758,
|
|
"learning_rate": 1.996773079141233e-05,
|
|
"loss": 1.1279,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.8644501278772379,
|
|
"grad_norm": 0.37172355657034833,
|
|
"learning_rate": 1.9966698815241583e-05,
|
|
"loss": 1.1339,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.8670076726342711,
|
|
"grad_norm": 0.506903869952954,
|
|
"learning_rate": 1.9965650623875034e-05,
|
|
"loss": 1.1039,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.8695652173913043,
|
|
"grad_norm": 0.4279498776163848,
|
|
"learning_rate": 1.9964586219018018e-05,
|
|
"loss": 1.1425,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.8721227621483376,
|
|
"grad_norm": 0.36753587770795587,
|
|
"learning_rate": 1.9963505602402263e-05,
|
|
"loss": 1.0978,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.8746803069053708,
|
|
"grad_norm": 0.3648609772451092,
|
|
"learning_rate": 1.996240877578586e-05,
|
|
"loss": 1.1242,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.8772378516624041,
|
|
"grad_norm": 0.37366918011434086,
|
|
"learning_rate": 1.996129574095328e-05,
|
|
"loss": 1.1191,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.8797953964194374,
|
|
"grad_norm": 0.3879756302273747,
|
|
"learning_rate": 1.996016649971536e-05,
|
|
"loss": 1.1253,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 0.35306903326209926,
|
|
"learning_rate": 1.9959021053909304e-05,
|
|
"loss": 1.1097,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.8849104859335039,
|
|
"grad_norm": 0.3497813112371213,
|
|
"learning_rate": 1.995785940539868e-05,
|
|
"loss": 1.1751,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.887468030690537,
|
|
"grad_norm": 0.31991011885719,
|
|
"learning_rate": 1.995668155607342e-05,
|
|
"loss": 1.06,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.8900255754475703,
|
|
"grad_norm": 0.33100033800466955,
|
|
"learning_rate": 1.9955487507849815e-05,
|
|
"loss": 1.1217,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.8925831202046036,
|
|
"grad_norm": 0.3302462532169077,
|
|
"learning_rate": 1.9954277262670497e-05,
|
|
"loss": 1.1016,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.8951406649616368,
|
|
"grad_norm": 0.2988617813500731,
|
|
"learning_rate": 1.9953050822504466e-05,
|
|
"loss": 1.1259,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.8976982097186701,
|
|
"grad_norm": 0.2467443109516983,
|
|
"learning_rate": 1.995180818934706e-05,
|
|
"loss": 1.1449,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.9002557544757033,
|
|
"grad_norm": 0.2862819186333417,
|
|
"learning_rate": 1.995054936521997e-05,
|
|
"loss": 1.1,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.9028132992327366,
|
|
"grad_norm": 0.3386935579478213,
|
|
"learning_rate": 1.9949274352171218e-05,
|
|
"loss": 1.1215,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.9053708439897699,
|
|
"grad_norm": 0.377267345773294,
|
|
"learning_rate": 1.9947983152275175e-05,
|
|
"loss": 1.1151,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.907928388746803,
|
|
"grad_norm": 0.26418004315541993,
|
|
"learning_rate": 1.9946675767632545e-05,
|
|
"loss": 1.0909,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.9104859335038363,
|
|
"grad_norm": 0.3036950602266219,
|
|
"learning_rate": 1.9945352200370352e-05,
|
|
"loss": 1.1065,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.9130434782608695,
|
|
"grad_norm": 0.2847990677396293,
|
|
"learning_rate": 1.9944012452641966e-05,
|
|
"loss": 1.1187,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.9156010230179028,
|
|
"grad_norm": 0.3155239647410138,
|
|
"learning_rate": 1.994265652662707e-05,
|
|
"loss": 1.1402,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.9181585677749361,
|
|
"grad_norm": 0.3011564965680371,
|
|
"learning_rate": 1.9941284424531668e-05,
|
|
"loss": 1.1232,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.9207161125319693,
|
|
"grad_norm": 0.3119452115804441,
|
|
"learning_rate": 1.9939896148588086e-05,
|
|
"loss": 1.0879,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.9232736572890026,
|
|
"grad_norm": 0.33133352515569403,
|
|
"learning_rate": 1.9938491701054965e-05,
|
|
"loss": 1.1384,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.9258312020460358,
|
|
"grad_norm": 0.2085194934877816,
|
|
"learning_rate": 1.9937071084217254e-05,
|
|
"loss": 1.0616,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.928388746803069,
|
|
"grad_norm": 0.27348539950003964,
|
|
"learning_rate": 1.99356343003862e-05,
|
|
"loss": 1.127,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.9309462915601023,
|
|
"grad_norm": 0.314231043083254,
|
|
"learning_rate": 1.9934181351899365e-05,
|
|
"loss": 1.1075,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.9335038363171355,
|
|
"grad_norm": 0.3354380584507947,
|
|
"learning_rate": 1.9932712241120606e-05,
|
|
"loss": 1.1272,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.9360613810741688,
|
|
"grad_norm": 0.28703321632472045,
|
|
"learning_rate": 1.9931226970440075e-05,
|
|
"loss": 1.1469,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.9386189258312021,
|
|
"grad_norm": 0.3426859912220677,
|
|
"learning_rate": 1.9929725542274215e-05,
|
|
"loss": 1.1278,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.9411764705882353,
|
|
"grad_norm": 0.29299540193881474,
|
|
"learning_rate": 1.992820795906575e-05,
|
|
"loss": 1.1187,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.9437340153452686,
|
|
"grad_norm": 0.39295341923846966,
|
|
"learning_rate": 1.99266742232837e-05,
|
|
"loss": 1.1126,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.9462915601023018,
|
|
"grad_norm": 0.35353202277391543,
|
|
"learning_rate": 1.9925124337423356e-05,
|
|
"loss": 1.1139,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.948849104859335,
|
|
"grad_norm": 0.3311467211582019,
|
|
"learning_rate": 1.9923558304006283e-05,
|
|
"loss": 1.138,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.9514066496163683,
|
|
"grad_norm": 0.3816152174441759,
|
|
"learning_rate": 1.992197612558032e-05,
|
|
"loss": 1.1176,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.9539641943734015,
|
|
"grad_norm": 0.36605913254516786,
|
|
"learning_rate": 1.9920377804719573e-05,
|
|
"loss": 1.1221,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.9565217391304348,
|
|
"grad_norm": 0.36097755733897396,
|
|
"learning_rate": 1.991876334402441e-05,
|
|
"loss": 1.1198,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.959079283887468,
|
|
"grad_norm": 0.34895670740815254,
|
|
"learning_rate": 1.9917132746121454e-05,
|
|
"loss": 1.1438,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.9616368286445013,
|
|
"grad_norm": 0.2817987248252719,
|
|
"learning_rate": 1.9915486013663595e-05,
|
|
"loss": 1.0946,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.9641943734015346,
|
|
"grad_norm": 0.2440543185648296,
|
|
"learning_rate": 1.9913823149329952e-05,
|
|
"loss": 1.1257,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.9667519181585678,
|
|
"grad_norm": 0.29938424755141774,
|
|
"learning_rate": 1.9912144155825913e-05,
|
|
"loss": 1.1315,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.969309462915601,
|
|
"grad_norm": 0.3042211939245891,
|
|
"learning_rate": 1.9910449035883086e-05,
|
|
"loss": 1.1005,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.9718670076726342,
|
|
"grad_norm": 0.3662935173068649,
|
|
"learning_rate": 1.990873779225933e-05,
|
|
"loss": 1.0831,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.9744245524296675,
|
|
"grad_norm": 0.34290782200372855,
|
|
"learning_rate": 1.990701042773873e-05,
|
|
"loss": 1.1116,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.9769820971867008,
|
|
"grad_norm": 0.2659876511429978,
|
|
"learning_rate": 1.99052669451316e-05,
|
|
"loss": 1.1172,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.979539641943734,
|
|
"grad_norm": 0.2656583663382276,
|
|
"learning_rate": 1.9903507347274478e-05,
|
|
"loss": 1.1243,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.9820971867007673,
|
|
"grad_norm": 0.35197356004646674,
|
|
"learning_rate": 1.9901731637030123e-05,
|
|
"loss": 1.0751,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.9846547314578005,
|
|
"grad_norm": 0.4123186710230891,
|
|
"learning_rate": 1.9899939817287494e-05,
|
|
"loss": 1.1572,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.9872122762148338,
|
|
"grad_norm": 0.48886837110572706,
|
|
"learning_rate": 1.989813189096178e-05,
|
|
"loss": 1.1109,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.989769820971867,
|
|
"grad_norm": 0.4200898181195607,
|
|
"learning_rate": 1.989630786099436e-05,
|
|
"loss": 1.1243,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.9923273657289002,
|
|
"grad_norm": 0.36473186521348727,
|
|
"learning_rate": 1.9894467730352817e-05,
|
|
"loss": 1.1379,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.9948849104859335,
|
|
"grad_norm": 0.33106729200219565,
|
|
"learning_rate": 1.9892611502030932e-05,
|
|
"loss": 1.1183,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.9974424552429667,
|
|
"grad_norm": 0.28859949847448485,
|
|
"learning_rate": 1.9890739179048666e-05,
|
|
"loss": 1.1019,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.32343067044443596,
|
|
"learning_rate": 1.9888850764452177e-05,
|
|
"loss": 1.1315,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 1.0025575447570332,
|
|
"grad_norm": 0.2946752191785302,
|
|
"learning_rate": 1.988694626131379e-05,
|
|
"loss": 1.1027,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 1.0051150895140666,
|
|
"grad_norm": 0.2840956310037306,
|
|
"learning_rate": 1.9885025672732024e-05,
|
|
"loss": 1.1255,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 1.0076726342710998,
|
|
"grad_norm": 0.3834929641779387,
|
|
"learning_rate": 1.9883089001831545e-05,
|
|
"loss": 1.0926,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 1.010230179028133,
|
|
"grad_norm": 0.37119046465058125,
|
|
"learning_rate": 1.9881136251763203e-05,
|
|
"loss": 1.1024,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 1.0127877237851663,
|
|
"grad_norm": 0.3481999615848297,
|
|
"learning_rate": 1.9879167425703998e-05,
|
|
"loss": 1.1177,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 1.0153452685421995,
|
|
"grad_norm": 0.4174534154279672,
|
|
"learning_rate": 1.9877182526857086e-05,
|
|
"loss": 1.1194,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 1.0179028132992327,
|
|
"grad_norm": 0.428283352237624,
|
|
"learning_rate": 1.9875181558451774e-05,
|
|
"loss": 1.1126,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 1.020460358056266,
|
|
"grad_norm": 0.34788898984052513,
|
|
"learning_rate": 1.9873164523743517e-05,
|
|
"loss": 1.0826,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 1.0230179028132993,
|
|
"grad_norm": 0.3235948349939345,
|
|
"learning_rate": 1.9871131426013894e-05,
|
|
"loss": 1.137,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 1.0255754475703325,
|
|
"grad_norm": 0.3661886910233816,
|
|
"learning_rate": 1.9869082268570637e-05,
|
|
"loss": 1.1135,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 1.0281329923273657,
|
|
"grad_norm": 0.3844357019706309,
|
|
"learning_rate": 1.9867017054747593e-05,
|
|
"loss": 1.1316,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 1.030690537084399,
|
|
"grad_norm": 0.3351625771872402,
|
|
"learning_rate": 1.9864935787904734e-05,
|
|
"loss": 1.1009,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 1.0332480818414322,
|
|
"grad_norm": 0.34602161255624664,
|
|
"learning_rate": 1.986283847142816e-05,
|
|
"loss": 1.1047,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 1.0358056265984654,
|
|
"grad_norm": 0.3709821493330784,
|
|
"learning_rate": 1.9860725108730065e-05,
|
|
"loss": 1.1031,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 1.0383631713554988,
|
|
"grad_norm": 0.37774483264562303,
|
|
"learning_rate": 1.9858595703248755e-05,
|
|
"loss": 1.137,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 1.040920716112532,
|
|
"grad_norm": 0.3599825369273542,
|
|
"learning_rate": 1.985645025844865e-05,
|
|
"loss": 1.0707,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 1.0434782608695652,
|
|
"grad_norm": 0.39966584857588405,
|
|
"learning_rate": 1.9854288777820246e-05,
|
|
"loss": 1.1033,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 1.0460358056265984,
|
|
"grad_norm": 0.40289071310305824,
|
|
"learning_rate": 1.9852111264880145e-05,
|
|
"loss": 1.0806,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 1.0485933503836318,
|
|
"grad_norm": 0.47128238325065436,
|
|
"learning_rate": 1.984991772317102e-05,
|
|
"loss": 1.0756,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 1.051150895140665,
|
|
"grad_norm": 0.5298917118212448,
|
|
"learning_rate": 1.9847708156261622e-05,
|
|
"loss": 1.1055,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 1.0537084398976981,
|
|
"grad_norm": 0.47297356768421134,
|
|
"learning_rate": 1.9845482567746783e-05,
|
|
"loss": 1.0836,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 1.0562659846547315,
|
|
"grad_norm": 0.38344561089251955,
|
|
"learning_rate": 1.9843240961247398e-05,
|
|
"loss": 1.0904,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 1.0588235294117647,
|
|
"grad_norm": 0.27676602705991193,
|
|
"learning_rate": 1.9840983340410414e-05,
|
|
"loss": 1.1402,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 1.061381074168798,
|
|
"grad_norm": 0.4125473070163219,
|
|
"learning_rate": 1.9838709708908848e-05,
|
|
"loss": 1.1108,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 1.0639386189258313,
|
|
"grad_norm": 0.39100913652365626,
|
|
"learning_rate": 1.983642007044175e-05,
|
|
"loss": 1.0894,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 1.0664961636828645,
|
|
"grad_norm": 0.3635147529725554,
|
|
"learning_rate": 1.983411442873422e-05,
|
|
"loss": 1.0751,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 1.0690537084398977,
|
|
"grad_norm": 0.3157457311508148,
|
|
"learning_rate": 1.983179278753739e-05,
|
|
"loss": 1.0867,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 1.0716112531969308,
|
|
"grad_norm": 0.3380507668468239,
|
|
"learning_rate": 1.9829455150628432e-05,
|
|
"loss": 1.1428,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 1.0741687979539642,
|
|
"grad_norm": 0.3531121689418475,
|
|
"learning_rate": 1.982710152181053e-05,
|
|
"loss": 1.0877,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 1.0767263427109974,
|
|
"grad_norm": 0.2800940522052926,
|
|
"learning_rate": 1.982473190491289e-05,
|
|
"loss": 1.1025,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 1.0792838874680306,
|
|
"grad_norm": 0.3045440051536889,
|
|
"learning_rate": 1.9822346303790732e-05,
|
|
"loss": 1.0954,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 1.081841432225064,
|
|
"grad_norm": 0.2875179180998631,
|
|
"learning_rate": 1.9819944722325283e-05,
|
|
"loss": 1.0799,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 1.0843989769820972,
|
|
"grad_norm": 0.3671466904640979,
|
|
"learning_rate": 1.981752716442376e-05,
|
|
"loss": 1.1239,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 1.0869565217391304,
|
|
"grad_norm": 0.310905332933887,
|
|
"learning_rate": 1.9815093634019384e-05,
|
|
"loss": 1.0885,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 1.0895140664961638,
|
|
"grad_norm": 0.34866191023824383,
|
|
"learning_rate": 1.9812644135071358e-05,
|
|
"loss": 1.0789,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 1.092071611253197,
|
|
"grad_norm": 0.3670206738107968,
|
|
"learning_rate": 1.9810178671564853e-05,
|
|
"loss": 1.1051,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 1.0946291560102301,
|
|
"grad_norm": 0.46475258100798056,
|
|
"learning_rate": 1.980769724751104e-05,
|
|
"loss": 1.0838,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 1.0971867007672633,
|
|
"grad_norm": 0.3157024370545657,
|
|
"learning_rate": 1.9805199866947026e-05,
|
|
"loss": 1.114,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 1.0997442455242967,
|
|
"grad_norm": 0.29958992335623563,
|
|
"learning_rate": 1.9802686533935903e-05,
|
|
"loss": 1.0909,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 1.10230179028133,
|
|
"grad_norm": 0.3045539331442299,
|
|
"learning_rate": 1.9800157252566698e-05,
|
|
"loss": 1.119,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 1.104859335038363,
|
|
"grad_norm": 0.35388881893166907,
|
|
"learning_rate": 1.97976120269544e-05,
|
|
"loss": 1.1357,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 1.1074168797953965,
|
|
"grad_norm": 0.4072658855507119,
|
|
"learning_rate": 1.9795050861239932e-05,
|
|
"loss": 1.1153,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 1.1099744245524297,
|
|
"grad_norm": 0.3515081652084557,
|
|
"learning_rate": 1.9792473759590148e-05,
|
|
"loss": 1.1051,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 1.1125319693094629,
|
|
"grad_norm": 0.30513537117496636,
|
|
"learning_rate": 1.978988072619783e-05,
|
|
"loss": 1.0943,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 1.1150895140664963,
|
|
"grad_norm": 0.5088746516427844,
|
|
"learning_rate": 1.9787271765281684e-05,
|
|
"loss": 1.0947,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 1.1176470588235294,
|
|
"grad_norm": 0.6682126794134292,
|
|
"learning_rate": 1.9784646881086327e-05,
|
|
"loss": 1.0743,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 1.1202046035805626,
|
|
"grad_norm": 0.5551640593749172,
|
|
"learning_rate": 1.9782006077882282e-05,
|
|
"loss": 1.0861,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 1.1227621483375958,
|
|
"grad_norm": 0.3278866812808205,
|
|
"learning_rate": 1.9779349359965966e-05,
|
|
"loss": 1.1069,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 1.1253196930946292,
|
|
"grad_norm": 0.38591224008325814,
|
|
"learning_rate": 1.9776676731659695e-05,
|
|
"loss": 1.0849,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 1.1278772378516624,
|
|
"grad_norm": 0.35719651550677206,
|
|
"learning_rate": 1.977398819731167e-05,
|
|
"loss": 1.1053,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 1.1304347826086956,
|
|
"grad_norm": 0.4232965403621678,
|
|
"learning_rate": 1.9771283761295966e-05,
|
|
"loss": 1.0848,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 1.132992327365729,
|
|
"grad_norm": 0.2697343671368354,
|
|
"learning_rate": 1.9768563428012536e-05,
|
|
"loss": 1.1091,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 1.1355498721227621,
|
|
"grad_norm": 0.3193367309932036,
|
|
"learning_rate": 1.9765827201887183e-05,
|
|
"loss": 1.0767,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 1.1381074168797953,
|
|
"grad_norm": 0.36846576847881124,
|
|
"learning_rate": 1.9763075087371583e-05,
|
|
"loss": 1.0996,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 1.1406649616368287,
|
|
"grad_norm": 0.31668666427159936,
|
|
"learning_rate": 1.9760307088943254e-05,
|
|
"loss": 1.0713,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 1.143222506393862,
|
|
"grad_norm": 0.35150116619841826,
|
|
"learning_rate": 1.9757523211105555e-05,
|
|
"loss": 1.0564,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 1.145780051150895,
|
|
"grad_norm": 0.429831549745095,
|
|
"learning_rate": 1.975472345838768e-05,
|
|
"loss": 1.0907,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 1.1483375959079285,
|
|
"grad_norm": 0.44872565734771747,
|
|
"learning_rate": 1.9751907835344654e-05,
|
|
"loss": 1.0817,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 1.1508951406649617,
|
|
"grad_norm": 0.33913236381932554,
|
|
"learning_rate": 1.9749076346557318e-05,
|
|
"loss": 1.129,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 1.1534526854219949,
|
|
"grad_norm": 0.33115586128973074,
|
|
"learning_rate": 1.9746228996632326e-05,
|
|
"loss": 1.1034,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 1.156010230179028,
|
|
"grad_norm": 0.3057185791661933,
|
|
"learning_rate": 1.974336579020214e-05,
|
|
"loss": 1.1076,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 1.1585677749360614,
|
|
"grad_norm": 0.43316526036175457,
|
|
"learning_rate": 1.9740486731925022e-05,
|
|
"loss": 1.1224,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 1.1611253196930946,
|
|
"grad_norm": 0.5066112837446138,
|
|
"learning_rate": 1.9737591826485013e-05,
|
|
"loss": 1.0962,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 1.1636828644501278,
|
|
"grad_norm": 0.4014502906289108,
|
|
"learning_rate": 1.9734681078591943e-05,
|
|
"loss": 1.0905,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 1.1662404092071612,
|
|
"grad_norm": 0.30247128311625804,
|
|
"learning_rate": 1.9731754492981423e-05,
|
|
"loss": 1.0812,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 1.1687979539641944,
|
|
"grad_norm": 0.31145252945008656,
|
|
"learning_rate": 1.9728812074414822e-05,
|
|
"loss": 1.0729,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 1.1713554987212276,
|
|
"grad_norm": 0.33968915375934183,
|
|
"learning_rate": 1.9725853827679266e-05,
|
|
"loss": 1.078,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 1.1739130434782608,
|
|
"grad_norm": 0.27618072861680876,
|
|
"learning_rate": 1.9722879757587647e-05,
|
|
"loss": 1.0864,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 0.28234315821124384,
|
|
"learning_rate": 1.9719889868978582e-05,
|
|
"loss": 1.1135,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 1.1790281329923273,
|
|
"grad_norm": 0.29884726287169866,
|
|
"learning_rate": 1.971688416671644e-05,
|
|
"loss": 1.1363,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 1.1815856777493605,
|
|
"grad_norm": 0.27600448666706423,
|
|
"learning_rate": 1.9713862655691302e-05,
|
|
"loss": 1.0791,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 1.184143222506394,
|
|
"grad_norm": 0.2803813788615088,
|
|
"learning_rate": 1.971082534081899e-05,
|
|
"loss": 1.0718,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 1.186700767263427,
|
|
"grad_norm": 0.2696501099289663,
|
|
"learning_rate": 1.970777222704101e-05,
|
|
"loss": 1.0961,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 1.1892583120204603,
|
|
"grad_norm": 0.3010556872116562,
|
|
"learning_rate": 1.97047033193246e-05,
|
|
"loss": 1.1038,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 1.1918158567774937,
|
|
"grad_norm": 0.28235325514025905,
|
|
"learning_rate": 1.970161862266268e-05,
|
|
"loss": 1.1054,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 1.1943734015345269,
|
|
"grad_norm": 0.28808186970685423,
|
|
"learning_rate": 1.969851814207385e-05,
|
|
"loss": 1.0807,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 1.19693094629156,
|
|
"grad_norm": 0.33258411208957883,
|
|
"learning_rate": 1.9695401882602406e-05,
|
|
"loss": 1.1296,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 1.1994884910485935,
|
|
"grad_norm": 0.3318703383183081,
|
|
"learning_rate": 1.9692269849318303e-05,
|
|
"loss": 1.0936,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 1.2020460358056266,
|
|
"grad_norm": 0.30178464518160203,
|
|
"learning_rate": 1.9689122047317166e-05,
|
|
"loss": 1.1155,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 1.2046035805626598,
|
|
"grad_norm": 0.30521273043475255,
|
|
"learning_rate": 1.968595848172027e-05,
|
|
"loss": 1.0896,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 1.207161125319693,
|
|
"grad_norm": 0.34614634138914463,
|
|
"learning_rate": 1.968277915767454e-05,
|
|
"loss": 1.0452,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 1.2097186700767264,
|
|
"grad_norm": 0.32741746531886684,
|
|
"learning_rate": 1.9679584080352537e-05,
|
|
"loss": 1.1045,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 1.2122762148337596,
|
|
"grad_norm": 0.2615489309131341,
|
|
"learning_rate": 1.967637325495245e-05,
|
|
"loss": 1.0855,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 1.2148337595907928,
|
|
"grad_norm": 0.27476592859150684,
|
|
"learning_rate": 1.9673146686698093e-05,
|
|
"loss": 1.1001,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 1.2173913043478262,
|
|
"grad_norm": 0.3421071933190777,
|
|
"learning_rate": 1.9669904380838892e-05,
|
|
"loss": 1.0729,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 1.2199488491048593,
|
|
"grad_norm": 0.3598257915245131,
|
|
"learning_rate": 1.966664634264987e-05,
|
|
"loss": 1.1242,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 1.2225063938618925,
|
|
"grad_norm": 0.32107570559715254,
|
|
"learning_rate": 1.9663372577431663e-05,
|
|
"loss": 1.1087,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 1.2250639386189257,
|
|
"grad_norm": 0.341209086018264,
|
|
"learning_rate": 1.966008309051047e-05,
|
|
"loss": 1.1167,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 1.227621483375959,
|
|
"grad_norm": 0.29733249941263845,
|
|
"learning_rate": 1.965677788723809e-05,
|
|
"loss": 1.07,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.2301790281329923,
|
|
"grad_norm": 0.26502862394927407,
|
|
"learning_rate": 1.9653456972991877e-05,
|
|
"loss": 1.0775,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 1.2327365728900257,
|
|
"grad_norm": 0.28986896788872485,
|
|
"learning_rate": 1.965012035317475e-05,
|
|
"loss": 1.0967,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 1.2352941176470589,
|
|
"grad_norm": 0.33295845795202056,
|
|
"learning_rate": 1.9646768033215183e-05,
|
|
"loss": 1.0879,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 1.237851662404092,
|
|
"grad_norm": 0.3705619524001342,
|
|
"learning_rate": 1.9643400018567195e-05,
|
|
"loss": 1.1019,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 1.2404092071611252,
|
|
"grad_norm": 0.3266347911273673,
|
|
"learning_rate": 1.9640016314710323e-05,
|
|
"loss": 1.1084,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 1.2429667519181586,
|
|
"grad_norm": 0.3761069051897771,
|
|
"learning_rate": 1.9636616927149655e-05,
|
|
"loss": 1.1029,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 1.2455242966751918,
|
|
"grad_norm": 0.2621662577070755,
|
|
"learning_rate": 1.9633201861415773e-05,
|
|
"loss": 1.0735,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 1.248081841432225,
|
|
"grad_norm": 0.266376960810325,
|
|
"learning_rate": 1.9629771123064784e-05,
|
|
"loss": 1.0948,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 1.2506393861892584,
|
|
"grad_norm": 0.3408438115021644,
|
|
"learning_rate": 1.9626324717678275e-05,
|
|
"loss": 1.0984,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 1.2531969309462916,
|
|
"grad_norm": 0.3255066954002719,
|
|
"learning_rate": 1.962286265086334e-05,
|
|
"loss": 1.1213,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.2557544757033248,
|
|
"grad_norm": 0.3765758476751633,
|
|
"learning_rate": 1.961938492825254e-05,
|
|
"loss": 1.0909,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 1.258312020460358,
|
|
"grad_norm": 0.3109670040308706,
|
|
"learning_rate": 1.9615891555503914e-05,
|
|
"loss": 1.1164,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 1.2608695652173914,
|
|
"grad_norm": 0.28523527744811616,
|
|
"learning_rate": 1.961238253830096e-05,
|
|
"loss": 1.0834,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 1.2634271099744245,
|
|
"grad_norm": 0.3472113617037474,
|
|
"learning_rate": 1.9608857882352636e-05,
|
|
"loss": 1.0823,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 1.265984654731458,
|
|
"grad_norm": 0.45214384592951995,
|
|
"learning_rate": 1.9605317593393326e-05,
|
|
"loss": 1.1084,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.2685421994884911,
|
|
"grad_norm": 0.3401855972965097,
|
|
"learning_rate": 1.9601761677182868e-05,
|
|
"loss": 1.0978,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 1.2710997442455243,
|
|
"grad_norm": 0.3025957486994177,
|
|
"learning_rate": 1.959819013950651e-05,
|
|
"loss": 1.0889,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 1.2736572890025575,
|
|
"grad_norm": 0.29140422941812544,
|
|
"learning_rate": 1.9594602986174923e-05,
|
|
"loss": 1.0792,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 1.2762148337595907,
|
|
"grad_norm": 0.3620688439157377,
|
|
"learning_rate": 1.959100022302418e-05,
|
|
"loss": 1.092,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 1.278772378516624,
|
|
"grad_norm": 0.3498507983384518,
|
|
"learning_rate": 1.9587381855915754e-05,
|
|
"loss": 1.0652,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.2813299232736572,
|
|
"grad_norm": 0.34633148833870603,
|
|
"learning_rate": 1.95837478907365e-05,
|
|
"loss": 1.0859,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 1.2838874680306906,
|
|
"grad_norm": 0.28466962730903933,
|
|
"learning_rate": 1.958009833339865e-05,
|
|
"loss": 1.0912,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 1.2864450127877238,
|
|
"grad_norm": 0.26890207030009217,
|
|
"learning_rate": 1.9576433189839807e-05,
|
|
"loss": 1.1088,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 1.289002557544757,
|
|
"grad_norm": 0.273263645379487,
|
|
"learning_rate": 1.957275246602293e-05,
|
|
"loss": 1.0837,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 1.2915601023017902,
|
|
"grad_norm": 0.2716148540613851,
|
|
"learning_rate": 1.9569056167936332e-05,
|
|
"loss": 1.105,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.2941176470588236,
|
|
"grad_norm": 0.24370260465489227,
|
|
"learning_rate": 1.956534430159365e-05,
|
|
"loss": 1.0726,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 1.2966751918158568,
|
|
"grad_norm": 0.2620730046771573,
|
|
"learning_rate": 1.9561616873033867e-05,
|
|
"loss": 1.1079,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 1.29923273657289,
|
|
"grad_norm": 0.3135544306790673,
|
|
"learning_rate": 1.955787388832127e-05,
|
|
"loss": 1.0697,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 1.3017902813299234,
|
|
"grad_norm": 0.26135639483849105,
|
|
"learning_rate": 1.9554115353545464e-05,
|
|
"loss": 1.1016,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 1.3043478260869565,
|
|
"grad_norm": 0.25771344651987327,
|
|
"learning_rate": 1.9550341274821348e-05,
|
|
"loss": 1.0727,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.3069053708439897,
|
|
"grad_norm": 0.3167223084832456,
|
|
"learning_rate": 1.9546551658289113e-05,
|
|
"loss": 1.0792,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 1.309462915601023,
|
|
"grad_norm": 0.37857845074967256,
|
|
"learning_rate": 1.954274651011423e-05,
|
|
"loss": 1.1143,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 1.3120204603580563,
|
|
"grad_norm": 0.2580494189739856,
|
|
"learning_rate": 1.9538925836487436e-05,
|
|
"loss": 1.0674,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 1.3145780051150895,
|
|
"grad_norm": 0.39297270925108346,
|
|
"learning_rate": 1.953508964362473e-05,
|
|
"loss": 1.0885,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 1.317135549872123,
|
|
"grad_norm": 0.4568937813346712,
|
|
"learning_rate": 1.9531237937767352e-05,
|
|
"loss": 1.0807,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.319693094629156,
|
|
"grad_norm": 0.4182414922758871,
|
|
"learning_rate": 1.9527370725181793e-05,
|
|
"loss": 1.0766,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 1.3222506393861893,
|
|
"grad_norm": 0.4402863172879326,
|
|
"learning_rate": 1.9523488012159762e-05,
|
|
"loss": 1.0712,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 1.3248081841432224,
|
|
"grad_norm": 0.3810424193074309,
|
|
"learning_rate": 1.9519589805018187e-05,
|
|
"loss": 1.0888,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.3273657289002558,
|
|
"grad_norm": 0.4051938816832732,
|
|
"learning_rate": 1.951567611009922e-05,
|
|
"loss": 1.0801,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.329923273657289,
|
|
"grad_norm": 0.3260440045944625,
|
|
"learning_rate": 1.9511746933770186e-05,
|
|
"loss": 1.1149,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.3324808184143222,
|
|
"grad_norm": 0.31554258651135036,
|
|
"learning_rate": 1.9507802282423612e-05,
|
|
"loss": 1.1202,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.3350383631713556,
|
|
"grad_norm": 0.2622342243824476,
|
|
"learning_rate": 1.9503842162477205e-05,
|
|
"loss": 1.1006,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.3375959079283888,
|
|
"grad_norm": 0.3015423536266443,
|
|
"learning_rate": 1.9499866580373826e-05,
|
|
"loss": 1.0873,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.340153452685422,
|
|
"grad_norm": 0.3920165036339574,
|
|
"learning_rate": 1.94958755425815e-05,
|
|
"loss": 1.1154,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.3427109974424551,
|
|
"grad_norm": 0.2769409471650046,
|
|
"learning_rate": 1.9491869055593392e-05,
|
|
"loss": 1.0867,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.3452685421994885,
|
|
"grad_norm": 0.30161940340621723,
|
|
"learning_rate": 1.9487847125927814e-05,
|
|
"loss": 1.1126,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.3478260869565217,
|
|
"grad_norm": 0.41990580701086677,
|
|
"learning_rate": 1.948380976012819e-05,
|
|
"loss": 1.0625,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.350383631713555,
|
|
"grad_norm": 0.3940286196901995,
|
|
"learning_rate": 1.9479756964763062e-05,
|
|
"loss": 1.1262,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.3529411764705883,
|
|
"grad_norm": 0.3683443524857737,
|
|
"learning_rate": 1.9475688746426075e-05,
|
|
"loss": 1.0865,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.3554987212276215,
|
|
"grad_norm": 0.2675607272032647,
|
|
"learning_rate": 1.9471605111735964e-05,
|
|
"loss": 1.0594,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.3580562659846547,
|
|
"grad_norm": 0.30194225210114733,
|
|
"learning_rate": 1.9467506067336554e-05,
|
|
"loss": 1.0955,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.3606138107416879,
|
|
"grad_norm": 0.32576735510414695,
|
|
"learning_rate": 1.946339161989672e-05,
|
|
"loss": 1.0824,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.3631713554987213,
|
|
"grad_norm": 0.3598150497292756,
|
|
"learning_rate": 1.9459261776110426e-05,
|
|
"loss": 1.1215,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.3657289002557544,
|
|
"grad_norm": 0.30585802865605916,
|
|
"learning_rate": 1.945511654269666e-05,
|
|
"loss": 1.086,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.3682864450127878,
|
|
"grad_norm": 0.2832294529242309,
|
|
"learning_rate": 1.945095592639946e-05,
|
|
"loss": 1.0992,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.370843989769821,
|
|
"grad_norm": 0.29056128095513195,
|
|
"learning_rate": 1.944677993398789e-05,
|
|
"loss": 1.1311,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.3734015345268542,
|
|
"grad_norm": 0.2598885076655647,
|
|
"learning_rate": 1.944258857225603e-05,
|
|
"loss": 1.0869,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.3759590792838874,
|
|
"grad_norm": 0.29819735030908995,
|
|
"learning_rate": 1.943838184802296e-05,
|
|
"loss": 1.1034,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.3785166240409208,
|
|
"grad_norm": 0.27354562935410204,
|
|
"learning_rate": 1.9434159768132762e-05,
|
|
"loss": 1.0834,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.381074168797954,
|
|
"grad_norm": 0.3164865864885613,
|
|
"learning_rate": 1.9429922339454486e-05,
|
|
"loss": 1.0952,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.3836317135549872,
|
|
"grad_norm": 0.34458030079305596,
|
|
"learning_rate": 1.9425669568882175e-05,
|
|
"loss": 1.1195,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.3861892583120206,
|
|
"grad_norm": 0.2973996932273863,
|
|
"learning_rate": 1.942140146333481e-05,
|
|
"loss": 1.1082,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.3887468030690537,
|
|
"grad_norm": 0.41583952226086746,
|
|
"learning_rate": 1.9417118029756342e-05,
|
|
"loss": 1.0664,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.391304347826087,
|
|
"grad_norm": 0.33101469656406096,
|
|
"learning_rate": 1.9412819275115648e-05,
|
|
"loss": 1.087,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.39386189258312,
|
|
"grad_norm": 0.2709972180594455,
|
|
"learning_rate": 1.9408505206406526e-05,
|
|
"loss": 1.078,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.3964194373401535,
|
|
"grad_norm": 0.3358832525629651,
|
|
"learning_rate": 1.9404175830647703e-05,
|
|
"loss": 1.0549,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.3989769820971867,
|
|
"grad_norm": 0.2987798463061033,
|
|
"learning_rate": 1.93998311548828e-05,
|
|
"loss": 1.0946,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.40153452685422,
|
|
"grad_norm": 0.3337061384486843,
|
|
"learning_rate": 1.939547118618033e-05,
|
|
"loss": 1.0898,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.4040920716112533,
|
|
"grad_norm": 0.3217064113312768,
|
|
"learning_rate": 1.9391095931633694e-05,
|
|
"loss": 1.1098,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.4066496163682864,
|
|
"grad_norm": 0.2752108304141071,
|
|
"learning_rate": 1.9386705398361156e-05,
|
|
"loss": 1.0469,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.4092071611253196,
|
|
"grad_norm": 0.25580623137423647,
|
|
"learning_rate": 1.938229959350584e-05,
|
|
"loss": 1.0616,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.4117647058823528,
|
|
"grad_norm": 0.3326332518112022,
|
|
"learning_rate": 1.937787852423571e-05,
|
|
"loss": 1.1083,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.4143222506393862,
|
|
"grad_norm": 0.28662569595039195,
|
|
"learning_rate": 1.937344219774358e-05,
|
|
"loss": 1.0908,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.4168797953964194,
|
|
"grad_norm": 0.27173135593182157,
|
|
"learning_rate": 1.9368990621247062e-05,
|
|
"loss": 1.102,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.4194373401534528,
|
|
"grad_norm": 0.2468084134139675,
|
|
"learning_rate": 1.9364523801988606e-05,
|
|
"loss": 1.1147,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.421994884910486,
|
|
"grad_norm": 0.2709546209917836,
|
|
"learning_rate": 1.9360041747235437e-05,
|
|
"loss": 1.0962,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 1.4245524296675192,
|
|
"grad_norm": 0.2653203619472685,
|
|
"learning_rate": 1.9355544464279587e-05,
|
|
"loss": 1.0864,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 1.4271099744245523,
|
|
"grad_norm": 0.28467968268797966,
|
|
"learning_rate": 1.9351031960437848e-05,
|
|
"loss": 1.0747,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 1.4296675191815857,
|
|
"grad_norm": 0.31847968792917525,
|
|
"learning_rate": 1.934650424305178e-05,
|
|
"loss": 1.0731,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 1.432225063938619,
|
|
"grad_norm": 0.3091639351747145,
|
|
"learning_rate": 1.9341961319487704e-05,
|
|
"loss": 1.0598,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.434782608695652,
|
|
"grad_norm": 0.26120102379692217,
|
|
"learning_rate": 1.9337403197136663e-05,
|
|
"loss": 1.0712,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 1.4373401534526855,
|
|
"grad_norm": 0.283165316308832,
|
|
"learning_rate": 1.9332829883414444e-05,
|
|
"loss": 1.0883,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 1.4398976982097187,
|
|
"grad_norm": 0.2767794060421261,
|
|
"learning_rate": 1.932824138576154e-05,
|
|
"loss": 1.1141,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 1.4424552429667519,
|
|
"grad_norm": 0.3027787955580307,
|
|
"learning_rate": 1.9323637711643147e-05,
|
|
"loss": 1.1109,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 1.445012787723785,
|
|
"grad_norm": 0.32071961002527666,
|
|
"learning_rate": 1.9319018868549165e-05,
|
|
"loss": 1.1192,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.4475703324808185,
|
|
"grad_norm": 0.33467873672280385,
|
|
"learning_rate": 1.931438486399415e-05,
|
|
"loss": 1.0817,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 1.4501278772378516,
|
|
"grad_norm": 0.30569240173237483,
|
|
"learning_rate": 1.930973570551735e-05,
|
|
"loss": 1.0607,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 1.452685421994885,
|
|
"grad_norm": 0.298726423982734,
|
|
"learning_rate": 1.9305071400682644e-05,
|
|
"loss": 1.0914,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 1.4552429667519182,
|
|
"grad_norm": 0.3038529339878212,
|
|
"learning_rate": 1.9300391957078564e-05,
|
|
"loss": 1.0834,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 1.4578005115089514,
|
|
"grad_norm": 0.30563450154931243,
|
|
"learning_rate": 1.9295697382318286e-05,
|
|
"loss": 1.0733,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.4603580562659846,
|
|
"grad_norm": 0.3808106030288731,
|
|
"learning_rate": 1.9290987684039576e-05,
|
|
"loss": 1.0955,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 1.4629156010230178,
|
|
"grad_norm": 0.32964679230942334,
|
|
"learning_rate": 1.9286262869904827e-05,
|
|
"loss": 1.0977,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 1.4654731457800512,
|
|
"grad_norm": 0.3576744350781661,
|
|
"learning_rate": 1.928152294760101e-05,
|
|
"loss": 1.0826,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 1.4680306905370843,
|
|
"grad_norm": 0.3442477800849191,
|
|
"learning_rate": 1.9276767924839687e-05,
|
|
"loss": 1.0693,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 0.4177409226360097,
|
|
"learning_rate": 1.927199780935698e-05,
|
|
"loss": 1.1031,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.473145780051151,
|
|
"grad_norm": 0.5022744214347684,
|
|
"learning_rate": 1.926721260891357e-05,
|
|
"loss": 1.1081,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 1.4757033248081841,
|
|
"grad_norm": 0.5089458782552098,
|
|
"learning_rate": 1.9262412331294677e-05,
|
|
"loss": 1.0984,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 1.4782608695652173,
|
|
"grad_norm": 0.28913442828013464,
|
|
"learning_rate": 1.9257596984310055e-05,
|
|
"loss": 1.0907,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 1.4808184143222507,
|
|
"grad_norm": 0.36385701502207274,
|
|
"learning_rate": 1.925276657579397e-05,
|
|
"loss": 1.0667,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 1.4833759590792839,
|
|
"grad_norm": 0.39854637256040343,
|
|
"learning_rate": 1.9247921113605197e-05,
|
|
"loss": 1.0814,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.485933503836317,
|
|
"grad_norm": 0.3421920326108303,
|
|
"learning_rate": 1.9243060605626995e-05,
|
|
"loss": 1.0984,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 1.4884910485933505,
|
|
"grad_norm": 0.2806970145004491,
|
|
"learning_rate": 1.9238185059767116e-05,
|
|
"loss": 1.0903,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 1.4910485933503836,
|
|
"grad_norm": 0.458875989536999,
|
|
"learning_rate": 1.9233294483957758e-05,
|
|
"loss": 1.1135,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 1.4936061381074168,
|
|
"grad_norm": 0.5204446417118193,
|
|
"learning_rate": 1.922838888615559e-05,
|
|
"loss": 1.1228,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 1.49616368286445,
|
|
"grad_norm": 0.4574878580551403,
|
|
"learning_rate": 1.922346827434171e-05,
|
|
"loss": 1.0595,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.4987212276214834,
|
|
"grad_norm": 0.26814443608722427,
|
|
"learning_rate": 1.921853265652164e-05,
|
|
"loss": 1.0742,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 1.5012787723785166,
|
|
"grad_norm": 0.4321843380909753,
|
|
"learning_rate": 1.9213582040725333e-05,
|
|
"loss": 1.0823,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 1.50383631713555,
|
|
"grad_norm": 0.3998584041466985,
|
|
"learning_rate": 1.9208616435007124e-05,
|
|
"loss": 1.1113,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 1.5063938618925832,
|
|
"grad_norm": 0.36340166424292447,
|
|
"learning_rate": 1.9203635847445743e-05,
|
|
"loss": 1.0495,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 1.5089514066496164,
|
|
"grad_norm": 0.30341924814307153,
|
|
"learning_rate": 1.9198640286144296e-05,
|
|
"loss": 1.0778,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.5115089514066495,
|
|
"grad_norm": 0.3549252043532506,
|
|
"learning_rate": 1.9193629759230252e-05,
|
|
"loss": 1.0526,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 1.5140664961636827,
|
|
"grad_norm": 0.3706707482911529,
|
|
"learning_rate": 1.9188604274855417e-05,
|
|
"loss": 1.1082,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 1.5166240409207161,
|
|
"grad_norm": 0.3221161365565599,
|
|
"learning_rate": 1.9183563841195948e-05,
|
|
"loss": 1.0358,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 1.5191815856777495,
|
|
"grad_norm": 0.35561020647213454,
|
|
"learning_rate": 1.917850846645231e-05,
|
|
"loss": 1.1016,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 1.5217391304347827,
|
|
"grad_norm": 0.3891453948051964,
|
|
"learning_rate": 1.917343815884929e-05,
|
|
"loss": 1.0723,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.5242966751918159,
|
|
"grad_norm": 0.293218650160261,
|
|
"learning_rate": 1.9168352926635948e-05,
|
|
"loss": 1.0842,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 1.526854219948849,
|
|
"grad_norm": 0.331624086856979,
|
|
"learning_rate": 1.9163252778085646e-05,
|
|
"loss": 1.0928,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 1.5294117647058822,
|
|
"grad_norm": 0.36005628746389595,
|
|
"learning_rate": 1.9158137721496014e-05,
|
|
"loss": 1.0954,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 1.5319693094629157,
|
|
"grad_norm": 0.25854576697363735,
|
|
"learning_rate": 1.9153007765188918e-05,
|
|
"loss": 1.0703,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 1.5345268542199488,
|
|
"grad_norm": 0.3178892680337157,
|
|
"learning_rate": 1.914786291751048e-05,
|
|
"loss": 1.1178,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.5370843989769822,
|
|
"grad_norm": 0.3276728285320476,
|
|
"learning_rate": 1.9142703186831044e-05,
|
|
"loss": 1.0711,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 1.5396419437340154,
|
|
"grad_norm": 0.34402306746609335,
|
|
"learning_rate": 1.9137528581545172e-05,
|
|
"loss": 1.0669,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 1.5421994884910486,
|
|
"grad_norm": 0.3658697294408855,
|
|
"learning_rate": 1.9132339110071623e-05,
|
|
"loss": 1.0738,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 1.5447570332480818,
|
|
"grad_norm": 0.33272997926321957,
|
|
"learning_rate": 1.9127134780853343e-05,
|
|
"loss": 1.0891,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 1.547314578005115,
|
|
"grad_norm": 0.26256059097959605,
|
|
"learning_rate": 1.9121915602357447e-05,
|
|
"loss": 1.0752,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.5498721227621484,
|
|
"grad_norm": 0.29698212652722755,
|
|
"learning_rate": 1.9116681583075215e-05,
|
|
"loss": 1.0531,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 1.5524296675191815,
|
|
"grad_norm": 0.3308461220455405,
|
|
"learning_rate": 1.9111432731522067e-05,
|
|
"loss": 1.0775,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 1.554987212276215,
|
|
"grad_norm": 0.28434303668023103,
|
|
"learning_rate": 1.910616905623756e-05,
|
|
"loss": 1.0989,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 1.5575447570332481,
|
|
"grad_norm": 0.2949610693246568,
|
|
"learning_rate": 1.910089056578536e-05,
|
|
"loss": 1.0942,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 1.5601023017902813,
|
|
"grad_norm": 0.26028511630293355,
|
|
"learning_rate": 1.9095597268753243e-05,
|
|
"loss": 1.0639,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.5626598465473145,
|
|
"grad_norm": 0.2736816450940113,
|
|
"learning_rate": 1.9090289173753077e-05,
|
|
"loss": 1.1013,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 1.5652173913043477,
|
|
"grad_norm": 0.24169212652369965,
|
|
"learning_rate": 1.908496628942079e-05,
|
|
"loss": 1.0904,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 1.567774936061381,
|
|
"grad_norm": 0.2790060046832418,
|
|
"learning_rate": 1.907962862441639e-05,
|
|
"loss": 1.0789,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 1.5703324808184145,
|
|
"grad_norm": 0.25148763709880523,
|
|
"learning_rate": 1.9074276187423925e-05,
|
|
"loss": 1.083,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 1.5728900255754477,
|
|
"grad_norm": 0.260089635225582,
|
|
"learning_rate": 1.906890898715147e-05,
|
|
"loss": 1.1052,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.5754475703324808,
|
|
"grad_norm": 0.24239290344853867,
|
|
"learning_rate": 1.9063527032331128e-05,
|
|
"loss": 1.0587,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 1.578005115089514,
|
|
"grad_norm": 0.31033949728422483,
|
|
"learning_rate": 1.9058130331719002e-05,
|
|
"loss": 1.0906,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 1.5805626598465472,
|
|
"grad_norm": 0.29694640873919886,
|
|
"learning_rate": 1.9052718894095183e-05,
|
|
"loss": 1.0828,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 1.5831202046035806,
|
|
"grad_norm": 0.268458744450183,
|
|
"learning_rate": 1.904729272826375e-05,
|
|
"loss": 1.0697,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 1.5856777493606138,
|
|
"grad_norm": 0.3328538025026265,
|
|
"learning_rate": 1.9041851843052727e-05,
|
|
"loss": 1.0556,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.5882352941176472,
|
|
"grad_norm": 0.4354576423430095,
|
|
"learning_rate": 1.90363962473141e-05,
|
|
"loss": 1.0888,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 1.5907928388746804,
|
|
"grad_norm": 0.4488970201166202,
|
|
"learning_rate": 1.9030925949923777e-05,
|
|
"loss": 1.0991,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 1.5933503836317136,
|
|
"grad_norm": 0.30850235477610843,
|
|
"learning_rate": 1.9025440959781593e-05,
|
|
"loss": 1.0721,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 1.5959079283887467,
|
|
"grad_norm": 0.24306011770668454,
|
|
"learning_rate": 1.9019941285811284e-05,
|
|
"loss": 1.1146,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 1.59846547314578,
|
|
"grad_norm": 0.31927732953474425,
|
|
"learning_rate": 1.9014426936960477e-05,
|
|
"loss": 1.1386,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 1.6010230179028133,
|
|
"grad_norm": 0.30395309199867215,
|
|
"learning_rate": 1.900889792220067e-05,
|
|
"loss": 1.0651,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 1.6035805626598465,
|
|
"grad_norm": 0.2641664347228699,
|
|
"learning_rate": 1.9003354250527225e-05,
|
|
"loss": 1.0737,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 1.60613810741688,
|
|
"grad_norm": 0.2541673904415416,
|
|
"learning_rate": 1.899779593095935e-05,
|
|
"loss": 1.1093,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 1.608695652173913,
|
|
"grad_norm": 0.248114384702292,
|
|
"learning_rate": 1.8992222972540083e-05,
|
|
"loss": 1.0631,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 1.6112531969309463,
|
|
"grad_norm": 0.27098670487834897,
|
|
"learning_rate": 1.8986635384336275e-05,
|
|
"loss": 1.0684,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.6138107416879794,
|
|
"grad_norm": 0.2707047290641469,
|
|
"learning_rate": 1.8981033175438593e-05,
|
|
"loss": 1.0793,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 1.6163682864450126,
|
|
"grad_norm": 0.2248022175811438,
|
|
"learning_rate": 1.897541635496147e-05,
|
|
"loss": 1.0741,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 1.618925831202046,
|
|
"grad_norm": 0.33046089699268805,
|
|
"learning_rate": 1.896978493204313e-05,
|
|
"loss": 1.0536,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 1.6214833759590794,
|
|
"grad_norm": 0.2897890506100947,
|
|
"learning_rate": 1.896413891584554e-05,
|
|
"loss": 1.1041,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 1.6240409207161126,
|
|
"grad_norm": 0.24423929651462964,
|
|
"learning_rate": 1.8958478315554414e-05,
|
|
"loss": 1.0554,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 1.6265984654731458,
|
|
"grad_norm": 0.2824637389915044,
|
|
"learning_rate": 1.8952803140379198e-05,
|
|
"loss": 1.105,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 1.629156010230179,
|
|
"grad_norm": 0.34172319194434536,
|
|
"learning_rate": 1.894711339955305e-05,
|
|
"loss": 1.0966,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 1.6317135549872122,
|
|
"grad_norm": 0.2986624598202099,
|
|
"learning_rate": 1.8941409102332818e-05,
|
|
"loss": 1.0801,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 1.6342710997442456,
|
|
"grad_norm": 0.35330551163337126,
|
|
"learning_rate": 1.893569025799904e-05,
|
|
"loss": 1.1168,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 1.6368286445012787,
|
|
"grad_norm": 0.37997527154753075,
|
|
"learning_rate": 1.8929956875855913e-05,
|
|
"loss": 1.044,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.6393861892583121,
|
|
"grad_norm": 0.3987670557181093,
|
|
"learning_rate": 1.89242089652313e-05,
|
|
"loss": 1.0678,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 1.6419437340153453,
|
|
"grad_norm": 0.4164983853962145,
|
|
"learning_rate": 1.8918446535476683e-05,
|
|
"loss": 1.0713,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 1.6445012787723785,
|
|
"grad_norm": 0.36634278907361967,
|
|
"learning_rate": 1.8912669595967182e-05,
|
|
"loss": 1.0845,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 1.6470588235294117,
|
|
"grad_norm": 0.3377854105852521,
|
|
"learning_rate": 1.890687815610151e-05,
|
|
"loss": 1.1325,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 1.6496163682864449,
|
|
"grad_norm": 0.2921364211079459,
|
|
"learning_rate": 1.8901072225301983e-05,
|
|
"loss": 1.0417,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 1.6521739130434783,
|
|
"grad_norm": 0.40803324585389733,
|
|
"learning_rate": 1.8895251813014486e-05,
|
|
"loss": 1.0985,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 1.6547314578005117,
|
|
"grad_norm": 0.4777584379650545,
|
|
"learning_rate": 1.8889416928708465e-05,
|
|
"loss": 1.0579,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 1.6572890025575449,
|
|
"grad_norm": 0.4575863335013247,
|
|
"learning_rate": 1.8883567581876913e-05,
|
|
"loss": 1.075,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 1.659846547314578,
|
|
"grad_norm": 0.44868767506108537,
|
|
"learning_rate": 1.887770378203635e-05,
|
|
"loss": 1.082,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 1.6624040920716112,
|
|
"grad_norm": 0.3990360823870846,
|
|
"learning_rate": 1.8871825538726815e-05,
|
|
"loss": 1.0618,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.6649616368286444,
|
|
"grad_norm": 0.384455268117493,
|
|
"learning_rate": 1.8865932861511836e-05,
|
|
"loss": 1.0883,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 1.6675191815856778,
|
|
"grad_norm": 0.4308655650983798,
|
|
"learning_rate": 1.8860025759978436e-05,
|
|
"loss": 1.1136,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 1.670076726342711,
|
|
"grad_norm": 0.5161027640726775,
|
|
"learning_rate": 1.8854104243737096e-05,
|
|
"loss": 1.0876,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 1.6726342710997444,
|
|
"grad_norm": 0.5710337903727111,
|
|
"learning_rate": 1.8848168322421756e-05,
|
|
"loss": 1.0921,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 1.6751918158567776,
|
|
"grad_norm": 0.4680011964164238,
|
|
"learning_rate": 1.884221800568979e-05,
|
|
"loss": 1.0817,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 1.6777493606138107,
|
|
"grad_norm": 0.273509418810932,
|
|
"learning_rate": 1.8836253303221985e-05,
|
|
"loss": 1.0676,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 1.680306905370844,
|
|
"grad_norm": 0.36238937602325755,
|
|
"learning_rate": 1.8830274224722544e-05,
|
|
"loss": 1.0694,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 1.682864450127877,
|
|
"grad_norm": 0.4331370312585361,
|
|
"learning_rate": 1.8824280779919055e-05,
|
|
"loss": 1.0939,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 1.6854219948849105,
|
|
"grad_norm": 0.42161084086226236,
|
|
"learning_rate": 1.8818272978562472e-05,
|
|
"loss": 1.0949,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 1.6879795396419437,
|
|
"grad_norm": 0.42114600096809945,
|
|
"learning_rate": 1.8812250830427116e-05,
|
|
"loss": 1.1071,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.690537084398977,
|
|
"grad_norm": 0.2580305989521523,
|
|
"learning_rate": 1.8806214345310648e-05,
|
|
"loss": 1.0884,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 1.6930946291560103,
|
|
"grad_norm": 0.2790098578226022,
|
|
"learning_rate": 1.8800163533034048e-05,
|
|
"loss": 1.0786,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 1.6956521739130435,
|
|
"grad_norm": 0.3952483114126335,
|
|
"learning_rate": 1.879409840344161e-05,
|
|
"loss": 1.1025,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 1.6982097186700766,
|
|
"grad_norm": 0.34837002184241345,
|
|
"learning_rate": 1.8788018966400923e-05,
|
|
"loss": 1.0862,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 1.7007672634271098,
|
|
"grad_norm": 0.23347425632455518,
|
|
"learning_rate": 1.878192523180285e-05,
|
|
"loss": 1.0903,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 1.7033248081841432,
|
|
"grad_norm": 0.258084870513599,
|
|
"learning_rate": 1.877581720956151e-05,
|
|
"loss": 1.0659,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 1.7058823529411766,
|
|
"grad_norm": 0.2955310030807304,
|
|
"learning_rate": 1.876969490961428e-05,
|
|
"loss": 1.0803,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 1.7084398976982098,
|
|
"grad_norm": 0.34485101895191056,
|
|
"learning_rate": 1.8763558341921762e-05,
|
|
"loss": 1.0729,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 1.710997442455243,
|
|
"grad_norm": 0.25932977011662367,
|
|
"learning_rate": 1.8757407516467762e-05,
|
|
"loss": 1.1017,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 1.7135549872122762,
|
|
"grad_norm": 0.23771298856204617,
|
|
"learning_rate": 1.8751242443259286e-05,
|
|
"loss": 1.0771,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.7161125319693094,
|
|
"grad_norm": 0.3403000739473665,
|
|
"learning_rate": 1.874506313232653e-05,
|
|
"loss": 1.0972,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 1.7186700767263428,
|
|
"grad_norm": 0.36624614786635146,
|
|
"learning_rate": 1.873886959372284e-05,
|
|
"loss": 1.0948,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 1.721227621483376,
|
|
"grad_norm": 0.23241780598609607,
|
|
"learning_rate": 1.8732661837524722e-05,
|
|
"loss": 1.0726,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 1.7237851662404093,
|
|
"grad_norm": 0.27573330219222747,
|
|
"learning_rate": 1.8726439873831803e-05,
|
|
"loss": 1.1154,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 1.7263427109974425,
|
|
"grad_norm": 0.3289571952505283,
|
|
"learning_rate": 1.8720203712766833e-05,
|
|
"loss": 1.0855,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 1.7289002557544757,
|
|
"grad_norm": 0.26315983835648826,
|
|
"learning_rate": 1.8713953364475654e-05,
|
|
"loss": 1.0561,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 1.7314578005115089,
|
|
"grad_norm": 0.2933737539222408,
|
|
"learning_rate": 1.8707688839127187e-05,
|
|
"loss": 1.0717,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 1.734015345268542,
|
|
"grad_norm": 0.24075336640916348,
|
|
"learning_rate": 1.8701410146913427e-05,
|
|
"loss": 1.0733,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 1.7365728900255755,
|
|
"grad_norm": 0.2969635924636881,
|
|
"learning_rate": 1.869511729804942e-05,
|
|
"loss": 1.0736,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 1.7391304347826086,
|
|
"grad_norm": 0.2302120367596696,
|
|
"learning_rate": 1.8688810302773225e-05,
|
|
"loss": 1.0718,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.741687979539642,
|
|
"grad_norm": 0.31123990252305606,
|
|
"learning_rate": 1.8682489171345942e-05,
|
|
"loss": 1.0633,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.7442455242966752,
|
|
"grad_norm": 0.25671775642481637,
|
|
"learning_rate": 1.8676153914051648e-05,
|
|
"loss": 1.1055,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 1.7468030690537084,
|
|
"grad_norm": 0.2731165902037635,
|
|
"learning_rate": 1.866980454119741e-05,
|
|
"loss": 1.1019,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 1.7493606138107416,
|
|
"grad_norm": 0.29946202186623655,
|
|
"learning_rate": 1.8663441063113266e-05,
|
|
"loss": 1.0856,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 1.7519181585677748,
|
|
"grad_norm": 0.2743108383298565,
|
|
"learning_rate": 1.8657063490152193e-05,
|
|
"loss": 1.0797,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 1.7544757033248082,
|
|
"grad_norm": 0.2910690805954212,
|
|
"learning_rate": 1.8650671832690106e-05,
|
|
"loss": 1.1068,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 1.7570332480818416,
|
|
"grad_norm": 0.25617556691443527,
|
|
"learning_rate": 1.864426610112583e-05,
|
|
"loss": 1.0801,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 1.7595907928388748,
|
|
"grad_norm": 0.2446643852273966,
|
|
"learning_rate": 1.8637846305881092e-05,
|
|
"loss": 1.0712,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 1.762148337595908,
|
|
"grad_norm": 0.24853300895824507,
|
|
"learning_rate": 1.8631412457400494e-05,
|
|
"loss": 1.0518,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 0.2250526521940477,
|
|
"learning_rate": 1.862496456615151e-05,
|
|
"loss": 1.0802,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.7672634271099743,
|
|
"grad_norm": 0.23033386861703295,
|
|
"learning_rate": 1.861850264262445e-05,
|
|
"loss": 1.0921,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 1.7698209718670077,
|
|
"grad_norm": 0.22393185289398734,
|
|
"learning_rate": 1.8612026697332466e-05,
|
|
"loss": 1.0824,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 1.772378516624041,
|
|
"grad_norm": 0.24371247518659098,
|
|
"learning_rate": 1.860553674081151e-05,
|
|
"loss": 1.0958,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 1.7749360613810743,
|
|
"grad_norm": 0.21684995978781324,
|
|
"learning_rate": 1.859903278362034e-05,
|
|
"loss": 1.0511,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 1.7774936061381075,
|
|
"grad_norm": 0.24359803588661344,
|
|
"learning_rate": 1.8592514836340485e-05,
|
|
"loss": 1.064,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 1.7800511508951407,
|
|
"grad_norm": 0.2806613621237684,
|
|
"learning_rate": 1.8585982909576243e-05,
|
|
"loss": 1.0974,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 1.7826086956521738,
|
|
"grad_norm": 0.2951317541501585,
|
|
"learning_rate": 1.857943701395464e-05,
|
|
"loss": 1.0745,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 1.785166240409207,
|
|
"grad_norm": 0.2602691127905397,
|
|
"learning_rate": 1.857287716012545e-05,
|
|
"loss": 1.094,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 1.7877237851662404,
|
|
"grad_norm": 0.2878865850607815,
|
|
"learning_rate": 1.8566303358761134e-05,
|
|
"loss": 1.0764,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 1.7902813299232738,
|
|
"grad_norm": 0.25826524614522556,
|
|
"learning_rate": 1.8559715620556865e-05,
|
|
"loss": 1.095,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.792838874680307,
|
|
"grad_norm": 0.3113734244197743,
|
|
"learning_rate": 1.855311395623048e-05,
|
|
"loss": 1.0636,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 1.7953964194373402,
|
|
"grad_norm": 0.32545837268145317,
|
|
"learning_rate": 1.854649837652247e-05,
|
|
"loss": 1.0836,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 1.7979539641943734,
|
|
"grad_norm": 0.285984682125429,
|
|
"learning_rate": 1.8539868892195972e-05,
|
|
"loss": 1.0848,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 1.8005115089514065,
|
|
"grad_norm": 0.27758608852953665,
|
|
"learning_rate": 1.8533225514036742e-05,
|
|
"loss": 1.0663,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 1.80306905370844,
|
|
"grad_norm": 0.27148772448252917,
|
|
"learning_rate": 1.852656825285314e-05,
|
|
"loss": 1.094,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 1.8056265984654731,
|
|
"grad_norm": 0.30810009717755804,
|
|
"learning_rate": 1.8519897119476115e-05,
|
|
"loss": 1.0455,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 1.8081841432225065,
|
|
"grad_norm": 0.2763175632842481,
|
|
"learning_rate": 1.8513212124759185e-05,
|
|
"loss": 1.0525,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 1.8107416879795397,
|
|
"grad_norm": 0.2555077301269018,
|
|
"learning_rate": 1.8506513279578415e-05,
|
|
"loss": 1.0708,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 1.813299232736573,
|
|
"grad_norm": 0.2861828394638753,
|
|
"learning_rate": 1.849980059483241e-05,
|
|
"loss": 1.0269,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 1.815856777493606,
|
|
"grad_norm": 0.32694363610851984,
|
|
"learning_rate": 1.849307408144229e-05,
|
|
"loss": 1.0742,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.8184143222506393,
|
|
"grad_norm": 0.33550420038638934,
|
|
"learning_rate": 1.8486333750351668e-05,
|
|
"loss": 1.1291,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 1.8209718670076727,
|
|
"grad_norm": 0.30494475043620173,
|
|
"learning_rate": 1.8479579612526642e-05,
|
|
"loss": 1.0754,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 1.8235294117647058,
|
|
"grad_norm": 0.2449819480488345,
|
|
"learning_rate": 1.8472811678955773e-05,
|
|
"loss": 1.083,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 1.8260869565217392,
|
|
"grad_norm": 0.26042670531487994,
|
|
"learning_rate": 1.8466029960650066e-05,
|
|
"loss": 1.0749,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 1.8286445012787724,
|
|
"grad_norm": 0.3057228350277353,
|
|
"learning_rate": 1.845923446864295e-05,
|
|
"loss": 1.0549,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 1.8312020460358056,
|
|
"grad_norm": 0.2500852141764497,
|
|
"learning_rate": 1.845242521399027e-05,
|
|
"loss": 1.0721,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 1.8337595907928388,
|
|
"grad_norm": 0.2675252870460311,
|
|
"learning_rate": 1.8445602207770254e-05,
|
|
"loss": 1.0449,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 1.836317135549872,
|
|
"grad_norm": 0.2836719734304398,
|
|
"learning_rate": 1.8438765461083504e-05,
|
|
"loss": 1.0905,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 1.8388746803069054,
|
|
"grad_norm": 0.34699165997108533,
|
|
"learning_rate": 1.843191498505299e-05,
|
|
"loss": 1.0901,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 1.8414322250639388,
|
|
"grad_norm": 0.2722070954863811,
|
|
"learning_rate": 1.8425050790823994e-05,
|
|
"loss": 1.0964,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.843989769820972,
|
|
"grad_norm": 0.258368289769939,
|
|
"learning_rate": 1.8418172889564145e-05,
|
|
"loss": 1.0962,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 1.8465473145780051,
|
|
"grad_norm": 0.25936143701246717,
|
|
"learning_rate": 1.8411281292463345e-05,
|
|
"loss": 1.0545,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 1.8491048593350383,
|
|
"grad_norm": 0.3060957581043503,
|
|
"learning_rate": 1.8404376010733802e-05,
|
|
"loss": 1.0815,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 1.8516624040920715,
|
|
"grad_norm": 0.2815365945528782,
|
|
"learning_rate": 1.8397457055609973e-05,
|
|
"loss": 1.0759,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 1.854219948849105,
|
|
"grad_norm": 0.2745951540225352,
|
|
"learning_rate": 1.8390524438348565e-05,
|
|
"loss": 1.1021,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 1.856777493606138,
|
|
"grad_norm": 0.27846031555437806,
|
|
"learning_rate": 1.8383578170228514e-05,
|
|
"loss": 1.0248,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 1.8593350383631715,
|
|
"grad_norm": 0.2938959273434096,
|
|
"learning_rate": 1.8376618262550966e-05,
|
|
"loss": 1.0528,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 1.8618925831202047,
|
|
"grad_norm": 0.2993316558221603,
|
|
"learning_rate": 1.836964472663925e-05,
|
|
"loss": 1.058,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 1.8644501278772379,
|
|
"grad_norm": 0.28817201575308804,
|
|
"learning_rate": 1.8362657573838874e-05,
|
|
"loss": 1.1157,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 1.867007672634271,
|
|
"grad_norm": 0.22467467671098768,
|
|
"learning_rate": 1.8355656815517505e-05,
|
|
"loss": 1.0711,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.8695652173913042,
|
|
"grad_norm": 0.29149108866988305,
|
|
"learning_rate": 1.8348642463064937e-05,
|
|
"loss": 1.0414,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 1.8721227621483376,
|
|
"grad_norm": 0.39401431973372464,
|
|
"learning_rate": 1.8341614527893077e-05,
|
|
"loss": 1.0791,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 1.8746803069053708,
|
|
"grad_norm": 0.4335182479065654,
|
|
"learning_rate": 1.833457302143594e-05,
|
|
"loss": 1.0878,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 1.8772378516624042,
|
|
"grad_norm": 0.43497766670833005,
|
|
"learning_rate": 1.832751795514962e-05,
|
|
"loss": 1.0484,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 1.8797953964194374,
|
|
"grad_norm": 0.2997553952148685,
|
|
"learning_rate": 1.832044934051226e-05,
|
|
"loss": 1.0762,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 1.8823529411764706,
|
|
"grad_norm": 0.23441660095601177,
|
|
"learning_rate": 1.8313367189024065e-05,
|
|
"loss": 1.1082,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 1.8849104859335037,
|
|
"grad_norm": 0.23816717696848114,
|
|
"learning_rate": 1.8306271512207242e-05,
|
|
"loss": 1.0834,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 1.887468030690537,
|
|
"grad_norm": 0.29809886717421774,
|
|
"learning_rate": 1.829916232160602e-05,
|
|
"loss": 1.087,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 1.8900255754475703,
|
|
"grad_norm": 0.36580006827207345,
|
|
"learning_rate": 1.829203962878661e-05,
|
|
"loss": 1.0718,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 1.8925831202046037,
|
|
"grad_norm": 0.36472500474679165,
|
|
"learning_rate": 1.8284903445337184e-05,
|
|
"loss": 1.0435,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.895140664961637,
|
|
"grad_norm": 0.2569898458683152,
|
|
"learning_rate": 1.8277753782867865e-05,
|
|
"loss": 1.0569,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 1.89769820971867,
|
|
"grad_norm": 0.2807015519670205,
|
|
"learning_rate": 1.8270590653010706e-05,
|
|
"loss": 1.0623,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 1.9002557544757033,
|
|
"grad_norm": 0.2706420270561887,
|
|
"learning_rate": 1.8263414067419676e-05,
|
|
"loss": 1.101,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 1.9028132992327365,
|
|
"grad_norm": 0.28562929161394046,
|
|
"learning_rate": 1.8256224037770628e-05,
|
|
"loss": 1.0524,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 1.9053708439897699,
|
|
"grad_norm": 0.2774733347803849,
|
|
"learning_rate": 1.824902057576129e-05,
|
|
"loss": 1.0511,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 1.907928388746803,
|
|
"grad_norm": 0.22198709105225659,
|
|
"learning_rate": 1.8241803693111245e-05,
|
|
"loss": 1.075,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 1.9104859335038364,
|
|
"grad_norm": 0.287788512970941,
|
|
"learning_rate": 1.8234573401561914e-05,
|
|
"loss": 1.0665,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 1.9130434782608696,
|
|
"grad_norm": 0.2909301551397291,
|
|
"learning_rate": 1.8227329712876525e-05,
|
|
"loss": 1.0802,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 1.9156010230179028,
|
|
"grad_norm": 0.25392349276614573,
|
|
"learning_rate": 1.8220072638840105e-05,
|
|
"loss": 1.1035,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 1.918158567774936,
|
|
"grad_norm": 0.22821936416155694,
|
|
"learning_rate": 1.8212802191259465e-05,
|
|
"loss": 1.0571,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.9207161125319692,
|
|
"grad_norm": 0.3130516886250542,
|
|
"learning_rate": 1.8205518381963165e-05,
|
|
"loss": 1.1095,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 1.9232736572890026,
|
|
"grad_norm": 0.3857586516868388,
|
|
"learning_rate": 1.8198221222801506e-05,
|
|
"loss": 1.06,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 1.9258312020460358,
|
|
"grad_norm": 0.315792024279407,
|
|
"learning_rate": 1.8190910725646512e-05,
|
|
"loss": 1.0772,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 1.9283887468030692,
|
|
"grad_norm": 0.26686727973038904,
|
|
"learning_rate": 1.8183586902391905e-05,
|
|
"loss": 1.0708,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 1.9309462915601023,
|
|
"grad_norm": 0.3669775155609857,
|
|
"learning_rate": 1.8176249764953088e-05,
|
|
"loss": 1.0393,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 1.9335038363171355,
|
|
"grad_norm": 0.3411186812565117,
|
|
"learning_rate": 1.8168899325267122e-05,
|
|
"loss": 1.0777,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 1.9360613810741687,
|
|
"grad_norm": 0.29525106020949826,
|
|
"learning_rate": 1.8161535595292717e-05,
|
|
"loss": 1.0738,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 1.938618925831202,
|
|
"grad_norm": 0.2431416087312154,
|
|
"learning_rate": 1.8154158587010195e-05,
|
|
"loss": 1.0552,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 1.9411764705882353,
|
|
"grad_norm": 0.2528824918629993,
|
|
"learning_rate": 1.8146768312421495e-05,
|
|
"loss": 1.1049,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 1.9437340153452687,
|
|
"grad_norm": 0.27274199937217425,
|
|
"learning_rate": 1.8139364783550128e-05,
|
|
"loss": 1.11,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.9462915601023019,
|
|
"grad_norm": 0.27694326525936447,
|
|
"learning_rate": 1.813194801244117e-05,
|
|
"loss": 1.1085,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 1.948849104859335,
|
|
"grad_norm": 0.26284036778935943,
|
|
"learning_rate": 1.8124518011161246e-05,
|
|
"loss": 1.0817,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 1.9514066496163682,
|
|
"grad_norm": 0.34628694859076536,
|
|
"learning_rate": 1.8117074791798503e-05,
|
|
"loss": 1.0723,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 1.9539641943734014,
|
|
"grad_norm": 0.3205449398285809,
|
|
"learning_rate": 1.8109618366462597e-05,
|
|
"loss": 1.0878,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 1.9565217391304348,
|
|
"grad_norm": 0.2930907660937919,
|
|
"learning_rate": 1.8102148747284662e-05,
|
|
"loss": 1.0194,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 1.959079283887468,
|
|
"grad_norm": 0.3199378305398446,
|
|
"learning_rate": 1.8094665946417304e-05,
|
|
"loss": 1.0818,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 1.9616368286445014,
|
|
"grad_norm": 0.3147442131814513,
|
|
"learning_rate": 1.8087169976034568e-05,
|
|
"loss": 1.0524,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 1.9641943734015346,
|
|
"grad_norm": 0.29010540377698546,
|
|
"learning_rate": 1.807966084833193e-05,
|
|
"loss": 1.0804,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 1.9667519181585678,
|
|
"grad_norm": 0.2830375710975825,
|
|
"learning_rate": 1.8072138575526277e-05,
|
|
"loss": 1.0876,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 1.969309462915601,
|
|
"grad_norm": 0.29912181409924526,
|
|
"learning_rate": 1.806460316985587e-05,
|
|
"loss": 1.0674,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.9718670076726341,
|
|
"grad_norm": 0.280637494020639,
|
|
"learning_rate": 1.8057054643580347e-05,
|
|
"loss": 1.059,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 1.9744245524296675,
|
|
"grad_norm": 0.25437147169201857,
|
|
"learning_rate": 1.8049493008980685e-05,
|
|
"loss": 1.076,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 1.976982097186701,
|
|
"grad_norm": 0.260015840044801,
|
|
"learning_rate": 1.8041918278359194e-05,
|
|
"loss": 1.0884,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 1.979539641943734,
|
|
"grad_norm": 0.23338451398624144,
|
|
"learning_rate": 1.8034330464039485e-05,
|
|
"loss": 1.0564,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 1.9820971867007673,
|
|
"grad_norm": 0.27240262637273416,
|
|
"learning_rate": 1.8026729578366457e-05,
|
|
"loss": 1.0653,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 1.9846547314578005,
|
|
"grad_norm": 0.2658428330726454,
|
|
"learning_rate": 1.801911563370628e-05,
|
|
"loss": 1.0847,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 1.9872122762148337,
|
|
"grad_norm": 0.24259844645380865,
|
|
"learning_rate": 1.801148864244636e-05,
|
|
"loss": 1.0617,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 1.989769820971867,
|
|
"grad_norm": 0.274423591955145,
|
|
"learning_rate": 1.8003848616995333e-05,
|
|
"loss": 1.1046,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 1.9923273657289002,
|
|
"grad_norm": 0.270074412347766,
|
|
"learning_rate": 1.7996195569783053e-05,
|
|
"loss": 1.0841,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 1.9948849104859336,
|
|
"grad_norm": 0.32727342222060607,
|
|
"learning_rate": 1.798852951326054e-05,
|
|
"loss": 1.064,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.9974424552429668,
|
|
"grad_norm": 0.28041604224998723,
|
|
"learning_rate": 1.7980850459899997e-05,
|
|
"loss": 1.0748,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.230649257113214,
|
|
"learning_rate": 1.7973158422194754e-05,
|
|
"loss": 1.0504,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 2.002557544757033,
|
|
"grad_norm": 0.27721442928112094,
|
|
"learning_rate": 1.7965453412659284e-05,
|
|
"loss": 1.0561,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 2.0051150895140664,
|
|
"grad_norm": 0.3484629274944669,
|
|
"learning_rate": 1.795773544382915e-05,
|
|
"loss": 1.0484,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 2.0076726342710995,
|
|
"grad_norm": 0.35248757109292245,
|
|
"learning_rate": 1.795000452826101e-05,
|
|
"loss": 1.0494,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 2.010230179028133,
|
|
"grad_norm": 0.31602726514395096,
|
|
"learning_rate": 1.794226067853257e-05,
|
|
"loss": 1.1343,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 2.0127877237851663,
|
|
"grad_norm": 0.30632695925595954,
|
|
"learning_rate": 1.79345039072426e-05,
|
|
"loss": 1.0648,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 2.0153452685421995,
|
|
"grad_norm": 0.33328827891250323,
|
|
"learning_rate": 1.7926734227010876e-05,
|
|
"loss": 1.0801,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 2.0179028132992327,
|
|
"grad_norm": 0.35618373914463364,
|
|
"learning_rate": 1.7918951650478188e-05,
|
|
"loss": 1.0613,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 2.020460358056266,
|
|
"grad_norm": 0.3085542598082131,
|
|
"learning_rate": 1.7911156190306296e-05,
|
|
"loss": 1.0476,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 2.023017902813299,
|
|
"grad_norm": 0.22686489493321832,
|
|
"learning_rate": 1.7903347859177926e-05,
|
|
"loss": 1.0486,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 2.0255754475703327,
|
|
"grad_norm": 0.2750201664093288,
|
|
"learning_rate": 1.7895526669796747e-05,
|
|
"loss": 1.0543,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 2.028132992327366,
|
|
"grad_norm": 0.2998881689120612,
|
|
"learning_rate": 1.7887692634887345e-05,
|
|
"loss": 1.0434,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 2.030690537084399,
|
|
"grad_norm": 0.260904922673988,
|
|
"learning_rate": 1.7879845767195204e-05,
|
|
"loss": 1.0443,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 2.0332480818414322,
|
|
"grad_norm": 0.2465816351987358,
|
|
"learning_rate": 1.787198607948669e-05,
|
|
"loss": 1.0516,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 2.0358056265984654,
|
|
"grad_norm": 0.23239060808440448,
|
|
"learning_rate": 1.786411358454902e-05,
|
|
"loss": 1.0588,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 2.0383631713554986,
|
|
"grad_norm": 0.26101630597920855,
|
|
"learning_rate": 1.785622829519025e-05,
|
|
"loss": 1.0835,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 2.040920716112532,
|
|
"grad_norm": 0.3040971752066545,
|
|
"learning_rate": 1.7848330224239256e-05,
|
|
"loss": 1.0563,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 2.0434782608695654,
|
|
"grad_norm": 0.26487253530894395,
|
|
"learning_rate": 1.7840419384545706e-05,
|
|
"loss": 1.0579,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 2.0460358056265986,
|
|
"grad_norm": 0.2689601096947907,
|
|
"learning_rate": 1.7832495788980035e-05,
|
|
"loss": 1.1015,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 2.0485933503836318,
|
|
"grad_norm": 0.25525460785840065,
|
|
"learning_rate": 1.7824559450433446e-05,
|
|
"loss": 1.0537,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 2.051150895140665,
|
|
"grad_norm": 0.345599384998098,
|
|
"learning_rate": 1.7816610381817864e-05,
|
|
"loss": 1.0604,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 2.053708439897698,
|
|
"grad_norm": 0.3359389407416057,
|
|
"learning_rate": 1.780864859606592e-05,
|
|
"loss": 1.0664,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 2.0562659846547313,
|
|
"grad_norm": 0.2813553104050823,
|
|
"learning_rate": 1.780067410613095e-05,
|
|
"loss": 1.0937,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 2.0588235294117645,
|
|
"grad_norm": 0.2548220560875847,
|
|
"learning_rate": 1.7792686924986946e-05,
|
|
"loss": 1.0441,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 2.061381074168798,
|
|
"grad_norm": 0.28792647000401994,
|
|
"learning_rate": 1.7784687065628554e-05,
|
|
"loss": 1.058,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 2.0639386189258313,
|
|
"grad_norm": 0.2603601267230107,
|
|
"learning_rate": 1.777667454107104e-05,
|
|
"loss": 1.0992,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 2.0664961636828645,
|
|
"grad_norm": 0.2583588654263776,
|
|
"learning_rate": 1.776864936435029e-05,
|
|
"loss": 1.0735,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 2.0690537084398977,
|
|
"grad_norm": 0.30719716854376583,
|
|
"learning_rate": 1.7760611548522755e-05,
|
|
"loss": 1.0498,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 2.071611253196931,
|
|
"grad_norm": 0.30807492892970295,
|
|
"learning_rate": 1.7752561106665463e-05,
|
|
"loss": 1.0548,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 2.074168797953964,
|
|
"grad_norm": 0.3210704099635407,
|
|
"learning_rate": 1.7744498051875984e-05,
|
|
"loss": 1.077,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 2.0767263427109977,
|
|
"grad_norm": 0.4282126010865939,
|
|
"learning_rate": 1.7736422397272396e-05,
|
|
"loss": 1.0494,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 2.079283887468031,
|
|
"grad_norm": 0.4051125030459934,
|
|
"learning_rate": 1.772833415599329e-05,
|
|
"loss": 1.0511,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 2.081841432225064,
|
|
"grad_norm": 0.2991528183767012,
|
|
"learning_rate": 1.7720233341197726e-05,
|
|
"loss": 1.1121,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 2.084398976982097,
|
|
"grad_norm": 0.22783217071200507,
|
|
"learning_rate": 1.7712119966065225e-05,
|
|
"loss": 1.0383,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 2.0869565217391304,
|
|
"grad_norm": 0.3516616820022178,
|
|
"learning_rate": 1.770399404379574e-05,
|
|
"loss": 1.0498,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 2.0895140664961636,
|
|
"grad_norm": 0.2606641623626611,
|
|
"learning_rate": 1.7695855587609637e-05,
|
|
"loss": 1.0594,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 2.0920716112531967,
|
|
"grad_norm": 0.269085192714615,
|
|
"learning_rate": 1.7687704610747676e-05,
|
|
"loss": 1.0419,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 2.0946291560102304,
|
|
"grad_norm": 0.28768629596697776,
|
|
"learning_rate": 1.767954112647099e-05,
|
|
"loss": 1.0435,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 2.0971867007672635,
|
|
"grad_norm": 0.27429737921035624,
|
|
"learning_rate": 1.7671365148061053e-05,
|
|
"loss": 1.0458,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 2.0997442455242967,
|
|
"grad_norm": 0.29736519534073375,
|
|
"learning_rate": 1.7663176688819673e-05,
|
|
"loss": 1.0566,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 2.10230179028133,
|
|
"grad_norm": 0.26021437570192907,
|
|
"learning_rate": 1.765497576206896e-05,
|
|
"loss": 1.0422,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 2.104859335038363,
|
|
"grad_norm": 0.2783440308095714,
|
|
"learning_rate": 1.764676238115131e-05,
|
|
"loss": 1.0776,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 2.1074168797953963,
|
|
"grad_norm": 0.3339846285282316,
|
|
"learning_rate": 1.763853655942938e-05,
|
|
"loss": 1.0674,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 2.10997442455243,
|
|
"grad_norm": 0.2223362385153581,
|
|
"learning_rate": 1.7630298310286065e-05,
|
|
"loss": 1.0699,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 2.112531969309463,
|
|
"grad_norm": 0.33059613735162624,
|
|
"learning_rate": 1.7622047647124488e-05,
|
|
"loss": 1.0634,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 2.1150895140664963,
|
|
"grad_norm": 0.3414911305158879,
|
|
"learning_rate": 1.761378458336796e-05,
|
|
"loss": 1.0548,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 2.1176470588235294,
|
|
"grad_norm": 0.32041930375116484,
|
|
"learning_rate": 1.760550913245996e-05,
|
|
"loss": 1.0621,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 2.1202046035805626,
|
|
"grad_norm": 0.2971788267573472,
|
|
"learning_rate": 1.7597221307864142e-05,
|
|
"loss": 1.0704,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 2.122762148337596,
|
|
"grad_norm": 0.27537162097267065,
|
|
"learning_rate": 1.7588921123064273e-05,
|
|
"loss": 1.0961,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 2.125319693094629,
|
|
"grad_norm": 0.29232241446373336,
|
|
"learning_rate": 1.7580608591564233e-05,
|
|
"loss": 1.0916,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 2.1278772378516626,
|
|
"grad_norm": 0.3815701080685027,
|
|
"learning_rate": 1.757228372688799e-05,
|
|
"loss": 1.0848,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 2.130434782608696,
|
|
"grad_norm": 0.33830135607419565,
|
|
"learning_rate": 1.7563946542579584e-05,
|
|
"loss": 1.0824,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 2.132992327365729,
|
|
"grad_norm": 0.26436755888688523,
|
|
"learning_rate": 1.7555597052203088e-05,
|
|
"loss": 1.0424,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 2.135549872122762,
|
|
"grad_norm": 0.2204259325114956,
|
|
"learning_rate": 1.7547235269342602e-05,
|
|
"loss": 1.0749,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 2.1381074168797953,
|
|
"grad_norm": 0.31500508880378464,
|
|
"learning_rate": 1.7538861207602225e-05,
|
|
"loss": 1.0871,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 2.1406649616368285,
|
|
"grad_norm": 0.33104625224299034,
|
|
"learning_rate": 1.753047488060603e-05,
|
|
"loss": 1.0257,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 2.1432225063938617,
|
|
"grad_norm": 0.2325551980906377,
|
|
"learning_rate": 1.7522076301998048e-05,
|
|
"loss": 1.0907,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 2.1457800511508953,
|
|
"grad_norm": 0.2464976826758584,
|
|
"learning_rate": 1.7513665485442238e-05,
|
|
"loss": 1.067,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 2.1483375959079285,
|
|
"grad_norm": 0.25290511781194314,
|
|
"learning_rate": 1.750524244462248e-05,
|
|
"loss": 1.0893,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 2.1508951406649617,
|
|
"grad_norm": 0.3247901788745791,
|
|
"learning_rate": 1.7496807193242528e-05,
|
|
"loss": 1.0638,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 2.153452685421995,
|
|
"grad_norm": 0.34958915516133227,
|
|
"learning_rate": 1.748835974502601e-05,
|
|
"loss": 1.0825,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 2.156010230179028,
|
|
"grad_norm": 0.24243104695456325,
|
|
"learning_rate": 1.7479900113716398e-05,
|
|
"loss": 1.0537,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 2.1585677749360612,
|
|
"grad_norm": 0.2734369268109971,
|
|
"learning_rate": 1.7471428313076984e-05,
|
|
"loss": 1.1031,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 2.1611253196930944,
|
|
"grad_norm": 0.3380184912512867,
|
|
"learning_rate": 1.7462944356890853e-05,
|
|
"loss": 1.0589,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 2.163682864450128,
|
|
"grad_norm": 0.3625402818137926,
|
|
"learning_rate": 1.7454448258960877e-05,
|
|
"loss": 1.0561,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 2.166240409207161,
|
|
"grad_norm": 0.34638148620089215,
|
|
"learning_rate": 1.744594003310967e-05,
|
|
"loss": 1.0186,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 2.1687979539641944,
|
|
"grad_norm": 0.24740728690176142,
|
|
"learning_rate": 1.743741969317959e-05,
|
|
"loss": 1.1099,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 2.1713554987212276,
|
|
"grad_norm": 0.287155398140135,
|
|
"learning_rate": 1.7428887253032695e-05,
|
|
"loss": 1.0691,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 2.1739130434782608,
|
|
"grad_norm": 0.3566062867329238,
|
|
"learning_rate": 1.7420342726550728e-05,
|
|
"loss": 1.0701,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 2.176470588235294,
|
|
"grad_norm": 0.3096727205958978,
|
|
"learning_rate": 1.74117861276351e-05,
|
|
"loss": 1.0716,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 2.1790281329923276,
|
|
"grad_norm": 0.25874536932280473,
|
|
"learning_rate": 1.740321747020687e-05,
|
|
"loss": 1.0893,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 2.1815856777493607,
|
|
"grad_norm": 0.21538442833683963,
|
|
"learning_rate": 1.7394636768206702e-05,
|
|
"loss": 1.0266,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 2.184143222506394,
|
|
"grad_norm": 0.2871943030157397,
|
|
"learning_rate": 1.738604403559486e-05,
|
|
"loss": 1.0085,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 2.186700767263427,
|
|
"grad_norm": 0.2851621085345804,
|
|
"learning_rate": 1.7377439286351184e-05,
|
|
"loss": 1.0622,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 2.1892583120204603,
|
|
"grad_norm": 0.26228336638762867,
|
|
"learning_rate": 1.736882253447506e-05,
|
|
"loss": 1.083,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 2.1918158567774935,
|
|
"grad_norm": 0.26992050889733915,
|
|
"learning_rate": 1.736019379398542e-05,
|
|
"loss": 1.1006,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 2.1943734015345266,
|
|
"grad_norm": 0.23555655653113924,
|
|
"learning_rate": 1.7351553078920665e-05,
|
|
"loss": 1.0914,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 2.1969309462915603,
|
|
"grad_norm": 0.30209071932451825,
|
|
"learning_rate": 1.734290040333871e-05,
|
|
"loss": 1.0873,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 2.1994884910485935,
|
|
"grad_norm": 0.23936877597438264,
|
|
"learning_rate": 1.733423578131691e-05,
|
|
"loss": 1.0835,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 2.2020460358056266,
|
|
"grad_norm": 0.3366403647300894,
|
|
"learning_rate": 1.732555922695207e-05,
|
|
"loss": 1.0743,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 2.20460358056266,
|
|
"grad_norm": 0.30248308613139313,
|
|
"learning_rate": 1.73168707543604e-05,
|
|
"loss": 1.0482,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 2.207161125319693,
|
|
"grad_norm": 0.26759196361130394,
|
|
"learning_rate": 1.73081703776775e-05,
|
|
"loss": 1.0686,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 2.209718670076726,
|
|
"grad_norm": 0.2424062745806639,
|
|
"learning_rate": 1.7299458111058336e-05,
|
|
"loss": 1.0738,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 2.21227621483376,
|
|
"grad_norm": 0.24086304886593904,
|
|
"learning_rate": 1.7290733968677226e-05,
|
|
"loss": 1.0313,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 2.214833759590793,
|
|
"grad_norm": 0.30184358263466177,
|
|
"learning_rate": 1.7281997964727803e-05,
|
|
"loss": 1.0602,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 2.217391304347826,
|
|
"grad_norm": 0.2366294082979442,
|
|
"learning_rate": 1.7273250113423e-05,
|
|
"loss": 1.1046,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 2.2199488491048593,
|
|
"grad_norm": 0.26905581826310315,
|
|
"learning_rate": 1.726449042899502e-05,
|
|
"loss": 1.0437,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 2.2225063938618925,
|
|
"grad_norm": 0.36508543225667806,
|
|
"learning_rate": 1.725571892569533e-05,
|
|
"loss": 1.0809,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 2.2250639386189257,
|
|
"grad_norm": 0.30221117179280654,
|
|
"learning_rate": 1.7246935617794608e-05,
|
|
"loss": 1.0664,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 2.227621483375959,
|
|
"grad_norm": 0.2269380846996494,
|
|
"learning_rate": 1.723814051958275e-05,
|
|
"loss": 1.045,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 2.2301790281329925,
|
|
"grad_norm": 0.3848192034817777,
|
|
"learning_rate": 1.7229333645368834e-05,
|
|
"loss": 1.0661,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 2.2327365728900257,
|
|
"grad_norm": 0.4724477310420707,
|
|
"learning_rate": 1.722051500948109e-05,
|
|
"loss": 1.0846,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 2.235294117647059,
|
|
"grad_norm": 0.3561338471365552,
|
|
"learning_rate": 1.7211684626266887e-05,
|
|
"loss": 1.0718,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 2.237851662404092,
|
|
"grad_norm": 0.24533531015000096,
|
|
"learning_rate": 1.7202842510092706e-05,
|
|
"loss": 1.0428,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 2.2404092071611252,
|
|
"grad_norm": 0.2999534454935499,
|
|
"learning_rate": 1.7193988675344125e-05,
|
|
"loss": 1.0598,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 2.2429667519181584,
|
|
"grad_norm": 0.3931502655829081,
|
|
"learning_rate": 1.7185123136425775e-05,
|
|
"loss": 1.0486,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 2.2455242966751916,
|
|
"grad_norm": 0.4099239641868052,
|
|
"learning_rate": 1.7176245907761327e-05,
|
|
"loss": 1.0567,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 2.2480818414322252,
|
|
"grad_norm": 0.2859379832887241,
|
|
"learning_rate": 1.7167357003793485e-05,
|
|
"loss": 1.0567,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 2.2506393861892584,
|
|
"grad_norm": 0.29262327466969734,
|
|
"learning_rate": 1.7158456438983934e-05,
|
|
"loss": 1.0299,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 2.2531969309462916,
|
|
"grad_norm": 0.43158299248544585,
|
|
"learning_rate": 1.7149544227813343e-05,
|
|
"loss": 1.05,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 2.2557544757033248,
|
|
"grad_norm": 0.3011090401640172,
|
|
"learning_rate": 1.7140620384781316e-05,
|
|
"loss": 1.0166,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 2.258312020460358,
|
|
"grad_norm": 0.2826762526500697,
|
|
"learning_rate": 1.7131684924406392e-05,
|
|
"loss": 1.0561,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 2.260869565217391,
|
|
"grad_norm": 0.40076272547936787,
|
|
"learning_rate": 1.7122737861226007e-05,
|
|
"loss": 1.0536,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 2.2634271099744243,
|
|
"grad_norm": 0.3893952639906247,
|
|
"learning_rate": 1.711377920979647e-05,
|
|
"loss": 1.0717,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 2.265984654731458,
|
|
"grad_norm": 0.2701415754560129,
|
|
"learning_rate": 1.7104808984692946e-05,
|
|
"loss": 1.0788,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 2.268542199488491,
|
|
"grad_norm": 0.3118978955533469,
|
|
"learning_rate": 1.7095827200509436e-05,
|
|
"loss": 1.0358,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 2.2710997442455243,
|
|
"grad_norm": 0.4681497183113763,
|
|
"learning_rate": 1.7086833871858735e-05,
|
|
"loss": 1.0405,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 2.2736572890025575,
|
|
"grad_norm": 0.44886562710116457,
|
|
"learning_rate": 1.707782901337243e-05,
|
|
"loss": 1.0635,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 2.2762148337595907,
|
|
"grad_norm": 0.24326783713209693,
|
|
"learning_rate": 1.7068812639700862e-05,
|
|
"loss": 1.0995,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 2.2787723785166243,
|
|
"grad_norm": 0.34628521799460377,
|
|
"learning_rate": 1.7059784765513106e-05,
|
|
"loss": 1.0772,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 2.2813299232736575,
|
|
"grad_norm": 0.3903166631143913,
|
|
"learning_rate": 1.705074540549695e-05,
|
|
"loss": 1.0609,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 2.2838874680306906,
|
|
"grad_norm": 0.3263912141551758,
|
|
"learning_rate": 1.704169457435887e-05,
|
|
"loss": 1.0661,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 2.286445012787724,
|
|
"grad_norm": 0.2566336981081094,
|
|
"learning_rate": 1.7032632286823995e-05,
|
|
"loss": 1.0853,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 2.289002557544757,
|
|
"grad_norm": 0.36154048413903833,
|
|
"learning_rate": 1.702355855763611e-05,
|
|
"loss": 1.0723,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 2.29156010230179,
|
|
"grad_norm": 0.2971617301340999,
|
|
"learning_rate": 1.70144734015576e-05,
|
|
"loss": 1.0619,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 2.2941176470588234,
|
|
"grad_norm": 0.2572103383141402,
|
|
"learning_rate": 1.700537683336944e-05,
|
|
"loss": 1.0589,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 2.296675191815857,
|
|
"grad_norm": 0.37750177979394905,
|
|
"learning_rate": 1.699626886787119e-05,
|
|
"loss": 1.0361,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 2.29923273657289,
|
|
"grad_norm": 0.35765757522418873,
|
|
"learning_rate": 1.698714951988093e-05,
|
|
"loss": 1.071,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 2.3017902813299234,
|
|
"grad_norm": 0.30989044748347006,
|
|
"learning_rate": 1.6978018804235278e-05,
|
|
"loss": 1.0555,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 2.3043478260869565,
|
|
"grad_norm": 0.24476809290635856,
|
|
"learning_rate": 1.6968876735789326e-05,
|
|
"loss": 1.0483,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 2.3069053708439897,
|
|
"grad_norm": 0.308551372008468,
|
|
"learning_rate": 1.695972332941666e-05,
|
|
"loss": 1.0551,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 2.309462915601023,
|
|
"grad_norm": 0.37111491476604536,
|
|
"learning_rate": 1.695055860000929e-05,
|
|
"loss": 1.0743,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 2.312020460358056,
|
|
"grad_norm": 0.29147416337800386,
|
|
"learning_rate": 1.6941382562477664e-05,
|
|
"loss": 1.0003,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 2.3145780051150897,
|
|
"grad_norm": 0.26326878890729166,
|
|
"learning_rate": 1.6932195231750616e-05,
|
|
"loss": 1.0351,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 2.317135549872123,
|
|
"grad_norm": 0.29839767577203885,
|
|
"learning_rate": 1.6922996622775363e-05,
|
|
"loss": 1.0445,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 2.319693094629156,
|
|
"grad_norm": 0.23637128109675618,
|
|
"learning_rate": 1.691378675051747e-05,
|
|
"loss": 1.0519,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 2.3222506393861893,
|
|
"grad_norm": 0.25442257071130125,
|
|
"learning_rate": 1.6904565629960814e-05,
|
|
"loss": 1.0902,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 2.3248081841432224,
|
|
"grad_norm": 0.3303656891744051,
|
|
"learning_rate": 1.6895333276107588e-05,
|
|
"loss": 1.0265,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 2.3273657289002556,
|
|
"grad_norm": 0.2612217404110996,
|
|
"learning_rate": 1.688608970397825e-05,
|
|
"loss": 1.1046,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 2.329923273657289,
|
|
"grad_norm": 0.271721798226581,
|
|
"learning_rate": 1.6876834928611524e-05,
|
|
"loss": 1.0784,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 2.3324808184143224,
|
|
"grad_norm": 0.22229862393309946,
|
|
"learning_rate": 1.6867568965064336e-05,
|
|
"loss": 1.0364,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 2.3350383631713556,
|
|
"grad_norm": 0.23741009658476048,
|
|
"learning_rate": 1.685829182841184e-05,
|
|
"loss": 1.0707,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 2.337595907928389,
|
|
"grad_norm": 0.28874176637750065,
|
|
"learning_rate": 1.684900353374735e-05,
|
|
"loss": 1.0702,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 2.340153452685422,
|
|
"grad_norm": 0.30379227509184065,
|
|
"learning_rate": 1.683970409618235e-05,
|
|
"loss": 1.0689,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 2.342710997442455,
|
|
"grad_norm": 0.2726310509927992,
|
|
"learning_rate": 1.683039353084644e-05,
|
|
"loss": 1.0905,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 2.3452685421994883,
|
|
"grad_norm": 0.2713331067951481,
|
|
"learning_rate": 1.6821071852887322e-05,
|
|
"loss": 1.0317,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 2.3478260869565215,
|
|
"grad_norm": 0.3293005148131402,
|
|
"learning_rate": 1.681173907747079e-05,
|
|
"loss": 1.0572,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 2.350383631713555,
|
|
"grad_norm": 0.2660221814623652,
|
|
"learning_rate": 1.680239521978068e-05,
|
|
"loss": 1.0429,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 0.2412158860005583,
|
|
"learning_rate": 1.679304029501887e-05,
|
|
"loss": 1.0452,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 2.3554987212276215,
|
|
"grad_norm": 0.33605356950268017,
|
|
"learning_rate": 1.6783674318405233e-05,
|
|
"loss": 1.0496,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 2.3580562659846547,
|
|
"grad_norm": 0.29348949393829404,
|
|
"learning_rate": 1.677429730517763e-05,
|
|
"loss": 1.0471,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 2.360613810741688,
|
|
"grad_norm": 0.27205789977362044,
|
|
"learning_rate": 1.6764909270591875e-05,
|
|
"loss": 1.049,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 2.363171355498721,
|
|
"grad_norm": 0.24380065073942686,
|
|
"learning_rate": 1.6755510229921713e-05,
|
|
"loss": 1.0568,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 2.3657289002557547,
|
|
"grad_norm": 0.2607905003163443,
|
|
"learning_rate": 1.6746100198458795e-05,
|
|
"loss": 1.0447,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 2.368286445012788,
|
|
"grad_norm": 0.25646849705097663,
|
|
"learning_rate": 1.673667919151266e-05,
|
|
"loss": 1.0213,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 2.370843989769821,
|
|
"grad_norm": 0.24557852833345492,
|
|
"learning_rate": 1.6727247224410686e-05,
|
|
"loss": 1.079,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 2.373401534526854,
|
|
"grad_norm": 0.2536896072712956,
|
|
"learning_rate": 1.67178043124981e-05,
|
|
"loss": 1.0864,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 2.3759590792838874,
|
|
"grad_norm": 0.2921088303385537,
|
|
"learning_rate": 1.6708350471137927e-05,
|
|
"loss": 1.0564,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 2.3785166240409206,
|
|
"grad_norm": 0.20366681064359315,
|
|
"learning_rate": 1.669888571571098e-05,
|
|
"loss": 1.0815,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 2.381074168797954,
|
|
"grad_norm": 0.2708885776774786,
|
|
"learning_rate": 1.6689410061615823e-05,
|
|
"loss": 1.0453,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 2.3836317135549874,
|
|
"grad_norm": 0.26422900568518476,
|
|
"learning_rate": 1.6679923524268748e-05,
|
|
"loss": 1.0691,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 2.3861892583120206,
|
|
"grad_norm": 0.24062139672551194,
|
|
"learning_rate": 1.6670426119103762e-05,
|
|
"loss": 1.0527,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 2.3887468030690537,
|
|
"grad_norm": 0.2440568759213169,
|
|
"learning_rate": 1.666091786157255e-05,
|
|
"loss": 1.039,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 2.391304347826087,
|
|
"grad_norm": 0.24192631220648755,
|
|
"learning_rate": 1.6651398767144454e-05,
|
|
"loss": 1.0368,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 2.39386189258312,
|
|
"grad_norm": 0.3094662604619502,
|
|
"learning_rate": 1.664186885130644e-05,
|
|
"loss": 1.0612,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 2.3964194373401533,
|
|
"grad_norm": 0.22698815376801923,
|
|
"learning_rate": 1.6632328129563088e-05,
|
|
"loss": 1.0573,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 2.398976982097187,
|
|
"grad_norm": 0.25713439762667506,
|
|
"learning_rate": 1.6622776617436556e-05,
|
|
"loss": 1.0689,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 2.40153452685422,
|
|
"grad_norm": 0.21070288001877646,
|
|
"learning_rate": 1.6613214330466557e-05,
|
|
"loss": 1.0514,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 2.4040920716112533,
|
|
"grad_norm": 0.2650104302111488,
|
|
"learning_rate": 1.6603641284210335e-05,
|
|
"loss": 1.0607,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 2.4066496163682864,
|
|
"grad_norm": 0.24280091189228237,
|
|
"learning_rate": 1.6594057494242634e-05,
|
|
"loss": 1.0526,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 2.4092071611253196,
|
|
"grad_norm": 0.2255724092281544,
|
|
"learning_rate": 1.6584462976155683e-05,
|
|
"loss": 1.0584,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 2.411764705882353,
|
|
"grad_norm": 0.2704536970024839,
|
|
"learning_rate": 1.6574857745559168e-05,
|
|
"loss": 1.0621,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 2.414322250639386,
|
|
"grad_norm": 0.29272610932834264,
|
|
"learning_rate": 1.656524181808019e-05,
|
|
"loss": 1.0625,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 2.4168797953964196,
|
|
"grad_norm": 0.28911787491946217,
|
|
"learning_rate": 1.655561520936327e-05,
|
|
"loss": 1.0165,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 2.419437340153453,
|
|
"grad_norm": 0.2532789709507061,
|
|
"learning_rate": 1.6545977935070293e-05,
|
|
"loss": 1.036,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 2.421994884910486,
|
|
"grad_norm": 0.2522741919476773,
|
|
"learning_rate": 1.6536330010880502e-05,
|
|
"loss": 1.0879,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 2.424552429667519,
|
|
"grad_norm": 0.2902148618078098,
|
|
"learning_rate": 1.652667145249047e-05,
|
|
"loss": 1.0447,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 2.4271099744245523,
|
|
"grad_norm": 0.2266116217612757,
|
|
"learning_rate": 1.6517002275614062e-05,
|
|
"loss": 1.0603,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 2.4296675191815855,
|
|
"grad_norm": 0.2855681782290051,
|
|
"learning_rate": 1.6507322495982433e-05,
|
|
"loss": 1.0415,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 2.4322250639386187,
|
|
"grad_norm": 0.2666978671553076,
|
|
"learning_rate": 1.6497632129343964e-05,
|
|
"loss": 1.057,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 2.4347826086956523,
|
|
"grad_norm": 0.25398223147396237,
|
|
"learning_rate": 1.6487931191464293e-05,
|
|
"loss": 1.0225,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 2.4373401534526855,
|
|
"grad_norm": 0.27478774153195795,
|
|
"learning_rate": 1.647821969812623e-05,
|
|
"loss": 1.0743,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 2.4398976982097187,
|
|
"grad_norm": 0.2548269730970245,
|
|
"learning_rate": 1.6468497665129767e-05,
|
|
"loss": 1.0753,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 2.442455242966752,
|
|
"grad_norm": 0.2531646552603803,
|
|
"learning_rate": 1.645876510829205e-05,
|
|
"loss": 1.0502,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 2.445012787723785,
|
|
"grad_norm": 0.2716259730414166,
|
|
"learning_rate": 1.6449022043447333e-05,
|
|
"loss": 1.0604,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 2.4475703324808182,
|
|
"grad_norm": 0.2759652629992187,
|
|
"learning_rate": 1.6439268486446982e-05,
|
|
"loss": 1.0307,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 2.4501278772378514,
|
|
"grad_norm": 0.284229730108131,
|
|
"learning_rate": 1.642950445315941e-05,
|
|
"loss": 1.0244,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 2.452685421994885,
|
|
"grad_norm": 0.2857191939202473,
|
|
"learning_rate": 1.6419729959470107e-05,
|
|
"loss": 1.0475,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 2.455242966751918,
|
|
"grad_norm": 0.24411876551827455,
|
|
"learning_rate": 1.6409945021281547e-05,
|
|
"loss": 1.0205,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 2.4578005115089514,
|
|
"grad_norm": 0.2839219346381256,
|
|
"learning_rate": 1.6400149654513224e-05,
|
|
"loss": 1.0902,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 2.4603580562659846,
|
|
"grad_norm": 0.290894600450773,
|
|
"learning_rate": 1.6390343875101582e-05,
|
|
"loss": 1.0655,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 2.4629156010230178,
|
|
"grad_norm": 0.25018640254339125,
|
|
"learning_rate": 1.6380527699000012e-05,
|
|
"loss": 1.075,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 2.4654731457800514,
|
|
"grad_norm": 0.314947984707885,
|
|
"learning_rate": 1.6370701142178815e-05,
|
|
"loss": 1.0802,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 2.4680306905370846,
|
|
"grad_norm": 0.23513441288297676,
|
|
"learning_rate": 1.636086422062519e-05,
|
|
"loss": 1.0315,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 2.4705882352941178,
|
|
"grad_norm": 0.26967522371119773,
|
|
"learning_rate": 1.635101695034319e-05,
|
|
"loss": 1.0454,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 2.473145780051151,
|
|
"grad_norm": 0.2673917447835626,
|
|
"learning_rate": 1.6341159347353714e-05,
|
|
"loss": 1.0577,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 2.475703324808184,
|
|
"grad_norm": 0.24623838061921519,
|
|
"learning_rate": 1.633129142769446e-05,
|
|
"loss": 1.0607,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 2.4782608695652173,
|
|
"grad_norm": 0.5975989314807109,
|
|
"learning_rate": 1.6321413207419915e-05,
|
|
"loss": 1.0624,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 2.4808184143222505,
|
|
"grad_norm": 0.2783985268403012,
|
|
"learning_rate": 1.6311524702601328e-05,
|
|
"loss": 1.0277,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 2.483375959079284,
|
|
"grad_norm": 0.2948227168148377,
|
|
"learning_rate": 1.6301625929326682e-05,
|
|
"loss": 1.0509,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 2.4859335038363173,
|
|
"grad_norm": 0.25464495418366273,
|
|
"learning_rate": 1.6291716903700657e-05,
|
|
"loss": 1.0743,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 2.4884910485933505,
|
|
"grad_norm": 0.32267891042610297,
|
|
"learning_rate": 1.6281797641844615e-05,
|
|
"loss": 1.0528,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 2.4910485933503836,
|
|
"grad_norm": 0.24461174022768228,
|
|
"learning_rate": 1.6271868159896583e-05,
|
|
"loss": 1.0536,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 2.493606138107417,
|
|
"grad_norm": 0.3184259095166065,
|
|
"learning_rate": 1.6261928474011205e-05,
|
|
"loss": 1.0295,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 2.49616368286445,
|
|
"grad_norm": 0.31223168542424856,
|
|
"learning_rate": 1.6251978600359727e-05,
|
|
"loss": 1.0611,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 2.498721227621483,
|
|
"grad_norm": 0.24470883821957645,
|
|
"learning_rate": 1.6242018555129968e-05,
|
|
"loss": 1.0501,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 2.501278772378517,
|
|
"grad_norm": 0.263841680832215,
|
|
"learning_rate": 1.6232048354526305e-05,
|
|
"loss": 1.0632,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 2.50383631713555,
|
|
"grad_norm": 0.2799350053468126,
|
|
"learning_rate": 1.6222068014769626e-05,
|
|
"loss": 1.0669,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 2.506393861892583,
|
|
"grad_norm": 0.23708656285849256,
|
|
"learning_rate": 1.6212077552097326e-05,
|
|
"loss": 1.0242,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 2.5089514066496164,
|
|
"grad_norm": 0.32106303705514144,
|
|
"learning_rate": 1.6202076982763258e-05,
|
|
"loss": 1.038,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 2.5115089514066495,
|
|
"grad_norm": 0.32641459248285415,
|
|
"learning_rate": 1.6192066323037723e-05,
|
|
"loss": 1.0192,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 2.5140664961636827,
|
|
"grad_norm": 0.2374782294678397,
|
|
"learning_rate": 1.618204558920744e-05,
|
|
"loss": 1.0317,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 2.516624040920716,
|
|
"grad_norm": 0.2669950742681541,
|
|
"learning_rate": 1.6172014797575512e-05,
|
|
"loss": 1.0604,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 2.5191815856777495,
|
|
"grad_norm": 0.3289018657957539,
|
|
"learning_rate": 1.616197396446142e-05,
|
|
"loss": 1.0558,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 2.5217391304347827,
|
|
"grad_norm": 0.30014120894320534,
|
|
"learning_rate": 1.6151923106200964e-05,
|
|
"loss": 1.0282,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 2.524296675191816,
|
|
"grad_norm": 0.22934126760741957,
|
|
"learning_rate": 1.6141862239146263e-05,
|
|
"loss": 1.0442,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 2.526854219948849,
|
|
"grad_norm": 0.3082443169061738,
|
|
"learning_rate": 1.613179137966572e-05,
|
|
"loss": 1.0671,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 2.5294117647058822,
|
|
"grad_norm": 0.34264852115767747,
|
|
"learning_rate": 1.612171054414399e-05,
|
|
"loss": 1.0659,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 2.531969309462916,
|
|
"grad_norm": 0.28840855857878017,
|
|
"learning_rate": 1.6111619748981967e-05,
|
|
"loss": 1.0757,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 2.5345268542199486,
|
|
"grad_norm": 0.29679625325903564,
|
|
"learning_rate": 1.610151901059674e-05,
|
|
"loss": 1.0574,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 2.5370843989769822,
|
|
"grad_norm": 0.2701305485919972,
|
|
"learning_rate": 1.6091408345421583e-05,
|
|
"loss": 1.076,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 2.5396419437340154,
|
|
"grad_norm": 0.27772319714999755,
|
|
"learning_rate": 1.6081287769905914e-05,
|
|
"loss": 1.0557,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 2.5421994884910486,
|
|
"grad_norm": 0.2575298835482317,
|
|
"learning_rate": 1.6071157300515274e-05,
|
|
"loss": 1.0371,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 2.544757033248082,
|
|
"grad_norm": 0.2434229348885953,
|
|
"learning_rate": 1.6061016953731307e-05,
|
|
"loss": 1.0293,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 2.547314578005115,
|
|
"grad_norm": 0.24931228820010734,
|
|
"learning_rate": 1.6050866746051722e-05,
|
|
"loss": 1.0497,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 2.5498721227621486,
|
|
"grad_norm": 0.24970615225374868,
|
|
"learning_rate": 1.6040706693990272e-05,
|
|
"loss": 1.0507,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 2.5524296675191813,
|
|
"grad_norm": 0.2705848075384666,
|
|
"learning_rate": 1.6030536814076722e-05,
|
|
"loss": 1.051,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 2.554987212276215,
|
|
"grad_norm": 0.2645976951028759,
|
|
"learning_rate": 1.602035712285684e-05,
|
|
"loss": 1.044,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 2.557544757033248,
|
|
"grad_norm": 0.25280588284501737,
|
|
"learning_rate": 1.6010167636892338e-05,
|
|
"loss": 1.0466,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 2.5601023017902813,
|
|
"grad_norm": 0.23309975174376094,
|
|
"learning_rate": 1.5999968372760882e-05,
|
|
"loss": 1.0503,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 2.5626598465473145,
|
|
"grad_norm": 0.24003131974818753,
|
|
"learning_rate": 1.5989759347056028e-05,
|
|
"loss": 1.0428,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 2.5652173913043477,
|
|
"grad_norm": 0.22803670250684518,
|
|
"learning_rate": 1.5979540576387226e-05,
|
|
"loss": 1.067,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 2.5677749360613813,
|
|
"grad_norm": 0.23366692767216873,
|
|
"learning_rate": 1.596931207737978e-05,
|
|
"loss": 1.0735,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 2.5703324808184145,
|
|
"grad_norm": 0.2514628572179653,
|
|
"learning_rate": 1.5959073866674812e-05,
|
|
"loss": 1.0683,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 2.5728900255754477,
|
|
"grad_norm": 0.2647695835957155,
|
|
"learning_rate": 1.594882596092926e-05,
|
|
"loss": 1.006,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 2.575447570332481,
|
|
"grad_norm": 0.2705206567562451,
|
|
"learning_rate": 1.5938568376815816e-05,
|
|
"loss": 1.0815,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 2.578005115089514,
|
|
"grad_norm": 0.26218100830771535,
|
|
"learning_rate": 1.5928301131022933e-05,
|
|
"loss": 1.0712,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 2.580562659846547,
|
|
"grad_norm": 0.24704018764157912,
|
|
"learning_rate": 1.5918024240254778e-05,
|
|
"loss": 1.069,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 2.5831202046035804,
|
|
"grad_norm": 0.3099818232532923,
|
|
"learning_rate": 1.5907737721231205e-05,
|
|
"loss": 1.0485,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 2.585677749360614,
|
|
"grad_norm": 0.2976698121714401,
|
|
"learning_rate": 1.5897441590687747e-05,
|
|
"loss": 1.0577,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 2.588235294117647,
|
|
"grad_norm": 0.25285713641828206,
|
|
"learning_rate": 1.5887135865375552e-05,
|
|
"loss": 1.0603,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 2.5907928388746804,
|
|
"grad_norm": 0.2526446484384057,
|
|
"learning_rate": 1.5876820562061402e-05,
|
|
"loss": 1.0433,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 2.5933503836317136,
|
|
"grad_norm": 0.29067294932967996,
|
|
"learning_rate": 1.586649569752765e-05,
|
|
"loss": 1.0616,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 2.5959079283887467,
|
|
"grad_norm": 0.282910218177146,
|
|
"learning_rate": 1.5856161288572195e-05,
|
|
"loss": 1.0413,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 2.59846547314578,
|
|
"grad_norm": 0.2268843181296163,
|
|
"learning_rate": 1.5845817352008485e-05,
|
|
"loss": 1.0407,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 2.601023017902813,
|
|
"grad_norm": 0.22762472803069236,
|
|
"learning_rate": 1.583546390466545e-05,
|
|
"loss": 1.0536,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 2.6035805626598467,
|
|
"grad_norm": 0.23603794648210832,
|
|
"learning_rate": 1.58251009633875e-05,
|
|
"loss": 1.0571,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 2.60613810741688,
|
|
"grad_norm": 0.2676423332930833,
|
|
"learning_rate": 1.5814728545034503e-05,
|
|
"loss": 1.0297,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 2.608695652173913,
|
|
"grad_norm": 0.25371119273646303,
|
|
"learning_rate": 1.5804346666481728e-05,
|
|
"loss": 1.037,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 2.6112531969309463,
|
|
"grad_norm": 0.23765073500378178,
|
|
"learning_rate": 1.5793955344619846e-05,
|
|
"loss": 1.0493,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 2.6138107416879794,
|
|
"grad_norm": 0.28479895070770733,
|
|
"learning_rate": 1.5783554596354885e-05,
|
|
"loss": 1.0428,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 2.6163682864450126,
|
|
"grad_norm": 0.2610596840924324,
|
|
"learning_rate": 1.577314443860821e-05,
|
|
"loss": 1.0659,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 2.618925831202046,
|
|
"grad_norm": 0.24670717715351206,
|
|
"learning_rate": 1.57627248883165e-05,
|
|
"loss": 1.0434,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 2.6214833759590794,
|
|
"grad_norm": 0.22640840073229135,
|
|
"learning_rate": 1.575229596243171e-05,
|
|
"loss": 1.043,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 2.6240409207161126,
|
|
"grad_norm": 0.25314200985521523,
|
|
"learning_rate": 1.574185767792106e-05,
|
|
"loss": 1.0494,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 2.626598465473146,
|
|
"grad_norm": 0.21470094174624627,
|
|
"learning_rate": 1.573141005176697e-05,
|
|
"loss": 1.0568,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 2.629156010230179,
|
|
"grad_norm": 0.23151889692704267,
|
|
"learning_rate": 1.5720953100967085e-05,
|
|
"loss": 1.0648,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 2.631713554987212,
|
|
"grad_norm": 0.21397184877158426,
|
|
"learning_rate": 1.5710486842534206e-05,
|
|
"loss": 1.0663,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 2.634271099744246,
|
|
"grad_norm": 0.22192997813660584,
|
|
"learning_rate": 1.5700011293496285e-05,
|
|
"loss": 1.0534,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 2.6368286445012785,
|
|
"grad_norm": 0.21407356154899657,
|
|
"learning_rate": 1.568952647089638e-05,
|
|
"loss": 1.059,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 2.639386189258312,
|
|
"grad_norm": 0.21832618515669033,
|
|
"learning_rate": 1.5679032391792648e-05,
|
|
"loss": 1.0221,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 2.6419437340153453,
|
|
"grad_norm": 0.24431871394272658,
|
|
"learning_rate": 1.5668529073258298e-05,
|
|
"loss": 1.0858,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 2.6445012787723785,
|
|
"grad_norm": 0.31234951434869057,
|
|
"learning_rate": 1.5658016532381565e-05,
|
|
"loss": 1.06,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 2.6470588235294117,
|
|
"grad_norm": 0.2080542192295102,
|
|
"learning_rate": 1.5647494786265705e-05,
|
|
"loss": 1.0651,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 2.649616368286445,
|
|
"grad_norm": 0.24670278561413833,
|
|
"learning_rate": 1.5636963852028936e-05,
|
|
"loss": 1.0373,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 2.6521739130434785,
|
|
"grad_norm": 0.23750220801463004,
|
|
"learning_rate": 1.5626423746804433e-05,
|
|
"loss": 1.0426,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 2.6547314578005117,
|
|
"grad_norm": 0.24041568140574793,
|
|
"learning_rate": 1.5615874487740287e-05,
|
|
"loss": 1.0504,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 2.657289002557545,
|
|
"grad_norm": 0.2389633958150457,
|
|
"learning_rate": 1.560531609199948e-05,
|
|
"loss": 1.0572,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 2.659846547314578,
|
|
"grad_norm": 0.2770548151196396,
|
|
"learning_rate": 1.559474857675986e-05,
|
|
"loss": 1.068,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 2.662404092071611,
|
|
"grad_norm": 0.266725154908083,
|
|
"learning_rate": 1.5584171959214126e-05,
|
|
"loss": 1.0449,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 2.6649616368286444,
|
|
"grad_norm": 0.25482885945652345,
|
|
"learning_rate": 1.557358625656976e-05,
|
|
"loss": 1.0784,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 2.6675191815856776,
|
|
"grad_norm": 0.264472394184579,
|
|
"learning_rate": 1.5562991486049045e-05,
|
|
"loss": 1.0118,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 2.670076726342711,
|
|
"grad_norm": 0.2848797989882817,
|
|
"learning_rate": 1.555238766488901e-05,
|
|
"loss": 1.0555,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 2.6726342710997444,
|
|
"grad_norm": 0.24695033243914596,
|
|
"learning_rate": 1.5541774810341404e-05,
|
|
"loss": 1.0402,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 2.6751918158567776,
|
|
"grad_norm": 0.20315866222350132,
|
|
"learning_rate": 1.5531152939672683e-05,
|
|
"loss": 1.0251,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 2.6777493606138107,
|
|
"grad_norm": 0.2608581931242649,
|
|
"learning_rate": 1.5520522070163962e-05,
|
|
"loss": 1.0549,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 2.680306905370844,
|
|
"grad_norm": 0.3085807293166213,
|
|
"learning_rate": 1.550988221911101e-05,
|
|
"loss": 1.0586,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 2.682864450127877,
|
|
"grad_norm": 0.22686082652143869,
|
|
"learning_rate": 1.549923340382419e-05,
|
|
"loss": 1.0315,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 2.6854219948849103,
|
|
"grad_norm": 0.23840859030860576,
|
|
"learning_rate": 1.548857564162846e-05,
|
|
"loss": 1.0542,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 2.687979539641944,
|
|
"grad_norm": 0.2828144148836396,
|
|
"learning_rate": 1.5477908949863335e-05,
|
|
"loss": 1.0546,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 2.690537084398977,
|
|
"grad_norm": 0.24462451577997144,
|
|
"learning_rate": 1.5467233345882858e-05,
|
|
"loss": 1.05,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 2.6930946291560103,
|
|
"grad_norm": 0.2608389325913873,
|
|
"learning_rate": 1.5456548847055565e-05,
|
|
"loss": 1.0582,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 2.6956521739130435,
|
|
"grad_norm": 0.2341653521141245,
|
|
"learning_rate": 1.5445855470764467e-05,
|
|
"loss": 1.0227,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 2.6982097186700766,
|
|
"grad_norm": 0.2001748409496552,
|
|
"learning_rate": 1.5435153234407023e-05,
|
|
"loss": 1.0361,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 2.70076726342711,
|
|
"grad_norm": 0.24778418959062198,
|
|
"learning_rate": 1.5424442155395095e-05,
|
|
"loss": 1.0556,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 2.703324808184143,
|
|
"grad_norm": 0.23891064433631373,
|
|
"learning_rate": 1.5413722251154947e-05,
|
|
"loss": 1.0583,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 2.7058823529411766,
|
|
"grad_norm": 0.18730639273619554,
|
|
"learning_rate": 1.540299353912719e-05,
|
|
"loss": 1.0461,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 2.70843989769821,
|
|
"grad_norm": 0.22764007423409213,
|
|
"learning_rate": 1.5392256036766767e-05,
|
|
"loss": 1.0723,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 2.710997442455243,
|
|
"grad_norm": 0.2161337514937876,
|
|
"learning_rate": 1.5381509761542925e-05,
|
|
"loss": 1.0303,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 2.713554987212276,
|
|
"grad_norm": 0.23665490844389125,
|
|
"learning_rate": 1.537075473093918e-05,
|
|
"loss": 1.072,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 2.7161125319693094,
|
|
"grad_norm": 0.2171745194472315,
|
|
"learning_rate": 1.535999096245329e-05,
|
|
"loss": 1.0609,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 2.718670076726343,
|
|
"grad_norm": 0.27479490086390757,
|
|
"learning_rate": 1.5349218473597244e-05,
|
|
"loss": 1.0976,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 2.7212276214833757,
|
|
"grad_norm": 0.23802159891837593,
|
|
"learning_rate": 1.5338437281897196e-05,
|
|
"loss": 1.0561,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 2.7237851662404093,
|
|
"grad_norm": 0.23413108216980624,
|
|
"learning_rate": 1.532764740489348e-05,
|
|
"loss": 1.0249,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 2.7263427109974425,
|
|
"grad_norm": 0.23839123328370654,
|
|
"learning_rate": 1.5316848860140545e-05,
|
|
"loss": 1.0448,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 2.7289002557544757,
|
|
"grad_norm": 0.26889749126936374,
|
|
"learning_rate": 1.530604166520695e-05,
|
|
"loss": 1.0538,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 2.731457800511509,
|
|
"grad_norm": 0.23104275616772496,
|
|
"learning_rate": 1.529522583767533e-05,
|
|
"loss": 1.0709,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 2.734015345268542,
|
|
"grad_norm": 0.26947945752974595,
|
|
"learning_rate": 1.5284401395142356e-05,
|
|
"loss": 1.0476,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 2.7365728900255757,
|
|
"grad_norm": 0.2650970504236315,
|
|
"learning_rate": 1.5273568355218714e-05,
|
|
"loss": 1.0906,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 2.7391304347826084,
|
|
"grad_norm": 0.2426600100365933,
|
|
"learning_rate": 1.5262726735529096e-05,
|
|
"loss": 1.0421,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 2.741687979539642,
|
|
"grad_norm": 0.2565653498953779,
|
|
"learning_rate": 1.5251876553712129e-05,
|
|
"loss": 1.0714,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 2.7442455242966752,
|
|
"grad_norm": 0.2590844357725753,
|
|
"learning_rate": 1.5241017827420379e-05,
|
|
"loss": 1.0529,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 2.7468030690537084,
|
|
"grad_norm": 0.2661157616076656,
|
|
"learning_rate": 1.523015057432032e-05,
|
|
"loss": 1.0413,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 2.7493606138107416,
|
|
"grad_norm": 0.2316877382855349,
|
|
"learning_rate": 1.5219274812092297e-05,
|
|
"loss": 1.0965,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 2.7519181585677748,
|
|
"grad_norm": 0.281689753856549,
|
|
"learning_rate": 1.5208390558430486e-05,
|
|
"loss": 1.0506,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 2.7544757033248084,
|
|
"grad_norm": 0.25889609476509934,
|
|
"learning_rate": 1.5197497831042891e-05,
|
|
"loss": 1.0701,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 2.7570332480818416,
|
|
"grad_norm": 0.25370938447354224,
|
|
"learning_rate": 1.5186596647651299e-05,
|
|
"loss": 1.0344,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 2.7595907928388748,
|
|
"grad_norm": 0.21590996086487077,
|
|
"learning_rate": 1.5175687025991254e-05,
|
|
"loss": 1.0111,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 2.762148337595908,
|
|
"grad_norm": 0.25136209115240976,
|
|
"learning_rate": 1.5164768983812031e-05,
|
|
"loss": 1.0594,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 2.764705882352941,
|
|
"grad_norm": 0.2296309073317973,
|
|
"learning_rate": 1.5153842538876595e-05,
|
|
"loss": 1.0195,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 2.7672634271099743,
|
|
"grad_norm": 0.2188880236827278,
|
|
"learning_rate": 1.5142907708961594e-05,
|
|
"loss": 1.0563,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 2.7698209718670075,
|
|
"grad_norm": 0.29043124524993463,
|
|
"learning_rate": 1.5131964511857307e-05,
|
|
"loss": 1.0579,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 2.772378516624041,
|
|
"grad_norm": 0.23042976434473456,
|
|
"learning_rate": 1.512101296536764e-05,
|
|
"loss": 1.0594,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 2.7749360613810743,
|
|
"grad_norm": 0.3064542379695439,
|
|
"learning_rate": 1.5110053087310067e-05,
|
|
"loss": 1.0347,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 2.7774936061381075,
|
|
"grad_norm": 0.2990911954190306,
|
|
"learning_rate": 1.5099084895515633e-05,
|
|
"loss": 1.0872,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 2.7800511508951407,
|
|
"grad_norm": 0.30238830537129957,
|
|
"learning_rate": 1.5088108407828887e-05,
|
|
"loss": 1.0102,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 2.782608695652174,
|
|
"grad_norm": 0.22800852447745912,
|
|
"learning_rate": 1.5077123642107901e-05,
|
|
"loss": 1.0373,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 2.785166240409207,
|
|
"grad_norm": 0.26466118290058793,
|
|
"learning_rate": 1.5066130616224194e-05,
|
|
"loss": 1.0601,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 2.78772378516624,
|
|
"grad_norm": 0.3134236905423725,
|
|
"learning_rate": 1.5055129348062733e-05,
|
|
"loss": 1.0282,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 2.790281329923274,
|
|
"grad_norm": 0.30040919493276264,
|
|
"learning_rate": 1.5044119855521899e-05,
|
|
"loss": 1.0028,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 2.792838874680307,
|
|
"grad_norm": 0.3018437088485077,
|
|
"learning_rate": 1.5033102156513442e-05,
|
|
"loss": 1.0642,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 2.79539641943734,
|
|
"grad_norm": 0.2594288455529522,
|
|
"learning_rate": 1.5022076268962474e-05,
|
|
"loss": 1.0651,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 2.7979539641943734,
|
|
"grad_norm": 0.2427672329241251,
|
|
"learning_rate": 1.5011042210807416e-05,
|
|
"loss": 1.0499,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 2.8005115089514065,
|
|
"grad_norm": 0.2753688016374087,
|
|
"learning_rate": 1.5000000000000002e-05,
|
|
"loss": 1.0441,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 2.80306905370844,
|
|
"grad_norm": 0.333646004575826,
|
|
"learning_rate": 1.4988949654505212e-05,
|
|
"loss": 1.0954,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 2.805626598465473,
|
|
"grad_norm": 0.24884374092942535,
|
|
"learning_rate": 1.4977891192301266e-05,
|
|
"loss": 1.0616,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 2.8081841432225065,
|
|
"grad_norm": 0.25576802318021363,
|
|
"learning_rate": 1.4966824631379595e-05,
|
|
"loss": 1.0767,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 2.8107416879795397,
|
|
"grad_norm": 0.2726811004318987,
|
|
"learning_rate": 1.49557499897448e-05,
|
|
"loss": 1.0629,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 2.813299232736573,
|
|
"grad_norm": 0.2490020562964201,
|
|
"learning_rate": 1.4944667285414629e-05,
|
|
"loss": 1.0401,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.815856777493606,
|
|
"grad_norm": 0.230153454763048,
|
|
"learning_rate": 1.4933576536419951e-05,
|
|
"loss": 1.0681,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 2.8184143222506393,
|
|
"grad_norm": 0.29290021173573333,
|
|
"learning_rate": 1.492247776080472e-05,
|
|
"loss": 1.0478,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 2.820971867007673,
|
|
"grad_norm": 0.22373455728798555,
|
|
"learning_rate": 1.4911370976625951e-05,
|
|
"loss": 1.0646,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 2.8235294117647056,
|
|
"grad_norm": 0.2867670697761132,
|
|
"learning_rate": 1.4900256201953686e-05,
|
|
"loss": 1.0395,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 2.8260869565217392,
|
|
"grad_norm": 0.2580511336465639,
|
|
"learning_rate": 1.488913345487097e-05,
|
|
"loss": 1.0299,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 2.8286445012787724,
|
|
"grad_norm": 0.30823901300584283,
|
|
"learning_rate": 1.4878002753473814e-05,
|
|
"loss": 1.0588,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 2.8312020460358056,
|
|
"grad_norm": 0.26061529857491966,
|
|
"learning_rate": 1.486686411587118e-05,
|
|
"loss": 1.0544,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 2.833759590792839,
|
|
"grad_norm": 0.3411340236384177,
|
|
"learning_rate": 1.4855717560184925e-05,
|
|
"loss": 1.0673,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 2.836317135549872,
|
|
"grad_norm": 0.3112034427743734,
|
|
"learning_rate": 1.4844563104549808e-05,
|
|
"loss": 1.0702,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 2.8388746803069056,
|
|
"grad_norm": 0.26159448325094614,
|
|
"learning_rate": 1.4833400767113425e-05,
|
|
"loss": 1.0518,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 2.8414322250639388,
|
|
"grad_norm": 0.24843885045239295,
|
|
"learning_rate": 1.48222305660362e-05,
|
|
"loss": 1.0519,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 2.843989769820972,
|
|
"grad_norm": 0.34052436576940476,
|
|
"learning_rate": 1.4811052519491358e-05,
|
|
"loss": 1.0621,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 2.846547314578005,
|
|
"grad_norm": 0.25035667041534276,
|
|
"learning_rate": 1.4799866645664875e-05,
|
|
"loss": 1.0495,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 2.8491048593350383,
|
|
"grad_norm": 0.23950107492766087,
|
|
"learning_rate": 1.4788672962755474e-05,
|
|
"loss": 1.0474,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 2.8516624040920715,
|
|
"grad_norm": 0.2228748439468561,
|
|
"learning_rate": 1.4777471488974573e-05,
|
|
"loss": 1.056,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 2.8542199488491047,
|
|
"grad_norm": 0.21686894636285,
|
|
"learning_rate": 1.476626224254627e-05,
|
|
"loss": 1.0473,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 2.8567774936061383,
|
|
"grad_norm": 0.21336673271033718,
|
|
"learning_rate": 1.475504524170731e-05,
|
|
"loss": 1.0327,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 2.8593350383631715,
|
|
"grad_norm": 0.2412247096897979,
|
|
"learning_rate": 1.4743820504707054e-05,
|
|
"loss": 1.0603,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 2.8618925831202047,
|
|
"grad_norm": 0.20338495510222906,
|
|
"learning_rate": 1.4732588049807442e-05,
|
|
"loss": 1.0345,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 2.864450127877238,
|
|
"grad_norm": 0.2224056939046196,
|
|
"learning_rate": 1.4721347895282977e-05,
|
|
"loss": 1.0932,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 2.867007672634271,
|
|
"grad_norm": 0.21219190570803861,
|
|
"learning_rate": 1.4710100059420693e-05,
|
|
"loss": 1.0577,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 2.869565217391304,
|
|
"grad_norm": 0.23417177032958478,
|
|
"learning_rate": 1.4698844560520107e-05,
|
|
"loss": 1.04,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 2.8721227621483374,
|
|
"grad_norm": 0.21756710346483277,
|
|
"learning_rate": 1.4687581416893218e-05,
|
|
"loss": 1.0115,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 2.874680306905371,
|
|
"grad_norm": 0.27116811809019226,
|
|
"learning_rate": 1.4676310646864455e-05,
|
|
"loss": 1.0925,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 2.877237851662404,
|
|
"grad_norm": 0.20359779513752466,
|
|
"learning_rate": 1.4665032268770656e-05,
|
|
"loss": 1.0662,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 2.8797953964194374,
|
|
"grad_norm": 0.25086860996163834,
|
|
"learning_rate": 1.4653746300961037e-05,
|
|
"loss": 1.0615,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 2.8823529411764706,
|
|
"grad_norm": 0.21619154701357268,
|
|
"learning_rate": 1.4642452761797166e-05,
|
|
"loss": 1.028,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 2.8849104859335037,
|
|
"grad_norm": 0.23657771626030477,
|
|
"learning_rate": 1.4631151669652917e-05,
|
|
"loss": 1.0339,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 2.887468030690537,
|
|
"grad_norm": 0.25435410320469787,
|
|
"learning_rate": 1.4619843042914466e-05,
|
|
"loss": 1.0382,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 2.89002557544757,
|
|
"grad_norm": 0.3165858987447032,
|
|
"learning_rate": 1.4608526899980238e-05,
|
|
"loss": 1.0631,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 2.8925831202046037,
|
|
"grad_norm": 0.3059530735276844,
|
|
"learning_rate": 1.4597203259260893e-05,
|
|
"loss": 1.0742,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 2.895140664961637,
|
|
"grad_norm": 0.23231123365328338,
|
|
"learning_rate": 1.4585872139179284e-05,
|
|
"loss": 1.0108,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 2.89769820971867,
|
|
"grad_norm": 0.32159788413714113,
|
|
"learning_rate": 1.457453355817044e-05,
|
|
"loss": 1.0343,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 2.9002557544757033,
|
|
"grad_norm": 0.2624561212579556,
|
|
"learning_rate": 1.456318753468152e-05,
|
|
"loss": 1.0344,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 2.9028132992327365,
|
|
"grad_norm": 0.21340797781295,
|
|
"learning_rate": 1.455183408717179e-05,
|
|
"loss": 1.0582,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 2.90537084398977,
|
|
"grad_norm": 0.27498982896150626,
|
|
"learning_rate": 1.4540473234112607e-05,
|
|
"loss": 1.0319,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 2.907928388746803,
|
|
"grad_norm": 0.26787413886350847,
|
|
"learning_rate": 1.4529104993987364e-05,
|
|
"loss": 1.094,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 2.9104859335038364,
|
|
"grad_norm": 0.22411507204789752,
|
|
"learning_rate": 1.4517729385291479e-05,
|
|
"loss": 1.0289,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 2.9130434782608696,
|
|
"grad_norm": 0.3186727715150146,
|
|
"learning_rate": 1.4506346426532356e-05,
|
|
"loss": 1.0474,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 2.915601023017903,
|
|
"grad_norm": 0.23017658335190225,
|
|
"learning_rate": 1.4494956136229356e-05,
|
|
"loss": 1.0406,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 2.918158567774936,
|
|
"grad_norm": 0.2469732487522561,
|
|
"learning_rate": 1.448355853291377e-05,
|
|
"loss": 1.0545,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 2.920716112531969,
|
|
"grad_norm": 0.34257461951959434,
|
|
"learning_rate": 1.4472153635128787e-05,
|
|
"loss": 1.0649,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 2.923273657289003,
|
|
"grad_norm": 0.26582238607210484,
|
|
"learning_rate": 1.4460741461429457e-05,
|
|
"loss": 1.0643,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 2.9258312020460355,
|
|
"grad_norm": 0.238713886041743,
|
|
"learning_rate": 1.4449322030382681e-05,
|
|
"loss": 1.0375,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 2.928388746803069,
|
|
"grad_norm": 0.28544164960503227,
|
|
"learning_rate": 1.4437895360567156e-05,
|
|
"loss": 1.0459,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 2.9309462915601023,
|
|
"grad_norm": 0.30617216188801405,
|
|
"learning_rate": 1.4426461470573358e-05,
|
|
"loss": 1.0352,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 2.9335038363171355,
|
|
"grad_norm": 0.23250706835607923,
|
|
"learning_rate": 1.4415020379003513e-05,
|
|
"loss": 1.0547,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 2.9360613810741687,
|
|
"grad_norm": 0.23449213816934886,
|
|
"learning_rate": 1.4403572104471559e-05,
|
|
"loss": 1.0506,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 2.938618925831202,
|
|
"grad_norm": 0.26285727807721854,
|
|
"learning_rate": 1.4392116665603123e-05,
|
|
"loss": 1.067,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 0.25864228967500363,
|
|
"learning_rate": 1.4380654081035492e-05,
|
|
"loss": 1.0566,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 2.9437340153452687,
|
|
"grad_norm": 0.2197313587417355,
|
|
"learning_rate": 1.4369184369417573e-05,
|
|
"loss": 1.069,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 2.946291560102302,
|
|
"grad_norm": 0.22175625255078796,
|
|
"learning_rate": 1.4357707549409865e-05,
|
|
"loss": 1.0393,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 2.948849104859335,
|
|
"grad_norm": 0.20734806987916835,
|
|
"learning_rate": 1.4346223639684445e-05,
|
|
"loss": 1.0629,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 2.9514066496163682,
|
|
"grad_norm": 0.20844980678105798,
|
|
"learning_rate": 1.4334732658924906e-05,
|
|
"loss": 1.0683,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 2.9539641943734014,
|
|
"grad_norm": 0.1986457605182691,
|
|
"learning_rate": 1.4323234625826363e-05,
|
|
"loss": 1.082,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 2.9565217391304346,
|
|
"grad_norm": 0.2173974733436024,
|
|
"learning_rate": 1.4311729559095391e-05,
|
|
"loss": 1.0579,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 2.959079283887468,
|
|
"grad_norm": 0.23569051033252647,
|
|
"learning_rate": 1.430021747745002e-05,
|
|
"loss": 1.0501,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 2.9616368286445014,
|
|
"grad_norm": 0.1958953354096487,
|
|
"learning_rate": 1.4288698399619682e-05,
|
|
"loss": 1.0423,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 2.9641943734015346,
|
|
"grad_norm": 0.24550680925330018,
|
|
"learning_rate": 1.4277172344345203e-05,
|
|
"loss": 1.0429,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 2.9667519181585678,
|
|
"grad_norm": 0.22335624269922177,
|
|
"learning_rate": 1.4265639330378751e-05,
|
|
"loss": 1.0637,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 2.969309462915601,
|
|
"grad_norm": 0.19207777433952558,
|
|
"learning_rate": 1.4254099376483814e-05,
|
|
"loss": 1.032,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 2.971867007672634,
|
|
"grad_norm": 0.21933228277599973,
|
|
"learning_rate": 1.424255250143518e-05,
|
|
"loss": 1.0399,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 2.9744245524296673,
|
|
"grad_norm": 0.2042696972237095,
|
|
"learning_rate": 1.423099872401889e-05,
|
|
"loss": 1.082,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 2.976982097186701,
|
|
"grad_norm": 0.23521017440946976,
|
|
"learning_rate": 1.4219438063032223e-05,
|
|
"loss": 1.0337,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 2.979539641943734,
|
|
"grad_norm": 0.23773407464153606,
|
|
"learning_rate": 1.4207870537283645e-05,
|
|
"loss": 1.0464,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 2.9820971867007673,
|
|
"grad_norm": 0.19999456866670134,
|
|
"learning_rate": 1.4196296165592804e-05,
|
|
"loss": 1.0738,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 2.9846547314578005,
|
|
"grad_norm": 0.24196149952728568,
|
|
"learning_rate": 1.4184714966790472e-05,
|
|
"loss": 1.0515,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 2.9872122762148337,
|
|
"grad_norm": 0.2078635385282362,
|
|
"learning_rate": 1.4173126959718542e-05,
|
|
"loss": 1.0685,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 2.9897698209718673,
|
|
"grad_norm": 0.22519888128468324,
|
|
"learning_rate": 1.416153216322997e-05,
|
|
"loss": 1.0406,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 2.9923273657289,
|
|
"grad_norm": 0.23526057180385235,
|
|
"learning_rate": 1.4149930596188768e-05,
|
|
"loss": 1.0388,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 2.9948849104859336,
|
|
"grad_norm": 0.23228687433861023,
|
|
"learning_rate": 1.4138322277469962e-05,
|
|
"loss": 1.035,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 2.997442455242967,
|
|
"grad_norm": 0.23799687340205392,
|
|
"learning_rate": 1.412670722595956e-05,
|
|
"loss": 1.0798,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.22605319189413042,
|
|
"learning_rate": 1.4115085460554524e-05,
|
|
"loss": 1.0724,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 3.002557544757033,
|
|
"grad_norm": 0.22583372556086656,
|
|
"learning_rate": 1.410345700016274e-05,
|
|
"loss": 1.0653,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 3.0051150895140664,
|
|
"grad_norm": 0.20810235633737204,
|
|
"learning_rate": 1.4091821863702983e-05,
|
|
"loss": 1.0641,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 3.0076726342710995,
|
|
"grad_norm": 0.20645828983892262,
|
|
"learning_rate": 1.4080180070104897e-05,
|
|
"loss": 1.0426,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 3.010230179028133,
|
|
"grad_norm": 0.20345366792505884,
|
|
"learning_rate": 1.406853163830895e-05,
|
|
"loss": 1.0849,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 3.0127877237851663,
|
|
"grad_norm": 0.21212291453565033,
|
|
"learning_rate": 1.4056876587266413e-05,
|
|
"loss": 1.0687,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 3.0153452685421995,
|
|
"grad_norm": 0.19908450369242628,
|
|
"learning_rate": 1.4045214935939323e-05,
|
|
"loss": 1.0193,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 3.0179028132992327,
|
|
"grad_norm": 0.22127953869549283,
|
|
"learning_rate": 1.4033546703300465e-05,
|
|
"loss": 1.027,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 3.020460358056266,
|
|
"grad_norm": 0.2284795334598278,
|
|
"learning_rate": 1.402187190833331e-05,
|
|
"loss": 1.041,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 3.023017902813299,
|
|
"grad_norm": 0.2062329065326131,
|
|
"learning_rate": 1.4010190570032034e-05,
|
|
"loss": 1.0371,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 3.0255754475703327,
|
|
"grad_norm": 0.19478100964489237,
|
|
"learning_rate": 1.3998502707401437e-05,
|
|
"loss": 1.0578,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 3.028132992327366,
|
|
"grad_norm": 0.22168971452412287,
|
|
"learning_rate": 1.398680833945694e-05,
|
|
"loss": 1.023,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 3.030690537084399,
|
|
"grad_norm": 0.2040809628837293,
|
|
"learning_rate": 1.3975107485224552e-05,
|
|
"loss": 1.0382,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 3.0332480818414322,
|
|
"grad_norm": 0.2051983553640489,
|
|
"learning_rate": 1.3963400163740828e-05,
|
|
"loss": 1.0186,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 3.0358056265984654,
|
|
"grad_norm": 0.2350671015231016,
|
|
"learning_rate": 1.395168639405285e-05,
|
|
"loss": 1.0455,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 3.0383631713554986,
|
|
"grad_norm": 0.22621448501355076,
|
|
"learning_rate": 1.3939966195218188e-05,
|
|
"loss": 1.0074,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 3.040920716112532,
|
|
"grad_norm": 0.23737640534776971,
|
|
"learning_rate": 1.3928239586304873e-05,
|
|
"loss": 1.0437,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 3.0434782608695654,
|
|
"grad_norm": 0.2323257168547048,
|
|
"learning_rate": 1.3916506586391364e-05,
|
|
"loss": 1.0327,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 3.0460358056265986,
|
|
"grad_norm": 0.22305161499533654,
|
|
"learning_rate": 1.390476721456652e-05,
|
|
"loss": 1.0099,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 3.0485933503836318,
|
|
"grad_norm": 0.23535858097990897,
|
|
"learning_rate": 1.3893021489929564e-05,
|
|
"loss": 1.051,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 3.051150895140665,
|
|
"grad_norm": 0.20326385048979087,
|
|
"learning_rate": 1.3881269431590052e-05,
|
|
"loss": 1.057,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 3.053708439897698,
|
|
"grad_norm": 0.21150204467244554,
|
|
"learning_rate": 1.3869511058667855e-05,
|
|
"loss": 1.0296,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 3.0562659846547313,
|
|
"grad_norm": 0.2227085234968232,
|
|
"learning_rate": 1.3857746390293106e-05,
|
|
"loss": 1.0342,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 3.0588235294117645,
|
|
"grad_norm": 0.24189335016155378,
|
|
"learning_rate": 1.3845975445606184e-05,
|
|
"loss": 1.0491,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 3.061381074168798,
|
|
"grad_norm": 0.21700679291777608,
|
|
"learning_rate": 1.383419824375768e-05,
|
|
"loss": 1.0458,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 3.0639386189258313,
|
|
"grad_norm": 0.2325789506958363,
|
|
"learning_rate": 1.382241480390837e-05,
|
|
"loss": 1.0451,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 3.0664961636828645,
|
|
"grad_norm": 0.21783084381710976,
|
|
"learning_rate": 1.3810625145229174e-05,
|
|
"loss": 1.0621,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 3.0690537084398977,
|
|
"grad_norm": 0.259978348441225,
|
|
"learning_rate": 1.3798829286901122e-05,
|
|
"loss": 1.0216,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 3.071611253196931,
|
|
"grad_norm": 0.2531231166315013,
|
|
"learning_rate": 1.3787027248115341e-05,
|
|
"loss": 1.0344,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 3.074168797953964,
|
|
"grad_norm": 0.25693037958499804,
|
|
"learning_rate": 1.3775219048073011e-05,
|
|
"loss": 1.0571,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 3.0767263427109977,
|
|
"grad_norm": 0.22329447917802453,
|
|
"learning_rate": 1.376340470598534e-05,
|
|
"loss": 1.0621,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 3.079283887468031,
|
|
"grad_norm": 0.24363305905238922,
|
|
"learning_rate": 1.3751584241073517e-05,
|
|
"loss": 1.0627,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 3.081841432225064,
|
|
"grad_norm": 0.252245006887946,
|
|
"learning_rate": 1.3739757672568703e-05,
|
|
"loss": 1.0619,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 3.084398976982097,
|
|
"grad_norm": 0.24187527332738293,
|
|
"learning_rate": 1.3727925019711981e-05,
|
|
"loss": 1.0324,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 3.0869565217391304,
|
|
"grad_norm": 0.2140650570738505,
|
|
"learning_rate": 1.3716086301754343e-05,
|
|
"loss": 1.0538,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 3.0895140664961636,
|
|
"grad_norm": 0.26828049735013604,
|
|
"learning_rate": 1.3704241537956643e-05,
|
|
"loss": 1.0806,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 3.0920716112531967,
|
|
"grad_norm": 0.20662196910585112,
|
|
"learning_rate": 1.3692390747589564e-05,
|
|
"loss": 1.0272,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 3.0946291560102304,
|
|
"grad_norm": 0.23564415225665816,
|
|
"learning_rate": 1.3680533949933607e-05,
|
|
"loss": 1.0499,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 3.0971867007672635,
|
|
"grad_norm": 0.20991526952221617,
|
|
"learning_rate": 1.3668671164279039e-05,
|
|
"loss": 1.0514,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 3.0997442455242967,
|
|
"grad_norm": 0.22870151484413298,
|
|
"learning_rate": 1.3656802409925874e-05,
|
|
"loss": 1.0134,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 3.10230179028133,
|
|
"grad_norm": 0.21877781759998727,
|
|
"learning_rate": 1.3644927706183824e-05,
|
|
"loss": 1.0851,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 3.104859335038363,
|
|
"grad_norm": 0.2327125173805525,
|
|
"learning_rate": 1.3633047072372301e-05,
|
|
"loss": 1.0311,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 3.1074168797953963,
|
|
"grad_norm": 0.22202571636713042,
|
|
"learning_rate": 1.3621160527820343e-05,
|
|
"loss": 1.0737,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 3.10997442455243,
|
|
"grad_norm": 0.2154525697553689,
|
|
"learning_rate": 1.3609268091866621e-05,
|
|
"loss": 1.0298,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 3.112531969309463,
|
|
"grad_norm": 0.24440602961960542,
|
|
"learning_rate": 1.3597369783859385e-05,
|
|
"loss": 1.0637,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 3.1150895140664963,
|
|
"grad_norm": 0.22947504540372743,
|
|
"learning_rate": 1.3585465623156434e-05,
|
|
"loss": 1.0358,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 3.1176470588235294,
|
|
"grad_norm": 0.20546693748205078,
|
|
"learning_rate": 1.3573555629125097e-05,
|
|
"loss": 1.0531,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 3.1202046035805626,
|
|
"grad_norm": 0.2376207772257609,
|
|
"learning_rate": 1.3561639821142187e-05,
|
|
"loss": 1.0422,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 3.122762148337596,
|
|
"grad_norm": 0.2075906124157621,
|
|
"learning_rate": 1.3549718218593982e-05,
|
|
"loss": 1.0373,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 3.125319693094629,
|
|
"grad_norm": 0.2710877805734423,
|
|
"learning_rate": 1.3537790840876179e-05,
|
|
"loss": 0.9867,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 3.1278772378516626,
|
|
"grad_norm": 0.21873389694947254,
|
|
"learning_rate": 1.3525857707393878e-05,
|
|
"loss": 1.0493,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 3.130434782608696,
|
|
"grad_norm": 0.23140954420047274,
|
|
"learning_rate": 1.3513918837561544e-05,
|
|
"loss": 1.0192,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 3.132992327365729,
|
|
"grad_norm": 0.21413826548960174,
|
|
"learning_rate": 1.3501974250802967e-05,
|
|
"loss": 1.0233,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 3.135549872122762,
|
|
"grad_norm": 0.2211593381832046,
|
|
"learning_rate": 1.3490023966551249e-05,
|
|
"loss": 1.0415,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 3.1381074168797953,
|
|
"grad_norm": 0.23108631631913867,
|
|
"learning_rate": 1.3478068004248747e-05,
|
|
"loss": 1.0399,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 3.1406649616368285,
|
|
"grad_norm": 0.22275279756167513,
|
|
"learning_rate": 1.346610638334707e-05,
|
|
"loss": 1.0596,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 3.1432225063938617,
|
|
"grad_norm": 0.2524231602837744,
|
|
"learning_rate": 1.3454139123307023e-05,
|
|
"loss": 1.065,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 3.1457800511508953,
|
|
"grad_norm": 0.2196098109454718,
|
|
"learning_rate": 1.3442166243598598e-05,
|
|
"loss": 1.0497,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 3.1483375959079285,
|
|
"grad_norm": 0.2392235318659055,
|
|
"learning_rate": 1.3430187763700914e-05,
|
|
"loss": 1.0579,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 3.1508951406649617,
|
|
"grad_norm": 0.2252882411678263,
|
|
"learning_rate": 1.341820370310221e-05,
|
|
"loss": 1.037,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 3.153452685421995,
|
|
"grad_norm": 0.21957606499611643,
|
|
"learning_rate": 1.3406214081299807e-05,
|
|
"loss": 1.077,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 3.156010230179028,
|
|
"grad_norm": 0.2158883136158835,
|
|
"learning_rate": 1.3394218917800064e-05,
|
|
"loss": 1.0576,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 3.1585677749360612,
|
|
"grad_norm": 0.23206630107006462,
|
|
"learning_rate": 1.3382218232118367e-05,
|
|
"loss": 1.046,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 3.1611253196930944,
|
|
"grad_norm": 0.22650165934718894,
|
|
"learning_rate": 1.3370212043779078e-05,
|
|
"loss": 1.0513,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 3.163682864450128,
|
|
"grad_norm": 0.2146494581025888,
|
|
"learning_rate": 1.335820037231552e-05,
|
|
"loss": 1.0418,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 3.166240409207161,
|
|
"grad_norm": 0.22693672785502703,
|
|
"learning_rate": 1.3346183237269925e-05,
|
|
"loss": 1.044,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 3.1687979539641944,
|
|
"grad_norm": 0.24944388113412067,
|
|
"learning_rate": 1.3334160658193425e-05,
|
|
"loss": 1.0085,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 3.1713554987212276,
|
|
"grad_norm": 0.2323240702756201,
|
|
"learning_rate": 1.3322132654646003e-05,
|
|
"loss": 1.0348,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 3.1739130434782608,
|
|
"grad_norm": 0.23314120380593967,
|
|
"learning_rate": 1.3310099246196466e-05,
|
|
"loss": 1.0255,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 3.176470588235294,
|
|
"grad_norm": 0.22959022702139156,
|
|
"learning_rate": 1.3298060452422421e-05,
|
|
"loss": 1.0303,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 3.1790281329923276,
|
|
"grad_norm": 0.1945764817333214,
|
|
"learning_rate": 1.3286016292910229e-05,
|
|
"loss": 1.0366,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 3.1815856777493607,
|
|
"grad_norm": 0.2049881448552149,
|
|
"learning_rate": 1.327396678725499e-05,
|
|
"loss": 1.0224,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 3.184143222506394,
|
|
"grad_norm": 0.245199876694944,
|
|
"learning_rate": 1.3261911955060493e-05,
|
|
"loss": 0.9968,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 3.186700767263427,
|
|
"grad_norm": 0.19541276884697034,
|
|
"learning_rate": 1.3249851815939197e-05,
|
|
"loss": 1.0502,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 3.1892583120204603,
|
|
"grad_norm": 0.22313066289223873,
|
|
"learning_rate": 1.3237786389512191e-05,
|
|
"loss": 1.0577,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 3.1918158567774935,
|
|
"grad_norm": 0.23691814508572034,
|
|
"learning_rate": 1.3225715695409171e-05,
|
|
"loss": 1.0407,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 3.1943734015345266,
|
|
"grad_norm": 0.19364764369376442,
|
|
"learning_rate": 1.3213639753268406e-05,
|
|
"loss": 1.0289,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 3.1969309462915603,
|
|
"grad_norm": 0.19636310287160377,
|
|
"learning_rate": 1.3201558582736693e-05,
|
|
"loss": 1.0389,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 3.1994884910485935,
|
|
"grad_norm": 0.1876664287484004,
|
|
"learning_rate": 1.3189472203469347e-05,
|
|
"loss": 1.0167,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 3.2020460358056266,
|
|
"grad_norm": 0.19365316134612506,
|
|
"learning_rate": 1.3177380635130144e-05,
|
|
"loss": 1.0522,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 3.20460358056266,
|
|
"grad_norm": 0.17412371216897868,
|
|
"learning_rate": 1.3165283897391315e-05,
|
|
"loss": 1.0125,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 3.207161125319693,
|
|
"grad_norm": 0.21377597350657065,
|
|
"learning_rate": 1.3153182009933495e-05,
|
|
"loss": 1.035,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 3.209718670076726,
|
|
"grad_norm": 0.18072951551049465,
|
|
"learning_rate": 1.3141074992445695e-05,
|
|
"loss": 1.0354,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 3.21227621483376,
|
|
"grad_norm": 0.21819804516231073,
|
|
"learning_rate": 1.3128962864625281e-05,
|
|
"loss": 1.0288,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 3.214833759590793,
|
|
"grad_norm": 0.22829327535687294,
|
|
"learning_rate": 1.3116845646177923e-05,
|
|
"loss": 1.0329,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 3.217391304347826,
|
|
"grad_norm": 0.22096551556124827,
|
|
"learning_rate": 1.3104723356817582e-05,
|
|
"loss": 1.0272,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 3.2199488491048593,
|
|
"grad_norm": 0.19427368545567542,
|
|
"learning_rate": 1.309259601626646e-05,
|
|
"loss": 1.0757,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 3.2225063938618925,
|
|
"grad_norm": 0.2517142880283656,
|
|
"learning_rate": 1.3080463644254986e-05,
|
|
"loss": 1.0449,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 3.2250639386189257,
|
|
"grad_norm": 0.21438511450639225,
|
|
"learning_rate": 1.3068326260521769e-05,
|
|
"loss": 1.0253,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 3.227621483375959,
|
|
"grad_norm": 0.23939604240119217,
|
|
"learning_rate": 1.3056183884813568e-05,
|
|
"loss": 1.0055,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 3.2301790281329925,
|
|
"grad_norm": 0.24913816729402657,
|
|
"learning_rate": 1.3044036536885284e-05,
|
|
"loss": 1.0305,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 3.2327365728900257,
|
|
"grad_norm": 0.22985968452270927,
|
|
"learning_rate": 1.3031884236499877e-05,
|
|
"loss": 1.0356,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 3.235294117647059,
|
|
"grad_norm": 0.2432127136491896,
|
|
"learning_rate": 1.3019727003428387e-05,
|
|
"loss": 1.0327,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 3.237851662404092,
|
|
"grad_norm": 0.21511626506563813,
|
|
"learning_rate": 1.300756485744987e-05,
|
|
"loss": 1.0351,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 3.2404092071611252,
|
|
"grad_norm": 0.21620331140589194,
|
|
"learning_rate": 1.2995397818351381e-05,
|
|
"loss": 1.0272,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 3.2429667519181584,
|
|
"grad_norm": 0.24918797088173247,
|
|
"learning_rate": 1.2983225905927924e-05,
|
|
"loss": 0.9923,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 3.2455242966751916,
|
|
"grad_norm": 0.2033868759774891,
|
|
"learning_rate": 1.2971049139982448e-05,
|
|
"loss": 1.0526,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 3.2480818414322252,
|
|
"grad_norm": 0.24065409839804014,
|
|
"learning_rate": 1.2958867540325785e-05,
|
|
"loss": 1.0283,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 3.2506393861892584,
|
|
"grad_norm": 0.23975735377063542,
|
|
"learning_rate": 1.294668112677664e-05,
|
|
"loss": 1.0467,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 3.2531969309462916,
|
|
"grad_norm": 0.20321738007355677,
|
|
"learning_rate": 1.2934489919161541e-05,
|
|
"loss": 1.0292,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 3.2557544757033248,
|
|
"grad_norm": 0.22563988593724132,
|
|
"learning_rate": 1.292229393731482e-05,
|
|
"loss": 1.0273,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 3.258312020460358,
|
|
"grad_norm": 0.2108784426288754,
|
|
"learning_rate": 1.2910093201078584e-05,
|
|
"loss": 1.041,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 3.260869565217391,
|
|
"grad_norm": 0.25182826531670705,
|
|
"learning_rate": 1.289788773030266e-05,
|
|
"loss": 1.0507,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 3.2634271099744243,
|
|
"grad_norm": 0.23260866121986465,
|
|
"learning_rate": 1.2885677544844592e-05,
|
|
"loss": 1.0073,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 3.265984654731458,
|
|
"grad_norm": 0.20778832907058722,
|
|
"learning_rate": 1.2873462664569583e-05,
|
|
"loss": 1.063,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 3.268542199488491,
|
|
"grad_norm": 0.24704017386773852,
|
|
"learning_rate": 1.2861243109350485e-05,
|
|
"loss": 1.0275,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 3.2710997442455243,
|
|
"grad_norm": 0.20143011397018976,
|
|
"learning_rate": 1.2849018899067746e-05,
|
|
"loss": 1.0786,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 3.2736572890025575,
|
|
"grad_norm": 0.19780957370773475,
|
|
"learning_rate": 1.2836790053609396e-05,
|
|
"loss": 1.0475,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 3.2762148337595907,
|
|
"grad_norm": 0.21001290371983408,
|
|
"learning_rate": 1.2824556592870993e-05,
|
|
"loss": 1.0544,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 3.2787723785166243,
|
|
"grad_norm": 0.2314545925289747,
|
|
"learning_rate": 1.2812318536755624e-05,
|
|
"loss": 1.0432,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 3.2813299232736575,
|
|
"grad_norm": 0.21988256589877733,
|
|
"learning_rate": 1.2800075905173834e-05,
|
|
"loss": 1.0432,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 3.2838874680306906,
|
|
"grad_norm": 0.26832633674704665,
|
|
"learning_rate": 1.2787828718043622e-05,
|
|
"loss": 1.0379,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 3.286445012787724,
|
|
"grad_norm": 0.2234222589374059,
|
|
"learning_rate": 1.2775576995290397e-05,
|
|
"loss": 1.0421,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 3.289002557544757,
|
|
"grad_norm": 0.20516563803916263,
|
|
"learning_rate": 1.276332075684694e-05,
|
|
"loss": 1.0392,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 3.29156010230179,
|
|
"grad_norm": 0.2404590656925125,
|
|
"learning_rate": 1.2751060022653393e-05,
|
|
"loss": 1.0283,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 3.2941176470588234,
|
|
"grad_norm": 0.19864113603292302,
|
|
"learning_rate": 1.2738794812657194e-05,
|
|
"loss": 1.0144,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 3.296675191815857,
|
|
"grad_norm": 0.2323436030300969,
|
|
"learning_rate": 1.2726525146813078e-05,
|
|
"loss": 1.0151,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 3.29923273657289,
|
|
"grad_norm": 0.24929371156784427,
|
|
"learning_rate": 1.2714251045083028e-05,
|
|
"loss": 1.0137,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 3.3017902813299234,
|
|
"grad_norm": 0.20413376158858587,
|
|
"learning_rate": 1.2701972527436235e-05,
|
|
"loss": 1.0233,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 3.3043478260869565,
|
|
"grad_norm": 0.21637513281635873,
|
|
"learning_rate": 1.2689689613849083e-05,
|
|
"loss": 1.0586,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 3.3069053708439897,
|
|
"grad_norm": 0.18194714637573692,
|
|
"learning_rate": 1.2677402324305099e-05,
|
|
"loss": 0.994,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 3.309462915601023,
|
|
"grad_norm": 0.19606411156722506,
|
|
"learning_rate": 1.266511067879494e-05,
|
|
"loss": 1.0283,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 3.312020460358056,
|
|
"grad_norm": 0.19517256802808283,
|
|
"learning_rate": 1.265281469731634e-05,
|
|
"loss": 1.0373,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 3.3145780051150897,
|
|
"grad_norm": 0.17867307264513901,
|
|
"learning_rate": 1.2640514399874095e-05,
|
|
"loss": 1.0517,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 3.317135549872123,
|
|
"grad_norm": 0.19814474828943063,
|
|
"learning_rate": 1.2628209806480024e-05,
|
|
"loss": 1.0068,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 3.319693094629156,
|
|
"grad_norm": 0.21270750338094424,
|
|
"learning_rate": 1.2615900937152923e-05,
|
|
"loss": 1.0236,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 3.3222506393861893,
|
|
"grad_norm": 0.21625825452151415,
|
|
"learning_rate": 1.2603587811918558e-05,
|
|
"loss": 1.0495,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 3.3248081841432224,
|
|
"grad_norm": 0.23776899893360745,
|
|
"learning_rate": 1.2591270450809612e-05,
|
|
"loss": 1.0741,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 3.3273657289002556,
|
|
"grad_norm": 0.22428186293001376,
|
|
"learning_rate": 1.2578948873865662e-05,
|
|
"loss": 1.0132,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 3.329923273657289,
|
|
"grad_norm": 0.20864902455184137,
|
|
"learning_rate": 1.2566623101133144e-05,
|
|
"loss": 1.0464,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 3.3324808184143224,
|
|
"grad_norm": 0.2685355350833958,
|
|
"learning_rate": 1.2554293152665316e-05,
|
|
"loss": 1.0247,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 3.3350383631713556,
|
|
"grad_norm": 0.2527986356697781,
|
|
"learning_rate": 1.2541959048522239e-05,
|
|
"loss": 1.0399,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 3.337595907928389,
|
|
"grad_norm": 0.22197339925214596,
|
|
"learning_rate": 1.2529620808770723e-05,
|
|
"loss": 1.0157,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 3.340153452685422,
|
|
"grad_norm": 0.3107261506811511,
|
|
"learning_rate": 1.251727845348432e-05,
|
|
"loss": 1.0495,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 3.342710997442455,
|
|
"grad_norm": 0.2643689123746537,
|
|
"learning_rate": 1.2504932002743262e-05,
|
|
"loss": 1.001,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 3.3452685421994883,
|
|
"grad_norm": 0.2364739279711792,
|
|
"learning_rate": 1.2492581476634458e-05,
|
|
"loss": 1.045,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 3.3478260869565215,
|
|
"grad_norm": 0.28136518049730547,
|
|
"learning_rate": 1.2480226895251439e-05,
|
|
"loss": 1.0285,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 3.350383631713555,
|
|
"grad_norm": 0.2523350080360508,
|
|
"learning_rate": 1.2467868278694342e-05,
|
|
"loss": 1.0658,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 3.3529411764705883,
|
|
"grad_norm": 0.20529584681597104,
|
|
"learning_rate": 1.245550564706986e-05,
|
|
"loss": 1.0372,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 3.3554987212276215,
|
|
"grad_norm": 0.26187724014211844,
|
|
"learning_rate": 1.2443139020491216e-05,
|
|
"loss": 1.0295,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 3.3580562659846547,
|
|
"grad_norm": 0.2759180573007528,
|
|
"learning_rate": 1.2430768419078143e-05,
|
|
"loss": 1.0312,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 3.360613810741688,
|
|
"grad_norm": 0.2020495956799633,
|
|
"learning_rate": 1.2418393862956837e-05,
|
|
"loss": 1.0419,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 3.363171355498721,
|
|
"grad_norm": 0.2369272520944126,
|
|
"learning_rate": 1.2406015372259925e-05,
|
|
"loss": 1.0122,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 3.3657289002557547,
|
|
"grad_norm": 0.2184979100214276,
|
|
"learning_rate": 1.2393632967126441e-05,
|
|
"loss": 1.0327,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 3.368286445012788,
|
|
"grad_norm": 0.23858603204557072,
|
|
"learning_rate": 1.2381246667701781e-05,
|
|
"loss": 1.0475,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 3.370843989769821,
|
|
"grad_norm": 0.26756479784593945,
|
|
"learning_rate": 1.236885649413768e-05,
|
|
"loss": 1.0426,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 3.373401534526854,
|
|
"grad_norm": 0.1892302039091279,
|
|
"learning_rate": 1.2356462466592177e-05,
|
|
"loss": 1.0412,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 3.3759590792838874,
|
|
"grad_norm": 0.29335988888765785,
|
|
"learning_rate": 1.2344064605229577e-05,
|
|
"loss": 1.0175,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 3.3785166240409206,
|
|
"grad_norm": 0.21447038773497848,
|
|
"learning_rate": 1.2331662930220424e-05,
|
|
"loss": 1.018,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 3.381074168797954,
|
|
"grad_norm": 0.24164773212365756,
|
|
"learning_rate": 1.2319257461741478e-05,
|
|
"loss": 1.029,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 3.3836317135549874,
|
|
"grad_norm": 0.23724415736018667,
|
|
"learning_rate": 1.2306848219975649e-05,
|
|
"loss": 1.017,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 3.3861892583120206,
|
|
"grad_norm": 0.2146728306264026,
|
|
"learning_rate": 1.2294435225112005e-05,
|
|
"loss": 1.0301,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 3.3887468030690537,
|
|
"grad_norm": 0.18212095256468025,
|
|
"learning_rate": 1.2282018497345705e-05,
|
|
"loss": 1.0361,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 3.391304347826087,
|
|
"grad_norm": 0.23148682510609303,
|
|
"learning_rate": 1.2269598056877996e-05,
|
|
"loss": 1.0385,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 3.39386189258312,
|
|
"grad_norm": 0.20473257376707585,
|
|
"learning_rate": 1.2257173923916154e-05,
|
|
"loss": 1.0208,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 3.3964194373401533,
|
|
"grad_norm": 0.20995062344103757,
|
|
"learning_rate": 1.2244746118673467e-05,
|
|
"loss": 1.0116,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 3.398976982097187,
|
|
"grad_norm": 0.23774156769953378,
|
|
"learning_rate": 1.22323146613692e-05,
|
|
"loss": 1.0742,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 3.40153452685422,
|
|
"grad_norm": 0.20830692559875352,
|
|
"learning_rate": 1.2219879572228555e-05,
|
|
"loss": 1.0565,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 3.4040920716112533,
|
|
"grad_norm": 0.2147028468697588,
|
|
"learning_rate": 1.2207440871482644e-05,
|
|
"loss": 1.0294,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 3.4066496163682864,
|
|
"grad_norm": 0.24756067918436106,
|
|
"learning_rate": 1.2194998579368451e-05,
|
|
"loss": 1.0479,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 3.4092071611253196,
|
|
"grad_norm": 0.2056045421373826,
|
|
"learning_rate": 1.2182552716128818e-05,
|
|
"loss": 1.0236,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 3.411764705882353,
|
|
"grad_norm": 0.2079215269898909,
|
|
"learning_rate": 1.2170103302012374e-05,
|
|
"loss": 1.0513,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 3.414322250639386,
|
|
"grad_norm": 0.19554068307435188,
|
|
"learning_rate": 1.2157650357273547e-05,
|
|
"loss": 1.0389,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 3.4168797953964196,
|
|
"grad_norm": 0.20840944979090947,
|
|
"learning_rate": 1.2145193902172496e-05,
|
|
"loss": 1.0355,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 3.419437340153453,
|
|
"grad_norm": 0.21130712097196197,
|
|
"learning_rate": 1.2132733956975093e-05,
|
|
"loss": 1.0322,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 3.421994884910486,
|
|
"grad_norm": 0.17958150894777242,
|
|
"learning_rate": 1.2120270541952892e-05,
|
|
"loss": 1.0227,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 3.424552429667519,
|
|
"grad_norm": 0.2225571229441682,
|
|
"learning_rate": 1.210780367738309e-05,
|
|
"loss": 1.0285,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 3.4271099744245523,
|
|
"grad_norm": 0.1885954682977986,
|
|
"learning_rate": 1.2095333383548495e-05,
|
|
"loss": 1.0812,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 3.4296675191815855,
|
|
"grad_norm": 0.2099948092443905,
|
|
"learning_rate": 1.2082859680737495e-05,
|
|
"loss": 1.0716,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 3.4322250639386187,
|
|
"grad_norm": 0.2256939428442792,
|
|
"learning_rate": 1.2070382589244026e-05,
|
|
"loss": 1.0311,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 3.4347826086956523,
|
|
"grad_norm": 0.23072791297771425,
|
|
"learning_rate": 1.2057902129367536e-05,
|
|
"loss": 1.0467,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 3.4373401534526855,
|
|
"grad_norm": 0.2057602125391487,
|
|
"learning_rate": 1.204541832141295e-05,
|
|
"loss": 1.028,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 3.4398976982097187,
|
|
"grad_norm": 0.2520074046407619,
|
|
"learning_rate": 1.2032931185690646e-05,
|
|
"loss": 1.0163,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 3.442455242966752,
|
|
"grad_norm": 0.2421964192866277,
|
|
"learning_rate": 1.202044074251641e-05,
|
|
"loss": 1.063,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 3.445012787723785,
|
|
"grad_norm": 0.20429551187516548,
|
|
"learning_rate": 1.2007947012211419e-05,
|
|
"loss": 1.0361,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 3.4475703324808182,
|
|
"grad_norm": 0.2520787216839294,
|
|
"learning_rate": 1.199545001510218e-05,
|
|
"loss": 1.054,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 3.4501278772378514,
|
|
"grad_norm": 0.24681543428956615,
|
|
"learning_rate": 1.1982949771520535e-05,
|
|
"loss": 1.0605,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 3.452685421994885,
|
|
"grad_norm": 0.20282034999970464,
|
|
"learning_rate": 1.1970446301803598e-05,
|
|
"loss": 1.0461,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 3.455242966751918,
|
|
"grad_norm": 0.22677677047988842,
|
|
"learning_rate": 1.1957939626293726e-05,
|
|
"loss": 1.0459,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 3.4578005115089514,
|
|
"grad_norm": 0.23929950706752162,
|
|
"learning_rate": 1.1945429765338507e-05,
|
|
"loss": 1.0531,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 3.4603580562659846,
|
|
"grad_norm": 0.2096490071983182,
|
|
"learning_rate": 1.1932916739290694e-05,
|
|
"loss": 1.0148,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 3.4629156010230178,
|
|
"grad_norm": 0.20618185619438542,
|
|
"learning_rate": 1.1920400568508201e-05,
|
|
"loss": 1.0375,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 3.4654731457800514,
|
|
"grad_norm": 0.23186283780985562,
|
|
"learning_rate": 1.1907881273354059e-05,
|
|
"loss": 1.0276,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 3.4680306905370846,
|
|
"grad_norm": 0.21691929515578598,
|
|
"learning_rate": 1.1895358874196377e-05,
|
|
"loss": 1.0368,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 3.4705882352941178,
|
|
"grad_norm": 0.20410519325755752,
|
|
"learning_rate": 1.188283339140831e-05,
|
|
"loss": 1.038,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 3.473145780051151,
|
|
"grad_norm": 0.22863334112386996,
|
|
"learning_rate": 1.1870304845368043e-05,
|
|
"loss": 1.0433,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 3.475703324808184,
|
|
"grad_norm": 0.2126661663430652,
|
|
"learning_rate": 1.1857773256458732e-05,
|
|
"loss": 1.0605,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 3.4782608695652173,
|
|
"grad_norm": 0.24272298207990836,
|
|
"learning_rate": 1.184523864506849e-05,
|
|
"loss": 1.0476,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 3.4808184143222505,
|
|
"grad_norm": 0.20098243757734405,
|
|
"learning_rate": 1.1832701031590345e-05,
|
|
"loss": 1.032,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 3.483375959079284,
|
|
"grad_norm": 0.2516527217412891,
|
|
"learning_rate": 1.1820160436422213e-05,
|
|
"loss": 1.0392,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 3.4859335038363173,
|
|
"grad_norm": 0.22312520765078486,
|
|
"learning_rate": 1.1807616879966856e-05,
|
|
"loss": 1.0549,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 3.4884910485933505,
|
|
"grad_norm": 0.23508194911007732,
|
|
"learning_rate": 1.1795070382631856e-05,
|
|
"loss": 1.0257,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 3.4910485933503836,
|
|
"grad_norm": 0.2056219883277526,
|
|
"learning_rate": 1.1782520964829583e-05,
|
|
"loss": 1.0616,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 3.493606138107417,
|
|
"grad_norm": 0.22297849379676427,
|
|
"learning_rate": 1.1769968646977148e-05,
|
|
"loss": 1.08,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 3.49616368286445,
|
|
"grad_norm": 0.1917605236627194,
|
|
"learning_rate": 1.1757413449496393e-05,
|
|
"loss": 1.0582,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 3.498721227621483,
|
|
"grad_norm": 0.22264832355995012,
|
|
"learning_rate": 1.174485539281384e-05,
|
|
"loss": 0.9999,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 3.501278772378517,
|
|
"grad_norm": 0.18053830121135175,
|
|
"learning_rate": 1.1732294497360658e-05,
|
|
"loss": 1.0481,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 3.50383631713555,
|
|
"grad_norm": 0.25413658020729973,
|
|
"learning_rate": 1.1719730783572645e-05,
|
|
"loss": 1.0526,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 3.506393861892583,
|
|
"grad_norm": 0.20438148687464178,
|
|
"learning_rate": 1.1707164271890168e-05,
|
|
"loss": 1.0465,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 3.5089514066496164,
|
|
"grad_norm": 0.27411869672391553,
|
|
"learning_rate": 1.1694594982758164e-05,
|
|
"loss": 1.0672,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 3.5115089514066495,
|
|
"grad_norm": 0.27020394951486204,
|
|
"learning_rate": 1.1682022936626076e-05,
|
|
"loss": 1.0249,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 3.5140664961636827,
|
|
"grad_norm": 0.20542313494356507,
|
|
"learning_rate": 1.166944815394784e-05,
|
|
"loss": 1.0444,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 3.516624040920716,
|
|
"grad_norm": 0.2696771035530231,
|
|
"learning_rate": 1.165687065518184e-05,
|
|
"loss": 1.0164,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 3.5191815856777495,
|
|
"grad_norm": 0.21834933315057503,
|
|
"learning_rate": 1.1644290460790879e-05,
|
|
"loss": 1.0231,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 3.5217391304347827,
|
|
"grad_norm": 0.25602165129241816,
|
|
"learning_rate": 1.163170759124215e-05,
|
|
"loss": 1.0499,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 3.524296675191816,
|
|
"grad_norm": 0.2466307590095287,
|
|
"learning_rate": 1.161912206700719e-05,
|
|
"loss": 1.0179,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 3.526854219948849,
|
|
"grad_norm": 0.1990877095514582,
|
|
"learning_rate": 1.1606533908561866e-05,
|
|
"loss": 1.0825,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 3.5294117647058822,
|
|
"grad_norm": 0.2262880860449741,
|
|
"learning_rate": 1.1593943136386316e-05,
|
|
"loss": 1.0239,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 3.531969309462916,
|
|
"grad_norm": 0.23639713675723853,
|
|
"learning_rate": 1.1581349770964946e-05,
|
|
"loss": 1.0797,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 3.5345268542199486,
|
|
"grad_norm": 0.19143592758217978,
|
|
"learning_rate": 1.1568753832786376e-05,
|
|
"loss": 1.0482,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 3.5370843989769822,
|
|
"grad_norm": 0.21395077968188803,
|
|
"learning_rate": 1.1556155342343405e-05,
|
|
"loss": 1.0341,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 3.5396419437340154,
|
|
"grad_norm": 0.20517427967195068,
|
|
"learning_rate": 1.154355432013299e-05,
|
|
"loss": 1.0657,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 3.5421994884910486,
|
|
"grad_norm": 0.19022344547536582,
|
|
"learning_rate": 1.1530950786656205e-05,
|
|
"loss": 1.0428,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 3.544757033248082,
|
|
"grad_norm": 0.24857892965208156,
|
|
"learning_rate": 1.1518344762418216e-05,
|
|
"loss": 1.0614,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 3.547314578005115,
|
|
"grad_norm": 0.17434032950673256,
|
|
"learning_rate": 1.150573626792823e-05,
|
|
"loss": 1.0119,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 3.5498721227621486,
|
|
"grad_norm": 0.221669736437551,
|
|
"learning_rate": 1.1493125323699486e-05,
|
|
"loss": 1.0325,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 3.5524296675191813,
|
|
"grad_norm": 0.19550877444868983,
|
|
"learning_rate": 1.1480511950249195e-05,
|
|
"loss": 1.0621,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 3.554987212276215,
|
|
"grad_norm": 0.20320983764425946,
|
|
"learning_rate": 1.1467896168098533e-05,
|
|
"loss": 1.0688,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 3.557544757033248,
|
|
"grad_norm": 0.21236236447911172,
|
|
"learning_rate": 1.1455277997772585e-05,
|
|
"loss": 0.9992,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 3.5601023017902813,
|
|
"grad_norm": 0.1946876189282923,
|
|
"learning_rate": 1.1442657459800323e-05,
|
|
"loss": 1.0298,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 3.5626598465473145,
|
|
"grad_norm": 0.20833695509734265,
|
|
"learning_rate": 1.143003457471458e-05,
|
|
"loss": 1.0481,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 3.5652173913043477,
|
|
"grad_norm": 0.19849397670530705,
|
|
"learning_rate": 1.1417409363051992e-05,
|
|
"loss": 1.0508,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 3.5677749360613813,
|
|
"grad_norm": 0.1862173592034928,
|
|
"learning_rate": 1.1404781845352999e-05,
|
|
"loss": 1.0586,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 3.5703324808184145,
|
|
"grad_norm": 0.20151362231655162,
|
|
"learning_rate": 1.1392152042161774e-05,
|
|
"loss": 1.0319,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 3.5728900255754477,
|
|
"grad_norm": 0.23404342439834142,
|
|
"learning_rate": 1.1379519974026226e-05,
|
|
"loss": 1.0151,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 3.575447570332481,
|
|
"grad_norm": 0.18584316354206787,
|
|
"learning_rate": 1.136688566149793e-05,
|
|
"loss": 1.0516,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 3.578005115089514,
|
|
"grad_norm": 0.2357364264338847,
|
|
"learning_rate": 1.1354249125132131e-05,
|
|
"loss": 1.0558,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 3.580562659846547,
|
|
"grad_norm": 0.255370311471337,
|
|
"learning_rate": 1.1341610385487677e-05,
|
|
"loss": 1.0159,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 3.5831202046035804,
|
|
"grad_norm": 0.2015566724373594,
|
|
"learning_rate": 1.1328969463127009e-05,
|
|
"loss": 1.0256,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 3.585677749360614,
|
|
"grad_norm": 0.2717588011458947,
|
|
"learning_rate": 1.1316326378616121e-05,
|
|
"loss": 1.0452,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 3.588235294117647,
|
|
"grad_norm": 0.226800697503035,
|
|
"learning_rate": 1.1303681152524514e-05,
|
|
"loss": 1.0417,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 3.5907928388746804,
|
|
"grad_norm": 0.20628829171202948,
|
|
"learning_rate": 1.129103380542519e-05,
|
|
"loss": 1.0483,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 3.5933503836317136,
|
|
"grad_norm": 0.2260665953032841,
|
|
"learning_rate": 1.1278384357894585e-05,
|
|
"loss": 1.0407,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 3.5959079283887467,
|
|
"grad_norm": 0.20513785218039995,
|
|
"learning_rate": 1.1265732830512561e-05,
|
|
"loss": 1.0391,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 3.59846547314578,
|
|
"grad_norm": 0.21444285296757887,
|
|
"learning_rate": 1.125307924386236e-05,
|
|
"loss": 1.0456,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 3.601023017902813,
|
|
"grad_norm": 0.2652819565444848,
|
|
"learning_rate": 1.1240423618530578e-05,
|
|
"loss": 1.0501,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 3.6035805626598467,
|
|
"grad_norm": 0.23632809050025924,
|
|
"learning_rate": 1.122776597510713e-05,
|
|
"loss": 1.0294,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 3.60613810741688,
|
|
"grad_norm": 0.2185806876530497,
|
|
"learning_rate": 1.1215106334185201e-05,
|
|
"loss": 1.0024,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 3.608695652173913,
|
|
"grad_norm": 0.24854116957417377,
|
|
"learning_rate": 1.1202444716361247e-05,
|
|
"loss": 1.0451,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 3.6112531969309463,
|
|
"grad_norm": 0.2045525689869136,
|
|
"learning_rate": 1.1189781142234917e-05,
|
|
"loss": 1.0635,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 3.6138107416879794,
|
|
"grad_norm": 0.2399433598230184,
|
|
"learning_rate": 1.1177115632409064e-05,
|
|
"loss": 1.0177,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 3.6163682864450126,
|
|
"grad_norm": 0.2415017313404832,
|
|
"learning_rate": 1.1164448207489673e-05,
|
|
"loss": 1.0379,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 3.618925831202046,
|
|
"grad_norm": 0.21319360249943278,
|
|
"learning_rate": 1.1151778888085856e-05,
|
|
"loss": 1.0179,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 3.6214833759590794,
|
|
"grad_norm": 0.24881166658392342,
|
|
"learning_rate": 1.1139107694809806e-05,
|
|
"loss": 1.0392,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 3.6240409207161126,
|
|
"grad_norm": 0.19415985264760977,
|
|
"learning_rate": 1.1126434648276756e-05,
|
|
"loss": 1.0124,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 3.626598465473146,
|
|
"grad_norm": 0.25642703103922565,
|
|
"learning_rate": 1.1113759769104965e-05,
|
|
"loss": 1.0496,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 3.629156010230179,
|
|
"grad_norm": 0.2492878689877881,
|
|
"learning_rate": 1.1101083077915667e-05,
|
|
"loss": 1.043,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 3.631713554987212,
|
|
"grad_norm": 0.1983125579481505,
|
|
"learning_rate": 1.1088404595333046e-05,
|
|
"loss": 1.0449,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 3.634271099744246,
|
|
"grad_norm": 0.21827713474511093,
|
|
"learning_rate": 1.1075724341984201e-05,
|
|
"loss": 1.0622,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 3.6368286445012785,
|
|
"grad_norm": 0.23619084555258635,
|
|
"learning_rate": 1.1063042338499113e-05,
|
|
"loss": 1.015,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 3.639386189258312,
|
|
"grad_norm": 0.20336660531825468,
|
|
"learning_rate": 1.1050358605510606e-05,
|
|
"loss": 1.0413,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 3.6419437340153453,
|
|
"grad_norm": 0.2421386235557971,
|
|
"learning_rate": 1.1037673163654321e-05,
|
|
"loss": 1.0307,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 3.6445012787723785,
|
|
"grad_norm": 0.22360499286457716,
|
|
"learning_rate": 1.1024986033568683e-05,
|
|
"loss": 1.0605,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 3.6470588235294117,
|
|
"grad_norm": 0.2378376933825962,
|
|
"learning_rate": 1.101229723589485e-05,
|
|
"loss": 1.0192,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 3.649616368286445,
|
|
"grad_norm": 0.22968460013912853,
|
|
"learning_rate": 1.099960679127671e-05,
|
|
"loss": 1.0349,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 3.6521739130434785,
|
|
"grad_norm": 0.23158540102865127,
|
|
"learning_rate": 1.0986914720360821e-05,
|
|
"loss": 1.0253,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 3.6547314578005117,
|
|
"grad_norm": 0.22013393117978197,
|
|
"learning_rate": 1.097422104379639e-05,
|
|
"loss": 1.018,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 3.657289002557545,
|
|
"grad_norm": 0.22220097208242998,
|
|
"learning_rate": 1.0961525782235233e-05,
|
|
"loss": 1.0473,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 3.659846547314578,
|
|
"grad_norm": 0.22194116899976712,
|
|
"learning_rate": 1.0948828956331752e-05,
|
|
"loss": 1.0424,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 3.662404092071611,
|
|
"grad_norm": 0.1983453396349903,
|
|
"learning_rate": 1.0936130586742881e-05,
|
|
"loss": 1.0453,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 3.6649616368286444,
|
|
"grad_norm": 0.2327743943604014,
|
|
"learning_rate": 1.0923430694128074e-05,
|
|
"loss": 1.0193,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 3.6675191815856776,
|
|
"grad_norm": 0.21867884439727386,
|
|
"learning_rate": 1.091072929914927e-05,
|
|
"loss": 1.0256,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 3.670076726342711,
|
|
"grad_norm": 0.23080732244405422,
|
|
"learning_rate": 1.0898026422470838e-05,
|
|
"loss": 1.0232,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 3.6726342710997444,
|
|
"grad_norm": 0.22857566907679472,
|
|
"learning_rate": 1.0885322084759566e-05,
|
|
"loss": 1.0536,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 3.6751918158567776,
|
|
"grad_norm": 0.2520804757587095,
|
|
"learning_rate": 1.0872616306684616e-05,
|
|
"loss": 1.0287,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 3.6777493606138107,
|
|
"grad_norm": 0.2469698171523125,
|
|
"learning_rate": 1.0859909108917497e-05,
|
|
"loss": 1.0909,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 3.680306905370844,
|
|
"grad_norm": 0.2327692634720372,
|
|
"learning_rate": 1.084720051213202e-05,
|
|
"loss": 1.0193,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 3.682864450127877,
|
|
"grad_norm": 0.23658961049768784,
|
|
"learning_rate": 1.0834490537004286e-05,
|
|
"loss": 1.0212,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 3.6854219948849103,
|
|
"grad_norm": 0.20942394628132058,
|
|
"learning_rate": 1.0821779204212623e-05,
|
|
"loss": 1.0249,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 3.687979539641944,
|
|
"grad_norm": 0.23145657493822064,
|
|
"learning_rate": 1.0809066534437576e-05,
|
|
"loss": 1.0179,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 3.690537084398977,
|
|
"grad_norm": 0.1999453161376075,
|
|
"learning_rate": 1.0796352548361863e-05,
|
|
"loss": 1.0026,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 3.6930946291560103,
|
|
"grad_norm": 0.22035660036843002,
|
|
"learning_rate": 1.0783637266670348e-05,
|
|
"loss": 1.0287,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 3.6956521739130435,
|
|
"grad_norm": 0.19317194516834582,
|
|
"learning_rate": 1.0770920710049997e-05,
|
|
"loss": 1.0507,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 3.6982097186700766,
|
|
"grad_norm": 0.2457010945328612,
|
|
"learning_rate": 1.0758202899189852e-05,
|
|
"loss": 1.0135,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 3.70076726342711,
|
|
"grad_norm": 0.18287871278152357,
|
|
"learning_rate": 1.0745483854780996e-05,
|
|
"loss": 1.0408,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 3.703324808184143,
|
|
"grad_norm": 0.23748668263508885,
|
|
"learning_rate": 1.073276359751652e-05,
|
|
"loss": 1.0642,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 3.7058823529411766,
|
|
"grad_norm": 0.22123508756316554,
|
|
"learning_rate": 1.0720042148091487e-05,
|
|
"loss": 1.0136,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 3.70843989769821,
|
|
"grad_norm": 0.23936061656812962,
|
|
"learning_rate": 1.0707319527202902e-05,
|
|
"loss": 1.0297,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 3.710997442455243,
|
|
"grad_norm": 0.27579723622779695,
|
|
"learning_rate": 1.0694595755549668e-05,
|
|
"loss": 1.0088,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 3.713554987212276,
|
|
"grad_norm": 0.2295449569053256,
|
|
"learning_rate": 1.0681870853832572e-05,
|
|
"loss": 1.0411,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 3.7161125319693094,
|
|
"grad_norm": 0.21165912842223478,
|
|
"learning_rate": 1.066914484275423e-05,
|
|
"loss": 1.0237,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 3.718670076726343,
|
|
"grad_norm": 0.22373624538155187,
|
|
"learning_rate": 1.0656417743019065e-05,
|
|
"loss": 1.0661,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 3.7212276214833757,
|
|
"grad_norm": 0.18604305862261736,
|
|
"learning_rate": 1.0643689575333276e-05,
|
|
"loss": 1.0205,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 3.7237851662404093,
|
|
"grad_norm": 0.22160309843387682,
|
|
"learning_rate": 1.0630960360404793e-05,
|
|
"loss": 1.0179,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 3.7263427109974425,
|
|
"grad_norm": 0.1910813020463846,
|
|
"learning_rate": 1.061823011894326e-05,
|
|
"loss": 1.0622,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 3.7289002557544757,
|
|
"grad_norm": 0.22862715748972842,
|
|
"learning_rate": 1.0605498871659974e-05,
|
|
"loss": 1.0185,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 3.731457800511509,
|
|
"grad_norm": 0.20341936295394042,
|
|
"learning_rate": 1.0592766639267885e-05,
|
|
"loss": 1.0534,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 3.734015345268542,
|
|
"grad_norm": 0.2403253522185079,
|
|
"learning_rate": 1.0580033442481532e-05,
|
|
"loss": 1.0384,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 3.7365728900255757,
|
|
"grad_norm": 0.22338961464147264,
|
|
"learning_rate": 1.0567299302017038e-05,
|
|
"loss": 1.0143,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 3.7391304347826084,
|
|
"grad_norm": 0.2117212049005623,
|
|
"learning_rate": 1.0554564238592051e-05,
|
|
"loss": 1.021,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 3.741687979539642,
|
|
"grad_norm": 0.2254372260082909,
|
|
"learning_rate": 1.0541828272925721e-05,
|
|
"loss": 1.0292,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 3.7442455242966752,
|
|
"grad_norm": 0.1922734992717323,
|
|
"learning_rate": 1.0529091425738669e-05,
|
|
"loss": 1.0489,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 3.7468030690537084,
|
|
"grad_norm": 0.21486062627786348,
|
|
"learning_rate": 1.0516353717752947e-05,
|
|
"loss": 1.0359,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 3.7493606138107416,
|
|
"grad_norm": 0.19407217948842267,
|
|
"learning_rate": 1.0503615169692012e-05,
|
|
"loss": 1.0342,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 3.7519181585677748,
|
|
"grad_norm": 0.1785805281257786,
|
|
"learning_rate": 1.0490875802280685e-05,
|
|
"loss": 1.0353,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 3.7544757033248084,
|
|
"grad_norm": 0.20291577459751503,
|
|
"learning_rate": 1.0478135636245122e-05,
|
|
"loss": 1.0306,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 3.7570332480818416,
|
|
"grad_norm": 0.1982096205595046,
|
|
"learning_rate": 1.046539469231277e-05,
|
|
"loss": 1.0548,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 3.7595907928388748,
|
|
"grad_norm": 0.20930042720158404,
|
|
"learning_rate": 1.0452652991212357e-05,
|
|
"loss": 1.0094,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 3.762148337595908,
|
|
"grad_norm": 0.19919273397375814,
|
|
"learning_rate": 1.0439910553673829e-05,
|
|
"loss": 1.0439,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 3.764705882352941,
|
|
"grad_norm": 0.22254826567261315,
|
|
"learning_rate": 1.0427167400428331e-05,
|
|
"loss": 1.0373,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 3.7672634271099743,
|
|
"grad_norm": 0.22854611711688827,
|
|
"learning_rate": 1.0414423552208184e-05,
|
|
"loss": 1.0199,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 3.7698209718670075,
|
|
"grad_norm": 0.3654589035727414,
|
|
"learning_rate": 1.0401679029746828e-05,
|
|
"loss": 1.0311,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 3.772378516624041,
|
|
"grad_norm": 0.19477682817923897,
|
|
"learning_rate": 1.038893385377881e-05,
|
|
"loss": 1.0445,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 3.7749360613810743,
|
|
"grad_norm": 0.2035068833502665,
|
|
"learning_rate": 1.0376188045039723e-05,
|
|
"loss": 1.035,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 3.7774936061381075,
|
|
"grad_norm": 0.20207740056727894,
|
|
"learning_rate": 1.0363441624266213e-05,
|
|
"loss": 1.0054,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 3.7800511508951407,
|
|
"grad_norm": 0.23108316839210677,
|
|
"learning_rate": 1.0350694612195905e-05,
|
|
"loss": 1.0299,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 3.782608695652174,
|
|
"grad_norm": 0.19921910618488686,
|
|
"learning_rate": 1.0337947029567388e-05,
|
|
"loss": 1.013,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 3.785166240409207,
|
|
"grad_norm": 0.19609376442655463,
|
|
"learning_rate": 1.0325198897120183e-05,
|
|
"loss": 1.0239,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 3.78772378516624,
|
|
"grad_norm": 0.2039103534692172,
|
|
"learning_rate": 1.0312450235594706e-05,
|
|
"loss": 1.0262,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 3.790281329923274,
|
|
"grad_norm": 0.19686683259289736,
|
|
"learning_rate": 1.0299701065732235e-05,
|
|
"loss": 1.0444,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 3.792838874680307,
|
|
"grad_norm": 0.2031103792356114,
|
|
"learning_rate": 1.0286951408274865e-05,
|
|
"loss": 1.0993,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 3.79539641943734,
|
|
"grad_norm": 0.2263801739639009,
|
|
"learning_rate": 1.0274201283965497e-05,
|
|
"loss": 1.0409,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 3.7979539641943734,
|
|
"grad_norm": 0.17572315424279408,
|
|
"learning_rate": 1.0261450713547785e-05,
|
|
"loss": 1.075,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 3.8005115089514065,
|
|
"grad_norm": 0.27023491274755906,
|
|
"learning_rate": 1.0248699717766107e-05,
|
|
"loss": 1.0679,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 3.80306905370844,
|
|
"grad_norm": 0.1713633148592625,
|
|
"learning_rate": 1.023594831736554e-05,
|
|
"loss": 1.0484,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 3.805626598465473,
|
|
"grad_norm": 0.2367623046752298,
|
|
"learning_rate": 1.0223196533091813e-05,
|
|
"loss": 1.0287,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 3.8081841432225065,
|
|
"grad_norm": 0.1984118987646221,
|
|
"learning_rate": 1.0210444385691282e-05,
|
|
"loss": 1.0373,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 3.8107416879795397,
|
|
"grad_norm": 0.19013291547902408,
|
|
"learning_rate": 1.0197691895910895e-05,
|
|
"loss": 1.0396,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 3.813299232736573,
|
|
"grad_norm": 0.2262690201508357,
|
|
"learning_rate": 1.0184939084498153e-05,
|
|
"loss": 1.0383,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 3.815856777493606,
|
|
"grad_norm": 0.21345095926753077,
|
|
"learning_rate": 1.0172185972201082e-05,
|
|
"loss": 1.0341,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 3.8184143222506393,
|
|
"grad_norm": 0.18180827453898485,
|
|
"learning_rate": 1.01594325797682e-05,
|
|
"loss": 1.0419,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 3.820971867007673,
|
|
"grad_norm": 0.23760325057681905,
|
|
"learning_rate": 1.0146678927948484e-05,
|
|
"loss": 1.0178,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 3.8235294117647056,
|
|
"grad_norm": 0.18084043730292876,
|
|
"learning_rate": 1.013392503749132e-05,
|
|
"loss": 1.0701,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 3.8260869565217392,
|
|
"grad_norm": 0.18619103410121773,
|
|
"learning_rate": 1.0121170929146493e-05,
|
|
"loss": 1.0359,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 3.8286445012787724,
|
|
"grad_norm": 0.1814058213229099,
|
|
"learning_rate": 1.0108416623664142e-05,
|
|
"loss": 1.0483,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 3.8312020460358056,
|
|
"grad_norm": 0.17659823284048892,
|
|
"learning_rate": 1.0095662141794725e-05,
|
|
"loss": 1.0167,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 3.833759590792839,
|
|
"grad_norm": 0.18093838446366517,
|
|
"learning_rate": 1.0082907504288977e-05,
|
|
"loss": 1.0271,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 3.836317135549872,
|
|
"grad_norm": 0.19401662423230362,
|
|
"learning_rate": 1.0070152731897911e-05,
|
|
"loss": 1.0525,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 3.8388746803069056,
|
|
"grad_norm": 0.17897896363370017,
|
|
"learning_rate": 1.0057397845372734e-05,
|
|
"loss": 1.0354,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 3.8414322250639388,
|
|
"grad_norm": 0.18581636595029996,
|
|
"learning_rate": 1.004464286546485e-05,
|
|
"loss": 1.0439,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 3.843989769820972,
|
|
"grad_norm": 0.17458922536736418,
|
|
"learning_rate": 1.0031887812925818e-05,
|
|
"loss": 1.0073,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 3.846547314578005,
|
|
"grad_norm": 0.18401279215992355,
|
|
"learning_rate": 1.0019132708507307e-05,
|
|
"loss": 1.0549,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 3.8491048593350383,
|
|
"grad_norm": 0.17886260918603583,
|
|
"learning_rate": 1.0006377572961075e-05,
|
|
"loss": 1.056,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 3.8516624040920715,
|
|
"grad_norm": 0.18640944420175584,
|
|
"learning_rate": 9.99362242703893e-06,
|
|
"loss": 1.0317,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 3.8542199488491047,
|
|
"grad_norm": 0.1724777242125077,
|
|
"learning_rate": 9.980867291492697e-06,
|
|
"loss": 1.0496,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 3.8567774936061383,
|
|
"grad_norm": 0.17736614296923925,
|
|
"learning_rate": 9.968112187074187e-06,
|
|
"loss": 1.0321,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 3.8593350383631715,
|
|
"grad_norm": 0.18919776197181185,
|
|
"learning_rate": 9.955357134535153e-06,
|
|
"loss": 1.0612,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 3.8618925831202047,
|
|
"grad_norm": 0.17013450287572257,
|
|
"learning_rate": 9.94260215462727e-06,
|
|
"loss": 1.0371,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 3.864450127877238,
|
|
"grad_norm": 0.1795391930284376,
|
|
"learning_rate": 9.929847268102092e-06,
|
|
"loss": 1.0116,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 3.867007672634271,
|
|
"grad_norm": 0.18010343872623125,
|
|
"learning_rate": 9.917092495711023e-06,
|
|
"loss": 0.9975,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 3.869565217391304,
|
|
"grad_norm": 0.2018143041172149,
|
|
"learning_rate": 9.904337858205282e-06,
|
|
"loss": 1.0261,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 3.8721227621483374,
|
|
"grad_norm": 0.20189193249637963,
|
|
"learning_rate": 9.891583376335861e-06,
|
|
"loss": 1.036,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 3.874680306905371,
|
|
"grad_norm": 0.18604316403857601,
|
|
"learning_rate": 9.87882907085351e-06,
|
|
"loss": 1.0353,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 3.877237851662404,
|
|
"grad_norm": 0.1764086076077849,
|
|
"learning_rate": 9.866074962508684e-06,
|
|
"loss": 1.048,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 3.8797953964194374,
|
|
"grad_norm": 0.18861859299069214,
|
|
"learning_rate": 9.85332107205152e-06,
|
|
"loss": 1.0719,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 3.8823529411764706,
|
|
"grad_norm": 0.1729886347071538,
|
|
"learning_rate": 9.840567420231802e-06,
|
|
"loss": 1.0436,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 3.8849104859335037,
|
|
"grad_norm": 0.20230041478663247,
|
|
"learning_rate": 9.82781402779892e-06,
|
|
"loss": 1.0611,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 3.887468030690537,
|
|
"grad_norm": 0.19599063188718716,
|
|
"learning_rate": 9.815060915501852e-06,
|
|
"loss": 1.0517,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 3.89002557544757,
|
|
"grad_norm": 0.20556197980895194,
|
|
"learning_rate": 9.802308104089109e-06,
|
|
"loss": 1.0249,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 3.8925831202046037,
|
|
"grad_norm": 0.21413593644142717,
|
|
"learning_rate": 9.789555614308721e-06,
|
|
"loss": 0.9947,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 3.895140664961637,
|
|
"grad_norm": 0.20287758208508144,
|
|
"learning_rate": 9.77680346690819e-06,
|
|
"loss": 1.0352,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 3.89769820971867,
|
|
"grad_norm": 0.19248950316327032,
|
|
"learning_rate": 9.764051682634462e-06,
|
|
"loss": 1.0275,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 3.9002557544757033,
|
|
"grad_norm": 0.22258046212032104,
|
|
"learning_rate": 9.751300282233895e-06,
|
|
"loss": 1.0534,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 3.9028132992327365,
|
|
"grad_norm": 0.21347571901775975,
|
|
"learning_rate": 9.738549286452218e-06,
|
|
"loss": 1.038,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 3.90537084398977,
|
|
"grad_norm": 0.2280185995042673,
|
|
"learning_rate": 9.725798716034507e-06,
|
|
"loss": 1.0286,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 3.907928388746803,
|
|
"grad_norm": 0.20202933779134605,
|
|
"learning_rate": 9.713048591725138e-06,
|
|
"loss": 1.0448,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 3.9104859335038364,
|
|
"grad_norm": 0.20920944736139577,
|
|
"learning_rate": 9.700298934267766e-06,
|
|
"loss": 1.0069,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 3.9130434782608696,
|
|
"grad_norm": 0.19240200507914293,
|
|
"learning_rate": 9.687549764405296e-06,
|
|
"loss": 1.0376,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 3.915601023017903,
|
|
"grad_norm": 0.20292905124684749,
|
|
"learning_rate": 9.674801102879817e-06,
|
|
"loss": 1.0274,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 3.918158567774936,
|
|
"grad_norm": 0.19062905855598355,
|
|
"learning_rate": 9.662052970432617e-06,
|
|
"loss": 1.0407,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 3.920716112531969,
|
|
"grad_norm": 0.21406493946615143,
|
|
"learning_rate": 9.6493053878041e-06,
|
|
"loss": 1.0401,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 3.923273657289003,
|
|
"grad_norm": 0.19190236583371453,
|
|
"learning_rate": 9.63655837573379e-06,
|
|
"loss": 1.0521,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 3.9258312020460355,
|
|
"grad_norm": 0.22868484745745557,
|
|
"learning_rate": 9.623811954960279e-06,
|
|
"loss": 1.0396,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 3.928388746803069,
|
|
"grad_norm": 0.1896213962401851,
|
|
"learning_rate": 9.611066146221192e-06,
|
|
"loss": 1.0272,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 3.9309462915601023,
|
|
"grad_norm": 0.208558000446644,
|
|
"learning_rate": 9.598320970253175e-06,
|
|
"loss": 1.0263,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 3.9335038363171355,
|
|
"grad_norm": 0.18215621037833685,
|
|
"learning_rate": 9.585576447791817e-06,
|
|
"loss": 1.044,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 3.9360613810741687,
|
|
"grad_norm": 0.17351304593560926,
|
|
"learning_rate": 9.572832599571674e-06,
|
|
"loss": 1.0268,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 3.938618925831202,
|
|
"grad_norm": 0.22389061474679745,
|
|
"learning_rate": 9.560089446326175e-06,
|
|
"loss": 1.0313,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 3.9411764705882355,
|
|
"grad_norm": 0.17547633776625562,
|
|
"learning_rate": 9.547347008787648e-06,
|
|
"loss": 1.0321,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 3.9437340153452687,
|
|
"grad_norm": 0.21231411571444475,
|
|
"learning_rate": 9.534605307687233e-06,
|
|
"loss": 1.0027,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 3.946291560102302,
|
|
"grad_norm": 0.1792239552721382,
|
|
"learning_rate": 9.52186436375488e-06,
|
|
"loss": 1.0272,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 3.948849104859335,
|
|
"grad_norm": 0.21595336710565813,
|
|
"learning_rate": 9.509124197719317e-06,
|
|
"loss": 1.0074,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 3.9514066496163682,
|
|
"grad_norm": 0.20310879984969743,
|
|
"learning_rate": 9.496384830307988e-06,
|
|
"loss": 1.0481,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 3.9539641943734014,
|
|
"grad_norm": 0.20949639165674833,
|
|
"learning_rate": 9.483646282247056e-06,
|
|
"loss": 1.0167,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 3.9565217391304346,
|
|
"grad_norm": 0.23427285497954728,
|
|
"learning_rate": 9.470908574261333e-06,
|
|
"loss": 1.0478,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 3.959079283887468,
|
|
"grad_norm": 0.1881836520862583,
|
|
"learning_rate": 9.458171727074284e-06,
|
|
"loss": 1.0257,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 3.9616368286445014,
|
|
"grad_norm": 0.22079043196824938,
|
|
"learning_rate": 9.44543576140795e-06,
|
|
"loss": 1.0904,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 3.9641943734015346,
|
|
"grad_norm": 0.18959168411837335,
|
|
"learning_rate": 9.432700697982962e-06,
|
|
"loss": 1.0562,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 3.9667519181585678,
|
|
"grad_norm": 0.1881932409897208,
|
|
"learning_rate": 9.419966557518472e-06,
|
|
"loss": 1.048,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 3.969309462915601,
|
|
"grad_norm": 0.20694575807793056,
|
|
"learning_rate": 9.407233360732119e-06,
|
|
"loss": 1.0453,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 3.971867007672634,
|
|
"grad_norm": 0.21141511803194477,
|
|
"learning_rate": 9.39450112834003e-06,
|
|
"loss": 1.0416,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 3.9744245524296673,
|
|
"grad_norm": 0.19924380600743072,
|
|
"learning_rate": 9.381769881056744e-06,
|
|
"loss": 1.0302,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 3.976982097186701,
|
|
"grad_norm": 0.18443702573710982,
|
|
"learning_rate": 9.36903963959521e-06,
|
|
"loss": 1.0509,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 3.979539641943734,
|
|
"grad_norm": 0.2130900807101153,
|
|
"learning_rate": 9.356310424666725e-06,
|
|
"loss": 1.0674,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 3.9820971867007673,
|
|
"grad_norm": 0.18076464736813797,
|
|
"learning_rate": 9.343582256980937e-06,
|
|
"loss": 1.0327,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 3.9846547314578005,
|
|
"grad_norm": 0.19770573119978005,
|
|
"learning_rate": 9.330855157245776e-06,
|
|
"loss": 1.049,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 3.9872122762148337,
|
|
"grad_norm": 0.18941088064084555,
|
|
"learning_rate": 9.318129146167432e-06,
|
|
"loss": 1.0285,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 3.9897698209718673,
|
|
"grad_norm": 0.21949442372495884,
|
|
"learning_rate": 9.305404244450337e-06,
|
|
"loss": 1.0447,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 3.9923273657289,
|
|
"grad_norm": 0.19665403880426255,
|
|
"learning_rate": 9.292680472797101e-06,
|
|
"loss": 1.0411,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 3.9948849104859336,
|
|
"grad_norm": 0.19058036356127872,
|
|
"learning_rate": 9.279957851908513e-06,
|
|
"loss": 1.0535,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 3.997442455242967,
|
|
"grad_norm": 0.18814319318672243,
|
|
"learning_rate": 9.267236402483482e-06,
|
|
"loss": 1.036,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.1865356816625339,
|
|
"learning_rate": 9.254516145219006e-06,
|
|
"loss": 1.0435,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 4.002557544757034,
|
|
"grad_norm": 0.19230450271770366,
|
|
"learning_rate": 9.241797100810152e-06,
|
|
"loss": 1.0143,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 4.005115089514066,
|
|
"grad_norm": 0.19899721133072965,
|
|
"learning_rate": 9.229079289950005e-06,
|
|
"loss": 1.0249,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 4.0076726342711,
|
|
"grad_norm": 0.21185878359559354,
|
|
"learning_rate": 9.216362733329657e-06,
|
|
"loss": 0.9987,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 4.010230179028133,
|
|
"grad_norm": 0.1985629222033457,
|
|
"learning_rate": 9.203647451638138e-06,
|
|
"loss": 1.0198,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 4.012787723785166,
|
|
"grad_norm": 0.1930121039553769,
|
|
"learning_rate": 9.190933465562426e-06,
|
|
"loss": 1.0328,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 4.015345268542199,
|
|
"grad_norm": 0.2189356848452908,
|
|
"learning_rate": 9.17822079578738e-06,
|
|
"loss": 1.0358,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 4.017902813299233,
|
|
"grad_norm": 0.18197666560197398,
|
|
"learning_rate": 9.165509462995716e-06,
|
|
"loss": 1.0312,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 4.020460358056266,
|
|
"grad_norm": 0.22141370700870244,
|
|
"learning_rate": 9.152799487867981e-06,
|
|
"loss": 1.0167,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 4.023017902813299,
|
|
"grad_norm": 0.2061928144217363,
|
|
"learning_rate": 9.140090891082506e-06,
|
|
"loss": 1.0173,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 4.025575447570333,
|
|
"grad_norm": 0.1855420730525284,
|
|
"learning_rate": 9.127383693315387e-06,
|
|
"loss": 1.0122,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 4.028132992327365,
|
|
"grad_norm": 0.19054702381827276,
|
|
"learning_rate": 9.114677915240436e-06,
|
|
"loss": 1.0207,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 4.030690537084399,
|
|
"grad_norm": 0.17786433578081798,
|
|
"learning_rate": 9.101973577529164e-06,
|
|
"loss": 1.0339,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 4.033248081841432,
|
|
"grad_norm": 0.18910562787321678,
|
|
"learning_rate": 9.089270700850733e-06,
|
|
"loss": 1.0007,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 4.035805626598465,
|
|
"grad_norm": 0.18519350419636166,
|
|
"learning_rate": 9.076569305871926e-06,
|
|
"loss": 1.0314,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 4.038363171355499,
|
|
"grad_norm": 0.21754655747857035,
|
|
"learning_rate": 9.063869413257124e-06,
|
|
"loss": 1.0302,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 4.040920716112532,
|
|
"grad_norm": 0.18004679417947927,
|
|
"learning_rate": 9.051171043668251e-06,
|
|
"loss": 1.0476,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 4.043478260869565,
|
|
"grad_norm": 0.2168920363400877,
|
|
"learning_rate": 9.038474217764768e-06,
|
|
"loss": 1.025,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 4.046035805626598,
|
|
"grad_norm": 0.19274796431055907,
|
|
"learning_rate": 9.025778956203611e-06,
|
|
"loss": 1.0098,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 4.048593350383632,
|
|
"grad_norm": 0.19201028214018007,
|
|
"learning_rate": 9.013085279639178e-06,
|
|
"loss": 1.0017,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 4.051150895140665,
|
|
"grad_norm": 0.19629486524205142,
|
|
"learning_rate": 9.000393208723291e-06,
|
|
"loss": 1.0219,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 4.053708439897698,
|
|
"grad_norm": 0.19752451256428386,
|
|
"learning_rate": 8.987702764105151e-06,
|
|
"loss": 1.0177,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 4.056265984654732,
|
|
"grad_norm": 0.20166118830323768,
|
|
"learning_rate": 8.975013966431323e-06,
|
|
"loss": 1.0601,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 4.0588235294117645,
|
|
"grad_norm": 0.17326861120237855,
|
|
"learning_rate": 8.96232683634568e-06,
|
|
"loss": 0.9847,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 4.061381074168798,
|
|
"grad_norm": 0.1898245941021511,
|
|
"learning_rate": 8.949641394489399e-06,
|
|
"loss": 1.0099,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 4.063938618925831,
|
|
"grad_norm": 0.1700392821316134,
|
|
"learning_rate": 8.93695766150089e-06,
|
|
"loss": 1.0538,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 4.0664961636828645,
|
|
"grad_norm": 0.1682061615806585,
|
|
"learning_rate": 8.9242756580158e-06,
|
|
"loss": 1.0172,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 4.069053708439898,
|
|
"grad_norm": 0.19303997092308417,
|
|
"learning_rate": 8.911595404666957e-06,
|
|
"loss": 1.0546,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 4.071611253196931,
|
|
"grad_norm": 0.1654939906619837,
|
|
"learning_rate": 8.898916922084336e-06,
|
|
"loss": 1.0464,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 4.0741687979539645,
|
|
"grad_norm": 0.18143405806846177,
|
|
"learning_rate": 8.88624023089504e-06,
|
|
"loss": 1.0545,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 4.076726342710997,
|
|
"grad_norm": 0.20747010533584376,
|
|
"learning_rate": 8.873565351723249e-06,
|
|
"loss": 1.0589,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 4.079283887468031,
|
|
"grad_norm": 0.15953653305890375,
|
|
"learning_rate": 8.8608923051902e-06,
|
|
"loss": 1.0179,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 4.081841432225064,
|
|
"grad_norm": 0.2035902582767619,
|
|
"learning_rate": 8.848221111914147e-06,
|
|
"loss": 1.0447,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 4.084398976982097,
|
|
"grad_norm": 0.15347759439362155,
|
|
"learning_rate": 8.835551792510329e-06,
|
|
"loss": 1.0307,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 4.086956521739131,
|
|
"grad_norm": 0.20574769500088766,
|
|
"learning_rate": 8.822884367590941e-06,
|
|
"loss": 0.9952,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 4.089514066496164,
|
|
"grad_norm": 0.1835496415175651,
|
|
"learning_rate": 8.810218857765085e-06,
|
|
"loss": 1.0005,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 4.092071611253197,
|
|
"grad_norm": 0.20530099186755948,
|
|
"learning_rate": 8.79755528363876e-06,
|
|
"loss": 1.0361,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 4.09462915601023,
|
|
"grad_norm": 0.2026938929869877,
|
|
"learning_rate": 8.7848936658148e-06,
|
|
"loss": 1.0328,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 4.0971867007672635,
|
|
"grad_norm": 0.1907662170906002,
|
|
"learning_rate": 8.772234024892872e-06,
|
|
"loss": 1.0133,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 4.099744245524296,
|
|
"grad_norm": 0.19617684565754476,
|
|
"learning_rate": 8.759576381469425e-06,
|
|
"loss": 1.0027,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 4.10230179028133,
|
|
"grad_norm": 0.17534476994793663,
|
|
"learning_rate": 8.746920756137642e-06,
|
|
"loss": 1.0437,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 4.1048593350383635,
|
|
"grad_norm": 0.20521166727954332,
|
|
"learning_rate": 8.734267169487444e-06,
|
|
"loss": 1.0265,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 4.107416879795396,
|
|
"grad_norm": 0.17225400361630142,
|
|
"learning_rate": 8.721615642105417e-06,
|
|
"loss": 1.0338,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 4.10997442455243,
|
|
"grad_norm": 0.21382338032724127,
|
|
"learning_rate": 8.708966194574814e-06,
|
|
"loss": 1.0083,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 4.112531969309463,
|
|
"grad_norm": 0.16180422908572098,
|
|
"learning_rate": 8.696318847475487e-06,
|
|
"loss": 1.0169,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 4.115089514066496,
|
|
"grad_norm": 0.23650182130816144,
|
|
"learning_rate": 8.68367362138388e-06,
|
|
"loss": 1.0323,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 4.117647058823529,
|
|
"grad_norm": 0.18535588146645351,
|
|
"learning_rate": 8.671030536872995e-06,
|
|
"loss": 1.0299,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 4.120204603580563,
|
|
"grad_norm": 0.17955290128121904,
|
|
"learning_rate": 8.658389614512325e-06,
|
|
"loss": 1.0189,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 4.122762148337596,
|
|
"grad_norm": 0.1782288851096717,
|
|
"learning_rate": 8.645750874867876e-06,
|
|
"loss": 1.0134,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 4.125319693094629,
|
|
"grad_norm": 0.18693604034380645,
|
|
"learning_rate": 8.633114338502073e-06,
|
|
"loss": 1.0403,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 4.127877237851663,
|
|
"grad_norm": 0.18248123513699424,
|
|
"learning_rate": 8.62048002597378e-06,
|
|
"loss": 1.0288,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 4.130434782608695,
|
|
"grad_norm": 0.18165634630490243,
|
|
"learning_rate": 8.607847957838227e-06,
|
|
"loss": 1.0301,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 4.132992327365729,
|
|
"grad_norm": 0.1803487141905229,
|
|
"learning_rate": 8.595218154647001e-06,
|
|
"loss": 1.0301,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 4.135549872122763,
|
|
"grad_norm": 0.18173901474688528,
|
|
"learning_rate": 8.58259063694801e-06,
|
|
"loss": 1.0222,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 4.138107416879795,
|
|
"grad_norm": 0.18078862560079437,
|
|
"learning_rate": 8.56996542528542e-06,
|
|
"loss": 1.0235,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 4.140664961636829,
|
|
"grad_norm": 0.1803693056043885,
|
|
"learning_rate": 8.55734254019968e-06,
|
|
"loss": 0.9988,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 4.143222506393862,
|
|
"grad_norm": 0.1865048325076587,
|
|
"learning_rate": 8.544722002227417e-06,
|
|
"loss": 1.0538,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 4.145780051150895,
|
|
"grad_norm": 0.17978097814336544,
|
|
"learning_rate": 8.532103831901472e-06,
|
|
"loss": 1.035,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 4.148337595907928,
|
|
"grad_norm": 0.23624978152806544,
|
|
"learning_rate": 8.519488049750808e-06,
|
|
"loss": 1.0298,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 4.150895140664962,
|
|
"grad_norm": 0.16381055698474817,
|
|
"learning_rate": 8.506874676300514e-06,
|
|
"loss": 1.0485,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 4.153452685421995,
|
|
"grad_norm": 0.19963138199162672,
|
|
"learning_rate": 8.494263732071772e-06,
|
|
"loss": 1.0092,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 4.156010230179028,
|
|
"grad_norm": 0.19251260911612733,
|
|
"learning_rate": 8.481655237581785e-06,
|
|
"loss": 1.0209,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 4.158567774936062,
|
|
"grad_norm": 0.17091450724555518,
|
|
"learning_rate": 8.469049213343798e-06,
|
|
"loss": 1.0358,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 4.161125319693094,
|
|
"grad_norm": 0.18111441891291247,
|
|
"learning_rate": 8.456445679867013e-06,
|
|
"loss": 1.0235,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 4.163682864450128,
|
|
"grad_norm": 0.1742001195215167,
|
|
"learning_rate": 8.443844657656596e-06,
|
|
"loss": 1.0436,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 4.166240409207161,
|
|
"grad_norm": 0.17755175605855264,
|
|
"learning_rate": 8.431246167213627e-06,
|
|
"loss": 1.0444,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 4.168797953964194,
|
|
"grad_norm": 0.17719860198513576,
|
|
"learning_rate": 8.418650229035054e-06,
|
|
"loss": 1.0321,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 4.171355498721228,
|
|
"grad_norm": 0.1606826181735471,
|
|
"learning_rate": 8.406056863613689e-06,
|
|
"loss": 1.0539,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 4.173913043478261,
|
|
"grad_norm": 0.1739885726513299,
|
|
"learning_rate": 8.393466091438139e-06,
|
|
"loss": 1.0282,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 4.176470588235294,
|
|
"grad_norm": 0.18218865497775108,
|
|
"learning_rate": 8.380877932992815e-06,
|
|
"loss": 1.0239,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 4.179028132992327,
|
|
"grad_norm": 0.16523774532642985,
|
|
"learning_rate": 8.368292408757853e-06,
|
|
"loss": 1.02,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 4.181585677749361,
|
|
"grad_norm": 0.17345180693087728,
|
|
"learning_rate": 8.355709539209121e-06,
|
|
"loss": 1.0392,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 4.1841432225063935,
|
|
"grad_norm": 0.17255097246631376,
|
|
"learning_rate": 8.343129344818162e-06,
|
|
"loss": 1.0714,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 4.186700767263427,
|
|
"grad_norm": 0.1814224170983909,
|
|
"learning_rate": 8.33055184605216e-06,
|
|
"loss": 1.0217,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 4.189258312020461,
|
|
"grad_norm": 0.1748560906889792,
|
|
"learning_rate": 8.317977063373925e-06,
|
|
"loss": 1.0391,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 4.1918158567774935,
|
|
"grad_norm": 0.18435771096605524,
|
|
"learning_rate": 8.305405017241837e-06,
|
|
"loss": 1.0215,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 4.194373401534527,
|
|
"grad_norm": 0.16909940397166726,
|
|
"learning_rate": 8.292835728109835e-06,
|
|
"loss": 1.0141,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 4.19693094629156,
|
|
"grad_norm": 0.16864611479976394,
|
|
"learning_rate": 8.28026921642736e-06,
|
|
"loss": 0.995,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 4.1994884910485935,
|
|
"grad_norm": 0.1832641724885349,
|
|
"learning_rate": 8.267705502639342e-06,
|
|
"loss": 1.0443,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 4.202046035805626,
|
|
"grad_norm": 0.15678971891456242,
|
|
"learning_rate": 8.255144607186161e-06,
|
|
"loss": 0.9988,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 4.20460358056266,
|
|
"grad_norm": 0.17026684913571113,
|
|
"learning_rate": 8.242586550503607e-06,
|
|
"loss": 1.0413,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 4.207161125319693,
|
|
"grad_norm": 0.17089179054567286,
|
|
"learning_rate": 8.230031353022855e-06,
|
|
"loss": 1.0305,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 4.209718670076726,
|
|
"grad_norm": 0.17613488393658056,
|
|
"learning_rate": 8.217479035170422e-06,
|
|
"loss": 1.0075,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 4.21227621483376,
|
|
"grad_norm": 0.15804554349273428,
|
|
"learning_rate": 8.204929617368147e-06,
|
|
"loss": 1.0119,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 4.2148337595907925,
|
|
"grad_norm": 0.20718638597658195,
|
|
"learning_rate": 8.192383120033147e-06,
|
|
"loss": 1.0239,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 4.217391304347826,
|
|
"grad_norm": 0.1845223450299457,
|
|
"learning_rate": 8.179839563577789e-06,
|
|
"loss": 1.0044,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 4.21994884910486,
|
|
"grad_norm": 0.1740911877816002,
|
|
"learning_rate": 8.167298968409658e-06,
|
|
"loss": 1.0114,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 4.2225063938618925,
|
|
"grad_norm": 0.17787524858695802,
|
|
"learning_rate": 8.154761354931513e-06,
|
|
"loss": 1.0342,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 4.225063938618926,
|
|
"grad_norm": 0.17981590233123262,
|
|
"learning_rate": 8.142226743541273e-06,
|
|
"loss": 1.0196,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 4.227621483375959,
|
|
"grad_norm": 0.15945346875306546,
|
|
"learning_rate": 8.12969515463196e-06,
|
|
"loss": 1.0319,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 4.2301790281329925,
|
|
"grad_norm": 0.1782254652095104,
|
|
"learning_rate": 8.117166608591693e-06,
|
|
"loss": 1.027,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 4.232736572890025,
|
|
"grad_norm": 0.16769675527664904,
|
|
"learning_rate": 8.104641125803628e-06,
|
|
"loss": 1.0512,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 4.235294117647059,
|
|
"grad_norm": 0.17673772312426278,
|
|
"learning_rate": 8.092118726645943e-06,
|
|
"loss": 1.0289,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 4.2378516624040925,
|
|
"grad_norm": 0.17775412310787495,
|
|
"learning_rate": 8.0795994314918e-06,
|
|
"loss": 1.0134,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 4.240409207161125,
|
|
"grad_norm": 0.165083768711067,
|
|
"learning_rate": 8.067083260709309e-06,
|
|
"loss": 1.0482,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 4.242966751918159,
|
|
"grad_norm": 0.19604799862438058,
|
|
"learning_rate": 8.054570234661498e-06,
|
|
"loss": 1.0317,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 4.245524296675192,
|
|
"grad_norm": 0.16528010613818045,
|
|
"learning_rate": 8.042060373706275e-06,
|
|
"loss": 1.0348,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 4.248081841432225,
|
|
"grad_norm": 0.1804031281677697,
|
|
"learning_rate": 8.029553698196405e-06,
|
|
"loss": 1.0401,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 4.250639386189258,
|
|
"grad_norm": 0.176393933273107,
|
|
"learning_rate": 8.017050228479467e-06,
|
|
"loss": 1.0356,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 4.253196930946292,
|
|
"grad_norm": 0.19395943497159726,
|
|
"learning_rate": 8.004549984897822e-06,
|
|
"loss": 1.0191,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 4.255754475703325,
|
|
"grad_norm": 0.17246963598612605,
|
|
"learning_rate": 7.992052987788586e-06,
|
|
"loss": 1.0162,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 4.258312020460358,
|
|
"grad_norm": 0.18066442113845643,
|
|
"learning_rate": 7.979559257483591e-06,
|
|
"loss": 1.0229,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 4.260869565217392,
|
|
"grad_norm": 0.1680697165366633,
|
|
"learning_rate": 7.967068814309359e-06,
|
|
"loss": 1.0202,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 4.263427109974424,
|
|
"grad_norm": 0.17705957749246876,
|
|
"learning_rate": 7.954581678587054e-06,
|
|
"loss": 1.0324,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 4.265984654731458,
|
|
"grad_norm": 0.16130768348650035,
|
|
"learning_rate": 7.942097870632467e-06,
|
|
"loss": 0.9793,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 4.268542199488491,
|
|
"grad_norm": 0.17498237044992782,
|
|
"learning_rate": 7.929617410755977e-06,
|
|
"loss": 1.0249,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 4.271099744245524,
|
|
"grad_norm": 0.1925424733299812,
|
|
"learning_rate": 7.917140319262507e-06,
|
|
"loss": 1.0365,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 4.273657289002558,
|
|
"grad_norm": 0.18797309789320532,
|
|
"learning_rate": 7.90466661645151e-06,
|
|
"loss": 1.0118,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 4.276214833759591,
|
|
"grad_norm": 0.16573297446104532,
|
|
"learning_rate": 7.892196322616912e-06,
|
|
"loss": 1.0247,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 4.278772378516624,
|
|
"grad_norm": 0.1925991067748996,
|
|
"learning_rate": 7.879729458047111e-06,
|
|
"loss": 0.978,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 4.281329923273657,
|
|
"grad_norm": 0.1758834459188358,
|
|
"learning_rate": 7.86726604302491e-06,
|
|
"loss": 1.0175,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 4.283887468030691,
|
|
"grad_norm": 0.16487956982839647,
|
|
"learning_rate": 7.854806097827507e-06,
|
|
"loss": 1.0288,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 4.286445012787723,
|
|
"grad_norm": 0.1787793037572042,
|
|
"learning_rate": 7.842349642726458e-06,
|
|
"loss": 1.0166,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 4.289002557544757,
|
|
"grad_norm": 0.1841366036398648,
|
|
"learning_rate": 7.829896697987627e-06,
|
|
"loss": 1.0348,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 4.291560102301791,
|
|
"grad_norm": 0.1576001038888875,
|
|
"learning_rate": 7.817447283871187e-06,
|
|
"loss": 1.0342,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 4.294117647058823,
|
|
"grad_norm": 0.17981916810192364,
|
|
"learning_rate": 7.80500142063155e-06,
|
|
"loss": 1.0214,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 4.296675191815857,
|
|
"grad_norm": 0.17518421051117097,
|
|
"learning_rate": 7.792559128517363e-06,
|
|
"loss": 1.0404,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 4.29923273657289,
|
|
"grad_norm": 0.16823487687822244,
|
|
"learning_rate": 7.780120427771449e-06,
|
|
"loss": 1.0112,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 4.301790281329923,
|
|
"grad_norm": 0.16558738219755195,
|
|
"learning_rate": 7.7676853386308e-06,
|
|
"loss": 1.0605,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 4.304347826086957,
|
|
"grad_norm": 0.17794613732094552,
|
|
"learning_rate": 7.755253881326535e-06,
|
|
"loss": 1.0371,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 4.30690537084399,
|
|
"grad_norm": 0.19300577747925785,
|
|
"learning_rate": 7.742826076083848e-06,
|
|
"loss": 1.06,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 4.309462915601023,
|
|
"grad_norm": 0.16066023211525512,
|
|
"learning_rate": 7.730401943122007e-06,
|
|
"loss": 1.0084,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 4.312020460358056,
|
|
"grad_norm": 0.1947539405327399,
|
|
"learning_rate": 7.717981502654297e-06,
|
|
"loss": 1.0418,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 4.31457800511509,
|
|
"grad_norm": 0.16039175830465094,
|
|
"learning_rate": 7.705564774888001e-06,
|
|
"loss": 1.0039,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 4.3171355498721224,
|
|
"grad_norm": 0.18746085529738926,
|
|
"learning_rate": 7.693151780024354e-06,
|
|
"loss": 1.0041,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 4.319693094629156,
|
|
"grad_norm": 0.17014035483962622,
|
|
"learning_rate": 7.680742538258524e-06,
|
|
"loss": 1.0087,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 4.322250639386189,
|
|
"grad_norm": 0.19178845859382257,
|
|
"learning_rate": 7.668337069779577e-06,
|
|
"loss": 1.0716,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 4.324808184143222,
|
|
"grad_norm": 0.16691270419041054,
|
|
"learning_rate": 7.655935394770425e-06,
|
|
"loss": 1.0185,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 4.327365728900256,
|
|
"grad_norm": 0.17518851447109943,
|
|
"learning_rate": 7.643537533407828e-06,
|
|
"loss": 1.0173,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 4.329923273657289,
|
|
"grad_norm": 0.16145421958943196,
|
|
"learning_rate": 7.631143505862325e-06,
|
|
"loss": 1.0351,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 4.332480818414322,
|
|
"grad_norm": 0.37204295825399436,
|
|
"learning_rate": 7.618753332298219e-06,
|
|
"loss": 1.0303,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 4.335038363171355,
|
|
"grad_norm": 0.15830617963945456,
|
|
"learning_rate": 7.606367032873562e-06,
|
|
"loss": 1.0129,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 4.337595907928389,
|
|
"grad_norm": 0.18979652677231215,
|
|
"learning_rate": 7.593984627740075e-06,
|
|
"loss": 1.0526,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 4.340153452685422,
|
|
"grad_norm": 0.1876359842591056,
|
|
"learning_rate": 7.5816061370431674e-06,
|
|
"loss": 1.0181,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 4.342710997442455,
|
|
"grad_norm": 0.18251068037823034,
|
|
"learning_rate": 7.569231580921858e-06,
|
|
"loss": 0.996,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 4.345268542199489,
|
|
"grad_norm": 0.17542644898051862,
|
|
"learning_rate": 7.556860979508791e-06,
|
|
"loss": 1.0301,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 0.1927803590994827,
|
|
"learning_rate": 7.544494352930145e-06,
|
|
"loss": 1.03,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 4.350383631713555,
|
|
"grad_norm": 0.16917148556319608,
|
|
"learning_rate": 7.532131721305659e-06,
|
|
"loss": 0.9895,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 4.352941176470588,
|
|
"grad_norm": 0.18346223176780307,
|
|
"learning_rate": 7.519773104748562e-06,
|
|
"loss": 1.0428,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 4.3554987212276215,
|
|
"grad_norm": 0.1628922532881499,
|
|
"learning_rate": 7.507418523365542e-06,
|
|
"loss": 1.058,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 4.358056265984655,
|
|
"grad_norm": 0.1876763139643933,
|
|
"learning_rate": 7.495067997256742e-06,
|
|
"loss": 1.0112,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 4.360613810741688,
|
|
"grad_norm": 0.15693274545823557,
|
|
"learning_rate": 7.482721546515683e-06,
|
|
"loss": 1.0281,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 4.3631713554987215,
|
|
"grad_norm": 0.18630090934243648,
|
|
"learning_rate": 7.47037919122928e-06,
|
|
"loss": 1.0418,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 4.365728900255754,
|
|
"grad_norm": 0.16500214550907966,
|
|
"learning_rate": 7.458040951477763e-06,
|
|
"loss": 1.0279,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 4.368286445012788,
|
|
"grad_norm": 0.18494529984039387,
|
|
"learning_rate": 7.4457068473346836e-06,
|
|
"loss": 1.0155,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 4.370843989769821,
|
|
"grad_norm": 0.19216574362796557,
|
|
"learning_rate": 7.43337689886686e-06,
|
|
"loss": 1.0423,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 4.373401534526854,
|
|
"grad_norm": 0.16751025476175924,
|
|
"learning_rate": 7.42105112613434e-06,
|
|
"loss": 1.0317,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 4.375959079283888,
|
|
"grad_norm": 0.20151154222401438,
|
|
"learning_rate": 7.408729549190393e-06,
|
|
"loss": 1.0536,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 4.378516624040921,
|
|
"grad_norm": 0.18065737789912834,
|
|
"learning_rate": 7.3964121880814445e-06,
|
|
"loss": 1.0549,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 4.381074168797954,
|
|
"grad_norm": 0.17160881413407147,
|
|
"learning_rate": 7.3840990628470824e-06,
|
|
"loss": 1.0168,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 4.383631713554987,
|
|
"grad_norm": 0.1786512550850061,
|
|
"learning_rate": 7.371790193519979e-06,
|
|
"loss": 1.0435,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 4.3861892583120206,
|
|
"grad_norm": 0.19232717850899114,
|
|
"learning_rate": 7.359485600125904e-06,
|
|
"loss": 1.0389,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 4.388746803069053,
|
|
"grad_norm": 0.18440121677046997,
|
|
"learning_rate": 7.347185302683662e-06,
|
|
"loss": 1.0264,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 4.391304347826087,
|
|
"grad_norm": 0.19371415512946702,
|
|
"learning_rate": 7.334889321205063e-06,
|
|
"loss": 1.0622,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 4.3938618925831205,
|
|
"grad_norm": 0.19249478474991424,
|
|
"learning_rate": 7.322597675694904e-06,
|
|
"loss": 1.0029,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 4.396419437340153,
|
|
"grad_norm": 0.19009338152933727,
|
|
"learning_rate": 7.31031038615092e-06,
|
|
"loss": 1.0165,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 4.398976982097187,
|
|
"grad_norm": 0.18669974928276975,
|
|
"learning_rate": 7.298027472563768e-06,
|
|
"loss": 1.0357,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 4.40153452685422,
|
|
"grad_norm": 0.1650051526675111,
|
|
"learning_rate": 7.285748954916973e-06,
|
|
"loss": 1.0562,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 4.404092071611253,
|
|
"grad_norm": 0.1917534223305165,
|
|
"learning_rate": 7.273474853186922e-06,
|
|
"loss": 1.0409,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 4.406649616368286,
|
|
"grad_norm": 0.17737384077233112,
|
|
"learning_rate": 7.261205187342809e-06,
|
|
"loss": 1.0464,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 4.40920716112532,
|
|
"grad_norm": 0.17939864900718247,
|
|
"learning_rate": 7.248939977346612e-06,
|
|
"loss": 1.0153,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 4.411764705882353,
|
|
"grad_norm": 0.16822250340936998,
|
|
"learning_rate": 7.236679243153062e-06,
|
|
"loss": 1.0274,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 4.414322250639386,
|
|
"grad_norm": 0.2012483436702938,
|
|
"learning_rate": 7.224423004709607e-06,
|
|
"loss": 1.0302,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 4.41687979539642,
|
|
"grad_norm": 0.16437642340196237,
|
|
"learning_rate": 7.212171281956377e-06,
|
|
"loss": 1.0173,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 4.419437340153452,
|
|
"grad_norm": 0.18420316116672247,
|
|
"learning_rate": 7.199924094826167e-06,
|
|
"loss": 1.0154,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 4.421994884910486,
|
|
"grad_norm": 0.17063629208523548,
|
|
"learning_rate": 7.187681463244377e-06,
|
|
"loss": 1.0252,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 4.42455242966752,
|
|
"grad_norm": 0.2071747152600751,
|
|
"learning_rate": 7.175443407129008e-06,
|
|
"loss": 1.0643,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 4.427109974424552,
|
|
"grad_norm": 0.1596268627900996,
|
|
"learning_rate": 7.163209946390608e-06,
|
|
"loss": 1.0094,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 4.429667519181586,
|
|
"grad_norm": 0.17222832212411637,
|
|
"learning_rate": 7.1509811009322574e-06,
|
|
"loss": 1.0011,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 4.432225063938619,
|
|
"grad_norm": 0.18768984848570255,
|
|
"learning_rate": 7.138756890649516e-06,
|
|
"loss": 1.0344,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 4.434782608695652,
|
|
"grad_norm": 0.20394581557700622,
|
|
"learning_rate": 7.126537335430417e-06,
|
|
"loss": 1.0187,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 4.437340153452685,
|
|
"grad_norm": 0.1930227044611592,
|
|
"learning_rate": 7.1143224551554115e-06,
|
|
"loss": 1.0391,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 4.439897698209719,
|
|
"grad_norm": 0.19780011138369127,
|
|
"learning_rate": 7.102112269697341e-06,
|
|
"loss": 1.0599,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 4.442455242966752,
|
|
"grad_norm": 0.18641195549148987,
|
|
"learning_rate": 7.08990679892142e-06,
|
|
"loss": 1.0205,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 4.445012787723785,
|
|
"grad_norm": 0.1745033043017393,
|
|
"learning_rate": 7.077706062685181e-06,
|
|
"loss": 1.0254,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 4.447570332480819,
|
|
"grad_norm": 0.1875404190434515,
|
|
"learning_rate": 7.065510080838465e-06,
|
|
"loss": 1.0375,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 4.450127877237851,
|
|
"grad_norm": 0.17560201588299784,
|
|
"learning_rate": 7.053318873223365e-06,
|
|
"loss": 0.9962,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 4.452685421994885,
|
|
"grad_norm": 0.16337995441327988,
|
|
"learning_rate": 7.041132459674216e-06,
|
|
"loss": 1.0151,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 4.455242966751918,
|
|
"grad_norm": 0.17910647034147473,
|
|
"learning_rate": 7.028950860017555e-06,
|
|
"loss": 1.059,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 4.457800511508951,
|
|
"grad_norm": 0.1645714876947052,
|
|
"learning_rate": 7.016774094072077e-06,
|
|
"loss": 1.0151,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 4.460358056265985,
|
|
"grad_norm": 0.18052975261895468,
|
|
"learning_rate": 7.004602181648626e-06,
|
|
"loss": 1.0226,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 4.462915601023018,
|
|
"grad_norm": 0.15506591744701947,
|
|
"learning_rate": 6.992435142550133e-06,
|
|
"loss": 1.0315,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 4.465473145780051,
|
|
"grad_norm": 0.18883014972610887,
|
|
"learning_rate": 6.980272996571617e-06,
|
|
"loss": 1.035,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 4.468030690537084,
|
|
"grad_norm": 0.17244955302277767,
|
|
"learning_rate": 6.968115763500127e-06,
|
|
"loss": 1.0212,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 4.470588235294118,
|
|
"grad_norm": 0.17237420999484432,
|
|
"learning_rate": 6.95596346311472e-06,
|
|
"loss": 1.0262,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 4.4731457800511505,
|
|
"grad_norm": 0.18044664054131004,
|
|
"learning_rate": 6.943816115186432e-06,
|
|
"loss": 1.0285,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 4.475703324808184,
|
|
"grad_norm": 0.16838623145296286,
|
|
"learning_rate": 6.931673739478235e-06,
|
|
"loss": 1.0526,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 4.478260869565218,
|
|
"grad_norm": 0.16324922887275686,
|
|
"learning_rate": 6.919536355745018e-06,
|
|
"loss": 1.0174,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 4.4808184143222505,
|
|
"grad_norm": 0.16440559510930122,
|
|
"learning_rate": 6.907403983733543e-06,
|
|
"loss": 1.035,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 4.483375959079284,
|
|
"grad_norm": 0.15720327328308067,
|
|
"learning_rate": 6.895276643182423e-06,
|
|
"loss": 1.047,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 4.485933503836317,
|
|
"grad_norm": 0.16163765669193314,
|
|
"learning_rate": 6.883154353822079e-06,
|
|
"loss": 1.0465,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 4.4884910485933505,
|
|
"grad_norm": 0.17497015050920636,
|
|
"learning_rate": 6.871037135374722e-06,
|
|
"loss": 1.0184,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 4.491048593350383,
|
|
"grad_norm": 0.15908864283642854,
|
|
"learning_rate": 6.858925007554308e-06,
|
|
"loss": 1.0307,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 4.493606138107417,
|
|
"grad_norm": 0.18008191707505186,
|
|
"learning_rate": 6.8468179900665095e-06,
|
|
"loss": 1.0363,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 4.4961636828644505,
|
|
"grad_norm": 0.1854747706459379,
|
|
"learning_rate": 6.834716102608689e-06,
|
|
"loss": 1.0083,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 4.498721227621483,
|
|
"grad_norm": 0.1919413504278039,
|
|
"learning_rate": 6.8226193648698605e-06,
|
|
"loss": 0.996,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 4.501278772378517,
|
|
"grad_norm": 0.16472038994778412,
|
|
"learning_rate": 6.810527796530655e-06,
|
|
"loss": 1.0476,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 4.5038363171355495,
|
|
"grad_norm": 0.1877483916121461,
|
|
"learning_rate": 6.798441417263311e-06,
|
|
"loss": 1.042,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 4.506393861892583,
|
|
"grad_norm": 0.1524530347847294,
|
|
"learning_rate": 6.786360246731595e-06,
|
|
"loss": 1.0535,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 4.508951406649617,
|
|
"grad_norm": 0.16736289193940437,
|
|
"learning_rate": 6.774284304590832e-06,
|
|
"loss": 1.0384,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 4.5115089514066495,
|
|
"grad_norm": 0.1509168260512166,
|
|
"learning_rate": 6.762213610487813e-06,
|
|
"loss": 1.0124,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 4.514066496163683,
|
|
"grad_norm": 0.15987500159184168,
|
|
"learning_rate": 6.75014818406081e-06,
|
|
"loss": 1.0282,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 4.516624040920716,
|
|
"grad_norm": 0.16208604821494524,
|
|
"learning_rate": 6.7380880449395105e-06,
|
|
"loss": 1.017,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 4.5191815856777495,
|
|
"grad_norm": 0.1750240175749838,
|
|
"learning_rate": 6.726033212745009e-06,
|
|
"loss": 1.0448,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 4.521739130434782,
|
|
"grad_norm": 0.17627152188563489,
|
|
"learning_rate": 6.713983707089773e-06,
|
|
"loss": 1.0431,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 4.524296675191816,
|
|
"grad_norm": 0.172403571140956,
|
|
"learning_rate": 6.7019395475775805e-06,
|
|
"loss": 1.0014,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 4.526854219948849,
|
|
"grad_norm": 0.16551799261888245,
|
|
"learning_rate": 6.6899007538035376e-06,
|
|
"loss": 1.0277,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 4.529411764705882,
|
|
"grad_norm": 0.17935995088209722,
|
|
"learning_rate": 6.6778673453539984e-06,
|
|
"loss": 1.0214,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 4.531969309462916,
|
|
"grad_norm": 0.14762155206935834,
|
|
"learning_rate": 6.66583934180658e-06,
|
|
"loss": 1.0254,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 4.534526854219949,
|
|
"grad_norm": 0.18205952935739028,
|
|
"learning_rate": 6.653816762730079e-06,
|
|
"loss": 1.0128,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 4.537084398976982,
|
|
"grad_norm": 0.16531567285520288,
|
|
"learning_rate": 6.641799627684481e-06,
|
|
"loss": 1.0117,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 4.539641943734015,
|
|
"grad_norm": 0.1761641546294807,
|
|
"learning_rate": 6.629787956220924e-06,
|
|
"loss": 1.0047,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 4.542199488491049,
|
|
"grad_norm": 0.16044890357588265,
|
|
"learning_rate": 6.617781767881635e-06,
|
|
"loss": 1.0193,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 4.544757033248082,
|
|
"grad_norm": 0.159801416179025,
|
|
"learning_rate": 6.6057810821999406e-06,
|
|
"loss": 1.0344,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 4.547314578005115,
|
|
"grad_norm": 0.18194045342283055,
|
|
"learning_rate": 6.593785918700197e-06,
|
|
"loss": 1.046,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 4.549872122762149,
|
|
"grad_norm": 0.15701008924351048,
|
|
"learning_rate": 6.581796296897795e-06,
|
|
"loss": 1.0264,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 4.552429667519181,
|
|
"grad_norm": 0.16610935282488204,
|
|
"learning_rate": 6.569812236299089e-06,
|
|
"loss": 1.0207,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 4.554987212276215,
|
|
"grad_norm": 0.15940091408671517,
|
|
"learning_rate": 6.557833756401404e-06,
|
|
"loss": 1.049,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 4.557544757033249,
|
|
"grad_norm": 0.16618353240025538,
|
|
"learning_rate": 6.545860876692979e-06,
|
|
"loss": 1.0266,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 4.560102301790281,
|
|
"grad_norm": 0.17022750553375388,
|
|
"learning_rate": 6.533893616652932e-06,
|
|
"loss": 1.0791,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 4.562659846547315,
|
|
"grad_norm": 0.17223278557669286,
|
|
"learning_rate": 6.521931995751258e-06,
|
|
"loss": 1.001,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 4.565217391304348,
|
|
"grad_norm": 0.18588830803208972,
|
|
"learning_rate": 6.509976033448755e-06,
|
|
"loss": 1.0029,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 4.567774936061381,
|
|
"grad_norm": 0.15803052054999583,
|
|
"learning_rate": 6.498025749197036e-06,
|
|
"loss": 1.0085,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 4.570332480818414,
|
|
"grad_norm": 0.17758373561683846,
|
|
"learning_rate": 6.486081162438458e-06,
|
|
"loss": 1.0215,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 4.572890025575448,
|
|
"grad_norm": 0.1675050184516244,
|
|
"learning_rate": 6.4741422926061225e-06,
|
|
"loss": 1.0101,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 4.57544757033248,
|
|
"grad_norm": 0.1802049784719144,
|
|
"learning_rate": 6.462209159123825e-06,
|
|
"loss": 1.0594,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 4.578005115089514,
|
|
"grad_norm": 0.15407960949128488,
|
|
"learning_rate": 6.450281781406022e-06,
|
|
"loss": 1.0351,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 4.580562659846548,
|
|
"grad_norm": 0.17251700051840302,
|
|
"learning_rate": 6.438360178857818e-06,
|
|
"loss": 1.0237,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 4.58312020460358,
|
|
"grad_norm": 0.17736986767063925,
|
|
"learning_rate": 6.426444370874906e-06,
|
|
"loss": 1.0262,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 4.585677749360614,
|
|
"grad_norm": 0.18476336736016494,
|
|
"learning_rate": 6.414534376843566e-06,
|
|
"loss": 1.018,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 4.588235294117647,
|
|
"grad_norm": 0.17911429354068129,
|
|
"learning_rate": 6.402630216140618e-06,
|
|
"loss": 1.0286,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 4.59079283887468,
|
|
"grad_norm": 0.17311984725832297,
|
|
"learning_rate": 6.39073190813338e-06,
|
|
"loss": 1.0103,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 4.593350383631714,
|
|
"grad_norm": 0.1621278479186866,
|
|
"learning_rate": 6.37883947217966e-06,
|
|
"loss": 1.0228,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 4.595907928388747,
|
|
"grad_norm": 0.18444591716270403,
|
|
"learning_rate": 6.366952927627703e-06,
|
|
"loss": 1.0306,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 4.59846547314578,
|
|
"grad_norm": 0.1659804117379894,
|
|
"learning_rate": 6.355072293816178e-06,
|
|
"loss": 1.0072,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 4.601023017902813,
|
|
"grad_norm": 0.16571291930690385,
|
|
"learning_rate": 6.34319759007413e-06,
|
|
"loss": 1.0122,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 4.603580562659847,
|
|
"grad_norm": 0.1720471422264511,
|
|
"learning_rate": 6.331328835720961e-06,
|
|
"loss": 1.0465,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 4.6061381074168795,
|
|
"grad_norm": 0.16256427527474918,
|
|
"learning_rate": 6.319466050066395e-06,
|
|
"loss": 1.0069,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 4.608695652173913,
|
|
"grad_norm": 0.16289290458169317,
|
|
"learning_rate": 6.307609252410438e-06,
|
|
"loss": 0.9955,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 4.611253196930946,
|
|
"grad_norm": 0.16420344005471815,
|
|
"learning_rate": 6.295758462043362e-06,
|
|
"loss": 1.021,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 4.6138107416879794,
|
|
"grad_norm": 0.16431618715461352,
|
|
"learning_rate": 6.283913698245659e-06,
|
|
"loss": 0.9887,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 4.616368286445013,
|
|
"grad_norm": 0.162477757683666,
|
|
"learning_rate": 6.272074980288021e-06,
|
|
"loss": 1.0315,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 4.618925831202046,
|
|
"grad_norm": 0.1420949863331362,
|
|
"learning_rate": 6.2602423274313e-06,
|
|
"loss": 0.9946,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 4.621483375959079,
|
|
"grad_norm": 0.1617352765159284,
|
|
"learning_rate": 6.248415758926485e-06,
|
|
"loss": 1.0247,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 4.624040920716112,
|
|
"grad_norm": 0.14727458038419122,
|
|
"learning_rate": 6.236595294014662e-06,
|
|
"loss": 1.0695,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 4.626598465473146,
|
|
"grad_norm": 0.15513852752076332,
|
|
"learning_rate": 6.22478095192699e-06,
|
|
"loss": 1.0361,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 4.629156010230179,
|
|
"grad_norm": 0.15023148854538287,
|
|
"learning_rate": 6.212972751884663e-06,
|
|
"loss": 1.0263,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 4.631713554987212,
|
|
"grad_norm": 0.16087300720694614,
|
|
"learning_rate": 6.201170713098883e-06,
|
|
"loss": 1.0248,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 4.634271099744246,
|
|
"grad_norm": 0.15834981601790443,
|
|
"learning_rate": 6.189374854770832e-06,
|
|
"loss": 1.053,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 4.6368286445012785,
|
|
"grad_norm": 0.1573655447598696,
|
|
"learning_rate": 6.177585196091631e-06,
|
|
"loss": 0.9904,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 4.639386189258312,
|
|
"grad_norm": 0.158683133829273,
|
|
"learning_rate": 6.16580175624232e-06,
|
|
"loss": 1.0595,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 4.641943734015345,
|
|
"grad_norm": 0.1597812398342448,
|
|
"learning_rate": 6.15402455439382e-06,
|
|
"loss": 1.0517,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 4.6445012787723785,
|
|
"grad_norm": 0.15551450371650033,
|
|
"learning_rate": 6.142253609706898e-06,
|
|
"loss": 1.054,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 4.647058823529412,
|
|
"grad_norm": 0.19632917660508345,
|
|
"learning_rate": 6.130488941332151e-06,
|
|
"loss": 1.0512,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 4.649616368286445,
|
|
"grad_norm": 0.15643968941800954,
|
|
"learning_rate": 6.118730568409951e-06,
|
|
"loss": 1.039,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 4.6521739130434785,
|
|
"grad_norm": 0.20652844984094032,
|
|
"learning_rate": 6.106978510070443e-06,
|
|
"loss": 1.0129,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 4.654731457800511,
|
|
"grad_norm": 0.15097637750201956,
|
|
"learning_rate": 6.095232785433485e-06,
|
|
"loss": 1.0003,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 4.657289002557545,
|
|
"grad_norm": 0.20892906717171159,
|
|
"learning_rate": 6.083493413608639e-06,
|
|
"loss": 1.0032,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 4.659846547314578,
|
|
"grad_norm": 0.14676895460609313,
|
|
"learning_rate": 6.0717604136951315e-06,
|
|
"loss": 1.0575,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 4.662404092071611,
|
|
"grad_norm": 0.1744598380072282,
|
|
"learning_rate": 6.0600338047818155e-06,
|
|
"loss": 1.0012,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 4.664961636828645,
|
|
"grad_norm": 0.15898084906509888,
|
|
"learning_rate": 6.048313605947153e-06,
|
|
"loss": 1.0152,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 4.667519181585678,
|
|
"grad_norm": 0.18500242483627394,
|
|
"learning_rate": 6.036599836259175e-06,
|
|
"loss": 1.0202,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 4.670076726342711,
|
|
"grad_norm": 0.17586881973502083,
|
|
"learning_rate": 6.024892514775451e-06,
|
|
"loss": 1.0152,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 4.672634271099744,
|
|
"grad_norm": 0.1751917297897623,
|
|
"learning_rate": 6.013191660543063e-06,
|
|
"loss": 1.0185,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 4.675191815856778,
|
|
"grad_norm": 0.16539844174921248,
|
|
"learning_rate": 6.001497292598566e-06,
|
|
"loss": 1.0091,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 4.677749360613811,
|
|
"grad_norm": 0.16305138932194513,
|
|
"learning_rate": 5.98980942996797e-06,
|
|
"loss": 1.0171,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 4.680306905370844,
|
|
"grad_norm": 0.1978081666622713,
|
|
"learning_rate": 5.97812809166669e-06,
|
|
"loss": 1.04,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 4.6828644501278776,
|
|
"grad_norm": 0.14529737115947974,
|
|
"learning_rate": 5.966453296699541e-06,
|
|
"loss": 1.0219,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 4.68542199488491,
|
|
"grad_norm": 0.19132792503166993,
|
|
"learning_rate": 5.954785064060678e-06,
|
|
"loss": 1.0466,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 4.687979539641944,
|
|
"grad_norm": 0.14925809757481498,
|
|
"learning_rate": 5.943123412733587e-06,
|
|
"loss": 1.0168,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 4.690537084398977,
|
|
"grad_norm": 0.19480783069632648,
|
|
"learning_rate": 5.931468361691053e-06,
|
|
"loss": 1.074,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 4.69309462915601,
|
|
"grad_norm": 0.1597024405029427,
|
|
"learning_rate": 5.919819929895106e-06,
|
|
"loss": 1.0365,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 4.695652173913043,
|
|
"grad_norm": 0.179287834985346,
|
|
"learning_rate": 5.9081781362970205e-06,
|
|
"loss": 1.0461,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 4.698209718670077,
|
|
"grad_norm": 0.16882218098581764,
|
|
"learning_rate": 5.896542999837265e-06,
|
|
"loss": 1.0305,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 4.70076726342711,
|
|
"grad_norm": 0.14058129617791865,
|
|
"learning_rate": 5.8849145394454806e-06,
|
|
"loss": 0.9987,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 4.703324808184143,
|
|
"grad_norm": 0.18349693674349288,
|
|
"learning_rate": 5.873292774040442e-06,
|
|
"loss": 0.9943,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 4.705882352941177,
|
|
"grad_norm": 0.1610970199108,
|
|
"learning_rate": 5.861677722530037e-06,
|
|
"loss": 1.0579,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 4.708439897698209,
|
|
"grad_norm": 0.166987113555728,
|
|
"learning_rate": 5.850069403811235e-06,
|
|
"loss": 1.0181,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 4.710997442455243,
|
|
"grad_norm": 0.1677755864894642,
|
|
"learning_rate": 5.8384678367700325e-06,
|
|
"loss": 1.0125,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 4.713554987212277,
|
|
"grad_norm": 0.1779899568878102,
|
|
"learning_rate": 5.826873040281462e-06,
|
|
"loss": 1.0157,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 4.716112531969309,
|
|
"grad_norm": 0.16348039752545065,
|
|
"learning_rate": 5.81528503320953e-06,
|
|
"loss": 1.0343,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 4.718670076726343,
|
|
"grad_norm": 0.1670971620135551,
|
|
"learning_rate": 5.8037038344072e-06,
|
|
"loss": 1.0318,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 4.721227621483376,
|
|
"grad_norm": 0.18617223073968917,
|
|
"learning_rate": 5.792129462716355e-06,
|
|
"loss": 1.0219,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 4.723785166240409,
|
|
"grad_norm": 0.15449905092529612,
|
|
"learning_rate": 5.780561936967779e-06,
|
|
"loss": 1.0272,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 4.726342710997442,
|
|
"grad_norm": 0.1750868480359856,
|
|
"learning_rate": 5.769001275981112e-06,
|
|
"loss": 1.0565,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 4.728900255754476,
|
|
"grad_norm": 0.1663229129876114,
|
|
"learning_rate": 5.757447498564821e-06,
|
|
"loss": 1.0535,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 4.731457800511509,
|
|
"grad_norm": 0.15809631122185844,
|
|
"learning_rate": 5.745900623516189e-06,
|
|
"loss": 1.021,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 4.734015345268542,
|
|
"grad_norm": 0.16459750473842777,
|
|
"learning_rate": 5.734360669621255e-06,
|
|
"loss": 1.0248,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 4.736572890025576,
|
|
"grad_norm": 0.15287249372875325,
|
|
"learning_rate": 5.722827655654801e-06,
|
|
"loss": 1.0156,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 4.739130434782608,
|
|
"grad_norm": 0.1605211421637796,
|
|
"learning_rate": 5.711301600380317e-06,
|
|
"loss": 1.0569,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 4.741687979539642,
|
|
"grad_norm": 0.14939498740260876,
|
|
"learning_rate": 5.699782522549983e-06,
|
|
"loss": 1.0509,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 4.744245524296675,
|
|
"grad_norm": 0.16398542522125342,
|
|
"learning_rate": 5.688270440904613e-06,
|
|
"loss": 1.0273,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 4.746803069053708,
|
|
"grad_norm": 0.16733173044314129,
|
|
"learning_rate": 5.6767653741736405e-06,
|
|
"loss": 0.9938,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 4.749360613810742,
|
|
"grad_norm": 0.1505426061615439,
|
|
"learning_rate": 5.665267341075098e-06,
|
|
"loss": 1.0144,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 4.751918158567775,
|
|
"grad_norm": 0.1527851077672571,
|
|
"learning_rate": 5.653776360315562e-06,
|
|
"loss": 1.0478,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 4.754475703324808,
|
|
"grad_norm": 0.16913240191236387,
|
|
"learning_rate": 5.642292450590134e-06,
|
|
"loss": 1.0122,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 4.757033248081841,
|
|
"grad_norm": 0.158875356158748,
|
|
"learning_rate": 5.630815630582429e-06,
|
|
"loss": 1.0413,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 4.759590792838875,
|
|
"grad_norm": 0.14953756040104652,
|
|
"learning_rate": 5.61934591896451e-06,
|
|
"loss": 1.0337,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 4.762148337595908,
|
|
"grad_norm": 0.17219828313172605,
|
|
"learning_rate": 5.60788333439688e-06,
|
|
"loss": 1.0287,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 4.764705882352941,
|
|
"grad_norm": 0.1659776610530445,
|
|
"learning_rate": 5.596427895528443e-06,
|
|
"loss": 1.0443,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 4.767263427109975,
|
|
"grad_norm": 0.1676484186832149,
|
|
"learning_rate": 5.584979620996491e-06,
|
|
"loss": 1.0489,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 4.7698209718670075,
|
|
"grad_norm": 0.1623795959715509,
|
|
"learning_rate": 5.573538529426645e-06,
|
|
"loss": 1.0144,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 4.772378516624041,
|
|
"grad_norm": 0.16256260144035772,
|
|
"learning_rate": 5.562104639432845e-06,
|
|
"loss": 1.0427,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 4.774936061381074,
|
|
"grad_norm": 0.17175961986303814,
|
|
"learning_rate": 5.550677969617319e-06,
|
|
"loss": 1.0162,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 4.7774936061381075,
|
|
"grad_norm": 0.1542050330321217,
|
|
"learning_rate": 5.539258538570544e-06,
|
|
"loss": 1.0164,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 4.78005115089514,
|
|
"grad_norm": 0.15918533657676529,
|
|
"learning_rate": 5.527846364871219e-06,
|
|
"loss": 1.0309,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 4.782608695652174,
|
|
"grad_norm": 0.1403676241793028,
|
|
"learning_rate": 5.516441467086231e-06,
|
|
"loss": 1.0228,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 4.7851662404092075,
|
|
"grad_norm": 0.14773251181856192,
|
|
"learning_rate": 5.505043863770646e-06,
|
|
"loss": 1.0734,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 4.78772378516624,
|
|
"grad_norm": 0.16196858898805197,
|
|
"learning_rate": 5.493653573467647e-06,
|
|
"loss": 1.0048,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 4.790281329923274,
|
|
"grad_norm": 0.15355301379517172,
|
|
"learning_rate": 5.4822706147085205e-06,
|
|
"loss": 1.0125,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 4.792838874680307,
|
|
"grad_norm": 0.18982539717495267,
|
|
"learning_rate": 5.470895006012637e-06,
|
|
"loss": 0.9959,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 4.79539641943734,
|
|
"grad_norm": 0.1573171337655545,
|
|
"learning_rate": 5.459526765887397e-06,
|
|
"loss": 1.0297,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 4.797953964194374,
|
|
"grad_norm": 0.16351573968402464,
|
|
"learning_rate": 5.448165912828214e-06,
|
|
"loss": 0.9945,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 4.8005115089514065,
|
|
"grad_norm": 0.18629349709548856,
|
|
"learning_rate": 5.4368124653184835e-06,
|
|
"loss": 1.0363,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 4.80306905370844,
|
|
"grad_norm": 0.17008978855695026,
|
|
"learning_rate": 5.4254664418295634e-06,
|
|
"loss": 1.0273,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 4.805626598465473,
|
|
"grad_norm": 0.16524085689648021,
|
|
"learning_rate": 5.414127860820719e-06,
|
|
"loss": 1.0098,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 4.8081841432225065,
|
|
"grad_norm": 0.18739927868121126,
|
|
"learning_rate": 5.402796740739109e-06,
|
|
"loss": 1.0057,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 4.810741687979539,
|
|
"grad_norm": 0.17551431540439197,
|
|
"learning_rate": 5.391473100019767e-06,
|
|
"loss": 1.0378,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 4.813299232736573,
|
|
"grad_norm": 0.20076574431883742,
|
|
"learning_rate": 5.380156957085536e-06,
|
|
"loss": 1.0054,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 4.8158567774936065,
|
|
"grad_norm": 0.1633457331284817,
|
|
"learning_rate": 5.3688483303470895e-06,
|
|
"loss": 0.9945,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 4.818414322250639,
|
|
"grad_norm": 0.18981752589117254,
|
|
"learning_rate": 5.3575472382028386e-06,
|
|
"loss": 1.018,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 4.820971867007673,
|
|
"grad_norm": 0.1796254125656967,
|
|
"learning_rate": 5.346253699038966e-06,
|
|
"loss": 1.0175,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 4.823529411764706,
|
|
"grad_norm": 0.18612504881053146,
|
|
"learning_rate": 5.334967731229348e-06,
|
|
"loss": 1.0343,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 4.826086956521739,
|
|
"grad_norm": 0.1896503989682664,
|
|
"learning_rate": 5.323689353135546e-06,
|
|
"loss": 1.033,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 4.828644501278772,
|
|
"grad_norm": 0.17351769644886408,
|
|
"learning_rate": 5.312418583106784e-06,
|
|
"loss": 1.0341,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 4.831202046035806,
|
|
"grad_norm": 0.19813048664100952,
|
|
"learning_rate": 5.301155439479893e-06,
|
|
"loss": 1.0189,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 4.833759590792839,
|
|
"grad_norm": 0.17414587401870055,
|
|
"learning_rate": 5.289899940579315e-06,
|
|
"loss": 0.9979,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 4.836317135549872,
|
|
"grad_norm": 0.17954394790720937,
|
|
"learning_rate": 5.278652104717026e-06,
|
|
"loss": 1.033,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 4.838874680306906,
|
|
"grad_norm": 0.18225354012614833,
|
|
"learning_rate": 5.267411950192558e-06,
|
|
"loss": 1.0006,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 4.841432225063938,
|
|
"grad_norm": 0.19171250300846782,
|
|
"learning_rate": 5.256179495292953e-06,
|
|
"loss": 0.976,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 4.843989769820972,
|
|
"grad_norm": 0.16560762200333132,
|
|
"learning_rate": 5.244954758292691e-06,
|
|
"loss": 1.03,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 4.846547314578006,
|
|
"grad_norm": 0.17384349031638302,
|
|
"learning_rate": 5.233737757453733e-06,
|
|
"loss": 1.017,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 4.849104859335038,
|
|
"grad_norm": 0.18200737855014837,
|
|
"learning_rate": 5.222528511025429e-06,
|
|
"loss": 1.0544,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 4.851662404092072,
|
|
"grad_norm": 0.1674383880489774,
|
|
"learning_rate": 5.2113270372445334e-06,
|
|
"loss": 1.0199,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 4.854219948849105,
|
|
"grad_norm": 0.16206185822222566,
|
|
"learning_rate": 5.200133354335129e-06,
|
|
"loss": 1.0297,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 4.856777493606138,
|
|
"grad_norm": 0.16330979230562037,
|
|
"learning_rate": 5.188947480508644e-06,
|
|
"loss": 1.0618,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 4.859335038363171,
|
|
"grad_norm": 0.1641289208809162,
|
|
"learning_rate": 5.177769433963801e-06,
|
|
"loss": 1.0095,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 4.861892583120205,
|
|
"grad_norm": 0.16857653947800838,
|
|
"learning_rate": 5.166599232886579e-06,
|
|
"loss": 1.0132,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 4.864450127877237,
|
|
"grad_norm": 0.15123752972525892,
|
|
"learning_rate": 5.155436895450197e-06,
|
|
"loss": 1.0231,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 4.867007672634271,
|
|
"grad_norm": 0.18007827051394826,
|
|
"learning_rate": 5.144282439815075e-06,
|
|
"loss": 1.0299,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 4.869565217391305,
|
|
"grad_norm": 0.17145491388315698,
|
|
"learning_rate": 5.133135884128828e-06,
|
|
"loss": 1.0426,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 4.872122762148337,
|
|
"grad_norm": 0.15111451411798363,
|
|
"learning_rate": 5.121997246526188e-06,
|
|
"loss": 1.0335,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 4.874680306905371,
|
|
"grad_norm": 0.17562740075351813,
|
|
"learning_rate": 5.110866545129031e-06,
|
|
"loss": 1.0226,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 4.877237851662404,
|
|
"grad_norm": 0.14883986205754957,
|
|
"learning_rate": 5.099743798046315e-06,
|
|
"loss": 1.03,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 4.879795396419437,
|
|
"grad_norm": 0.16425606815927463,
|
|
"learning_rate": 5.088629023374052e-06,
|
|
"loss": 1.0524,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 4.882352941176471,
|
|
"grad_norm": 0.15699998164150683,
|
|
"learning_rate": 5.0775222391952826e-06,
|
|
"loss": 1.0598,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 4.884910485933504,
|
|
"grad_norm": 0.16747367530556498,
|
|
"learning_rate": 5.06642346358005e-06,
|
|
"loss": 1.0197,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 4.887468030690537,
|
|
"grad_norm": 0.19072243056188606,
|
|
"learning_rate": 5.055332714585372e-06,
|
|
"loss": 1.001,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 4.89002557544757,
|
|
"grad_norm": 0.16853967810789172,
|
|
"learning_rate": 5.044250010255202e-06,
|
|
"loss": 1.0432,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 4.892583120204604,
|
|
"grad_norm": 0.17828385119329374,
|
|
"learning_rate": 5.033175368620406e-06,
|
|
"loss": 1.0314,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 4.8951406649616365,
|
|
"grad_norm": 0.15062414843555882,
|
|
"learning_rate": 5.022108807698735e-06,
|
|
"loss": 1.0358,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 4.89769820971867,
|
|
"grad_norm": 0.17399854674836523,
|
|
"learning_rate": 5.0110503454947926e-06,
|
|
"loss": 1.0265,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 4.900255754475703,
|
|
"grad_norm": 0.16505478849391259,
|
|
"learning_rate": 5.000000000000003e-06,
|
|
"loss": 1.0495,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 4.9028132992327365,
|
|
"grad_norm": 0.1446909805445552,
|
|
"learning_rate": 4.988957789192583e-06,
|
|
"loss": 1.0044,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 4.90537084398977,
|
|
"grad_norm": 0.16047225013403066,
|
|
"learning_rate": 4.97792373103753e-06,
|
|
"loss": 0.977,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 4.907928388746803,
|
|
"grad_norm": 0.15267602057033672,
|
|
"learning_rate": 4.966897843486561e-06,
|
|
"loss": 1.0563,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 4.910485933503836,
|
|
"grad_norm": 0.14094891470116488,
|
|
"learning_rate": 4.955880144478101e-06,
|
|
"loss": 1.0172,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 4.913043478260869,
|
|
"grad_norm": 0.16225336285064607,
|
|
"learning_rate": 4.944870651937267e-06,
|
|
"loss": 1.0332,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 4.915601023017903,
|
|
"grad_norm": 0.15352807995544615,
|
|
"learning_rate": 4.933869383775809e-06,
|
|
"loss": 1.0285,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 4.918158567774936,
|
|
"grad_norm": 0.14893755036217834,
|
|
"learning_rate": 4.922876357892103e-06,
|
|
"loss": 1.0082,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 4.920716112531969,
|
|
"grad_norm": 0.17251988177114058,
|
|
"learning_rate": 4.911891592171113e-06,
|
|
"loss": 1.0131,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 4.923273657289003,
|
|
"grad_norm": 0.15340872718806947,
|
|
"learning_rate": 4.900915104484372e-06,
|
|
"loss": 1.0502,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 4.9258312020460355,
|
|
"grad_norm": 0.16259551968874744,
|
|
"learning_rate": 4.889946912689936e-06,
|
|
"loss": 1.0457,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 4.928388746803069,
|
|
"grad_norm": 0.15432669889294595,
|
|
"learning_rate": 4.878987034632361e-06,
|
|
"loss": 1.0491,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 4.930946291560103,
|
|
"grad_norm": 0.16399149074989694,
|
|
"learning_rate": 4.8680354881426935e-06,
|
|
"loss": 1.011,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 4.9335038363171355,
|
|
"grad_norm": 0.17537267004354543,
|
|
"learning_rate": 4.857092291038411e-06,
|
|
"loss": 1.0356,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 4.936061381074169,
|
|
"grad_norm": 0.15804425089068397,
|
|
"learning_rate": 4.846157461123411e-06,
|
|
"loss": 1.0556,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 4.938618925831202,
|
|
"grad_norm": 0.1644217524312441,
|
|
"learning_rate": 4.8352310161879724e-06,
|
|
"loss": 1.0521,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 4.9411764705882355,
|
|
"grad_norm": 0.166490586450367,
|
|
"learning_rate": 4.824312974008748e-06,
|
|
"loss": 1.0348,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 4.943734015345268,
|
|
"grad_norm": 0.15262614264530625,
|
|
"learning_rate": 4.813403352348703e-06,
|
|
"loss": 1.003,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 4.946291560102302,
|
|
"grad_norm": 0.16914604106371434,
|
|
"learning_rate": 4.8025021689571095e-06,
|
|
"loss": 1.0261,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 4.948849104859335,
|
|
"grad_norm": 0.14949420788516232,
|
|
"learning_rate": 4.791609441569517e-06,
|
|
"loss": 1.013,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 4.951406649616368,
|
|
"grad_norm": 0.18410232609002486,
|
|
"learning_rate": 4.780725187907707e-06,
|
|
"loss": 1.0211,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 4.953964194373402,
|
|
"grad_norm": 0.14300056243568887,
|
|
"learning_rate": 4.769849425679683e-06,
|
|
"loss": 1.0222,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 4.956521739130435,
|
|
"grad_norm": 0.17246451645014146,
|
|
"learning_rate": 4.758982172579621e-06,
|
|
"loss": 0.9967,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 4.959079283887468,
|
|
"grad_norm": 0.17259140226193048,
|
|
"learning_rate": 4.748123446287875e-06,
|
|
"loss": 1.0321,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 4.961636828644501,
|
|
"grad_norm": 1.1109363534677956,
|
|
"learning_rate": 4.737273264470909e-06,
|
|
"loss": 1.0923,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 4.964194373401535,
|
|
"grad_norm": 0.17074890567417172,
|
|
"learning_rate": 4.726431644781284e-06,
|
|
"loss": 1.0245,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 4.966751918158568,
|
|
"grad_norm": 0.15432050773937248,
|
|
"learning_rate": 4.715598604857648e-06,
|
|
"loss": 1.0378,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 4.969309462915601,
|
|
"grad_norm": 0.15888604747270782,
|
|
"learning_rate": 4.704774162324673e-06,
|
|
"loss": 1.0287,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 4.971867007672635,
|
|
"grad_norm": 0.17597082523498278,
|
|
"learning_rate": 4.6939583347930525e-06,
|
|
"loss": 1.0024,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 4.974424552429667,
|
|
"grad_norm": 0.15465610920055028,
|
|
"learning_rate": 4.6831511398594574e-06,
|
|
"loss": 1.0216,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 4.976982097186701,
|
|
"grad_norm": 0.16914400485984177,
|
|
"learning_rate": 4.672352595106525e-06,
|
|
"loss": 1.0595,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 4.979539641943734,
|
|
"grad_norm": 0.17772012019293779,
|
|
"learning_rate": 4.661562718102808e-06,
|
|
"loss": 1.0056,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 4.982097186700767,
|
|
"grad_norm": 0.14226899552306443,
|
|
"learning_rate": 4.65078152640276e-06,
|
|
"loss": 1.0221,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 4.9846547314578,
|
|
"grad_norm": 0.14866025187075746,
|
|
"learning_rate": 4.640009037546711e-06,
|
|
"loss": 1.0534,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 4.987212276214834,
|
|
"grad_norm": 0.18309163357147787,
|
|
"learning_rate": 4.629245269060826e-06,
|
|
"loss": 1.046,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 4.989769820971867,
|
|
"grad_norm": 0.14195791571684566,
|
|
"learning_rate": 4.61849023845708e-06,
|
|
"loss": 1.0119,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 4.9923273657289,
|
|
"grad_norm": 0.15240227847957083,
|
|
"learning_rate": 4.607743963233233e-06,
|
|
"loss": 1.0373,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 4.994884910485934,
|
|
"grad_norm": 0.1706260447764414,
|
|
"learning_rate": 4.5970064608728085e-06,
|
|
"loss": 0.9995,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 4.997442455242966,
|
|
"grad_norm": 0.16263531281395652,
|
|
"learning_rate": 4.586277748845056e-06,
|
|
"loss": 1.0053,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.15411495644560275,
|
|
"learning_rate": 4.575557844604905e-06,
|
|
"loss": 1.0268,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 5.002557544757034,
|
|
"grad_norm": 0.15615925966080388,
|
|
"learning_rate": 4.5648467655929815e-06,
|
|
"loss": 1.0199,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 5.005115089514066,
|
|
"grad_norm": 0.16045903540647527,
|
|
"learning_rate": 4.554144529235537e-06,
|
|
"loss": 1.0277,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 5.0076726342711,
|
|
"grad_norm": 0.16031341969126212,
|
|
"learning_rate": 4.543451152944438e-06,
|
|
"loss": 1.0562,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 5.010230179028133,
|
|
"grad_norm": 0.1429706019310508,
|
|
"learning_rate": 4.532766654117146e-06,
|
|
"loss": 1.031,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 5.012787723785166,
|
|
"grad_norm": 0.15753846906492294,
|
|
"learning_rate": 4.5220910501366635e-06,
|
|
"loss": 1.0368,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 5.015345268542199,
|
|
"grad_norm": 0.14579202507979455,
|
|
"learning_rate": 4.511424358371544e-06,
|
|
"loss": 1.0358,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 5.017902813299233,
|
|
"grad_norm": 0.15694921661063782,
|
|
"learning_rate": 4.500766596175813e-06,
|
|
"loss": 1.0037,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 5.020460358056266,
|
|
"grad_norm": 0.16268209756361607,
|
|
"learning_rate": 4.490117780888994e-06,
|
|
"loss": 1.0191,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 5.023017902813299,
|
|
"grad_norm": 0.13601692002794843,
|
|
"learning_rate": 4.479477929836039e-06,
|
|
"loss": 1.0225,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 5.025575447570333,
|
|
"grad_norm": 0.1513485213042126,
|
|
"learning_rate": 4.4688470603273184e-06,
|
|
"loss": 0.9987,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 5.028132992327365,
|
|
"grad_norm": 0.14505501997147888,
|
|
"learning_rate": 4.458225189658598e-06,
|
|
"loss": 1.0244,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 5.030690537084399,
|
|
"grad_norm": 0.15866972934335427,
|
|
"learning_rate": 4.447612335110991e-06,
|
|
"loss": 1.0147,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 5.033248081841432,
|
|
"grad_norm": 0.15717036214065513,
|
|
"learning_rate": 4.43700851395096e-06,
|
|
"loss": 1.0056,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 5.035805626598465,
|
|
"grad_norm": 0.15634999112536652,
|
|
"learning_rate": 4.426413743430241e-06,
|
|
"loss": 1.0486,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 5.038363171355499,
|
|
"grad_norm": 0.1549586768650421,
|
|
"learning_rate": 4.415828040785877e-06,
|
|
"loss": 1.0046,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 5.040920716112532,
|
|
"grad_norm": 0.1643495461245206,
|
|
"learning_rate": 4.405251423240138e-06,
|
|
"loss": 1.0158,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 5.043478260869565,
|
|
"grad_norm": 0.14558675280550004,
|
|
"learning_rate": 4.3946839080005236e-06,
|
|
"loss": 1.0167,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 5.046035805626598,
|
|
"grad_norm": 0.16057769002475886,
|
|
"learning_rate": 4.384125512259718e-06,
|
|
"loss": 1.0412,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 5.048593350383632,
|
|
"grad_norm": 0.1589654545230765,
|
|
"learning_rate": 4.373576253195568e-06,
|
|
"loss": 1.0058,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 5.051150895140665,
|
|
"grad_norm": 0.14004326798784272,
|
|
"learning_rate": 4.363036147971069e-06,
|
|
"loss": 0.9958,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 5.053708439897698,
|
|
"grad_norm": 0.16704739125788623,
|
|
"learning_rate": 4.352505213734298e-06,
|
|
"loss": 1.0202,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 5.056265984654732,
|
|
"grad_norm": 0.15270263482532218,
|
|
"learning_rate": 4.3419834676184395e-06,
|
|
"loss": 1.0221,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 5.0588235294117645,
|
|
"grad_norm": 0.15264750560420307,
|
|
"learning_rate": 4.331470926741707e-06,
|
|
"loss": 1.0264,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 5.061381074168798,
|
|
"grad_norm": 0.1675831575968936,
|
|
"learning_rate": 4.320967608207354e-06,
|
|
"loss": 1.0256,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 5.063938618925831,
|
|
"grad_norm": 0.15506176173449848,
|
|
"learning_rate": 4.3104735291036214e-06,
|
|
"loss": 1.0246,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 5.0664961636828645,
|
|
"grad_norm": 0.147438074557832,
|
|
"learning_rate": 4.299988706503716e-06,
|
|
"loss": 0.9895,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 5.069053708439898,
|
|
"grad_norm": 0.13712823238173896,
|
|
"learning_rate": 4.289513157465796e-06,
|
|
"loss": 1.0069,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 5.071611253196931,
|
|
"grad_norm": 0.1530445973165712,
|
|
"learning_rate": 4.279046899032918e-06,
|
|
"loss": 1.028,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 5.0741687979539645,
|
|
"grad_norm": 0.1487111811647309,
|
|
"learning_rate": 4.268589948233034e-06,
|
|
"loss": 0.9806,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 5.076726342710997,
|
|
"grad_norm": 0.1536495899212468,
|
|
"learning_rate": 4.258142322078944e-06,
|
|
"loss": 1.0141,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 5.079283887468031,
|
|
"grad_norm": 0.1420705753526825,
|
|
"learning_rate": 4.247704037568289e-06,
|
|
"loss": 1.0484,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 5.081841432225064,
|
|
"grad_norm": 0.14854933088338998,
|
|
"learning_rate": 4.237275111683502e-06,
|
|
"loss": 1.0176,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 5.084398976982097,
|
|
"grad_norm": 0.15085396882702742,
|
|
"learning_rate": 4.226855561391792e-06,
|
|
"loss": 1.0241,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 5.086956521739131,
|
|
"grad_norm": 0.13480571166529362,
|
|
"learning_rate": 4.2164454036451185e-06,
|
|
"loss": 1.0105,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 5.089514066496164,
|
|
"grad_norm": 0.15439478858765343,
|
|
"learning_rate": 4.2060446553801585e-06,
|
|
"loss": 1.0571,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 5.092071611253197,
|
|
"grad_norm": 0.14887589003918353,
|
|
"learning_rate": 4.195653333518271e-06,
|
|
"loss": 1.0309,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 5.09462915601023,
|
|
"grad_norm": 0.14823587280930983,
|
|
"learning_rate": 4.1852714549654985e-06,
|
|
"loss": 1.0286,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 5.0971867007672635,
|
|
"grad_norm": 0.1502816473196306,
|
|
"learning_rate": 4.1748990366125005e-06,
|
|
"loss": 1.0092,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 5.099744245524296,
|
|
"grad_norm": 0.13426636004437947,
|
|
"learning_rate": 4.164536095334557e-06,
|
|
"loss": 1.0055,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 5.10230179028133,
|
|
"grad_norm": 0.14869672831898953,
|
|
"learning_rate": 4.154182647991519e-06,
|
|
"loss": 1.0492,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 5.1048593350383635,
|
|
"grad_norm": 0.15755018419795028,
|
|
"learning_rate": 4.143838711427808e-06,
|
|
"loss": 1.0103,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 5.107416879795396,
|
|
"grad_norm": 0.1503017786383216,
|
|
"learning_rate": 4.133504302472356e-06,
|
|
"loss": 1.0015,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 5.10997442455243,
|
|
"grad_norm": 0.14022700208845976,
|
|
"learning_rate": 4.123179437938596e-06,
|
|
"loss": 1.0394,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 5.112531969309463,
|
|
"grad_norm": 0.149747082086179,
|
|
"learning_rate": 4.112864134624447e-06,
|
|
"loss": 1.0406,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 5.115089514066496,
|
|
"grad_norm": 0.15174138196167658,
|
|
"learning_rate": 4.102558409312256e-06,
|
|
"loss": 1.022,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 5.117647058823529,
|
|
"grad_norm": 0.14846170493390945,
|
|
"learning_rate": 4.092262278768797e-06,
|
|
"loss": 1.0132,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 5.120204603580563,
|
|
"grad_norm": 0.14541949365283377,
|
|
"learning_rate": 4.0819757597452246e-06,
|
|
"loss": 1.0328,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 5.122762148337596,
|
|
"grad_norm": 0.16073985913183766,
|
|
"learning_rate": 4.0716988689770695e-06,
|
|
"loss": 1.0067,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 5.125319693094629,
|
|
"grad_norm": 0.14371815787004755,
|
|
"learning_rate": 4.061431623184188e-06,
|
|
"loss": 1.0289,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 5.127877237851663,
|
|
"grad_norm": 0.14339076964243316,
|
|
"learning_rate": 4.051174039070742e-06,
|
|
"loss": 0.9812,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 5.130434782608695,
|
|
"grad_norm": 0.1437711220903366,
|
|
"learning_rate": 4.040926133325188e-06,
|
|
"loss": 1.0059,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 5.132992327365729,
|
|
"grad_norm": 0.1432806446083087,
|
|
"learning_rate": 4.030687922620223e-06,
|
|
"loss": 1.0183,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 5.135549872122763,
|
|
"grad_norm": 0.14407049755074497,
|
|
"learning_rate": 4.020459423612777e-06,
|
|
"loss": 1.0328,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 5.138107416879795,
|
|
"grad_norm": 0.14311456671607106,
|
|
"learning_rate": 4.010240652943974e-06,
|
|
"loss": 1.0247,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 5.140664961636829,
|
|
"grad_norm": 0.14651674275116736,
|
|
"learning_rate": 4.000031627239123e-06,
|
|
"loss": 1.0271,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 5.143222506393862,
|
|
"grad_norm": 0.14244659447949104,
|
|
"learning_rate": 3.989832363107664e-06,
|
|
"loss": 0.9729,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 5.145780051150895,
|
|
"grad_norm": 0.1474525383109307,
|
|
"learning_rate": 3.9796428771431625e-06,
|
|
"loss": 1.0208,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 5.148337595907928,
|
|
"grad_norm": 0.14684653759057748,
|
|
"learning_rate": 3.96946318592328e-06,
|
|
"loss": 0.9944,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 5.150895140664962,
|
|
"grad_norm": 0.14793817657477276,
|
|
"learning_rate": 3.959293306009734e-06,
|
|
"loss": 1.0606,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 5.153452685421995,
|
|
"grad_norm": 0.13847357302909763,
|
|
"learning_rate": 3.949133253948284e-06,
|
|
"loss": 1.0035,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 5.156010230179028,
|
|
"grad_norm": 0.14747847539008258,
|
|
"learning_rate": 3.938983046268695e-06,
|
|
"loss": 0.9869,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 5.158567774936062,
|
|
"grad_norm": 0.14511374476416694,
|
|
"learning_rate": 3.9288426994847285e-06,
|
|
"loss": 1.0238,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 5.161125319693094,
|
|
"grad_norm": 0.15030414965811079,
|
|
"learning_rate": 3.918712230094091e-06,
|
|
"loss": 1.0521,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 5.163682864450128,
|
|
"grad_norm": 0.14420923408617164,
|
|
"learning_rate": 3.908591654578417e-06,
|
|
"loss": 0.9878,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 5.166240409207161,
|
|
"grad_norm": 0.1369795797536583,
|
|
"learning_rate": 3.89848098940326e-06,
|
|
"loss": 1.0203,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 5.168797953964194,
|
|
"grad_norm": 0.15862135307508646,
|
|
"learning_rate": 3.888380251018035e-06,
|
|
"loss": 1.0112,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 5.171355498721228,
|
|
"grad_norm": 0.13968732984433663,
|
|
"learning_rate": 3.878289455856013e-06,
|
|
"loss": 1.0589,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 5.173913043478261,
|
|
"grad_norm": 0.14444481777607088,
|
|
"learning_rate": 3.868208620334282e-06,
|
|
"loss": 1.0065,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 5.176470588235294,
|
|
"grad_norm": 0.14184611750434217,
|
|
"learning_rate": 3.858137760853737e-06,
|
|
"loss": 1.0189,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 5.179028132992327,
|
|
"grad_norm": 0.14923144029216218,
|
|
"learning_rate": 3.84807689379904e-06,
|
|
"loss": 1.0052,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 5.181585677749361,
|
|
"grad_norm": 0.15459564247502722,
|
|
"learning_rate": 3.838026035538581e-06,
|
|
"loss": 0.9946,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 5.1841432225063935,
|
|
"grad_norm": 0.1418795966374483,
|
|
"learning_rate": 3.827985202424488e-06,
|
|
"loss": 1.0234,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 5.186700767263427,
|
|
"grad_norm": 0.1553154903132494,
|
|
"learning_rate": 3.817954410792565e-06,
|
|
"loss": 1.0137,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 5.189258312020461,
|
|
"grad_norm": 0.14275503896178632,
|
|
"learning_rate": 3.8079336769622834e-06,
|
|
"loss": 1.0289,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 5.1918158567774935,
|
|
"grad_norm": 0.13897565956134958,
|
|
"learning_rate": 3.7979230172367453e-06,
|
|
"loss": 1.0148,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 5.194373401534527,
|
|
"grad_norm": 0.14252828284486727,
|
|
"learning_rate": 3.7879224479026745e-06,
|
|
"loss": 1.0068,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 5.19693094629156,
|
|
"grad_norm": 0.1517901716492953,
|
|
"learning_rate": 3.7779319852303766e-06,
|
|
"loss": 1.0572,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 5.1994884910485935,
|
|
"grad_norm": 0.1439259357160915,
|
|
"learning_rate": 3.7679516454736977e-06,
|
|
"loss": 1.0446,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 5.202046035805626,
|
|
"grad_norm": 0.1371345617669485,
|
|
"learning_rate": 3.757981444870035e-06,
|
|
"loss": 0.9957,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 5.20460358056266,
|
|
"grad_norm": 0.16004739713130242,
|
|
"learning_rate": 3.748021399640279e-06,
|
|
"loss": 1.0276,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 5.207161125319693,
|
|
"grad_norm": 0.1441426514349444,
|
|
"learning_rate": 3.7380715259888e-06,
|
|
"loss": 1.0344,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 5.209718670076726,
|
|
"grad_norm": 0.14152534835692054,
|
|
"learning_rate": 3.7281318401034183e-06,
|
|
"loss": 0.9949,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 5.21227621483376,
|
|
"grad_norm": 0.1481149663167974,
|
|
"learning_rate": 3.718202358155384e-06,
|
|
"loss": 1.0545,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 5.2148337595907925,
|
|
"grad_norm": 0.13716666870403715,
|
|
"learning_rate": 3.7082830962993497e-06,
|
|
"loss": 1.0388,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 5.217391304347826,
|
|
"grad_norm": 0.1427599492035968,
|
|
"learning_rate": 3.6983740706733207e-06,
|
|
"loss": 0.9945,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 5.21994884910486,
|
|
"grad_norm": 0.14437989757241948,
|
|
"learning_rate": 3.688475297398674e-06,
|
|
"loss": 1.037,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 5.2225063938618925,
|
|
"grad_norm": 0.1407689885502161,
|
|
"learning_rate": 3.6785867925800856e-06,
|
|
"loss": 1.0019,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 5.225063938618926,
|
|
"grad_norm": 0.1381622930416597,
|
|
"learning_rate": 3.668708572305546e-06,
|
|
"loss": 1.0384,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 5.227621483375959,
|
|
"grad_norm": 0.13975927307572164,
|
|
"learning_rate": 3.658840652646287e-06,
|
|
"loss": 1.0018,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 5.2301790281329925,
|
|
"grad_norm": 0.15578171256673842,
|
|
"learning_rate": 3.6489830496568067e-06,
|
|
"loss": 1.0221,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 5.232736572890025,
|
|
"grad_norm": 0.14587450260403836,
|
|
"learning_rate": 3.639135779374813e-06,
|
|
"loss": 1.0462,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 5.235294117647059,
|
|
"grad_norm": 0.14336907869458113,
|
|
"learning_rate": 3.6292988578211863e-06,
|
|
"loss": 1.0242,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 5.2378516624040925,
|
|
"grad_norm": 0.13614785911809554,
|
|
"learning_rate": 3.619472300999992e-06,
|
|
"loss": 1.002,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 5.240409207161125,
|
|
"grad_norm": 0.14654873047839187,
|
|
"learning_rate": 3.6096561248984186e-06,
|
|
"loss": 1.0365,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 5.242966751918159,
|
|
"grad_norm": 0.14832735168435557,
|
|
"learning_rate": 3.5998503454867807e-06,
|
|
"loss": 1.0206,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 5.245524296675192,
|
|
"grad_norm": 0.15182549845090051,
|
|
"learning_rate": 3.5900549787184534e-06,
|
|
"loss": 1.0086,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 5.248081841432225,
|
|
"grad_norm": 0.15218834374865772,
|
|
"learning_rate": 3.580270040529894e-06,
|
|
"loss": 1.0457,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 5.250639386189258,
|
|
"grad_norm": 0.1386445311628316,
|
|
"learning_rate": 3.570495546840591e-06,
|
|
"loss": 1.0316,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 5.253196930946292,
|
|
"grad_norm": 0.1415172130548022,
|
|
"learning_rate": 3.560731513553022e-06,
|
|
"loss": 1.033,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 5.255754475703325,
|
|
"grad_norm": 0.134688736061587,
|
|
"learning_rate": 3.5509779565526683e-06,
|
|
"loss": 1.0341,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 5.258312020460358,
|
|
"grad_norm": 0.14665953403303808,
|
|
"learning_rate": 3.5412348917079507e-06,
|
|
"loss": 1.0621,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 5.260869565217392,
|
|
"grad_norm": 0.13619183573807261,
|
|
"learning_rate": 3.5315023348702325e-06,
|
|
"loss": 1.0366,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 5.263427109974424,
|
|
"grad_norm": 0.13658849089622857,
|
|
"learning_rate": 3.521780301873773e-06,
|
|
"loss": 1.0008,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 5.265984654731458,
|
|
"grad_norm": 0.14630387436677678,
|
|
"learning_rate": 3.512068808535707e-06,
|
|
"loss": 1.0147,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 5.268542199488491,
|
|
"grad_norm": 0.13734073999332427,
|
|
"learning_rate": 3.502367870656035e-06,
|
|
"loss": 1.028,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 5.271099744245524,
|
|
"grad_norm": 0.1355644028489033,
|
|
"learning_rate": 3.492677504017573e-06,
|
|
"loss": 1.0026,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 5.273657289002558,
|
|
"grad_norm": 0.14119902993384847,
|
|
"learning_rate": 3.4829977243859414e-06,
|
|
"loss": 1.0093,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 5.276214833759591,
|
|
"grad_norm": 0.14118557253626327,
|
|
"learning_rate": 3.4733285475095324e-06,
|
|
"loss": 1.0255,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 5.278772378516624,
|
|
"grad_norm": 0.13630213438701977,
|
|
"learning_rate": 3.4636699891195e-06,
|
|
"loss": 1.0176,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 5.281329923273657,
|
|
"grad_norm": 0.1355438862392238,
|
|
"learning_rate": 3.454022064929711e-06,
|
|
"loss": 1.0355,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 5.283887468030691,
|
|
"grad_norm": 0.1335405410237401,
|
|
"learning_rate": 3.4443847906367313e-06,
|
|
"loss": 0.9999,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 5.286445012787723,
|
|
"grad_norm": 0.13568542243072879,
|
|
"learning_rate": 3.4347581819198095e-06,
|
|
"loss": 1.0069,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 5.289002557544757,
|
|
"grad_norm": 0.14279750042804518,
|
|
"learning_rate": 3.425142254440835e-06,
|
|
"loss": 1.0316,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 5.291560102301791,
|
|
"grad_norm": 0.1421562223189775,
|
|
"learning_rate": 3.4155370238443185e-06,
|
|
"loss": 0.9929,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 5.294117647058823,
|
|
"grad_norm": 0.13090998129388792,
|
|
"learning_rate": 3.405942505757367e-06,
|
|
"loss": 1.0235,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 5.296675191815857,
|
|
"grad_norm": 0.1447611334505954,
|
|
"learning_rate": 3.3963587157896694e-06,
|
|
"loss": 0.9883,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 5.29923273657289,
|
|
"grad_norm": 0.1486460622906693,
|
|
"learning_rate": 3.386785669533447e-06,
|
|
"loss": 1.0614,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 5.301790281329923,
|
|
"grad_norm": 0.13082209863415079,
|
|
"learning_rate": 3.377223382563446e-06,
|
|
"loss": 1.019,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 5.304347826086957,
|
|
"grad_norm": 0.14431855838963542,
|
|
"learning_rate": 3.367671870436915e-06,
|
|
"loss": 1.0744,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 5.30690537084399,
|
|
"grad_norm": 0.13501366283453947,
|
|
"learning_rate": 3.358131148693564e-06,
|
|
"loss": 1.0204,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 5.309462915601023,
|
|
"grad_norm": 0.13647498103708036,
|
|
"learning_rate": 3.3486012328555505e-06,
|
|
"loss": 1.0361,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 5.312020460358056,
|
|
"grad_norm": 0.13678423051822214,
|
|
"learning_rate": 3.33908213842745e-06,
|
|
"loss": 1.0416,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 5.31457800511509,
|
|
"grad_norm": 0.15117370323671084,
|
|
"learning_rate": 3.3295738808962388e-06,
|
|
"loss": 1.0398,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 5.3171355498721224,
|
|
"grad_norm": 0.13218102548293045,
|
|
"learning_rate": 3.3200764757312555e-06,
|
|
"loss": 1.0211,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 5.319693094629156,
|
|
"grad_norm": 0.13875158228376064,
|
|
"learning_rate": 3.310589938384179e-06,
|
|
"loss": 1.0246,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 5.322250639386189,
|
|
"grad_norm": 0.1390888027343779,
|
|
"learning_rate": 3.301114284289021e-06,
|
|
"loss": 1.0228,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 5.324808184143222,
|
|
"grad_norm": 0.14311106791965889,
|
|
"learning_rate": 3.291649528862074e-06,
|
|
"loss": 1.0366,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 5.327365728900256,
|
|
"grad_norm": 0.1329482436934704,
|
|
"learning_rate": 3.2821956875019045e-06,
|
|
"loss": 0.9983,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 5.329923273657289,
|
|
"grad_norm": 0.1353254341465528,
|
|
"learning_rate": 3.272752775589316e-06,
|
|
"loss": 1.0262,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 5.332480818414322,
|
|
"grad_norm": 0.14279181335598803,
|
|
"learning_rate": 3.2633208084873445e-06,
|
|
"loss": 1.0214,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 5.335038363171355,
|
|
"grad_norm": 0.14938681808695,
|
|
"learning_rate": 3.253899801541206e-06,
|
|
"loss": 1.0458,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 5.337595907928389,
|
|
"grad_norm": 0.13903091402439763,
|
|
"learning_rate": 3.244489770078286e-06,
|
|
"loss": 1.0699,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 5.340153452685422,
|
|
"grad_norm": 0.14447995472723943,
|
|
"learning_rate": 3.2350907294081258e-06,
|
|
"loss": 0.9936,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 5.342710997442455,
|
|
"grad_norm": 0.14276869094442168,
|
|
"learning_rate": 3.2257026948223726e-06,
|
|
"loss": 1.0565,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 5.345268542199489,
|
|
"grad_norm": 0.14335515694613532,
|
|
"learning_rate": 3.2163256815947674e-06,
|
|
"loss": 0.9993,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 5.3478260869565215,
|
|
"grad_norm": 0.14665513927933138,
|
|
"learning_rate": 3.206959704981133e-06,
|
|
"loss": 1.0555,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 5.350383631713555,
|
|
"grad_norm": 0.1322833527352921,
|
|
"learning_rate": 3.197604780219323e-06,
|
|
"loss": 0.9652,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 5.352941176470588,
|
|
"grad_norm": 0.13906561826785738,
|
|
"learning_rate": 3.188260922529215e-06,
|
|
"loss": 1.0432,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 5.3554987212276215,
|
|
"grad_norm": 0.14254937224329012,
|
|
"learning_rate": 3.1789281471126786e-06,
|
|
"loss": 1.0175,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 5.358056265984655,
|
|
"grad_norm": 0.14911195774932937,
|
|
"learning_rate": 3.1696064691535634e-06,
|
|
"loss": 1.0024,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 5.360613810741688,
|
|
"grad_norm": 0.1296333526942248,
|
|
"learning_rate": 3.1602959038176516e-06,
|
|
"loss": 1.016,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 5.3631713554987215,
|
|
"grad_norm": 0.14492528039945102,
|
|
"learning_rate": 3.1509964662526484e-06,
|
|
"loss": 1.0072,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 5.365728900255754,
|
|
"grad_norm": 0.14261896658846623,
|
|
"learning_rate": 3.1417081715881623e-06,
|
|
"loss": 0.997,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 5.368286445012788,
|
|
"grad_norm": 0.15062841301973245,
|
|
"learning_rate": 3.132431034935667e-06,
|
|
"loss": 1.0286,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 5.370843989769821,
|
|
"grad_norm": 0.14079332067477582,
|
|
"learning_rate": 3.1231650713884832e-06,
|
|
"loss": 1.0331,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 5.373401534526854,
|
|
"grad_norm": 0.13555419460898196,
|
|
"learning_rate": 3.1139102960217493e-06,
|
|
"loss": 1.0041,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 5.375959079283888,
|
|
"grad_norm": 0.13880524146849596,
|
|
"learning_rate": 3.1046667238924155e-06,
|
|
"loss": 1.0423,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 5.378516624040921,
|
|
"grad_norm": 0.1511402878049476,
|
|
"learning_rate": 3.0954343700391897e-06,
|
|
"loss": 1.0349,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 5.381074168797954,
|
|
"grad_norm": 0.14254863702344298,
|
|
"learning_rate": 3.0862132494825325e-06,
|
|
"loss": 1.026,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 5.383631713554987,
|
|
"grad_norm": 0.1352194409726658,
|
|
"learning_rate": 3.0770033772246376e-06,
|
|
"loss": 0.9938,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 5.3861892583120206,
|
|
"grad_norm": 0.14319029352124846,
|
|
"learning_rate": 3.067804768249386e-06,
|
|
"loss": 0.9968,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 5.388746803069053,
|
|
"grad_norm": 0.1348404188548053,
|
|
"learning_rate": 3.058617437522342e-06,
|
|
"loss": 1.0166,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 5.391304347826087,
|
|
"grad_norm": 0.14010852729827156,
|
|
"learning_rate": 3.0494413999907125e-06,
|
|
"loss": 1.0066,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 5.3938618925831205,
|
|
"grad_norm": 0.1351055036158788,
|
|
"learning_rate": 3.0402766705833455e-06,
|
|
"loss": 1.0052,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 5.396419437340153,
|
|
"grad_norm": 0.13186613064153313,
|
|
"learning_rate": 3.0311232642106768e-06,
|
|
"loss": 0.9969,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 5.398976982097187,
|
|
"grad_norm": 0.1408809630359071,
|
|
"learning_rate": 3.021981195764726e-06,
|
|
"loss": 1.0283,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 5.40153452685422,
|
|
"grad_norm": 0.12965889759923607,
|
|
"learning_rate": 3.0128504801190716e-06,
|
|
"loss": 1.0179,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 5.404092071611253,
|
|
"grad_norm": 0.13945206906826596,
|
|
"learning_rate": 3.003731132128811e-06,
|
|
"loss": 1.0099,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 5.406649616368286,
|
|
"grad_norm": 0.1400549514773388,
|
|
"learning_rate": 2.9946231666305627e-06,
|
|
"loss": 0.998,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 5.40920716112532,
|
|
"grad_norm": 0.13519306803227119,
|
|
"learning_rate": 2.9855265984424042e-06,
|
|
"loss": 1.0069,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 5.411764705882353,
|
|
"grad_norm": 0.12988356378358373,
|
|
"learning_rate": 2.976441442363893e-06,
|
|
"loss": 0.9928,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 5.414322250639386,
|
|
"grad_norm": 0.13225437647406532,
|
|
"learning_rate": 2.967367713176007e-06,
|
|
"loss": 1.0082,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 5.41687979539642,
|
|
"grad_norm": 0.13453763452834291,
|
|
"learning_rate": 2.9583054256411326e-06,
|
|
"loss": 0.9779,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 5.419437340153452,
|
|
"grad_norm": 0.13933174777230192,
|
|
"learning_rate": 2.9492545945030517e-06,
|
|
"loss": 0.9947,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 5.421994884910486,
|
|
"grad_norm": 0.13265772100907866,
|
|
"learning_rate": 2.940215234486894e-06,
|
|
"loss": 1.0304,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 5.42455242966752,
|
|
"grad_norm": 0.13461066684644984,
|
|
"learning_rate": 2.9311873602991435e-06,
|
|
"loss": 1.0265,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 5.427109974424552,
|
|
"grad_norm": 0.13302962430701365,
|
|
"learning_rate": 2.922170986627573e-06,
|
|
"loss": 0.9907,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 5.429667519181586,
|
|
"grad_norm": 0.1372156107097446,
|
|
"learning_rate": 2.913166128141265e-06,
|
|
"loss": 1.0362,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 5.432225063938619,
|
|
"grad_norm": 0.13526418969755188,
|
|
"learning_rate": 2.9041727994905686e-06,
|
|
"loss": 1.0335,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 5.434782608695652,
|
|
"grad_norm": 0.14056788233892892,
|
|
"learning_rate": 2.895191015307055e-06,
|
|
"loss": 0.9863,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 5.437340153452685,
|
|
"grad_norm": 0.13830914570568487,
|
|
"learning_rate": 2.8862207902035334e-06,
|
|
"loss": 1.0279,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 5.439897698209719,
|
|
"grad_norm": 0.13255464251905436,
|
|
"learning_rate": 2.877262138773994e-06,
|
|
"loss": 1.0074,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 5.442455242966752,
|
|
"grad_norm": 0.13094809127879986,
|
|
"learning_rate": 2.8683150755936107e-06,
|
|
"loss": 1.0007,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 5.445012787723785,
|
|
"grad_norm": 0.13969902391137623,
|
|
"learning_rate": 2.859379615218685e-06,
|
|
"loss": 1.0183,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 5.447570332480819,
|
|
"grad_norm": 0.13298200813066383,
|
|
"learning_rate": 2.850455772186658e-06,
|
|
"loss": 1.0553,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 5.450127877237851,
|
|
"grad_norm": 0.13752465215056384,
|
|
"learning_rate": 2.8415435610160667e-06,
|
|
"loss": 1.0029,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 5.452685421994885,
|
|
"grad_norm": 0.13776730476333435,
|
|
"learning_rate": 2.8326429962065184e-06,
|
|
"loss": 1.0591,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 5.455242966751918,
|
|
"grad_norm": 0.15290697841832607,
|
|
"learning_rate": 2.8237540922386764e-06,
|
|
"loss": 1.0234,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 5.457800511508951,
|
|
"grad_norm": 0.1435647245473299,
|
|
"learning_rate": 2.8148768635742286e-06,
|
|
"loss": 1.0408,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 5.460358056265985,
|
|
"grad_norm": 0.1348972282036283,
|
|
"learning_rate": 2.8060113246558783e-06,
|
|
"loss": 1.0582,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 5.462915601023018,
|
|
"grad_norm": 0.14312694503231538,
|
|
"learning_rate": 2.7971574899072938e-06,
|
|
"loss": 1.0557,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 5.465473145780051,
|
|
"grad_norm": 0.14626596664710145,
|
|
"learning_rate": 2.7883153737331136e-06,
|
|
"loss": 1.0213,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 5.468030690537084,
|
|
"grad_norm": 0.12723321182479033,
|
|
"learning_rate": 2.7794849905189138e-06,
|
|
"loss": 1.0258,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 5.470588235294118,
|
|
"grad_norm": 0.1297835067922189,
|
|
"learning_rate": 2.7706663546311705e-06,
|
|
"loss": 0.9791,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 5.4731457800511505,
|
|
"grad_norm": 0.14065052834912603,
|
|
"learning_rate": 2.761859480417255e-06,
|
|
"loss": 1.0364,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 5.475703324808184,
|
|
"grad_norm": 0.14903101964341123,
|
|
"learning_rate": 2.753064382205396e-06,
|
|
"loss": 1.046,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 5.478260869565218,
|
|
"grad_norm": 0.12884063957129957,
|
|
"learning_rate": 2.7442810743046742e-06,
|
|
"loss": 1.0377,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 5.4808184143222505,
|
|
"grad_norm": 0.13327063753076238,
|
|
"learning_rate": 2.735509571004982e-06,
|
|
"loss": 1.0095,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 5.483375959079284,
|
|
"grad_norm": 0.1571390786677921,
|
|
"learning_rate": 2.7267498865770005e-06,
|
|
"loss": 0.9769,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 5.485933503836317,
|
|
"grad_norm": 0.1320156220064998,
|
|
"learning_rate": 2.718002035272197e-06,
|
|
"loss": 1.0057,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 5.4884910485933505,
|
|
"grad_norm": 0.1360636747597633,
|
|
"learning_rate": 2.7092660313227748e-06,
|
|
"loss": 1.0064,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 5.491048593350383,
|
|
"grad_norm": 0.13394654726028757,
|
|
"learning_rate": 2.700541888941667e-06,
|
|
"loss": 1.0025,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 5.493606138107417,
|
|
"grad_norm": 0.1460012982176339,
|
|
"learning_rate": 2.6918296223225026e-06,
|
|
"loss": 1.0227,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 5.4961636828644505,
|
|
"grad_norm": 0.13049152775591077,
|
|
"learning_rate": 2.683129245639603e-06,
|
|
"loss": 1.0393,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 5.498721227621483,
|
|
"grad_norm": 0.15254103744247385,
|
|
"learning_rate": 2.6744407730479325e-06,
|
|
"loss": 1.0279,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 5.501278772378517,
|
|
"grad_norm": 0.1440023793657765,
|
|
"learning_rate": 2.66576421868309e-06,
|
|
"loss": 1.0295,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 5.5038363171355495,
|
|
"grad_norm": 0.13606809517331622,
|
|
"learning_rate": 2.6570995966612945e-06,
|
|
"loss": 1.0299,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 5.506393861892583,
|
|
"grad_norm": 0.13926181662872325,
|
|
"learning_rate": 2.6484469210793384e-06,
|
|
"loss": 1.037,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 5.508951406649617,
|
|
"grad_norm": 0.14473456019169403,
|
|
"learning_rate": 2.6398062060145867e-06,
|
|
"loss": 1.017,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 5.5115089514066495,
|
|
"grad_norm": 0.13272081994045937,
|
|
"learning_rate": 2.631177465524938e-06,
|
|
"loss": 1.0217,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 5.514066496163683,
|
|
"grad_norm": 0.14026203110310534,
|
|
"learning_rate": 2.6225607136488194e-06,
|
|
"loss": 1.0021,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 5.516624040920716,
|
|
"grad_norm": 0.13205919977316974,
|
|
"learning_rate": 2.613955964405146e-06,
|
|
"loss": 1.052,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 5.5191815856777495,
|
|
"grad_norm": 0.13360379756882199,
|
|
"learning_rate": 2.605363231793302e-06,
|
|
"loss": 1.0499,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 5.521739130434782,
|
|
"grad_norm": 0.14208435941220482,
|
|
"learning_rate": 2.5967825297931328e-06,
|
|
"loss": 1.0172,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 5.524296675191816,
|
|
"grad_norm": 0.13295870010362018,
|
|
"learning_rate": 2.5882138723649018e-06,
|
|
"loss": 1.0334,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 5.526854219948849,
|
|
"grad_norm": 0.12489034371588933,
|
|
"learning_rate": 2.5796572734492777e-06,
|
|
"loss": 1.0103,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 5.529411764705882,
|
|
"grad_norm": 0.13244599397256537,
|
|
"learning_rate": 2.571112746967309e-06,
|
|
"loss": 1.0218,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 5.531969309462916,
|
|
"grad_norm": 0.15003256070846932,
|
|
"learning_rate": 2.5625803068204126e-06,
|
|
"loss": 1.0759,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 5.534526854219949,
|
|
"grad_norm": 0.1356632599292978,
|
|
"learning_rate": 2.554059966890332e-06,
|
|
"loss": 1.0042,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 5.537084398976982,
|
|
"grad_norm": 0.15088785982749878,
|
|
"learning_rate": 2.545551741039125e-06,
|
|
"loss": 1.0084,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 5.539641943734015,
|
|
"grad_norm": 0.13549191741444538,
|
|
"learning_rate": 2.5370556431091486e-06,
|
|
"loss": 1.0447,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 5.542199488491049,
|
|
"grad_norm": 0.1345097927774657,
|
|
"learning_rate": 2.5285716869230192e-06,
|
|
"loss": 1.0352,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 5.544757033248082,
|
|
"grad_norm": 0.1377603438588639,
|
|
"learning_rate": 2.5200998862836044e-06,
|
|
"loss": 1.0456,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 5.547314578005115,
|
|
"grad_norm": 0.13719837282442893,
|
|
"learning_rate": 2.5116402549739904e-06,
|
|
"loss": 1.0111,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 5.549872122762149,
|
|
"grad_norm": 0.12784774794791698,
|
|
"learning_rate": 2.503192806757474e-06,
|
|
"loss": 1.0555,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 5.552429667519181,
|
|
"grad_norm": 0.1377625979101254,
|
|
"learning_rate": 2.494757555377524e-06,
|
|
"loss": 1.0217,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 5.554987212276215,
|
|
"grad_norm": 0.13849942681054245,
|
|
"learning_rate": 2.486334514557761e-06,
|
|
"loss": 1.0175,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 5.557544757033249,
|
|
"grad_norm": 0.14070221787371265,
|
|
"learning_rate": 2.477923698001955e-06,
|
|
"loss": 1.03,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 5.560102301790281,
|
|
"grad_norm": 0.12917105115289923,
|
|
"learning_rate": 2.469525119393974e-06,
|
|
"loss": 1.0316,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 5.562659846547315,
|
|
"grad_norm": 0.14393204543904917,
|
|
"learning_rate": 2.461138792397779e-06,
|
|
"loss": 1.0429,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 5.565217391304348,
|
|
"grad_norm": 0.1350830986575781,
|
|
"learning_rate": 2.4527647306574e-06,
|
|
"loss": 1.0005,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 5.567774936061381,
|
|
"grad_norm": 0.1272869817285887,
|
|
"learning_rate": 2.4444029477969157e-06,
|
|
"loss": 1.0083,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 5.570332480818414,
|
|
"grad_norm": 0.1329875176980315,
|
|
"learning_rate": 2.4360534574204196e-06,
|
|
"loss": 1.0064,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 5.572890025575448,
|
|
"grad_norm": 0.13284521850316935,
|
|
"learning_rate": 2.427716273112011e-06,
|
|
"loss": 1.026,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 5.57544757033248,
|
|
"grad_norm": 0.13655729094534802,
|
|
"learning_rate": 2.4193914084357708e-06,
|
|
"loss": 1.0311,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 5.578005115089514,
|
|
"grad_norm": 0.13249886049800538,
|
|
"learning_rate": 2.4110788769357305e-06,
|
|
"loss": 1.0245,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 5.580562659846548,
|
|
"grad_norm": 0.14032611666517894,
|
|
"learning_rate": 2.402778692135861e-06,
|
|
"loss": 1.0218,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 5.58312020460358,
|
|
"grad_norm": 0.13366091002172387,
|
|
"learning_rate": 2.394490867540039e-06,
|
|
"loss": 1.0275,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 5.585677749360614,
|
|
"grad_norm": 0.13700684117392312,
|
|
"learning_rate": 2.3862154166320417e-06,
|
|
"loss": 1.0055,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 5.588235294117647,
|
|
"grad_norm": 0.13884798487973146,
|
|
"learning_rate": 2.3779523528755143e-06,
|
|
"loss": 1.0298,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 5.59079283887468,
|
|
"grad_norm": 0.14068128211510497,
|
|
"learning_rate": 2.3697016897139345e-06,
|
|
"loss": 1.0568,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 5.593350383631714,
|
|
"grad_norm": 0.1367538445761975,
|
|
"learning_rate": 2.361463440570623e-06,
|
|
"loss": 1.0211,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 5.595907928388747,
|
|
"grad_norm": 0.137882423029852,
|
|
"learning_rate": 2.353237618848695e-06,
|
|
"loss": 1.0388,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 5.59846547314578,
|
|
"grad_norm": 0.13627762962811446,
|
|
"learning_rate": 2.3450242379310427e-06,
|
|
"loss": 1.0423,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 5.601023017902813,
|
|
"grad_norm": 0.13080557028764447,
|
|
"learning_rate": 2.3368233111803305e-06,
|
|
"loss": 1.0209,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 5.603580562659847,
|
|
"grad_norm": 0.13373365809565754,
|
|
"learning_rate": 2.328634851938949e-06,
|
|
"loss": 1.0548,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 5.6061381074168795,
|
|
"grad_norm": 0.14670903806258018,
|
|
"learning_rate": 2.3204588735290155e-06,
|
|
"loss": 1.0283,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 5.608695652173913,
|
|
"grad_norm": 0.1351316953465856,
|
|
"learning_rate": 2.312295389252326e-06,
|
|
"loss": 1.0253,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 5.611253196930946,
|
|
"grad_norm": 0.14536763822784776,
|
|
"learning_rate": 2.304144412390367e-06,
|
|
"loss": 1.0289,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 5.6138107416879794,
|
|
"grad_norm": 0.1373151541315976,
|
|
"learning_rate": 2.2960059562042647e-06,
|
|
"loss": 1.0227,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 5.616368286445013,
|
|
"grad_norm": 0.12983515898716327,
|
|
"learning_rate": 2.2878800339347763e-06,
|
|
"loss": 1.0256,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 5.618925831202046,
|
|
"grad_norm": 0.12825544867685706,
|
|
"learning_rate": 2.279766658802275e-06,
|
|
"loss": 1.0468,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 5.621483375959079,
|
|
"grad_norm": 0.14977773117762613,
|
|
"learning_rate": 2.2716658440067085e-06,
|
|
"loss": 1.0045,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 5.624040920716112,
|
|
"grad_norm": 0.163815240244753,
|
|
"learning_rate": 2.2635776027276056e-06,
|
|
"loss": 1.0211,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 5.626598465473146,
|
|
"grad_norm": 0.1311668589781632,
|
|
"learning_rate": 2.255501948124017e-06,
|
|
"loss": 1.0318,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 5.629156010230179,
|
|
"grad_norm": 0.13085196604157895,
|
|
"learning_rate": 2.247438893334537e-06,
|
|
"loss": 1.0219,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 5.631713554987212,
|
|
"grad_norm": 0.1273903714267332,
|
|
"learning_rate": 2.2393884514772457e-06,
|
|
"loss": 0.9929,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 5.634271099744246,
|
|
"grad_norm": 0.13914897324377146,
|
|
"learning_rate": 2.231350635649713e-06,
|
|
"loss": 1.0452,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 5.6368286445012785,
|
|
"grad_norm": 0.13636448611829766,
|
|
"learning_rate": 2.223325458928961e-06,
|
|
"loss": 1.0078,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 5.639386189258312,
|
|
"grad_norm": 0.13875063448351502,
|
|
"learning_rate": 2.2153129343714484e-06,
|
|
"loss": 1.044,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 5.641943734015345,
|
|
"grad_norm": 0.1268762090418032,
|
|
"learning_rate": 2.207313075013059e-06,
|
|
"loss": 1.021,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 5.6445012787723785,
|
|
"grad_norm": 0.14115564139986136,
|
|
"learning_rate": 2.1993258938690533e-06,
|
|
"loss": 0.9935,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 5.647058823529412,
|
|
"grad_norm": 0.13114159318824248,
|
|
"learning_rate": 2.191351403934082e-06,
|
|
"loss": 1.0314,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 5.649616368286445,
|
|
"grad_norm": 0.12884976286632582,
|
|
"learning_rate": 2.183389618182139e-06,
|
|
"loss": 1.0046,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 5.6521739130434785,
|
|
"grad_norm": 0.12995582182420992,
|
|
"learning_rate": 2.1754405495665553e-06,
|
|
"loss": 1.0373,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 5.654731457800511,
|
|
"grad_norm": 0.13421458626767693,
|
|
"learning_rate": 2.1675042110199664e-06,
|
|
"loss": 1.016,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 5.657289002557545,
|
|
"grad_norm": 0.13511795554454278,
|
|
"learning_rate": 2.1595806154542965e-06,
|
|
"loss": 1.0203,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 5.659846547314578,
|
|
"grad_norm": 0.12526718028345482,
|
|
"learning_rate": 2.1516697757607464e-06,
|
|
"loss": 1.048,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 5.662404092071611,
|
|
"grad_norm": 0.13609131915375153,
|
|
"learning_rate": 2.143771704809753e-06,
|
|
"loss": 1.0221,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 5.664961636828645,
|
|
"grad_norm": 0.13389453092548842,
|
|
"learning_rate": 2.1358864154509838e-06,
|
|
"loss": 0.995,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 5.667519181585678,
|
|
"grad_norm": 0.13120531247951384,
|
|
"learning_rate": 2.128013920513311e-06,
|
|
"loss": 1.002,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 5.670076726342711,
|
|
"grad_norm": 0.12595917765897047,
|
|
"learning_rate": 2.1201542328047965e-06,
|
|
"loss": 1.0307,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 5.672634271099744,
|
|
"grad_norm": 0.1327291524503786,
|
|
"learning_rate": 2.112307365112657e-06,
|
|
"loss": 1.0042,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 5.675191815856778,
|
|
"grad_norm": 0.14073038841763177,
|
|
"learning_rate": 2.1044733302032527e-06,
|
|
"loss": 1.0089,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 5.677749360613811,
|
|
"grad_norm": 0.13145348857222067,
|
|
"learning_rate": 2.0966521408220753e-06,
|
|
"loss": 1.0191,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 5.680306905370844,
|
|
"grad_norm": 0.13758179967194598,
|
|
"learning_rate": 2.088843809693708e-06,
|
|
"loss": 1.0389,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 5.6828644501278776,
|
|
"grad_norm": 0.12934306601192186,
|
|
"learning_rate": 2.081048349521814e-06,
|
|
"loss": 1.0386,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 5.68542199488491,
|
|
"grad_norm": 0.12132994106171455,
|
|
"learning_rate": 2.0732657729891236e-06,
|
|
"loss": 1.0237,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 5.687979539641944,
|
|
"grad_norm": 0.12639844337210293,
|
|
"learning_rate": 2.065496092757403e-06,
|
|
"loss": 1.0039,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 5.690537084398977,
|
|
"grad_norm": 0.1397408236378054,
|
|
"learning_rate": 2.0577393214674335e-06,
|
|
"loss": 1.0782,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 5.69309462915601,
|
|
"grad_norm": 0.12975569414651789,
|
|
"learning_rate": 2.049995471738995e-06,
|
|
"loss": 1.029,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 5.695652173913043,
|
|
"grad_norm": 0.13025784101096557,
|
|
"learning_rate": 2.042264556170853e-06,
|
|
"loss": 0.9846,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 5.698209718670077,
|
|
"grad_norm": 0.1282941793591346,
|
|
"learning_rate": 2.034546587340719e-06,
|
|
"loss": 1.0143,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 5.70076726342711,
|
|
"grad_norm": 0.13236558983338137,
|
|
"learning_rate": 2.026841577805245e-06,
|
|
"loss": 1.0534,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 5.703324808184143,
|
|
"grad_norm": 0.13423342295188723,
|
|
"learning_rate": 2.019149540100005e-06,
|
|
"loss": 1.0568,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 5.705882352941177,
|
|
"grad_norm": 0.13468947441049006,
|
|
"learning_rate": 2.0114704867394598e-06,
|
|
"loss": 1.014,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 5.708439897698209,
|
|
"grad_norm": 0.13388666927274886,
|
|
"learning_rate": 2.0038044302169492e-06,
|
|
"loss": 1.0246,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 5.710997442455243,
|
|
"grad_norm": 0.13458582769078975,
|
|
"learning_rate": 1.9961513830046663e-06,
|
|
"loss": 1.0335,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 5.713554987212277,
|
|
"grad_norm": 0.1334530516759338,
|
|
"learning_rate": 1.988511357553644e-06,
|
|
"loss": 1.0107,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 5.716112531969309,
|
|
"grad_norm": 0.13432155143391286,
|
|
"learning_rate": 1.980884366293725e-06,
|
|
"loss": 1.002,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 5.718670076726343,
|
|
"grad_norm": 0.1321302038455819,
|
|
"learning_rate": 1.973270421633543e-06,
|
|
"loss": 1.0281,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 5.721227621483376,
|
|
"grad_norm": 0.13482083547904436,
|
|
"learning_rate": 1.965669535960516e-06,
|
|
"loss": 1.0032,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 5.723785166240409,
|
|
"grad_norm": 0.1362582011621695,
|
|
"learning_rate": 1.9580817216408075e-06,
|
|
"loss": 1.0151,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 5.726342710997442,
|
|
"grad_norm": 0.13381683599607858,
|
|
"learning_rate": 1.9505069910193164e-06,
|
|
"loss": 0.9876,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 5.728900255754476,
|
|
"grad_norm": 0.12202356902109507,
|
|
"learning_rate": 1.9429453564196543e-06,
|
|
"loss": 1.0203,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 5.731457800511509,
|
|
"grad_norm": 0.12193705736628206,
|
|
"learning_rate": 1.9353968301441306e-06,
|
|
"loss": 0.9752,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 5.734015345268542,
|
|
"grad_norm": 0.1264989543927549,
|
|
"learning_rate": 1.927861424473726e-06,
|
|
"loss": 1.025,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 5.736572890025576,
|
|
"grad_norm": 0.14123473229613026,
|
|
"learning_rate": 1.920339151668069e-06,
|
|
"loss": 1.0125,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 5.739130434782608,
|
|
"grad_norm": 0.12538976213285152,
|
|
"learning_rate": 1.9128300239654353e-06,
|
|
"loss": 1.0103,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 5.741687979539642,
|
|
"grad_norm": 0.12777815103030538,
|
|
"learning_rate": 1.9053340535827004e-06,
|
|
"loss": 1.0365,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 5.744245524296675,
|
|
"grad_norm": 0.9983046758036718,
|
|
"learning_rate": 1.8978512527153414e-06,
|
|
"loss": 1.0208,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 5.746803069053708,
|
|
"grad_norm": 0.13869698166830857,
|
|
"learning_rate": 1.8903816335374048e-06,
|
|
"loss": 1.0092,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 5.749360613810742,
|
|
"grad_norm": 0.13909895674572456,
|
|
"learning_rate": 1.882925208201498e-06,
|
|
"loss": 0.9976,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 5.751918158567775,
|
|
"grad_norm": 0.13223029843900272,
|
|
"learning_rate": 1.8754819888387576e-06,
|
|
"loss": 1.0226,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 5.754475703324808,
|
|
"grad_norm": 0.1355611449623982,
|
|
"learning_rate": 1.868051987558832e-06,
|
|
"loss": 1.0547,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 5.757033248081841,
|
|
"grad_norm": 0.1335592612771471,
|
|
"learning_rate": 1.8606352164498754e-06,
|
|
"loss": 1.022,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 5.759590792838875,
|
|
"grad_norm": 0.13517321815446315,
|
|
"learning_rate": 1.8532316875785084e-06,
|
|
"loss": 1.059,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 5.762148337595908,
|
|
"grad_norm": 0.12900109188000092,
|
|
"learning_rate": 1.8458414129898072e-06,
|
|
"loss": 1.0121,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 5.764705882352941,
|
|
"grad_norm": 0.13164593690766663,
|
|
"learning_rate": 1.8384644047072864e-06,
|
|
"loss": 1.0363,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 5.767263427109975,
|
|
"grad_norm": 0.12836234729861262,
|
|
"learning_rate": 1.8311006747328775e-06,
|
|
"loss": 1.0342,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 5.7698209718670075,
|
|
"grad_norm": 0.13352486032417052,
|
|
"learning_rate": 1.8237502350469161e-06,
|
|
"loss": 1.028,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 5.772378516624041,
|
|
"grad_norm": 0.12666547237956713,
|
|
"learning_rate": 1.8164130976080962e-06,
|
|
"loss": 0.9998,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 5.774936061381074,
|
|
"grad_norm": 0.12597408036958038,
|
|
"learning_rate": 1.8090892743534904e-06,
|
|
"loss": 0.9861,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 5.7774936061381075,
|
|
"grad_norm": 0.13091969265184827,
|
|
"learning_rate": 1.8017787771984973e-06,
|
|
"loss": 1.0196,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 5.78005115089514,
|
|
"grad_norm": 0.1328229090332335,
|
|
"learning_rate": 1.7944816180368408e-06,
|
|
"loss": 1.0422,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 5.782608695652174,
|
|
"grad_norm": 0.12677176745235394,
|
|
"learning_rate": 1.7871978087405384e-06,
|
|
"loss": 1.0097,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 5.7851662404092075,
|
|
"grad_norm": 0.12437893059639113,
|
|
"learning_rate": 1.7799273611598943e-06,
|
|
"loss": 1.0121,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 5.78772378516624,
|
|
"grad_norm": 0.1251367564202301,
|
|
"learning_rate": 1.772670287123479e-06,
|
|
"loss": 0.9939,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 5.790281329923274,
|
|
"grad_norm": 0.1302978820127013,
|
|
"learning_rate": 1.765426598438088e-06,
|
|
"loss": 1.0377,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 5.792838874680307,
|
|
"grad_norm": 0.12296911765019702,
|
|
"learning_rate": 1.7581963068887554e-06,
|
|
"loss": 1.0082,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 5.79539641943734,
|
|
"grad_norm": 0.1310292740348814,
|
|
"learning_rate": 1.7509794242387135e-06,
|
|
"loss": 1.0455,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 5.797953964194374,
|
|
"grad_norm": 0.11962773068304663,
|
|
"learning_rate": 1.7437759622293771e-06,
|
|
"loss": 1.0301,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 5.8005115089514065,
|
|
"grad_norm": 0.1338997971252641,
|
|
"learning_rate": 1.7365859325803269e-06,
|
|
"loss": 1.028,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 5.80306905370844,
|
|
"grad_norm": 0.12161266269112997,
|
|
"learning_rate": 1.7294093469892948e-06,
|
|
"loss": 1.0253,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 5.805626598465473,
|
|
"grad_norm": 0.12194546591797659,
|
|
"learning_rate": 1.7222462171321397e-06,
|
|
"loss": 1.0112,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 5.8081841432225065,
|
|
"grad_norm": 0.12690399558973253,
|
|
"learning_rate": 1.7150965546628184e-06,
|
|
"loss": 1.0168,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 5.810741687979539,
|
|
"grad_norm": 0.1329159422591136,
|
|
"learning_rate": 1.7079603712133908e-06,
|
|
"loss": 0.9867,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 5.813299232736573,
|
|
"grad_norm": 0.12116530026113131,
|
|
"learning_rate": 1.7008376783939772e-06,
|
|
"loss": 1.0085,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 5.8158567774936065,
|
|
"grad_norm": 0.12935715986878404,
|
|
"learning_rate": 1.6937284877927596e-06,
|
|
"loss": 1.0162,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 5.818414322250639,
|
|
"grad_norm": 0.12690629229315065,
|
|
"learning_rate": 1.6866328109759377e-06,
|
|
"loss": 0.9794,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 5.820971867007673,
|
|
"grad_norm": 0.12407793133570494,
|
|
"learning_rate": 1.6795506594877388e-06,
|
|
"loss": 1.031,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 5.823529411764706,
|
|
"grad_norm": 0.12704984040936246,
|
|
"learning_rate": 1.6724820448503852e-06,
|
|
"loss": 1.0204,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 5.826086956521739,
|
|
"grad_norm": 0.13001027110393584,
|
|
"learning_rate": 1.6654269785640608e-06,
|
|
"loss": 1.0448,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 5.828644501278772,
|
|
"grad_norm": 0.11915860756194478,
|
|
"learning_rate": 1.658385472106926e-06,
|
|
"loss": 1.0146,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 5.831202046035806,
|
|
"grad_norm": 0.12897358959587038,
|
|
"learning_rate": 1.6513575369350654e-06,
|
|
"loss": 1.021,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 5.833759590792839,
|
|
"grad_norm": 0.13505425066582885,
|
|
"learning_rate": 1.6443431844824975e-06,
|
|
"loss": 1.0002,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 5.836317135549872,
|
|
"grad_norm": 0.12555260697675938,
|
|
"learning_rate": 1.637342426161126e-06,
|
|
"loss": 1.0013,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 5.838874680306906,
|
|
"grad_norm": 0.1276721077986895,
|
|
"learning_rate": 1.630355273360752e-06,
|
|
"loss": 1.0083,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 5.841432225063938,
|
|
"grad_norm": 0.12628248303483217,
|
|
"learning_rate": 1.623381737449038e-06,
|
|
"loss": 1.0495,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 5.843989769820972,
|
|
"grad_norm": 0.13396531513865312,
|
|
"learning_rate": 1.6164218297714884e-06,
|
|
"loss": 0.9778,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 5.846547314578006,
|
|
"grad_norm": 0.13405119018709796,
|
|
"learning_rate": 1.609475561651438e-06,
|
|
"loss": 0.9882,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 5.849104859335038,
|
|
"grad_norm": 0.11946775190358987,
|
|
"learning_rate": 1.6025429443900286e-06,
|
|
"loss": 1.0402,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 5.851662404092072,
|
|
"grad_norm": 0.1286546110791319,
|
|
"learning_rate": 1.5956239892661995e-06,
|
|
"loss": 1.0323,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 5.854219948849105,
|
|
"grad_norm": 0.12706067523411144,
|
|
"learning_rate": 1.588718707536656e-06,
|
|
"loss": 1.0153,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 5.856777493606138,
|
|
"grad_norm": 0.12632255275977317,
|
|
"learning_rate": 1.5818271104358574e-06,
|
|
"loss": 1.0359,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 5.859335038363171,
|
|
"grad_norm": 0.12022429130741803,
|
|
"learning_rate": 1.5749492091760054e-06,
|
|
"loss": 1.0272,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 5.861892583120205,
|
|
"grad_norm": 0.12754203390815988,
|
|
"learning_rate": 1.5680850149470139e-06,
|
|
"loss": 1.0141,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 5.864450127877237,
|
|
"grad_norm": 0.12789955923845803,
|
|
"learning_rate": 1.5612345389164974e-06,
|
|
"loss": 1.0213,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 5.867007672634271,
|
|
"grad_norm": 0.13105545311215508,
|
|
"learning_rate": 1.5543977922297494e-06,
|
|
"loss": 1.0203,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 5.869565217391305,
|
|
"grad_norm": 0.12692375648838364,
|
|
"learning_rate": 1.5475747860097335e-06,
|
|
"loss": 1.0175,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 5.872122762148337,
|
|
"grad_norm": 0.12758413074272634,
|
|
"learning_rate": 1.5407655313570525e-06,
|
|
"loss": 1.0187,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 5.874680306905371,
|
|
"grad_norm": 0.1347266986438743,
|
|
"learning_rate": 1.5339700393499357e-06,
|
|
"loss": 0.978,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 5.877237851662404,
|
|
"grad_norm": 0.1286412634763229,
|
|
"learning_rate": 1.5271883210442285e-06,
|
|
"loss": 1.0243,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 5.879795396419437,
|
|
"grad_norm": 0.13598473504010955,
|
|
"learning_rate": 1.5204203874733604e-06,
|
|
"loss": 1.0458,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 5.882352941176471,
|
|
"grad_norm": 0.12217909066335947,
|
|
"learning_rate": 1.5136662496483346e-06,
|
|
"loss": 1.0159,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 5.884910485933504,
|
|
"grad_norm": 0.13697298325476193,
|
|
"learning_rate": 1.5069259185577112e-06,
|
|
"loss": 1.0234,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 5.887468030690537,
|
|
"grad_norm": 0.12856950834935316,
|
|
"learning_rate": 1.5001994051675894e-06,
|
|
"loss": 1.0005,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 5.89002557544757,
|
|
"grad_norm": 0.12272037964597306,
|
|
"learning_rate": 1.4934867204215864e-06,
|
|
"loss": 1.0182,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 5.892583120204604,
|
|
"grad_norm": 0.12396363368680077,
|
|
"learning_rate": 1.486787875240816e-06,
|
|
"loss": 1.0023,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 5.8951406649616365,
|
|
"grad_norm": 0.12822276354353365,
|
|
"learning_rate": 1.480102880523886e-06,
|
|
"loss": 1.0114,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 5.89769820971867,
|
|
"grad_norm": 0.12823957750976692,
|
|
"learning_rate": 1.4734317471468618e-06,
|
|
"loss": 1.0279,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 5.900255754475703,
|
|
"grad_norm": 0.12481205791568802,
|
|
"learning_rate": 1.4667744859632615e-06,
|
|
"loss": 0.9748,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 5.9028132992327365,
|
|
"grad_norm": 0.12376259417000356,
|
|
"learning_rate": 1.4601311078040304e-06,
|
|
"loss": 1.0291,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 5.90537084398977,
|
|
"grad_norm": 0.12039082706987389,
|
|
"learning_rate": 1.4535016234775324e-06,
|
|
"loss": 0.9835,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 5.907928388746803,
|
|
"grad_norm": 0.1278580324817726,
|
|
"learning_rate": 1.4468860437695243e-06,
|
|
"loss": 1.0276,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 5.910485933503836,
|
|
"grad_norm": 0.12971723157693313,
|
|
"learning_rate": 1.4402843794431354e-06,
|
|
"loss": 1.0085,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 5.913043478260869,
|
|
"grad_norm": 0.12766208083651814,
|
|
"learning_rate": 1.4336966412388674e-06,
|
|
"loss": 1.0392,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 5.915601023017903,
|
|
"grad_norm": 0.12363722996422528,
|
|
"learning_rate": 1.4271228398745552e-06,
|
|
"loss": 1.0063,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 5.918158567774936,
|
|
"grad_norm": 0.12491762028888559,
|
|
"learning_rate": 1.4205629860453641e-06,
|
|
"loss": 1.0598,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 5.920716112531969,
|
|
"grad_norm": 0.12614418988739717,
|
|
"learning_rate": 1.4140170904237616e-06,
|
|
"loss": 1.0078,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 5.923273657289003,
|
|
"grad_norm": 0.12871200444350614,
|
|
"learning_rate": 1.4074851636595165e-06,
|
|
"loss": 0.9912,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 5.9258312020460355,
|
|
"grad_norm": 0.12176341068010405,
|
|
"learning_rate": 1.400967216379663e-06,
|
|
"loss": 1.0023,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 5.928388746803069,
|
|
"grad_norm": 0.12736989149935335,
|
|
"learning_rate": 1.394463259188491e-06,
|
|
"loss": 1.0097,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 5.930946291560103,
|
|
"grad_norm": 0.12401472625813548,
|
|
"learning_rate": 1.3879733026675367e-06,
|
|
"loss": 1.036,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 5.9335038363171355,
|
|
"grad_norm": 0.12937517228342466,
|
|
"learning_rate": 1.3814973573755518e-06,
|
|
"loss": 1.036,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 5.936061381074169,
|
|
"grad_norm": 0.127613205394154,
|
|
"learning_rate": 1.3750354338484916e-06,
|
|
"loss": 0.9881,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 5.938618925831202,
|
|
"grad_norm": 0.12739173803258835,
|
|
"learning_rate": 1.3685875425995064e-06,
|
|
"loss": 1.0191,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 5.9411764705882355,
|
|
"grad_norm": 0.13795008867321654,
|
|
"learning_rate": 1.3621536941189107e-06,
|
|
"loss": 1.0144,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 5.943734015345268,
|
|
"grad_norm": 0.12984194360371934,
|
|
"learning_rate": 1.355733898874173e-06,
|
|
"loss": 1.049,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 5.946291560102302,
|
|
"grad_norm": 0.13129623864662363,
|
|
"learning_rate": 1.3493281673098956e-06,
|
|
"loss": 1.015,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 5.948849104859335,
|
|
"grad_norm": 0.12793818903871373,
|
|
"learning_rate": 1.3429365098478087e-06,
|
|
"loss": 0.9981,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 5.951406649616368,
|
|
"grad_norm": 0.1255755665233896,
|
|
"learning_rate": 1.3365589368867371e-06,
|
|
"loss": 0.9794,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 5.953964194373402,
|
|
"grad_norm": 0.1279352390496069,
|
|
"learning_rate": 1.330195458802591e-06,
|
|
"loss": 1.0249,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 5.956521739130435,
|
|
"grad_norm": 0.128293917496119,
|
|
"learning_rate": 1.323846085948356e-06,
|
|
"loss": 0.9898,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 5.959079283887468,
|
|
"grad_norm": 0.12767639872018413,
|
|
"learning_rate": 1.3175108286540617e-06,
|
|
"loss": 1.0352,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 5.961636828644501,
|
|
"grad_norm": 0.12662645466299385,
|
|
"learning_rate": 1.3111896972267768e-06,
|
|
"loss": 1.0055,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 5.964194373401535,
|
|
"grad_norm": 0.12253304775794958,
|
|
"learning_rate": 1.3048827019505828e-06,
|
|
"loss": 0.9892,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 5.966751918158568,
|
|
"grad_norm": 0.13233724231669944,
|
|
"learning_rate": 1.2985898530865736e-06,
|
|
"loss": 0.9883,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 5.969309462915601,
|
|
"grad_norm": 0.12275354609893704,
|
|
"learning_rate": 1.2923111608728168e-06,
|
|
"loss": 1.0221,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 5.971867007672635,
|
|
"grad_norm": 0.13544461017695578,
|
|
"learning_rate": 1.2860466355243506e-06,
|
|
"loss": 1.0587,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 5.974424552429667,
|
|
"grad_norm": 0.125504059793445,
|
|
"learning_rate": 1.2797962872331693e-06,
|
|
"loss": 1.0096,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 5.976982097186701,
|
|
"grad_norm": 0.13226317160144294,
|
|
"learning_rate": 1.2735601261681985e-06,
|
|
"loss": 1.0489,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 5.979539641943734,
|
|
"grad_norm": 0.12803280744387227,
|
|
"learning_rate": 1.2673381624752813e-06,
|
|
"loss": 1.0307,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 5.982097186700767,
|
|
"grad_norm": 0.12863654527584692,
|
|
"learning_rate": 1.2611304062771613e-06,
|
|
"loss": 1.017,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 5.9846547314578,
|
|
"grad_norm": 0.12401870969986709,
|
|
"learning_rate": 1.254936867673474e-06,
|
|
"loss": 1.0056,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 5.987212276214834,
|
|
"grad_norm": 0.11891932350440772,
|
|
"learning_rate": 1.2487575567407184e-06,
|
|
"loss": 0.9998,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 5.989769820971867,
|
|
"grad_norm": 0.12341714944406178,
|
|
"learning_rate": 1.2425924835322422e-06,
|
|
"loss": 1.0247,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 5.9923273657289,
|
|
"grad_norm": 0.1229416512376773,
|
|
"learning_rate": 1.2364416580782413e-06,
|
|
"loss": 1.0195,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 5.994884910485934,
|
|
"grad_norm": 0.12303637728566778,
|
|
"learning_rate": 1.2303050903857195e-06,
|
|
"loss": 1.0156,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 5.997442455242966,
|
|
"grad_norm": 0.13561743214244987,
|
|
"learning_rate": 1.2241827904384928e-06,
|
|
"loss": 1.0304,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.11664031093263695,
|
|
"learning_rate": 1.2180747681971539e-06,
|
|
"loss": 1.0047,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 6.002557544757034,
|
|
"grad_norm": 0.1230389316598828,
|
|
"learning_rate": 1.211981033599079e-06,
|
|
"loss": 1.0416,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 6.005115089514066,
|
|
"grad_norm": 0.12948288079807183,
|
|
"learning_rate": 1.2059015965583908e-06,
|
|
"loss": 1.0123,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 6.0076726342711,
|
|
"grad_norm": 0.1207876296019636,
|
|
"learning_rate": 1.1998364669659524e-06,
|
|
"loss": 0.9796,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 6.010230179028133,
|
|
"grad_norm": 0.1191785329656778,
|
|
"learning_rate": 1.1937856546893533e-06,
|
|
"loss": 0.9862,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 6.012787723785166,
|
|
"grad_norm": 0.12106597514269477,
|
|
"learning_rate": 1.1877491695728827e-06,
|
|
"loss": 1.0181,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 6.015345268542199,
|
|
"grad_norm": 0.12714775517717014,
|
|
"learning_rate": 1.181727021437531e-06,
|
|
"loss": 0.9901,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 6.017902813299233,
|
|
"grad_norm": 0.12314221662217836,
|
|
"learning_rate": 1.1757192200809487e-06,
|
|
"loss": 1.0139,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 6.020460358056266,
|
|
"grad_norm": 0.1205656248704543,
|
|
"learning_rate": 1.1697257752774581e-06,
|
|
"loss": 1.0064,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 6.023017902813299,
|
|
"grad_norm": 0.12375532206452915,
|
|
"learning_rate": 1.1637466967780186e-06,
|
|
"loss": 1.0055,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 6.025575447570333,
|
|
"grad_norm": 0.13727612152509278,
|
|
"learning_rate": 1.1577819943102132e-06,
|
|
"loss": 1.0334,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 6.028132992327365,
|
|
"grad_norm": 0.13743682672187252,
|
|
"learning_rate": 1.1518316775782456e-06,
|
|
"loss": 1.063,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 6.030690537084399,
|
|
"grad_norm": 0.1269152481030464,
|
|
"learning_rate": 1.1458957562629048e-06,
|
|
"loss": 1.0245,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 6.033248081841432,
|
|
"grad_norm": 0.12054742496527425,
|
|
"learning_rate": 1.1399742400215685e-06,
|
|
"loss": 1.016,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 6.035805626598465,
|
|
"grad_norm": 0.11563655740461991,
|
|
"learning_rate": 1.1340671384881664e-06,
|
|
"loss": 1.0034,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 6.038363171355499,
|
|
"grad_norm": 0.12654719374228424,
|
|
"learning_rate": 1.128174461273187e-06,
|
|
"loss": 1.0303,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 6.040920716112532,
|
|
"grad_norm": 0.13400791982749355,
|
|
"learning_rate": 1.122296217963651e-06,
|
|
"loss": 0.9908,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 6.043478260869565,
|
|
"grad_norm": 0.13721318190820386,
|
|
"learning_rate": 1.116432418123088e-06,
|
|
"loss": 1.0143,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 6.046035805626598,
|
|
"grad_norm": 0.1331473057560735,
|
|
"learning_rate": 1.1105830712915355e-06,
|
|
"loss": 1.0389,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 6.048593350383632,
|
|
"grad_norm": 0.12186052033355585,
|
|
"learning_rate": 1.1047481869855136e-06,
|
|
"loss": 0.9923,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 6.051150895140665,
|
|
"grad_norm": 0.130398414275441,
|
|
"learning_rate": 1.0989277746980186e-06,
|
|
"loss": 0.9989,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 6.053708439897698,
|
|
"grad_norm": 0.1212752348474763,
|
|
"learning_rate": 1.0931218438984903e-06,
|
|
"loss": 1.0002,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 6.056265984654732,
|
|
"grad_norm": 0.12066129403697316,
|
|
"learning_rate": 1.0873304040328193e-06,
|
|
"loss": 0.9855,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 6.0588235294117645,
|
|
"grad_norm": 0.12980745503624036,
|
|
"learning_rate": 1.0815534645233182e-06,
|
|
"loss": 1.0108,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 6.061381074168798,
|
|
"grad_norm": 0.12190895753762201,
|
|
"learning_rate": 1.075791034768704e-06,
|
|
"loss": 1.0134,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 6.063938618925831,
|
|
"grad_norm": 0.11736296572501317,
|
|
"learning_rate": 1.0700431241440888e-06,
|
|
"loss": 0.9819,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 6.0664961636828645,
|
|
"grad_norm": 0.11803134631202541,
|
|
"learning_rate": 1.064309742000963e-06,
|
|
"loss": 0.999,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 6.069053708439898,
|
|
"grad_norm": 0.12274428069266924,
|
|
"learning_rate": 1.0585908976671844e-06,
|
|
"loss": 1.0263,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 6.071611253196931,
|
|
"grad_norm": 0.1280904409678555,
|
|
"learning_rate": 1.052886600446954e-06,
|
|
"loss": 0.9989,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 6.0741687979539645,
|
|
"grad_norm": 0.13800491036101872,
|
|
"learning_rate": 1.0471968596208026e-06,
|
|
"loss": 1.0168,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 6.076726342710997,
|
|
"grad_norm": 0.125255996087832,
|
|
"learning_rate": 1.0415216844455889e-06,
|
|
"loss": 1.0016,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 6.079283887468031,
|
|
"grad_norm": 0.12500402095406113,
|
|
"learning_rate": 1.0358610841544657e-06,
|
|
"loss": 1.0207,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 6.081841432225064,
|
|
"grad_norm": 0.12102753345414748,
|
|
"learning_rate": 1.0302150679568745e-06,
|
|
"loss": 0.9889,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 6.084398976982097,
|
|
"grad_norm": 0.1263965580697967,
|
|
"learning_rate": 1.0245836450385304e-06,
|
|
"loss": 1.0278,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 6.086956521739131,
|
|
"grad_norm": 0.12426986420829644,
|
|
"learning_rate": 1.0189668245614092e-06,
|
|
"loss": 1.0024,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 6.089514066496164,
|
|
"grad_norm": 0.12124987678343191,
|
|
"learning_rate": 1.0133646156637244e-06,
|
|
"loss": 1.0346,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 6.092071611253197,
|
|
"grad_norm": 0.11760759251820775,
|
|
"learning_rate": 1.0077770274599187e-06,
|
|
"loss": 1.0176,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 6.09462915601023,
|
|
"grad_norm": 0.11882704515829542,
|
|
"learning_rate": 1.002204069040652e-06,
|
|
"loss": 0.9894,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 6.0971867007672635,
|
|
"grad_norm": 0.12369290549039276,
|
|
"learning_rate": 9.966457494727777e-07,
|
|
"loss": 1.04,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 6.099744245524296,
|
|
"grad_norm": 0.12345493397851956,
|
|
"learning_rate": 9.91102077799333e-07,
|
|
"loss": 1.0049,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 6.10230179028133,
|
|
"grad_norm": 0.12872126244712379,
|
|
"learning_rate": 9.855730630395244e-07,
|
|
"loss": 0.9933,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 6.1048593350383635,
|
|
"grad_norm": 0.11772835201472491,
|
|
"learning_rate": 9.800587141887173e-07,
|
|
"loss": 1.0285,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 6.107416879795396,
|
|
"grad_norm": 0.12252902927138364,
|
|
"learning_rate": 9.745590402184092e-07,
|
|
"loss": 1.0134,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 6.10997442455243,
|
|
"grad_norm": 0.12214679346044635,
|
|
"learning_rate": 9.690740500762241e-07,
|
|
"loss": 0.9778,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 6.112531969309463,
|
|
"grad_norm": 0.12270563199721099,
|
|
"learning_rate": 9.636037526859032e-07,
|
|
"loss": 1.0048,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 6.115089514066496,
|
|
"grad_norm": 0.13289561214559903,
|
|
"learning_rate": 9.58148156947276e-07,
|
|
"loss": 1.0355,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 6.117647058823529,
|
|
"grad_norm": 0.124015797218616,
|
|
"learning_rate": 9.52707271736254e-07,
|
|
"loss": 0.9894,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 6.120204603580563,
|
|
"grad_norm": 0.12869746602968873,
|
|
"learning_rate": 9.472811059048182e-07,
|
|
"loss": 1.034,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 6.122762148337596,
|
|
"grad_norm": 0.11502225665357182,
|
|
"learning_rate": 9.418696682810014e-07,
|
|
"loss": 1.0279,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 6.125319693094629,
|
|
"grad_norm": 0.12442843747682036,
|
|
"learning_rate": 9.364729676688755e-07,
|
|
"loss": 1.0346,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 6.127877237851663,
|
|
"grad_norm": 0.12203934311867798,
|
|
"learning_rate": 9.310910128485317e-07,
|
|
"loss": 1.0042,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 6.130434782608695,
|
|
"grad_norm": 0.13225053449453802,
|
|
"learning_rate": 9.257238125760781e-07,
|
|
"loss": 0.9979,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 6.132992327365729,
|
|
"grad_norm": 0.11626249473093271,
|
|
"learning_rate": 9.203713755836108e-07,
|
|
"loss": 1.0151,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 6.135549872122763,
|
|
"grad_norm": 0.12565196489418815,
|
|
"learning_rate": 9.150337105792129e-07,
|
|
"loss": 1.0003,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 6.138107416879795,
|
|
"grad_norm": 0.1176707888425743,
|
|
"learning_rate": 9.097108262469268e-07,
|
|
"loss": 1.0174,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 6.140664961636829,
|
|
"grad_norm": 0.1254506125476653,
|
|
"learning_rate": 9.044027312467574e-07,
|
|
"loss": 1.024,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 6.143222506393862,
|
|
"grad_norm": 0.12040306772801906,
|
|
"learning_rate": 8.991094342146423e-07,
|
|
"loss": 1.0238,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 6.145780051150895,
|
|
"grad_norm": 0.12003711394998114,
|
|
"learning_rate": 8.938309437624415e-07,
|
|
"loss": 1.0361,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 6.148337595907928,
|
|
"grad_norm": 0.1222116778211444,
|
|
"learning_rate": 8.885672684779345e-07,
|
|
"loss": 1.0195,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 6.150895140664962,
|
|
"grad_norm": 0.12213600424627216,
|
|
"learning_rate": 8.833184169247877e-07,
|
|
"loss": 1.0147,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 6.153452685421995,
|
|
"grad_norm": 0.11882499943476486,
|
|
"learning_rate": 8.780843976425568e-07,
|
|
"loss": 1.0443,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 6.156010230179028,
|
|
"grad_norm": 0.11944071935758879,
|
|
"learning_rate": 8.728652191466602e-07,
|
|
"loss": 1.0269,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 6.158567774936062,
|
|
"grad_norm": 0.12479032723786981,
|
|
"learning_rate": 8.676608899283789e-07,
|
|
"loss": 1.0407,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 6.161125319693094,
|
|
"grad_norm": 0.1232368778241773,
|
|
"learning_rate": 8.62471418454831e-07,
|
|
"loss": 0.998,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 6.163682864450128,
|
|
"grad_norm": 0.12380002645622601,
|
|
"learning_rate": 8.572968131689585e-07,
|
|
"loss": 1.0215,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 6.166240409207161,
|
|
"grad_norm": 0.11990258505813678,
|
|
"learning_rate": 8.521370824895236e-07,
|
|
"loss": 1.0362,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 6.168797953964194,
|
|
"grad_norm": 0.12763582460814127,
|
|
"learning_rate": 8.469922348110871e-07,
|
|
"loss": 1.0005,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 6.171355498721228,
|
|
"grad_norm": 0.12048771338001237,
|
|
"learning_rate": 8.41862278503991e-07,
|
|
"loss": 1.0154,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 6.173913043478261,
|
|
"grad_norm": 0.11110330026915051,
|
|
"learning_rate": 8.367472219143524e-07,
|
|
"loss": 0.9864,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 6.176470588235294,
|
|
"grad_norm": 0.12274015937027666,
|
|
"learning_rate": 8.316470733640525e-07,
|
|
"loss": 1.01,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 6.179028132992327,
|
|
"grad_norm": 0.11875414799502092,
|
|
"learning_rate": 8.265618411507148e-07,
|
|
"loss": 1.0349,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 6.181585677749361,
|
|
"grad_norm": 0.12112785116554001,
|
|
"learning_rate": 8.214915335476892e-07,
|
|
"loss": 1.0108,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 6.1841432225063935,
|
|
"grad_norm": 0.11843273179000395,
|
|
"learning_rate": 8.164361588040526e-07,
|
|
"loss": 1.0316,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 6.186700767263427,
|
|
"grad_norm": 0.12171206599055973,
|
|
"learning_rate": 8.113957251445837e-07,
|
|
"loss": 1.0181,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 6.189258312020461,
|
|
"grad_norm": 0.1332901069553243,
|
|
"learning_rate": 8.063702407697515e-07,
|
|
"loss": 1.0163,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 6.1918158567774935,
|
|
"grad_norm": 0.12665149802988054,
|
|
"learning_rate": 8.013597138557039e-07,
|
|
"loss": 1.0316,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 6.194373401534527,
|
|
"grad_norm": 0.11748240466353733,
|
|
"learning_rate": 7.963641525542564e-07,
|
|
"loss": 1.0295,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 6.19693094629156,
|
|
"grad_norm": 0.12263136155853388,
|
|
"learning_rate": 7.913835649928792e-07,
|
|
"loss": 1.0443,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 6.1994884910485935,
|
|
"grad_norm": 0.12057268564537553,
|
|
"learning_rate": 7.864179592746679e-07,
|
|
"loss": 0.9758,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 6.202046035805626,
|
|
"grad_norm": 0.11757878694680841,
|
|
"learning_rate": 7.814673434783604e-07,
|
|
"loss": 0.998,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 6.20460358056266,
|
|
"grad_norm": 0.18582779787648557,
|
|
"learning_rate": 7.765317256582949e-07,
|
|
"loss": 1.0115,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 6.207161125319693,
|
|
"grad_norm": 0.13582232353707813,
|
|
"learning_rate": 7.716111138444115e-07,
|
|
"loss": 1.0459,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 6.209718670076726,
|
|
"grad_norm": 0.13389475712289786,
|
|
"learning_rate": 7.667055160422432e-07,
|
|
"loss": 1.0274,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 6.21227621483376,
|
|
"grad_norm": 0.12673104354118297,
|
|
"learning_rate": 7.618149402328867e-07,
|
|
"loss": 1.0011,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 6.2148337595907925,
|
|
"grad_norm": 0.12765584122890725,
|
|
"learning_rate": 7.569393943730064e-07,
|
|
"loss": 1.0635,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 6.217391304347826,
|
|
"grad_norm": 0.11473857666105772,
|
|
"learning_rate": 7.52078886394807e-07,
|
|
"loss": 0.9878,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 6.21994884910486,
|
|
"grad_norm": 0.12228794360420046,
|
|
"learning_rate": 7.472334242060331e-07,
|
|
"loss": 1.0316,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 6.2225063938618925,
|
|
"grad_norm": 0.12426451417815787,
|
|
"learning_rate": 7.424030156899475e-07,
|
|
"loss": 1.0098,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 6.225063938618926,
|
|
"grad_norm": 0.11800919098475897,
|
|
"learning_rate": 7.375876687053252e-07,
|
|
"loss": 1.0508,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 6.227621483375959,
|
|
"grad_norm": 0.1309293626602563,
|
|
"learning_rate": 7.327873910864325e-07,
|
|
"loss": 1.0265,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 6.2301790281329925,
|
|
"grad_norm": 0.12364264713239634,
|
|
"learning_rate": 7.280021906430201e-07,
|
|
"loss": 1.038,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 6.232736572890025,
|
|
"grad_norm": 0.12731230734269985,
|
|
"learning_rate": 7.23232075160315e-07,
|
|
"loss": 0.9938,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 6.235294117647059,
|
|
"grad_norm": 0.11754730324986598,
|
|
"learning_rate": 7.184770523989904e-07,
|
|
"loss": 1.0209,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 6.2378516624040925,
|
|
"grad_norm": 0.12687711722398867,
|
|
"learning_rate": 7.137371300951746e-07,
|
|
"loss": 1.0369,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 6.240409207161125,
|
|
"grad_norm": 0.1226944492744433,
|
|
"learning_rate": 7.090123159604234e-07,
|
|
"loss": 1.0417,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 6.242966751918159,
|
|
"grad_norm": 0.11721843519340895,
|
|
"learning_rate": 7.043026176817158e-07,
|
|
"loss": 0.99,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 6.245524296675192,
|
|
"grad_norm": 0.12080675281454777,
|
|
"learning_rate": 6.996080429214347e-07,
|
|
"loss": 1.0065,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 6.248081841432225,
|
|
"grad_norm": 0.12010992913398671,
|
|
"learning_rate": 6.949285993173593e-07,
|
|
"loss": 1.0359,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 6.250639386189258,
|
|
"grad_norm": 0.11624614678372433,
|
|
"learning_rate": 6.902642944826544e-07,
|
|
"loss": 0.97,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 6.253196930946292,
|
|
"grad_norm": 0.12257573737475404,
|
|
"learning_rate": 6.856151360058505e-07,
|
|
"loss": 1.0192,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 6.255754475703325,
|
|
"grad_norm": 0.1201829684398593,
|
|
"learning_rate": 6.809811314508386e-07,
|
|
"loss": 1.0466,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 6.258312020460358,
|
|
"grad_norm": 0.12401967000820303,
|
|
"learning_rate": 6.763622883568521e-07,
|
|
"loss": 1.0356,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 6.260869565217392,
|
|
"grad_norm": 0.11778396980454381,
|
|
"learning_rate": 6.717586142384624e-07,
|
|
"loss": 1.036,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 6.263427109974424,
|
|
"grad_norm": 0.12185872889499474,
|
|
"learning_rate": 6.671701165855593e-07,
|
|
"loss": 1.0261,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 6.265984654731458,
|
|
"grad_norm": 0.1201489344194391,
|
|
"learning_rate": 6.625968028633389e-07,
|
|
"loss": 1.0119,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 6.268542199488491,
|
|
"grad_norm": 0.11988021977061444,
|
|
"learning_rate": 6.580386805122996e-07,
|
|
"loss": 1.021,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 6.271099744245524,
|
|
"grad_norm": 0.11792524228657224,
|
|
"learning_rate": 6.534957569482214e-07,
|
|
"loss": 1.0635,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 6.273657289002558,
|
|
"grad_norm": 0.11687466392592072,
|
|
"learning_rate": 6.489680395621556e-07,
|
|
"loss": 1.0129,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 6.276214833759591,
|
|
"grad_norm": 0.12220153331468454,
|
|
"learning_rate": 6.444555357204152e-07,
|
|
"loss": 0.9876,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 6.278772378516624,
|
|
"grad_norm": 0.11658584388896727,
|
|
"learning_rate": 6.39958252764562e-07,
|
|
"loss": 1.0258,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 6.281329923273657,
|
|
"grad_norm": 0.11595243705777233,
|
|
"learning_rate": 6.354761980113966e-07,
|
|
"loss": 1.0364,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 6.283887468030691,
|
|
"grad_norm": 0.11948349789713839,
|
|
"learning_rate": 6.31009378752937e-07,
|
|
"loss": 1.0295,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 6.286445012787723,
|
|
"grad_norm": 0.11578209417911318,
|
|
"learning_rate": 6.265578022564233e-07,
|
|
"loss": 1.003,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 6.289002557544757,
|
|
"grad_norm": 0.11954141892522423,
|
|
"learning_rate": 6.221214757642901e-07,
|
|
"loss": 1.0186,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 6.291560102301791,
|
|
"grad_norm": 0.1214032884466788,
|
|
"learning_rate": 6.177004064941616e-07,
|
|
"loss": 1.0325,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 6.294117647058823,
|
|
"grad_norm": 0.11798550854551848,
|
|
"learning_rate": 6.132946016388453e-07,
|
|
"loss": 1.0034,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 6.296675191815857,
|
|
"grad_norm": 0.12025821516068275,
|
|
"learning_rate": 6.089040683663083e-07,
|
|
"loss": 0.9823,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 6.29923273657289,
|
|
"grad_norm": 0.11951253909474888,
|
|
"learning_rate": 6.045288138196725e-07,
|
|
"loss": 1.0409,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 6.301790281329923,
|
|
"grad_norm": 0.11418311978255119,
|
|
"learning_rate": 6.001688451172027e-07,
|
|
"loss": 1.0022,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 6.304347826086957,
|
|
"grad_norm": 0.11934858308797691,
|
|
"learning_rate": 5.958241693522993e-07,
|
|
"loss": 1.0107,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 6.30690537084399,
|
|
"grad_norm": 0.12241414028875457,
|
|
"learning_rate": 5.914947935934756e-07,
|
|
"loss": 0.9971,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 6.309462915601023,
|
|
"grad_norm": 0.11903591318763888,
|
|
"learning_rate": 5.871807248843542e-07,
|
|
"loss": 1.0117,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 6.312020460358056,
|
|
"grad_norm": 0.11896713837542751,
|
|
"learning_rate": 5.828819702436573e-07,
|
|
"loss": 1.0199,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 6.31457800511509,
|
|
"grad_norm": 0.12256891371488562,
|
|
"learning_rate": 5.785985366651892e-07,
|
|
"loss": 1.003,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 6.3171355498721224,
|
|
"grad_norm": 0.1224791957117775,
|
|
"learning_rate": 5.743304311178289e-07,
|
|
"loss": 1.0067,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 6.319693094629156,
|
|
"grad_norm": 0.12119833550268867,
|
|
"learning_rate": 5.70077660545515e-07,
|
|
"loss": 1.0196,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 6.322250639386189,
|
|
"grad_norm": 0.11520605275376457,
|
|
"learning_rate": 5.658402318672418e-07,
|
|
"loss": 1.0127,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 6.324808184143222,
|
|
"grad_norm": 0.11525398133510434,
|
|
"learning_rate": 5.616181519770414e-07,
|
|
"loss": 1.0161,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 6.327365728900256,
|
|
"grad_norm": 0.12176149506861418,
|
|
"learning_rate": 5.574114277439702e-07,
|
|
"loss": 1.0216,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 6.329923273657289,
|
|
"grad_norm": 0.12541686899065785,
|
|
"learning_rate": 5.53220066012109e-07,
|
|
"loss": 1.0263,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 6.332480818414322,
|
|
"grad_norm": 0.12958665943781433,
|
|
"learning_rate": 5.490440736005397e-07,
|
|
"loss": 1.0737,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 6.335038363171355,
|
|
"grad_norm": 0.1273940622092984,
|
|
"learning_rate": 5.448834573033424e-07,
|
|
"loss": 1.028,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 6.337595907928389,
|
|
"grad_norm": 0.11799709709320902,
|
|
"learning_rate": 5.407382238895765e-07,
|
|
"loss": 0.9949,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 6.340153452685422,
|
|
"grad_norm": 0.1220634348791913,
|
|
"learning_rate": 5.366083801032806e-07,
|
|
"loss": 1.0422,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 6.342710997442455,
|
|
"grad_norm": 0.11889607141087616,
|
|
"learning_rate": 5.324939326634515e-07,
|
|
"loss": 1.0017,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 6.345268542199489,
|
|
"grad_norm": 0.12002156059223426,
|
|
"learning_rate": 5.283948882640355e-07,
|
|
"loss": 1.0181,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 6.3478260869565215,
|
|
"grad_norm": 0.11596540294437355,
|
|
"learning_rate": 5.24311253573927e-07,
|
|
"loss": 1.0346,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 6.350383631713555,
|
|
"grad_norm": 0.11502520531650343,
|
|
"learning_rate": 5.202430352369392e-07,
|
|
"loss": 1.0135,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 6.352941176470588,
|
|
"grad_norm": 0.12267491898314155,
|
|
"learning_rate": 5.161902398718121e-07,
|
|
"loss": 1.0435,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 6.3554987212276215,
|
|
"grad_norm": 0.12185761812901445,
|
|
"learning_rate": 5.121528740721871e-07,
|
|
"loss": 1.0377,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 6.358056265984655,
|
|
"grad_norm": 0.11976615175350093,
|
|
"learning_rate": 5.081309444066085e-07,
|
|
"loss": 1.034,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 6.360613810741688,
|
|
"grad_norm": 0.116555412280644,
|
|
"learning_rate": 5.041244574185056e-07,
|
|
"loss": 1.011,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 6.3631713554987215,
|
|
"grad_norm": 0.12515368166748755,
|
|
"learning_rate": 5.001334196261776e-07,
|
|
"loss": 0.9861,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 6.365728900255754,
|
|
"grad_norm": 0.11814447264484773,
|
|
"learning_rate": 4.961578375227982e-07,
|
|
"loss": 1.0146,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 6.368286445012788,
|
|
"grad_norm": 0.12245094109059326,
|
|
"learning_rate": 4.921977175763881e-07,
|
|
"loss": 1.0204,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 6.370843989769821,
|
|
"grad_norm": 0.12283694751475284,
|
|
"learning_rate": 4.882530662298168e-07,
|
|
"loss": 1.0313,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 6.373401534526854,
|
|
"grad_norm": 0.12224108783096758,
|
|
"learning_rate": 4.843238899007829e-07,
|
|
"loss": 1.032,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 6.375959079283888,
|
|
"grad_norm": 0.11751909048944272,
|
|
"learning_rate": 4.804101949818119e-07,
|
|
"loss": 1.0037,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 6.378516624040921,
|
|
"grad_norm": 0.1189722841334927,
|
|
"learning_rate": 4.765119878402424e-07,
|
|
"loss": 1.0218,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 6.381074168797954,
|
|
"grad_norm": 0.12188011601377355,
|
|
"learning_rate": 4.726292748182104e-07,
|
|
"loss": 1.0235,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 6.383631713554987,
|
|
"grad_norm": 0.11601162144284871,
|
|
"learning_rate": 4.687620622326505e-07,
|
|
"loss": 1.0095,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 6.3861892583120206,
|
|
"grad_norm": 0.11794823628283956,
|
|
"learning_rate": 4.6491035637527437e-07,
|
|
"loss": 1.0211,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 6.388746803069053,
|
|
"grad_norm": 0.12080963912657082,
|
|
"learning_rate": 4.6107416351256595e-07,
|
|
"loss": 0.996,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 6.391304347826087,
|
|
"grad_norm": 0.11852593163423941,
|
|
"learning_rate": 4.5725348988577057e-07,
|
|
"loss": 1.0473,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 6.3938618925831205,
|
|
"grad_norm": 0.1154582217572824,
|
|
"learning_rate": 4.5344834171088594e-07,
|
|
"loss": 0.9916,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 6.396419437340153,
|
|
"grad_norm": 0.12611349351005327,
|
|
"learning_rate": 4.496587251786544e-07,
|
|
"loss": 1.0537,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 6.398976982097187,
|
|
"grad_norm": 0.11841147140282605,
|
|
"learning_rate": 4.4588464645453856e-07,
|
|
"loss": 1.0354,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 6.40153452685422,
|
|
"grad_norm": 0.11761246404197793,
|
|
"learning_rate": 4.421261116787323e-07,
|
|
"loss": 1.0056,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 6.404092071611253,
|
|
"grad_norm": 0.116833267265145,
|
|
"learning_rate": 4.383831269661343e-07,
|
|
"loss": 0.9983,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 6.406649616368286,
|
|
"grad_norm": 0.12485584628194238,
|
|
"learning_rate": 4.3465569840635105e-07,
|
|
"loss": 1.0276,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 6.40920716112532,
|
|
"grad_norm": 0.11771747761741529,
|
|
"learning_rate": 4.309438320636705e-07,
|
|
"loss": 1.0119,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 6.411764705882353,
|
|
"grad_norm": 0.1167766752899283,
|
|
"learning_rate": 4.272475339770699e-07,
|
|
"loss": 1.0257,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 6.414322250639386,
|
|
"grad_norm": 0.11997899496687212,
|
|
"learning_rate": 4.235668101601964e-07,
|
|
"loss": 0.9887,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 6.41687979539642,
|
|
"grad_norm": 0.11897278858577053,
|
|
"learning_rate": 4.199016666013533e-07,
|
|
"loss": 1.0162,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 6.419437340153452,
|
|
"grad_norm": 0.1213013490317867,
|
|
"learning_rate": 4.1625210926350413e-07,
|
|
"loss": 1.0141,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 6.421994884910486,
|
|
"grad_norm": 0.12533002989447992,
|
|
"learning_rate": 4.1261814408424806e-07,
|
|
"loss": 1.0251,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 6.42455242966752,
|
|
"grad_norm": 0.12196478149472252,
|
|
"learning_rate": 4.089997769758225e-07,
|
|
"loss": 1.0365,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 6.427109974424552,
|
|
"grad_norm": 0.12143791187790264,
|
|
"learning_rate": 4.0539701382507847e-07,
|
|
"loss": 1.0032,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 6.429667519181586,
|
|
"grad_norm": 0.11682750481108217,
|
|
"learning_rate": 4.018098604934906e-07,
|
|
"loss": 1.0045,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 6.432225063938619,
|
|
"grad_norm": 0.11654420434670919,
|
|
"learning_rate": 3.982383228171338e-07,
|
|
"loss": 1.0122,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 6.434782608695652,
|
|
"grad_norm": 0.12087376970393812,
|
|
"learning_rate": 3.946824066066757e-07,
|
|
"loss": 1.0091,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 6.437340153452685,
|
|
"grad_norm": 0.11198028929740504,
|
|
"learning_rate": 3.9114211764736843e-07,
|
|
"loss": 0.9916,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 6.439897698209719,
|
|
"grad_norm": 0.117876547438714,
|
|
"learning_rate": 3.876174616990402e-07,
|
|
"loss": 0.9688,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 6.442455242966752,
|
|
"grad_norm": 0.11691097425539704,
|
|
"learning_rate": 3.8410844449608966e-07,
|
|
"loss": 1.0262,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 6.445012787723785,
|
|
"grad_norm": 0.12067476965271878,
|
|
"learning_rate": 3.8061507174746326e-07,
|
|
"loss": 1.0357,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 6.447570332480819,
|
|
"grad_norm": 0.11448044711242149,
|
|
"learning_rate": 3.7713734913666254e-07,
|
|
"loss": 1.0278,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 6.450127877237851,
|
|
"grad_norm": 0.11900503374045875,
|
|
"learning_rate": 3.73675282321726e-07,
|
|
"loss": 1.0293,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 6.452685421994885,
|
|
"grad_norm": 0.1237852363860751,
|
|
"learning_rate": 3.7022887693521914e-07,
|
|
"loss": 1.0432,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 6.455242966751918,
|
|
"grad_norm": 0.11395769439497158,
|
|
"learning_rate": 3.6679813858422673e-07,
|
|
"loss": 1.0451,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 6.457800511508951,
|
|
"grad_norm": 0.11755851431433859,
|
|
"learning_rate": 3.6338307285034626e-07,
|
|
"loss": 1.0166,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 6.460358056265985,
|
|
"grad_norm": 0.11537719335337888,
|
|
"learning_rate": 3.5998368528967764e-07,
|
|
"loss": 1.0221,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 6.462915601023018,
|
|
"grad_norm": 0.12098800578611382,
|
|
"learning_rate": 3.5659998143281027e-07,
|
|
"loss": 1.0474,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 6.465473145780051,
|
|
"grad_norm": 0.11989356063597686,
|
|
"learning_rate": 3.532319667848172e-07,
|
|
"loss": 1.0187,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 6.468030690537084,
|
|
"grad_norm": 0.1156244817453119,
|
|
"learning_rate": 3.498796468252508e-07,
|
|
"loss": 0.9894,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 6.470588235294118,
|
|
"grad_norm": 0.11213145863456157,
|
|
"learning_rate": 3.46543027008126e-07,
|
|
"loss": 1.0331,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 6.4731457800511505,
|
|
"grad_norm": 0.11707883319628067,
|
|
"learning_rate": 3.4322211276191176e-07,
|
|
"loss": 1.0259,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 6.475703324808184,
|
|
"grad_norm": 0.11350670721406404,
|
|
"learning_rate": 3.399169094895294e-07,
|
|
"loss": 1.0065,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 6.478260869565218,
|
|
"grad_norm": 0.11452239943111842,
|
|
"learning_rate": 3.366274225683397e-07,
|
|
"loss": 1.0382,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 6.4808184143222505,
|
|
"grad_norm": 0.11645854358551593,
|
|
"learning_rate": 3.3335365735012947e-07,
|
|
"loss": 0.9849,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 6.483375959079284,
|
|
"grad_norm": 0.1150643632230636,
|
|
"learning_rate": 3.3009561916111045e-07,
|
|
"loss": 1.0441,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 6.485933503836317,
|
|
"grad_norm": 0.11565843726243669,
|
|
"learning_rate": 3.2685331330190916e-07,
|
|
"loss": 1.0256,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 6.4884910485933505,
|
|
"grad_norm": 0.12172892123412701,
|
|
"learning_rate": 3.2362674504755385e-07,
|
|
"loss": 1.0006,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 6.491048593350383,
|
|
"grad_norm": 0.11416395245772691,
|
|
"learning_rate": 3.2041591964746767e-07,
|
|
"loss": 0.9981,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 6.493606138107417,
|
|
"grad_norm": 0.11099012627200047,
|
|
"learning_rate": 3.17220842325463e-07,
|
|
"loss": 0.9971,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 6.4961636828644505,
|
|
"grad_norm": 0.12666071845516697,
|
|
"learning_rate": 3.14041518279733e-07,
|
|
"loss": 1.019,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 6.498721227621483,
|
|
"grad_norm": 0.11694427326316041,
|
|
"learning_rate": 3.108779526828365e-07,
|
|
"loss": 1.048,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 6.501278772378517,
|
|
"grad_norm": 0.11663277776194486,
|
|
"learning_rate": 3.0773015068169876e-07,
|
|
"loss": 1.0205,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 6.5038363171355495,
|
|
"grad_norm": 0.11421370105035522,
|
|
"learning_rate": 3.045981173975965e-07,
|
|
"loss": 1.0062,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 6.506393861892583,
|
|
"grad_norm": 0.11416247400561318,
|
|
"learning_rate": 3.0148185792615137e-07,
|
|
"loss": 1.0221,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 6.508951406649617,
|
|
"grad_norm": 0.12004167269390631,
|
|
"learning_rate": 2.9838137733732343e-07,
|
|
"loss": 1.0336,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 6.5115089514066495,
|
|
"grad_norm": 0.12185027359479889,
|
|
"learning_rate": 2.9529668067539986e-07,
|
|
"loss": 1.0085,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 6.514066496163683,
|
|
"grad_norm": 0.11920181864869182,
|
|
"learning_rate": 2.922277729589906e-07,
|
|
"loss": 1.0212,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 6.516624040920716,
|
|
"grad_norm": 0.11457206340363568,
|
|
"learning_rate": 2.891746591810152e-07,
|
|
"loss": 1.0062,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 6.5191815856777495,
|
|
"grad_norm": 0.11396161204686395,
|
|
"learning_rate": 2.86137344308699e-07,
|
|
"loss": 1.0269,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 6.521739130434782,
|
|
"grad_norm": 0.11716042134956894,
|
|
"learning_rate": 2.8311583328356485e-07,
|
|
"loss": 1.0513,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 6.524296675191816,
|
|
"grad_norm": 0.11082138416428153,
|
|
"learning_rate": 2.801101310214205e-07,
|
|
"loss": 1.0133,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 6.526854219948849,
|
|
"grad_norm": 0.11831445098631707,
|
|
"learning_rate": 2.7712024241235757e-07,
|
|
"loss": 1.0184,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 6.529411764705882,
|
|
"grad_norm": 0.11918281125426747,
|
|
"learning_rate": 2.7414617232073505e-07,
|
|
"loss": 1.0344,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 6.531969309462916,
|
|
"grad_norm": 0.11681313613977624,
|
|
"learning_rate": 2.7118792558518237e-07,
|
|
"loss": 1.0219,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 6.534526854219949,
|
|
"grad_norm": 0.12570449518559115,
|
|
"learning_rate": 2.6824550701857966e-07,
|
|
"loss": 1.0192,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 6.537084398976982,
|
|
"grad_norm": 0.11631595597156608,
|
|
"learning_rate": 2.653189214080576e-07,
|
|
"loss": 0.9885,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 6.539641943734015,
|
|
"grad_norm": 0.11976742856004091,
|
|
"learning_rate": 2.624081735149897e-07,
|
|
"loss": 1.0225,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 6.542199488491049,
|
|
"grad_norm": 0.11687676414472607,
|
|
"learning_rate": 2.5951326807498123e-07,
|
|
"loss": 1.0051,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 6.544757033248082,
|
|
"grad_norm": 0.11626243542745685,
|
|
"learning_rate": 2.5663420979785915e-07,
|
|
"loss": 1.0256,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 6.547314578005115,
|
|
"grad_norm": 0.11473271542819383,
|
|
"learning_rate": 2.5377100336767547e-07,
|
|
"loss": 1.0134,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 6.549872122762149,
|
|
"grad_norm": 0.11617767916671155,
|
|
"learning_rate": 2.509236534426851e-07,
|
|
"loss": 1.0045,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 6.552429667519181,
|
|
"grad_norm": 0.11177045938404909,
|
|
"learning_rate": 2.4809216465534913e-07,
|
|
"loss": 1.0377,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 6.554987212276215,
|
|
"grad_norm": 0.11344781404055954,
|
|
"learning_rate": 2.4527654161232153e-07,
|
|
"loss": 1.0037,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 6.557544757033249,
|
|
"grad_norm": 0.12399390000812018,
|
|
"learning_rate": 2.424767888944468e-07,
|
|
"loss": 1.0462,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 6.560102301790281,
|
|
"grad_norm": 0.11847061868510626,
|
|
"learning_rate": 2.3969291105674805e-07,
|
|
"loss": 0.9959,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 6.562659846547315,
|
|
"grad_norm": 0.116920831153564,
|
|
"learning_rate": 2.3692491262841788e-07,
|
|
"loss": 0.9783,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 6.565217391304348,
|
|
"grad_norm": 0.12018087616989655,
|
|
"learning_rate": 2.3417279811281947e-07,
|
|
"loss": 0.9778,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 6.567774936061381,
|
|
"grad_norm": 0.11727845557913934,
|
|
"learning_rate": 2.3143657198746893e-07,
|
|
"loss": 1.042,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 6.570332480818414,
|
|
"grad_norm": 0.1156893274747709,
|
|
"learning_rate": 2.2871623870403649e-07,
|
|
"loss": 1.0302,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 6.572890025575448,
|
|
"grad_norm": 0.11720330890092409,
|
|
"learning_rate": 2.260118026883318e-07,
|
|
"loss": 1.0267,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 6.57544757033248,
|
|
"grad_norm": 0.11688767903985245,
|
|
"learning_rate": 2.233232683403075e-07,
|
|
"loss": 1.0292,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 6.578005115089514,
|
|
"grad_norm": 0.11603026043379294,
|
|
"learning_rate": 2.206506400340369e-07,
|
|
"loss": 1.0017,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 6.580562659846548,
|
|
"grad_norm": 0.11389458080146765,
|
|
"learning_rate": 2.1799392211772074e-07,
|
|
"loss": 1.0082,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 6.58312020460358,
|
|
"grad_norm": 0.1161474107114186,
|
|
"learning_rate": 2.1535311891367373e-07,
|
|
"loss": 1.0219,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 6.585677749360614,
|
|
"grad_norm": 0.11523869949699879,
|
|
"learning_rate": 2.1272823471831573e-07,
|
|
"loss": 1.0048,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 6.588235294117647,
|
|
"grad_norm": 0.11447790591214169,
|
|
"learning_rate": 2.101192738021718e-07,
|
|
"loss": 1.0116,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 6.59079283887468,
|
|
"grad_norm": 0.11643651666513412,
|
|
"learning_rate": 2.0752624040985436e-07,
|
|
"loss": 1.0117,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 6.593350383631714,
|
|
"grad_norm": 0.12040988081003166,
|
|
"learning_rate": 2.0494913876007105e-07,
|
|
"loss": 1.0255,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 6.595907928388747,
|
|
"grad_norm": 0.11872708662460554,
|
|
"learning_rate": 2.0238797304560243e-07,
|
|
"loss": 1.0241,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 6.59846547314578,
|
|
"grad_norm": 0.10983144316407795,
|
|
"learning_rate": 1.9984274743330424e-07,
|
|
"loss": 1.0106,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 6.601023017902813,
|
|
"grad_norm": 0.112895943367732,
|
|
"learning_rate": 1.9731346606410185e-07,
|
|
"loss": 1.0405,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 6.603580562659847,
|
|
"grad_norm": 0.11309181158689928,
|
|
"learning_rate": 1.9480013305297585e-07,
|
|
"loss": 1.0286,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 6.6061381074168795,
|
|
"grad_norm": 0.11579577875848088,
|
|
"learning_rate": 1.9230275248896425e-07,
|
|
"loss": 1.0137,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 6.608695652173913,
|
|
"grad_norm": 0.11932271374275923,
|
|
"learning_rate": 1.8982132843514577e-07,
|
|
"loss": 1.0352,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 6.611253196930946,
|
|
"grad_norm": 0.1187240263728754,
|
|
"learning_rate": 1.8735586492864556e-07,
|
|
"loss": 0.9899,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 6.6138107416879794,
|
|
"grad_norm": 0.12010362235501355,
|
|
"learning_rate": 1.8490636598061605e-07,
|
|
"loss": 1.0202,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 6.616368286445013,
|
|
"grad_norm": 0.11896072789581243,
|
|
"learning_rate": 1.8247283557624062e-07,
|
|
"loss": 1.0801,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 6.618925831202046,
|
|
"grad_norm": 0.11269695438058397,
|
|
"learning_rate": 1.8005527767471998e-07,
|
|
"loss": 1.0323,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 6.621483375959079,
|
|
"grad_norm": 0.11595014960172056,
|
|
"learning_rate": 1.7765369620926899e-07,
|
|
"loss": 1.0247,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 6.624040920716112,
|
|
"grad_norm": 0.11457210948093192,
|
|
"learning_rate": 1.752680950871144e-07,
|
|
"loss": 1.0561,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 6.626598465473146,
|
|
"grad_norm": 0.11577860483951284,
|
|
"learning_rate": 1.7289847818947492e-07,
|
|
"loss": 1.0182,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 6.629156010230179,
|
|
"grad_norm": 0.11240383490721378,
|
|
"learning_rate": 1.7054484937157112e-07,
|
|
"loss": 1.0255,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 6.631713554987212,
|
|
"grad_norm": 0.11631232042116323,
|
|
"learning_rate": 1.6820721246261106e-07,
|
|
"loss": 1.0299,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 6.634271099744246,
|
|
"grad_norm": 0.11273655621311057,
|
|
"learning_rate": 1.6588557126578365e-07,
|
|
"loss": 1.0407,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 6.6368286445012785,
|
|
"grad_norm": 0.11767164102993428,
|
|
"learning_rate": 1.6357992955825297e-07,
|
|
"loss": 1.0145,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 6.639386189258312,
|
|
"grad_norm": 0.11534695075999606,
|
|
"learning_rate": 1.6129029109115401e-07,
|
|
"loss": 1.0106,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 6.641943734015345,
|
|
"grad_norm": 0.11539400507669376,
|
|
"learning_rate": 1.59016659589587e-07,
|
|
"loss": 0.9862,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 6.6445012787723785,
|
|
"grad_norm": 0.11483047616375414,
|
|
"learning_rate": 1.567590387526041e-07,
|
|
"loss": 1.0301,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 6.647058823529412,
|
|
"grad_norm": 0.11260638212850177,
|
|
"learning_rate": 1.5451743225321726e-07,
|
|
"loss": 1.0088,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 6.649616368286445,
|
|
"grad_norm": 0.11619144848069289,
|
|
"learning_rate": 1.5229184373837912e-07,
|
|
"loss": 1.0117,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 6.6521739130434785,
|
|
"grad_norm": 0.12170161725444163,
|
|
"learning_rate": 1.5008227682898337e-07,
|
|
"loss": 1.0345,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 6.654731457800511,
|
|
"grad_norm": 0.11009879990340311,
|
|
"learning_rate": 1.4788873511985656e-07,
|
|
"loss": 1.0074,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 6.657289002557545,
|
|
"grad_norm": 0.11242257451547451,
|
|
"learning_rate": 1.4571122217975298e-07,
|
|
"loss": 1.0295,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 6.659846547314578,
|
|
"grad_norm": 0.11604613398078274,
|
|
"learning_rate": 1.4354974155135203e-07,
|
|
"loss": 1.0287,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 6.662404092071611,
|
|
"grad_norm": 0.11447891191608152,
|
|
"learning_rate": 1.4140429675124633e-07,
|
|
"loss": 1.0059,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 6.664961636828645,
|
|
"grad_norm": 0.11195548180186611,
|
|
"learning_rate": 1.3927489126993932e-07,
|
|
"loss": 1.0347,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 6.667519181585678,
|
|
"grad_norm": 0.11445065696070437,
|
|
"learning_rate": 1.3716152857184306e-07,
|
|
"loss": 1.012,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 6.670076726342711,
|
|
"grad_norm": 0.11614977059279803,
|
|
"learning_rate": 1.350642120952661e-07,
|
|
"loss": 0.9918,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 6.672634271099744,
|
|
"grad_norm": 0.11871269418863775,
|
|
"learning_rate": 1.3298294525241008e-07,
|
|
"loss": 1.0269,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 6.675191815856778,
|
|
"grad_norm": 0.10866128338893077,
|
|
"learning_rate": 1.3091773142936525e-07,
|
|
"loss": 1.0334,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 6.677749360613811,
|
|
"grad_norm": 0.12041795104852608,
|
|
"learning_rate": 1.2886857398610731e-07,
|
|
"loss": 0.9974,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 6.680306905370844,
|
|
"grad_norm": 0.11406194376177828,
|
|
"learning_rate": 1.2683547625648718e-07,
|
|
"loss": 1.0222,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 6.6828644501278776,
|
|
"grad_norm": 0.11240623577621248,
|
|
"learning_rate": 1.2481844154822565e-07,
|
|
"loss": 0.9952,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 6.68542199488491,
|
|
"grad_norm": 0.11514164915047609,
|
|
"learning_rate": 1.2281747314291437e-07,
|
|
"loss": 1.0026,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 6.687979539641944,
|
|
"grad_norm": 0.11222335726022206,
|
|
"learning_rate": 1.208325742960037e-07,
|
|
"loss": 1.0056,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 6.690537084398977,
|
|
"grad_norm": 0.11243016039454592,
|
|
"learning_rate": 1.1886374823679825e-07,
|
|
"loss": 1.0492,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 6.69309462915601,
|
|
"grad_norm": 0.11317201484958644,
|
|
"learning_rate": 1.1691099816845574e-07,
|
|
"loss": 1.0213,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 6.695652173913043,
|
|
"grad_norm": 0.1170626311837824,
|
|
"learning_rate": 1.149743272679793e-07,
|
|
"loss": 0.9974,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 6.698209718670077,
|
|
"grad_norm": 0.12262867677149476,
|
|
"learning_rate": 1.1305373868620961e-07,
|
|
"loss": 0.9967,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 6.70076726342711,
|
|
"grad_norm": 0.11396022297257247,
|
|
"learning_rate": 1.1114923554782608e-07,
|
|
"loss": 0.9956,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 6.703324808184143,
|
|
"grad_norm": 0.11735281558425238,
|
|
"learning_rate": 1.0926082095133572e-07,
|
|
"loss": 1.0193,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 6.705882352941177,
|
|
"grad_norm": 0.12029512917783149,
|
|
"learning_rate": 1.0738849796907091e-07,
|
|
"loss": 1.0473,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 6.708439897698209,
|
|
"grad_norm": 0.11312555151340069,
|
|
"learning_rate": 1.0553226964718277e-07,
|
|
"loss": 1.008,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 6.710997442455243,
|
|
"grad_norm": 0.11541322342299927,
|
|
"learning_rate": 1.0369213900564001e-07,
|
|
"loss": 1.0029,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 6.713554987212277,
|
|
"grad_norm": 0.11302071428638145,
|
|
"learning_rate": 1.0186810903822119e-07,
|
|
"loss": 0.9623,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 6.716112531969309,
|
|
"grad_norm": 0.11291140484686953,
|
|
"learning_rate": 1.0006018271250695e-07,
|
|
"loss": 1.0305,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 6.718670076726343,
|
|
"grad_norm": 0.11524487387426563,
|
|
"learning_rate": 9.826836296988107e-08,
|
|
"loss": 1.0596,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 6.721227621483376,
|
|
"grad_norm": 0.11543260535666969,
|
|
"learning_rate": 9.649265272552277e-08,
|
|
"loss": 1.0237,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 6.723785166240409,
|
|
"grad_norm": 0.11302904037284935,
|
|
"learning_rate": 9.473305486840112e-08,
|
|
"loss": 1.0177,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 6.726342710997442,
|
|
"grad_norm": 0.11210024116892857,
|
|
"learning_rate": 9.29895722612717e-08,
|
|
"loss": 1.0284,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 6.728900255754476,
|
|
"grad_norm": 0.11611360048557691,
|
|
"learning_rate": 9.126220774067218e-08,
|
|
"loss": 1.0313,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 6.731457800511509,
|
|
"grad_norm": 0.11281080704543008,
|
|
"learning_rate": 8.955096411691566e-08,
|
|
"loss": 1.0156,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 6.734015345268542,
|
|
"grad_norm": 0.11192307343079083,
|
|
"learning_rate": 8.785584417409065e-08,
|
|
"loss": 1.0173,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 6.736572890025576,
|
|
"grad_norm": 0.11483249975315203,
|
|
"learning_rate": 8.617685067004777e-08,
|
|
"loss": 1.0269,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 6.739130434782608,
|
|
"grad_norm": 0.11652633110386056,
|
|
"learning_rate": 8.451398633640861e-08,
|
|
"loss": 0.9978,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 6.741687979539642,
|
|
"grad_norm": 0.11193935061569056,
|
|
"learning_rate": 8.286725387854689e-08,
|
|
"loss": 1.0166,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 6.744245524296675,
|
|
"grad_norm": 0.1132575109344062,
|
|
"learning_rate": 8.123665597559393e-08,
|
|
"loss": 1.03,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 6.746803069053708,
|
|
"grad_norm": 0.10909141114205528,
|
|
"learning_rate": 7.962219528042991e-08,
|
|
"loss": 0.9843,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 6.749360613810742,
|
|
"grad_norm": 0.11510554903103819,
|
|
"learning_rate": 7.802387441968262e-08,
|
|
"loss": 1.0058,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 6.751918158567775,
|
|
"grad_norm": 0.1126125629269261,
|
|
"learning_rate": 7.644169599371975e-08,
|
|
"loss": 1.0451,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 6.754475703324808,
|
|
"grad_norm": 0.11361718582807691,
|
|
"learning_rate": 7.487566257664558e-08,
|
|
"loss": 1.0447,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 6.757033248081841,
|
|
"grad_norm": 0.11201362418480085,
|
|
"learning_rate": 7.332577671629982e-08,
|
|
"loss": 1.0003,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 6.759590792838875,
|
|
"grad_norm": 0.11250812055949669,
|
|
"learning_rate": 7.179204093424985e-08,
|
|
"loss": 1.0152,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 6.762148337595908,
|
|
"grad_norm": 0.11340595916397253,
|
|
"learning_rate": 7.027445772578856e-08,
|
|
"loss": 1.0136,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 6.764705882352941,
|
|
"grad_norm": 0.11043173067397596,
|
|
"learning_rate": 6.877302955992649e-08,
|
|
"loss": 1.0039,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 6.767263427109975,
|
|
"grad_norm": 0.11320152606971275,
|
|
"learning_rate": 6.72877588793952e-08,
|
|
"loss": 1.0263,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 6.7698209718670075,
|
|
"grad_norm": 0.11555065643180781,
|
|
"learning_rate": 6.581864810063732e-08,
|
|
"loss": 1.0095,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 6.772378516624041,
|
|
"grad_norm": 0.1114703182443358,
|
|
"learning_rate": 6.436569961380313e-08,
|
|
"loss": 1.0014,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 6.774936061381074,
|
|
"grad_norm": 0.11945044598900786,
|
|
"learning_rate": 6.292891578275063e-08,
|
|
"loss": 1.0308,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 6.7774936061381075,
|
|
"grad_norm": 0.11250868328242511,
|
|
"learning_rate": 6.150829894503662e-08,
|
|
"loss": 1.0107,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 6.78005115089514,
|
|
"grad_norm": 0.11491638958663465,
|
|
"learning_rate": 6.010385141191455e-08,
|
|
"loss": 1.0279,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 6.782608695652174,
|
|
"grad_norm": 0.1160903563132126,
|
|
"learning_rate": 5.8715575468333286e-08,
|
|
"loss": 1.0067,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 6.7851662404092075,
|
|
"grad_norm": 0.11673880519657757,
|
|
"learning_rate": 5.734347337293167e-08,
|
|
"loss": 1.0253,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 6.78772378516624,
|
|
"grad_norm": 0.11345092121417273,
|
|
"learning_rate": 5.598754735803513e-08,
|
|
"loss": 1.0256,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 6.790281329923274,
|
|
"grad_norm": 0.11245719320265857,
|
|
"learning_rate": 5.464779962964795e-08,
|
|
"loss": 1.023,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 6.792838874680307,
|
|
"grad_norm": 0.11318781711220266,
|
|
"learning_rate": 5.332423236745765e-08,
|
|
"loss": 0.9817,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 6.79539641943734,
|
|
"grad_norm": 0.11393255984182678,
|
|
"learning_rate": 5.201684772482507e-08,
|
|
"loss": 0.9919,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 6.797953964194374,
|
|
"grad_norm": 0.1114106983420887,
|
|
"learning_rate": 5.0725647828783196e-08,
|
|
"loss": 0.9949,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 6.8005115089514065,
|
|
"grad_norm": 0.11613702586163382,
|
|
"learning_rate": 4.945063478003276e-08,
|
|
"loss": 1.0246,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 6.80306905370844,
|
|
"grad_norm": 0.11426036986413816,
|
|
"learning_rate": 4.8191810652941096e-08,
|
|
"loss": 1.0434,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 6.805626598465473,
|
|
"grad_norm": 0.11654706791098739,
|
|
"learning_rate": 4.694917749553663e-08,
|
|
"loss": 1.0256,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 6.8081841432225065,
|
|
"grad_norm": 0.10999242921563646,
|
|
"learning_rate": 4.5722737329505495e-08,
|
|
"loss": 0.9802,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 6.810741687979539,
|
|
"grad_norm": 0.11948231260555445,
|
|
"learning_rate": 4.451249215018827e-08,
|
|
"loss": 1.0593,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 6.813299232736573,
|
|
"grad_norm": 0.11285924950704992,
|
|
"learning_rate": 4.331844392657991e-08,
|
|
"loss": 1.026,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 6.8158567774936065,
|
|
"grad_norm": 0.11299230638204774,
|
|
"learning_rate": 4.2140594601320915e-08,
|
|
"loss": 1.0162,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 6.818414322250639,
|
|
"grad_norm": 0.11595950299690573,
|
|
"learning_rate": 4.097894609069841e-08,
|
|
"loss": 0.9853,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 6.820971867007673,
|
|
"grad_norm": 0.1155751170348188,
|
|
"learning_rate": 3.983350028464283e-08,
|
|
"loss": 1.0022,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 6.823529411764706,
|
|
"grad_norm": 0.11385145480733656,
|
|
"learning_rate": 3.870425904672237e-08,
|
|
"loss": 1.0905,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 6.826086956521739,
|
|
"grad_norm": 0.11441498941945787,
|
|
"learning_rate": 3.7591224214141855e-08,
|
|
"loss": 1.032,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 6.828644501278772,
|
|
"grad_norm": 0.11622957694085463,
|
|
"learning_rate": 3.649439759773943e-08,
|
|
"loss": 1.0273,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 6.831202046035806,
|
|
"grad_norm": 0.11310752492427763,
|
|
"learning_rate": 3.541378098198323e-08,
|
|
"loss": 1.0202,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 6.833759590792839,
|
|
"grad_norm": 0.11205928985871322,
|
|
"learning_rate": 3.4349376124969136e-08,
|
|
"loss": 0.9919,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 6.836317135549872,
|
|
"grad_norm": 0.11055590937853152,
|
|
"learning_rate": 3.330118475841859e-08,
|
|
"loss": 1.019,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 6.838874680306906,
|
|
"grad_norm": 0.11098200209047006,
|
|
"learning_rate": 3.22692085876708e-08,
|
|
"loss": 0.9972,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 6.841432225063938,
|
|
"grad_norm": 0.11522340948350532,
|
|
"learning_rate": 3.125344929168828e-08,
|
|
"loss": 1.0004,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 6.843989769820972,
|
|
"grad_norm": 0.11422976235509531,
|
|
"learning_rate": 3.025390852304688e-08,
|
|
"loss": 1.0273,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 6.846547314578006,
|
|
"grad_norm": 0.11018216168196639,
|
|
"learning_rate": 2.927058790793802e-08,
|
|
"loss": 1.0102,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 6.849104859335038,
|
|
"grad_norm": 0.10995140569223621,
|
|
"learning_rate": 2.830348904616198e-08,
|
|
"loss": 0.991,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 6.851662404092072,
|
|
"grad_norm": 0.11543991907521552,
|
|
"learning_rate": 2.7352613511127946e-08,
|
|
"loss": 1.0338,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 6.854219948849105,
|
|
"grad_norm": 0.11129720513762761,
|
|
"learning_rate": 2.6417962849852875e-08,
|
|
"loss": 1.0094,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 6.856777493606138,
|
|
"grad_norm": 0.1097107046759256,
|
|
"learning_rate": 2.549953858295262e-08,
|
|
"loss": 1.0208,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 6.859335038363171,
|
|
"grad_norm": 0.1181695445768175,
|
|
"learning_rate": 2.459734220464638e-08,
|
|
"loss": 1.0015,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 6.861892583120205,
|
|
"grad_norm": 0.11107816598809478,
|
|
"learning_rate": 2.3711375182753347e-08,
|
|
"loss": 1.0261,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 6.864450127877237,
|
|
"grad_norm": 0.10839159774339671,
|
|
"learning_rate": 2.2841638958683855e-08,
|
|
"loss": 1.0135,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 6.867007672634271,
|
|
"grad_norm": 0.1121417586939987,
|
|
"learning_rate": 2.1988134947446004e-08,
|
|
"loss": 1.0035,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 6.869565217391305,
|
|
"grad_norm": 0.11209845991644457,
|
|
"learning_rate": 2.1150864537636817e-08,
|
|
"loss": 1.0321,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 6.872122762148337,
|
|
"grad_norm": 0.11303462530389491,
|
|
"learning_rate": 2.032982909144332e-08,
|
|
"loss": 1.012,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 6.874680306905371,
|
|
"grad_norm": 0.11117791002965544,
|
|
"learning_rate": 1.9525029944637008e-08,
|
|
"loss": 0.9929,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 6.877237851662404,
|
|
"grad_norm": 0.1089777437805983,
|
|
"learning_rate": 1.8736468406579388e-08,
|
|
"loss": 0.9931,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 6.879795396419437,
|
|
"grad_norm": 0.11251100033934079,
|
|
"learning_rate": 1.796414576020755e-08,
|
|
"loss": 1.0153,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 6.882352941176471,
|
|
"grad_norm": 0.11299998984552379,
|
|
"learning_rate": 1.720806326204305e-08,
|
|
"loss": 1.005,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 6.884910485933504,
|
|
"grad_norm": 0.11290626743296132,
|
|
"learning_rate": 1.646822214218524e-08,
|
|
"loss": 1.049,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 6.887468030690537,
|
|
"grad_norm": 0.11186130749976496,
|
|
"learning_rate": 1.5744623604310172e-08,
|
|
"loss": 1.003,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 6.89002557544757,
|
|
"grad_norm": 0.11028332749990057,
|
|
"learning_rate": 1.503726882566503e-08,
|
|
"loss": 0.9892,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 6.892583120204604,
|
|
"grad_norm": 0.11457205700764143,
|
|
"learning_rate": 1.4346158957073696e-08,
|
|
"loss": 1.0261,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 6.8951406649616365,
|
|
"grad_norm": 0.11434079231719742,
|
|
"learning_rate": 1.3671295122928974e-08,
|
|
"loss": 1.0118,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 6.89769820971867,
|
|
"grad_norm": 0.11590548541933458,
|
|
"learning_rate": 1.3012678421191471e-08,
|
|
"loss": 1.0397,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 6.900255754475703,
|
|
"grad_norm": 0.11241776946007812,
|
|
"learning_rate": 1.2370309923388501e-08,
|
|
"loss": 1.0214,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 6.9028132992327365,
|
|
"grad_norm": 0.11386908312296881,
|
|
"learning_rate": 1.1744190674614076e-08,
|
|
"loss": 1.0249,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 6.90537084398977,
|
|
"grad_norm": 0.1111155708841944,
|
|
"learning_rate": 1.1134321693525574e-08,
|
|
"loss": 1.0013,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 6.907928388746803,
|
|
"grad_norm": 0.11383791079341445,
|
|
"learning_rate": 1.0540703972341525e-08,
|
|
"loss": 1.0148,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 6.910485933503836,
|
|
"grad_norm": 0.11458774717785482,
|
|
"learning_rate": 9.963338476840501e-09,
|
|
"loss": 1.029,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 6.913043478260869,
|
|
"grad_norm": 0.11295695096599505,
|
|
"learning_rate": 9.402226146361104e-09,
|
|
"loss": 1.0136,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 6.915601023017903,
|
|
"grad_norm": 0.11389257052620162,
|
|
"learning_rate": 8.857367893796431e-09,
|
|
"loss": 0.9989,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 6.918158567774936,
|
|
"grad_norm": 0.11405136091559014,
|
|
"learning_rate": 8.328764605597395e-09,
|
|
"loss": 1.0239,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 6.920716112531969,
|
|
"grad_norm": 0.11514239271194625,
|
|
"learning_rate": 7.816417141768284e-09,
|
|
"loss": 1.041,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 6.923273657289003,
|
|
"grad_norm": 0.11236159186101047,
|
|
"learning_rate": 7.3203263358678775e-09,
|
|
"loss": 1.0297,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 6.9258312020460355,
|
|
"grad_norm": 0.112779013609661,
|
|
"learning_rate": 6.840492995002779e-09,
|
|
"loss": 1.0177,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 6.928388746803069,
|
|
"grad_norm": 0.11154163182583252,
|
|
"learning_rate": 6.376917899832968e-09,
|
|
"loss": 1.0262,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 6.930946291560103,
|
|
"grad_norm": 0.11358295898234577,
|
|
"learning_rate": 5.929601804566254e-09,
|
|
"loss": 1.0057,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 6.9335038363171355,
|
|
"grad_norm": 0.11003717187565273,
|
|
"learning_rate": 5.498545436957159e-09,
|
|
"loss": 1.0269,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 6.936061381074169,
|
|
"grad_norm": 0.10600474645039837,
|
|
"learning_rate": 5.0837494983091425e-09,
|
|
"loss": 0.9854,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 6.938618925831202,
|
|
"grad_norm": 0.10929642667614789,
|
|
"learning_rate": 4.6852146634668304e-09,
|
|
"loss": 1.0149,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 6.9411764705882355,
|
|
"grad_norm": 0.11582392789733863,
|
|
"learning_rate": 4.302941580823783e-09,
|
|
"loss": 0.9864,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 6.943734015345268,
|
|
"grad_norm": 0.11406855862931596,
|
|
"learning_rate": 3.936930872312506e-09,
|
|
"loss": 1.0296,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 6.946291560102302,
|
|
"grad_norm": 0.11629050448797144,
|
|
"learning_rate": 3.5871831334099992e-09,
|
|
"loss": 1.0319,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 6.948849104859335,
|
|
"grad_norm": 0.11235711633426523,
|
|
"learning_rate": 3.2536989331355406e-09,
|
|
"loss": 1.0061,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 6.951406649616368,
|
|
"grad_norm": 0.11339029722347495,
|
|
"learning_rate": 2.9364788140451296e-09,
|
|
"loss": 1.0558,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 6.953964194373402,
|
|
"grad_norm": 0.1122327401431765,
|
|
"learning_rate": 2.635523292237041e-09,
|
|
"loss": 1.043,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 6.956521739130435,
|
|
"grad_norm": 0.1150922652013077,
|
|
"learning_rate": 2.3508328573462745e-09,
|
|
"loss": 1.0157,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 6.959079283887468,
|
|
"grad_norm": 0.11034749878838018,
|
|
"learning_rate": 2.082407972547884e-09,
|
|
"loss": 1.0172,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 6.961636828644501,
|
|
"grad_norm": 0.11414568906111035,
|
|
"learning_rate": 1.8302490745503166e-09,
|
|
"loss": 1.0294,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 6.964194373401535,
|
|
"grad_norm": 0.11166620944982035,
|
|
"learning_rate": 1.5943565736020739e-09,
|
|
"loss": 1.0242,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 6.966751918158568,
|
|
"grad_norm": 0.11672921275884213,
|
|
"learning_rate": 1.3747308534850512e-09,
|
|
"loss": 1.0372,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 6.969309462915601,
|
|
"grad_norm": 0.11540312400728218,
|
|
"learning_rate": 1.1713722715167575e-09,
|
|
"loss": 1.0515,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 6.971867007672635,
|
|
"grad_norm": 0.11588267312835213,
|
|
"learning_rate": 9.84281158548095e-10,
|
|
"loss": 1.0291,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 6.974424552429667,
|
|
"grad_norm": 0.11642536438528109,
|
|
"learning_rate": 8.134578189644692e-10,
|
|
"loss": 1.013,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 6.976982097186701,
|
|
"grad_norm": 0.11741237126233431,
|
|
"learning_rate": 6.589025306869002e-10,
|
|
"loss": 1.0054,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 6.979539641943734,
|
|
"grad_norm": 0.1116075879721608,
|
|
"learning_rate": 5.206155451642491e-10,
|
|
"loss": 1.0299,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 6.982097186700767,
|
|
"grad_norm": 0.11444442287287329,
|
|
"learning_rate": 3.985970873821021e-10,
|
|
"loss": 1.0413,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 6.9846547314578,
|
|
"grad_norm": 0.12160291833827606,
|
|
"learning_rate": 2.928473558583278e-10,
|
|
"loss": 1.0317,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 6.987212276214834,
|
|
"grad_norm": 0.1124635627813877,
|
|
"learning_rate": 2.033665226386372e-10,
|
|
"loss": 1.0144,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 6.989769820971867,
|
|
"grad_norm": 0.11276149081438312,
|
|
"learning_rate": 1.301547333032449e-10,
|
|
"loss": 1.0007,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 6.9923273657289,
|
|
"grad_norm": 0.10984392228143453,
|
|
"learning_rate": 7.321210696464853e-11,
|
|
"loss": 0.9763,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 6.994884910485934,
|
|
"grad_norm": 0.11019543161726779,
|
|
"learning_rate": 3.253873626429816e-11,
|
|
"loss": 1.0013,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 6.997442455242966,
|
|
"grad_norm": 0.11197749059770203,
|
|
"learning_rate": 8.134687374816708e-12,
|
|
"loss": 1.0472,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.11208987109779546,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.9774,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"step": 2737,
|
|
"total_flos": 9969287656374272.0,
|
|
"train_loss": 1.063590354692078,
|
|
"train_runtime": 97730.0822,
|
|
"train_samples_per_second": 7.163,
|
|
"train_steps_per_second": 0.028
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 2737,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 9969287656374272.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|