714 lines
17 KiB
JSON
714 lines
17 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 98.9795918367347,
|
|
"eval_steps": 500,
|
|
"global_step": 29100,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 1.0204081632653061,
|
|
"grad_norm": 1.544520378112793,
|
|
"learning_rate": 1.0170068027210885e-05,
|
|
"loss": 9.6915,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 2.0408163265306123,
|
|
"grad_norm": 1.6265058517456055,
|
|
"learning_rate": 2.0374149659863947e-05,
|
|
"loss": 8.3327,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 3.061224489795918,
|
|
"grad_norm": 2.0470693111419678,
|
|
"learning_rate": 3.0578231292517004e-05,
|
|
"loss": 7.7972,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 4.081632653061225,
|
|
"grad_norm": 1.7373660802841187,
|
|
"learning_rate": 4.078231292517007e-05,
|
|
"loss": 7.508,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 5.1020408163265305,
|
|
"grad_norm": 2.189188241958618,
|
|
"learning_rate": 5.0986394557823136e-05,
|
|
"loss": 7.2126,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 6.122448979591836,
|
|
"grad_norm": 2.1524977684020996,
|
|
"learning_rate": 6.11904761904762e-05,
|
|
"loss": 6.9194,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 7.142857142857143,
|
|
"grad_norm": 2.03401517868042,
|
|
"learning_rate": 7.139455782312926e-05,
|
|
"loss": 6.6392,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 8.16326530612245,
|
|
"grad_norm": 2.0746707916259766,
|
|
"learning_rate": 8.159863945578233e-05,
|
|
"loss": 6.3837,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 9.183673469387756,
|
|
"grad_norm": 2.1161186695098877,
|
|
"learning_rate": 9.180272108843538e-05,
|
|
"loss": 6.1311,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 10.204081632653061,
|
|
"grad_norm": 2.1764469146728516,
|
|
"learning_rate": 9.977702191987907e-05,
|
|
"loss": 5.8614,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 11.224489795918368,
|
|
"grad_norm": 2.414827823638916,
|
|
"learning_rate": 9.86432350718065e-05,
|
|
"loss": 5.5985,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 12.244897959183673,
|
|
"grad_norm": 2.5662031173706055,
|
|
"learning_rate": 9.750944822373394e-05,
|
|
"loss": 5.3029,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 13.26530612244898,
|
|
"grad_norm": 2.821119785308838,
|
|
"learning_rate": 9.637566137566139e-05,
|
|
"loss": 5.0116,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 14.285714285714286,
|
|
"grad_norm": 3.0048000812530518,
|
|
"learning_rate": 9.524187452758882e-05,
|
|
"loss": 4.7344,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 15.306122448979592,
|
|
"grad_norm": 3.242598295211792,
|
|
"learning_rate": 9.410808767951625e-05,
|
|
"loss": 4.4621,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 16.3265306122449,
|
|
"grad_norm": 3.316965103149414,
|
|
"learning_rate": 9.29743008314437e-05,
|
|
"loss": 4.2054,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 17.346938775510203,
|
|
"grad_norm": 3.7419989109039307,
|
|
"learning_rate": 9.184051398337114e-05,
|
|
"loss": 3.9517,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 18.367346938775512,
|
|
"grad_norm": 3.7479987144470215,
|
|
"learning_rate": 9.070672713529857e-05,
|
|
"loss": 3.7011,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 19.387755102040817,
|
|
"grad_norm": 3.7604148387908936,
|
|
"learning_rate": 8.9572940287226e-05,
|
|
"loss": 3.4686,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 20.408163265306122,
|
|
"grad_norm": 4.052367210388184,
|
|
"learning_rate": 8.843915343915344e-05,
|
|
"loss": 3.2285,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 21.428571428571427,
|
|
"grad_norm": 4.340782642364502,
|
|
"learning_rate": 8.730536659108089e-05,
|
|
"loss": 3.0094,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 22.448979591836736,
|
|
"grad_norm": 4.590024948120117,
|
|
"learning_rate": 8.617157974300832e-05,
|
|
"loss": 2.7901,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 23.46938775510204,
|
|
"grad_norm": 4.343008995056152,
|
|
"learning_rate": 8.503779289493575e-05,
|
|
"loss": 2.5887,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 24.489795918367346,
|
|
"grad_norm": 4.200931549072266,
|
|
"learning_rate": 8.39040060468632e-05,
|
|
"loss": 2.3804,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 25.510204081632654,
|
|
"grad_norm": 4.765750408172607,
|
|
"learning_rate": 8.277021919879064e-05,
|
|
"loss": 2.1853,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 26.53061224489796,
|
|
"grad_norm": 4.196296215057373,
|
|
"learning_rate": 8.163643235071807e-05,
|
|
"loss": 2.0026,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 27.551020408163264,
|
|
"grad_norm": 4.485163688659668,
|
|
"learning_rate": 8.05026455026455e-05,
|
|
"loss": 1.8218,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 28.571428571428573,
|
|
"grad_norm": 4.515989780426025,
|
|
"learning_rate": 7.936885865457294e-05,
|
|
"loss": 1.6588,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 29.591836734693878,
|
|
"grad_norm": 4.492111682891846,
|
|
"learning_rate": 7.823507180650039e-05,
|
|
"loss": 1.4945,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 30.612244897959183,
|
|
"grad_norm": 4.51740026473999,
|
|
"learning_rate": 7.710128495842782e-05,
|
|
"loss": 1.3481,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 31.632653061224488,
|
|
"grad_norm": 4.193362236022949,
|
|
"learning_rate": 7.596749811035526e-05,
|
|
"loss": 1.2062,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 32.6530612244898,
|
|
"grad_norm": 4.017958164215088,
|
|
"learning_rate": 7.483371126228269e-05,
|
|
"loss": 1.0725,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 33.673469387755105,
|
|
"grad_norm": 4.180546283721924,
|
|
"learning_rate": 7.369992441421014e-05,
|
|
"loss": 0.9466,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 34.69387755102041,
|
|
"grad_norm": 4.280745983123779,
|
|
"learning_rate": 7.256613756613757e-05,
|
|
"loss": 0.8416,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 35.714285714285715,
|
|
"grad_norm": 3.9538300037384033,
|
|
"learning_rate": 7.143235071806501e-05,
|
|
"loss": 0.7401,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 36.734693877551024,
|
|
"grad_norm": 4.204588890075684,
|
|
"learning_rate": 7.029856386999244e-05,
|
|
"loss": 0.6532,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 37.755102040816325,
|
|
"grad_norm": 3.8845582008361816,
|
|
"learning_rate": 6.916477702191987e-05,
|
|
"loss": 0.5701,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 38.775510204081634,
|
|
"grad_norm": 3.7283339500427246,
|
|
"learning_rate": 6.803099017384732e-05,
|
|
"loss": 0.5032,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 39.795918367346935,
|
|
"grad_norm": 3.3194797039031982,
|
|
"learning_rate": 6.689720332577476e-05,
|
|
"loss": 0.4403,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 40.816326530612244,
|
|
"grad_norm": 3.4429259300231934,
|
|
"learning_rate": 6.57634164777022e-05,
|
|
"loss": 0.3887,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 41.83673469387755,
|
|
"grad_norm": 3.080552577972412,
|
|
"learning_rate": 6.462962962962962e-05,
|
|
"loss": 0.3442,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 42.857142857142854,
|
|
"grad_norm": 3.2737112045288086,
|
|
"learning_rate": 6.349584278155707e-05,
|
|
"loss": 0.3071,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 43.87755102040816,
|
|
"grad_norm": 2.8895883560180664,
|
|
"learning_rate": 6.236205593348451e-05,
|
|
"loss": 0.275,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 44.89795918367347,
|
|
"grad_norm": 3.075352430343628,
|
|
"learning_rate": 6.122826908541194e-05,
|
|
"loss": 0.2475,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 45.91836734693877,
|
|
"grad_norm": 2.932194471359253,
|
|
"learning_rate": 6.009448223733938e-05,
|
|
"loss": 0.2239,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 46.93877551020408,
|
|
"grad_norm": 2.5952064990997314,
|
|
"learning_rate": 5.896069538926682e-05,
|
|
"loss": 0.2045,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 47.95918367346939,
|
|
"grad_norm": 2.456416606903076,
|
|
"learning_rate": 5.7826908541194255e-05,
|
|
"loss": 0.1875,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 48.97959183673469,
|
|
"grad_norm": 2.836243152618408,
|
|
"learning_rate": 5.66931216931217e-05,
|
|
"loss": 0.1717,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 50.0,
|
|
"grad_norm": 2.4769959449768066,
|
|
"learning_rate": 5.5559334845049137e-05,
|
|
"loss": 0.1582,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 51.02040816326531,
|
|
"grad_norm": 1.9502800703048706,
|
|
"learning_rate": 5.442554799697657e-05,
|
|
"loss": 0.1468,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 52.04081632653061,
|
|
"grad_norm": 2.145501136779785,
|
|
"learning_rate": 5.3291761148904005e-05,
|
|
"loss": 0.1366,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 53.06122448979592,
|
|
"grad_norm": 1.8530632257461548,
|
|
"learning_rate": 5.215797430083145e-05,
|
|
"loss": 0.1266,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 54.08163265306123,
|
|
"grad_norm": 2.0811140537261963,
|
|
"learning_rate": 5.1024187452758886e-05,
|
|
"loss": 0.1179,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 55.10204081632653,
|
|
"grad_norm": 1.8534563779830933,
|
|
"learning_rate": 4.9890400604686324e-05,
|
|
"loss": 0.1096,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 56.12244897959184,
|
|
"grad_norm": 1.7965441942214966,
|
|
"learning_rate": 4.875661375661376e-05,
|
|
"loss": 0.1032,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 57.142857142857146,
|
|
"grad_norm": 1.824494481086731,
|
|
"learning_rate": 4.76228269085412e-05,
|
|
"loss": 0.0967,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 58.16326530612245,
|
|
"grad_norm": 1.6980013847351074,
|
|
"learning_rate": 4.6489040060468636e-05,
|
|
"loss": 0.0907,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 59.183673469387756,
|
|
"grad_norm": 1.6149917840957642,
|
|
"learning_rate": 4.5355253212396074e-05,
|
|
"loss": 0.085,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 60.204081632653065,
|
|
"grad_norm": 1.788779854774475,
|
|
"learning_rate": 4.422146636432351e-05,
|
|
"loss": 0.0804,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 61.224489795918366,
|
|
"grad_norm": 1.7672044038772583,
|
|
"learning_rate": 4.308767951625094e-05,
|
|
"loss": 0.0759,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 62.244897959183675,
|
|
"grad_norm": 1.5566641092300415,
|
|
"learning_rate": 4.1953892668178386e-05,
|
|
"loss": 0.0722,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 63.265306122448976,
|
|
"grad_norm": 1.291110873222351,
|
|
"learning_rate": 4.082010582010582e-05,
|
|
"loss": 0.0678,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 64.28571428571429,
|
|
"grad_norm": 1.596009373664856,
|
|
"learning_rate": 3.968631897203326e-05,
|
|
"loss": 0.0639,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 65.3061224489796,
|
|
"grad_norm": 1.4961538314819336,
|
|
"learning_rate": 3.85525321239607e-05,
|
|
"loss": 0.0605,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 66.3265306122449,
|
|
"grad_norm": 1.383008599281311,
|
|
"learning_rate": 3.7418745275888136e-05,
|
|
"loss": 0.0571,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 67.34693877551021,
|
|
"grad_norm": 1.1882243156433105,
|
|
"learning_rate": 3.628495842781557e-05,
|
|
"loss": 0.0541,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 68.36734693877551,
|
|
"grad_norm": 1.4175117015838623,
|
|
"learning_rate": 3.515117157974301e-05,
|
|
"loss": 0.0515,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 69.38775510204081,
|
|
"grad_norm": 1.412561058998108,
|
|
"learning_rate": 3.401738473167045e-05,
|
|
"loss": 0.0492,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 70.40816326530613,
|
|
"grad_norm": 1.358535885810852,
|
|
"learning_rate": 3.2883597883597886e-05,
|
|
"loss": 0.0462,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 71.42857142857143,
|
|
"grad_norm": 1.338392972946167,
|
|
"learning_rate": 3.174981103552532e-05,
|
|
"loss": 0.0443,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 72.44897959183673,
|
|
"grad_norm": 1.3225773572921753,
|
|
"learning_rate": 3.061602418745276e-05,
|
|
"loss": 0.0419,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 73.46938775510205,
|
|
"grad_norm": 1.1727213859558105,
|
|
"learning_rate": 2.9482237339380198e-05,
|
|
"loss": 0.0394,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 74.48979591836735,
|
|
"grad_norm": 1.25161612033844,
|
|
"learning_rate": 2.834845049130764e-05,
|
|
"loss": 0.0375,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 75.51020408163265,
|
|
"grad_norm": 1.240116834640503,
|
|
"learning_rate": 2.7214663643235073e-05,
|
|
"loss": 0.0358,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 76.53061224489795,
|
|
"grad_norm": 0.9712527394294739,
|
|
"learning_rate": 2.6080876795162514e-05,
|
|
"loss": 0.0339,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 77.55102040816327,
|
|
"grad_norm": 1.147048830986023,
|
|
"learning_rate": 2.4947089947089948e-05,
|
|
"loss": 0.0323,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 78.57142857142857,
|
|
"grad_norm": 1.0916506052017212,
|
|
"learning_rate": 2.3813303099017385e-05,
|
|
"loss": 0.0308,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 79.59183673469387,
|
|
"grad_norm": 1.128098964691162,
|
|
"learning_rate": 2.2679516250944823e-05,
|
|
"loss": 0.0294,
|
|
"step": 23400
|
|
},
|
|
{
|
|
"epoch": 80.61224489795919,
|
|
"grad_norm": 1.0495482683181763,
|
|
"learning_rate": 2.154572940287226e-05,
|
|
"loss": 0.0276,
|
|
"step": 23700
|
|
},
|
|
{
|
|
"epoch": 81.63265306122449,
|
|
"grad_norm": 0.8648446798324585,
|
|
"learning_rate": 2.0411942554799698e-05,
|
|
"loss": 0.0261,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 82.65306122448979,
|
|
"grad_norm": 1.1346194744110107,
|
|
"learning_rate": 1.9278155706727135e-05,
|
|
"loss": 0.0245,
|
|
"step": 24300
|
|
},
|
|
{
|
|
"epoch": 83.6734693877551,
|
|
"grad_norm": 0.9076672196388245,
|
|
"learning_rate": 1.8144368858654572e-05,
|
|
"loss": 0.0237,
|
|
"step": 24600
|
|
},
|
|
{
|
|
"epoch": 84.6938775510204,
|
|
"grad_norm": 1.0035544633865356,
|
|
"learning_rate": 1.701058201058201e-05,
|
|
"loss": 0.0222,
|
|
"step": 24900
|
|
},
|
|
{
|
|
"epoch": 85.71428571428571,
|
|
"grad_norm": 0.769279956817627,
|
|
"learning_rate": 1.587679516250945e-05,
|
|
"loss": 0.0211,
|
|
"step": 25200
|
|
},
|
|
{
|
|
"epoch": 86.73469387755102,
|
|
"grad_norm": 0.9665892124176025,
|
|
"learning_rate": 1.4743008314436888e-05,
|
|
"loss": 0.0202,
|
|
"step": 25500
|
|
},
|
|
{
|
|
"epoch": 87.75510204081633,
|
|
"grad_norm": 0.9066174626350403,
|
|
"learning_rate": 1.3609221466364324e-05,
|
|
"loss": 0.0193,
|
|
"step": 25800
|
|
},
|
|
{
|
|
"epoch": 88.77551020408163,
|
|
"grad_norm": 0.9459673166275024,
|
|
"learning_rate": 1.2475434618291761e-05,
|
|
"loss": 0.0183,
|
|
"step": 26100
|
|
},
|
|
{
|
|
"epoch": 89.79591836734694,
|
|
"grad_norm": 0.8062217235565186,
|
|
"learning_rate": 1.1341647770219199e-05,
|
|
"loss": 0.0174,
|
|
"step": 26400
|
|
},
|
|
{
|
|
"epoch": 90.81632653061224,
|
|
"grad_norm": 0.8470116853713989,
|
|
"learning_rate": 1.0207860922146636e-05,
|
|
"loss": 0.0167,
|
|
"step": 26700
|
|
},
|
|
{
|
|
"epoch": 91.83673469387755,
|
|
"grad_norm": 0.7526578903198242,
|
|
"learning_rate": 9.074074074074075e-06,
|
|
"loss": 0.016,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 92.85714285714286,
|
|
"grad_norm": 0.6859294176101685,
|
|
"learning_rate": 7.940287226001513e-06,
|
|
"loss": 0.0154,
|
|
"step": 27300
|
|
},
|
|
{
|
|
"epoch": 93.87755102040816,
|
|
"grad_norm": 0.574286937713623,
|
|
"learning_rate": 6.8065003779289495e-06,
|
|
"loss": 0.0147,
|
|
"step": 27600
|
|
},
|
|
{
|
|
"epoch": 94.89795918367346,
|
|
"grad_norm": 0.9053287506103516,
|
|
"learning_rate": 5.672713529856388e-06,
|
|
"loss": 0.0143,
|
|
"step": 27900
|
|
},
|
|
{
|
|
"epoch": 95.91836734693878,
|
|
"grad_norm": 0.5810430645942688,
|
|
"learning_rate": 4.538926681783825e-06,
|
|
"loss": 0.0137,
|
|
"step": 28200
|
|
},
|
|
{
|
|
"epoch": 96.93877551020408,
|
|
"grad_norm": 0.5778042674064636,
|
|
"learning_rate": 3.4051398337112627e-06,
|
|
"loss": 0.0132,
|
|
"step": 28500
|
|
},
|
|
{
|
|
"epoch": 97.95918367346938,
|
|
"grad_norm": 0.5646163821220398,
|
|
"learning_rate": 2.2713529856387e-06,
|
|
"loss": 0.0129,
|
|
"step": 28800
|
|
},
|
|
{
|
|
"epoch": 98.9795918367347,
|
|
"grad_norm": 0.6283496022224426,
|
|
"learning_rate": 1.1375661375661376e-06,
|
|
"loss": 0.0126,
|
|
"step": 29100
|
|
}
|
|
],
|
|
"logging_steps": 300,
|
|
"max_steps": 29400,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 100,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 6.08287850496e+16,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|