800 lines
19 KiB
JSON
800 lines
19 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1088,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.009191176470588236,
|
|
"grad_norm": 11.266937255859375,
|
|
"learning_rate": 8.256880733944956e-07,
|
|
"loss": 0.578,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.01838235294117647,
|
|
"grad_norm": 4.71966552734375,
|
|
"learning_rate": 1.743119266055046e-06,
|
|
"loss": 0.4698,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.027573529411764705,
|
|
"grad_norm": 2.152991771697998,
|
|
"learning_rate": 2.6605504587155968e-06,
|
|
"loss": 0.254,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.03676470588235294,
|
|
"grad_norm": 1.5794391632080078,
|
|
"learning_rate": 3.5779816513761473e-06,
|
|
"loss": 0.1899,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.04595588235294118,
|
|
"grad_norm": 2.2544736862182617,
|
|
"learning_rate": 4.4954128440366975e-06,
|
|
"loss": 0.1712,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.05514705882352941,
|
|
"grad_norm": 1.6429979801177979,
|
|
"learning_rate": 5.412844036697248e-06,
|
|
"loss": 0.1554,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.06433823529411764,
|
|
"grad_norm": 1.428564190864563,
|
|
"learning_rate": 6.330275229357799e-06,
|
|
"loss": 0.1509,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.07352941176470588,
|
|
"grad_norm": 1.1606788635253906,
|
|
"learning_rate": 7.247706422018349e-06,
|
|
"loss": 0.144,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.08272058823529412,
|
|
"grad_norm": 1.4222913980484009,
|
|
"learning_rate": 8.1651376146789e-06,
|
|
"loss": 0.1377,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.09191176470588236,
|
|
"grad_norm": 1.059951663017273,
|
|
"learning_rate": 9.08256880733945e-06,
|
|
"loss": 0.128,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.10110294117647059,
|
|
"grad_norm": 1.0378493070602417,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.1237,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.11029411764705882,
|
|
"grad_norm": 1.9791302680969238,
|
|
"learning_rate": 9.99742583072674e-06,
|
|
"loss": 0.1317,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.11948529411764706,
|
|
"grad_norm": 1.28194260597229,
|
|
"learning_rate": 9.98970597344593e-06,
|
|
"loss": 0.1258,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.12867647058823528,
|
|
"grad_norm": 1.228013515472412,
|
|
"learning_rate": 9.976848377045343e-06,
|
|
"loss": 0.1259,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.13786764705882354,
|
|
"grad_norm": 0.9104840755462646,
|
|
"learning_rate": 9.958866280576803e-06,
|
|
"loss": 0.119,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.14705882352941177,
|
|
"grad_norm": 0.957346498966217,
|
|
"learning_rate": 9.935778199624394e-06,
|
|
"loss": 0.1193,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.15625,
|
|
"grad_norm": 1.0997158288955688,
|
|
"learning_rate": 9.90760790723954e-06,
|
|
"loss": 0.1159,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.16544117647058823,
|
|
"grad_norm": 1.1610311269760132,
|
|
"learning_rate": 9.874384409462673e-06,
|
|
"loss": 0.1198,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.17463235294117646,
|
|
"grad_norm": 0.9462277889251709,
|
|
"learning_rate": 9.836141915456646e-06,
|
|
"loss": 0.1109,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.18382352941176472,
|
|
"grad_norm": 0.793471097946167,
|
|
"learning_rate": 9.792919802282656e-06,
|
|
"loss": 0.1158,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.19301470588235295,
|
|
"grad_norm": 1.2811907529830933,
|
|
"learning_rate": 9.744762574354967e-06,
|
|
"loss": 0.1148,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.20220588235294118,
|
|
"grad_norm": 0.8056549429893494,
|
|
"learning_rate": 9.691719817616148e-06,
|
|
"loss": 0.1054,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.2113970588235294,
|
|
"grad_norm": 0.9176375269889832,
|
|
"learning_rate": 9.633846148480024e-06,
|
|
"loss": 0.1097,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.22058823529411764,
|
|
"grad_norm": 0.8464528918266296,
|
|
"learning_rate": 9.571201157594925e-06,
|
|
"loss": 0.1055,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.22977941176470587,
|
|
"grad_norm": 0.8142296671867371,
|
|
"learning_rate": 9.503849348485112e-06,
|
|
"loss": 0.1046,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.23897058823529413,
|
|
"grad_norm": 0.8536105751991272,
|
|
"learning_rate": 9.431860071133592e-06,
|
|
"loss": 0.0973,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.24816176470588236,
|
|
"grad_norm": 1.1437208652496338,
|
|
"learning_rate": 9.355307450574666e-06,
|
|
"loss": 0.1032,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.25735294117647056,
|
|
"grad_norm": 1.4142391681671143,
|
|
"learning_rate": 9.27427031056979e-06,
|
|
"loss": 0.1057,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.2665441176470588,
|
|
"grad_norm": 0.7978008389472961,
|
|
"learning_rate": 9.188832092445281e-06,
|
|
"loss": 0.105,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.2757352941176471,
|
|
"grad_norm": 1.3588639497756958,
|
|
"learning_rate": 9.09908076917548e-06,
|
|
"loss": 0.1048,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.2849264705882353,
|
|
"grad_norm": 0.6924364566802979,
|
|
"learning_rate": 9.00510875479983e-06,
|
|
"loss": 0.1001,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 0.7341586947441101,
|
|
"learning_rate": 8.907012809267107e-06,
|
|
"loss": 0.1045,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.30330882352941174,
|
|
"grad_norm": 0.703032910823822,
|
|
"learning_rate": 8.804893938804839e-06,
|
|
"loss": 0.1029,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.3125,
|
|
"grad_norm": 0.8211079239845276,
|
|
"learning_rate": 8.698857291916456e-06,
|
|
"loss": 0.1,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.32169117647058826,
|
|
"grad_norm": 0.7473457455635071,
|
|
"learning_rate": 8.58901205111326e-06,
|
|
"loss": 0.0923,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.33088235294117646,
|
|
"grad_norm": 1.1483430862426758,
|
|
"learning_rate": 8.475471320492728e-06,
|
|
"loss": 0.1128,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.3400735294117647,
|
|
"grad_norm": 0.8971941471099854,
|
|
"learning_rate": 8.35835200927887e-06,
|
|
"loss": 0.1044,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.3492647058823529,
|
|
"grad_norm": 0.7846776247024536,
|
|
"learning_rate": 8.237774711444575e-06,
|
|
"loss": 0.0983,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.3584558823529412,
|
|
"grad_norm": 0.7909674644470215,
|
|
"learning_rate": 8.113863581539905e-06,
|
|
"loss": 0.0908,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.36764705882352944,
|
|
"grad_norm": 0.8026263117790222,
|
|
"learning_rate": 7.986746206854143e-06,
|
|
"loss": 0.0939,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.37683823529411764,
|
|
"grad_norm": 0.6986457705497742,
|
|
"learning_rate": 7.856553476043294e-06,
|
|
"loss": 0.0967,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.3860294117647059,
|
|
"grad_norm": 0.7079197764396667,
|
|
"learning_rate": 7.723419444358261e-06,
|
|
"loss": 0.0953,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.3952205882352941,
|
|
"grad_norm": 1.0171252489089966,
|
|
"learning_rate": 7.5874811956124805e-06,
|
|
"loss": 0.0928,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.40441176470588236,
|
|
"grad_norm": 0.8423835635185242,
|
|
"learning_rate": 7.4488787010311425e-06,
|
|
"loss": 0.0938,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.41360294117647056,
|
|
"grad_norm": 1.0884760618209839,
|
|
"learning_rate": 7.3077546751273494e-06,
|
|
"loss": 0.0925,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.4227941176470588,
|
|
"grad_norm": 0.7641892433166504,
|
|
"learning_rate": 7.164254428753581e-06,
|
|
"loss": 0.0953,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.4319852941176471,
|
|
"grad_norm": 0.8474388122558594,
|
|
"learning_rate": 7.018525719479805e-06,
|
|
"loss": 0.0913,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.4411764705882353,
|
|
"grad_norm": 0.8151172399520874,
|
|
"learning_rate": 6.870718599452279e-06,
|
|
"loss": 0.0974,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.45036764705882354,
|
|
"grad_norm": 0.5582044124603271,
|
|
"learning_rate": 6.7209852608897005e-06,
|
|
"loss": 0.0901,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.45955882352941174,
|
|
"grad_norm": 0.9465530514717102,
|
|
"learning_rate": 6.569479879375795e-06,
|
|
"loss": 0.0936,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.46875,
|
|
"grad_norm": 0.7654430270195007,
|
|
"learning_rate": 6.416358455109695e-06,
|
|
"loss": 0.0933,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.47794117647058826,
|
|
"grad_norm": 0.7761212587356567,
|
|
"learning_rate": 6.261778652277565e-06,
|
|
"loss": 0.093,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.48713235294117646,
|
|
"grad_norm": 0.5924690365791321,
|
|
"learning_rate": 6.105899636710895e-06,
|
|
"loss": 0.0879,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.4963235294117647,
|
|
"grad_norm": 0.591857373714447,
|
|
"learning_rate": 5.948881911998572e-06,
|
|
"loss": 0.0904,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.5055147058823529,
|
|
"grad_norm": 0.7316629886627197,
|
|
"learning_rate": 5.790887154221521e-06,
|
|
"loss": 0.0883,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.5147058823529411,
|
|
"grad_norm": 0.8223432302474976,
|
|
"learning_rate": 5.632078045480065e-06,
|
|
"loss": 0.0883,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.5238970588235294,
|
|
"grad_norm": 0.7148771286010742,
|
|
"learning_rate": 5.472618106385415e-06,
|
|
"loss": 0.0814,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.5330882352941176,
|
|
"grad_norm": 0.6548059582710266,
|
|
"learning_rate": 5.31267152768779e-06,
|
|
"loss": 0.0884,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.5422794117647058,
|
|
"grad_norm": 0.7036318778991699,
|
|
"learning_rate": 5.152403001214483e-06,
|
|
"loss": 0.0882,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.5514705882352942,
|
|
"grad_norm": 0.8155568242073059,
|
|
"learning_rate": 4.991977550292028e-06,
|
|
"loss": 0.0873,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.5606617647058824,
|
|
"grad_norm": 0.654234766960144,
|
|
"learning_rate": 4.831560359826985e-06,
|
|
"loss": 0.086,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.5698529411764706,
|
|
"grad_norm": 0.8868378400802612,
|
|
"learning_rate": 4.671316606220394e-06,
|
|
"loss": 0.0905,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.5790441176470589,
|
|
"grad_norm": 0.5300816297531128,
|
|
"learning_rate": 4.511411287290964e-06,
|
|
"loss": 0.0881,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.9155387282371521,
|
|
"learning_rate": 4.35200905238214e-06,
|
|
"loss": 0.088,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.5974264705882353,
|
|
"grad_norm": 0.7309725880622864,
|
|
"learning_rate": 4.193274032828e-06,
|
|
"loss": 0.0873,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.6066176470588235,
|
|
"grad_norm": 0.5966044664382935,
|
|
"learning_rate": 4.035369672952516e-06,
|
|
"loss": 0.0878,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.6158088235294118,
|
|
"grad_norm": 0.924511194229126,
|
|
"learning_rate": 3.8784585617762084e-06,
|
|
"loss": 0.0849,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 1.1838688850402832,
|
|
"learning_rate": 3.7227022656034873e-06,
|
|
"loss": 0.0867,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.6341911764705882,
|
|
"grad_norm": 0.7663735747337341,
|
|
"learning_rate": 3.568261161663042e-06,
|
|
"loss": 0.0896,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.6433823529411765,
|
|
"grad_norm": 0.7709589600563049,
|
|
"learning_rate": 3.4152942729725896e-06,
|
|
"loss": 0.0893,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.6525735294117647,
|
|
"grad_norm": 0.7545693516731262,
|
|
"learning_rate": 3.263959104598009e-06,
|
|
"loss": 0.0861,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.6617647058823529,
|
|
"grad_norm": 0.6413795948028564,
|
|
"learning_rate": 3.114411481475455e-06,
|
|
"loss": 0.0776,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.6709558823529411,
|
|
"grad_norm": 0.7467756271362305,
|
|
"learning_rate": 2.966805387963463e-06,
|
|
"loss": 0.084,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.6801470588235294,
|
|
"grad_norm": 0.6672917008399963,
|
|
"learning_rate": 2.821292809290217e-06,
|
|
"loss": 0.0834,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.6893382352941176,
|
|
"grad_norm": 0.7798132300376892,
|
|
"learning_rate": 2.678023575059274e-06,
|
|
"loss": 0.083,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.6985294117647058,
|
|
"grad_norm": 0.729383647441864,
|
|
"learning_rate": 2.5371452049748603e-06,
|
|
"loss": 0.0842,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.7077205882352942,
|
|
"grad_norm": 0.8981873989105225,
|
|
"learning_rate": 2.3988027569455895e-06,
|
|
"loss": 0.0874,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.7169117647058824,
|
|
"grad_norm": 0.7742981314659119,
|
|
"learning_rate": 2.2631386777230248e-06,
|
|
"loss": 0.0815,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.7261029411764706,
|
|
"grad_norm": 0.6819539070129395,
|
|
"learning_rate": 2.130292656228856e-06,
|
|
"loss": 0.0814,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.7352941176470589,
|
|
"grad_norm": 0.7190724611282349,
|
|
"learning_rate": 2.0004014797217207e-06,
|
|
"loss": 0.0826,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.7444852941176471,
|
|
"grad_norm": 0.674633800983429,
|
|
"learning_rate": 1.873598892951795e-06,
|
|
"loss": 0.0851,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.7536764705882353,
|
|
"grad_norm": 0.7690839767456055,
|
|
"learning_rate": 1.7500154604481312e-06,
|
|
"loss": 0.0804,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.7628676470588235,
|
|
"grad_norm": 0.6933445930480957,
|
|
"learning_rate": 1.629778432080586e-06,
|
|
"loss": 0.084,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.7720588235294118,
|
|
"grad_norm": 0.944789707660675,
|
|
"learning_rate": 1.513011612034726e-06,
|
|
"loss": 0.0776,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.78125,
|
|
"grad_norm": 0.645876944065094,
|
|
"learning_rate": 1.3998352313346768e-06,
|
|
"loss": 0.0797,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.7904411764705882,
|
|
"grad_norm": 0.72840416431427,
|
|
"learning_rate": 1.2903658240450989e-06,
|
|
"loss": 0.0792,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.7996323529411765,
|
|
"grad_norm": 0.8772525191307068,
|
|
"learning_rate": 1.184716107279837e-06,
|
|
"loss": 0.0804,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.8088235294117647,
|
|
"grad_norm": 0.8419274091720581,
|
|
"learning_rate": 1.0829948651407374e-06,
|
|
"loss": 0.0817,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.8180147058823529,
|
|
"grad_norm": 0.6790868043899536,
|
|
"learning_rate": 9.85306836706184e-07,
|
|
"loss": 0.0798,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.8272058823529411,
|
|
"grad_norm": 0.7450520992279053,
|
|
"learning_rate": 8.917526081846411e-07,
|
|
"loss": 0.0772,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.8363970588235294,
|
|
"grad_norm": 0.9222913980484009,
|
|
"learning_rate": 8.024285093442874e-07,
|
|
"loss": 0.0796,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.8455882352941176,
|
|
"grad_norm": 0.5793068408966064,
|
|
"learning_rate": 7.17426514325359e-07,
|
|
"loss": 0.0754,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.8547794117647058,
|
|
"grad_norm": 0.8552721738815308,
|
|
"learning_rate": 6.36834146937354e-07,
|
|
"loss": 0.0816,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.8639705882352942,
|
|
"grad_norm": 0.6834543943405151,
|
|
"learning_rate": 5.607343905385898e-07,
|
|
"loss": 0.077,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.8731617647058824,
|
|
"grad_norm": 0.8566415309906006,
|
|
"learning_rate": 4.892056025909148e-07,
|
|
"loss": 0.0788,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 0.6394233107566833,
|
|
"learning_rate": 4.2232143397756607e-07,
|
|
"loss": 0.0761,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.8915441176470589,
|
|
"grad_norm": 0.6848531365394592,
|
|
"learning_rate": 3.6015075316722605e-07,
|
|
"loss": 0.0762,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.9007352941176471,
|
|
"grad_norm": 0.7035109996795654,
|
|
"learning_rate": 3.02757575302392e-07,
|
|
"loss": 0.0827,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.9099264705882353,
|
|
"grad_norm": 1.1025725603103638,
|
|
"learning_rate": 2.5020099628504603e-07,
|
|
"loss": 0.0786,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.9191176470588235,
|
|
"grad_norm": 0.9067655801773071,
|
|
"learning_rate": 2.0253513192751374e-07,
|
|
"loss": 0.0713,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.9283088235294118,
|
|
"grad_norm": 2.0745654106140137,
|
|
"learning_rate": 1.5980906223115933e-07,
|
|
"loss": 0.0823,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.9375,
|
|
"grad_norm": 0.77626633644104,
|
|
"learning_rate": 1.220667808502951e-07,
|
|
"loss": 0.0797,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.9466911764705882,
|
|
"grad_norm": 0.6542201042175293,
|
|
"learning_rate": 8.934714979333403e-08,
|
|
"loss": 0.0813,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.9558823529411765,
|
|
"grad_norm": 0.7697402238845825,
|
|
"learning_rate": 6.168385940783727e-08,
|
|
"loss": 0.0745,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.9650735294117647,
|
|
"grad_norm": 1.0100551843643188,
|
|
"learning_rate": 3.910539369064603e-08,
|
|
"loss": 0.0794,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.9742647058823529,
|
|
"grad_norm": 0.5885272026062012,
|
|
"learning_rate": 2.1635000958836748e-08,
|
|
"loss": 0.0788,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.9834558823529411,
|
|
"grad_norm": 0.6646335124969482,
|
|
"learning_rate": 9.290669911672934e-09,
|
|
"loss": 0.0792,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.9926470588235294,
|
|
"grad_norm": 0.5683983564376831,
|
|
"learning_rate": 2.085111108227067e-09,
|
|
"loss": 0.0762,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 1088,
|
|
"total_flos": 1.9483786471612088e+18,
|
|
"train_loss": 0.10519510776023655,
|
|
"train_runtime": 10705.8335,
|
|
"train_samples_per_second": 3.25,
|
|
"train_steps_per_second": 0.102
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1088,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.9483786471612088e+18,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|