1701 lines
44 KiB
JSON
1701 lines
44 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 3.0,
|
||
|
|
"eval_steps": 50.0,
|
||
|
|
"global_step": 1185,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0025324469768914213,
|
||
|
|
"grad_norm": 6.541903357166615,
|
||
|
|
"learning_rate": 1.6666666666666668e-07,
|
||
|
|
"loss": 1.6022449731826782,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.012662234884457106,
|
||
|
|
"grad_norm": 6.812644002932686,
|
||
|
|
"learning_rate": 8.333333333333333e-07,
|
||
|
|
"loss": 1.5844556093215942,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.025324469768914212,
|
||
|
|
"grad_norm": 5.463309643291768,
|
||
|
|
"learning_rate": 1.6666666666666667e-06,
|
||
|
|
"loss": 1.5758234024047852,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03798670465337132,
|
||
|
|
"grad_norm": 3.5947186062437324,
|
||
|
|
"learning_rate": 2.5e-06,
|
||
|
|
"loss": 1.5148856163024902,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.050648939537828425,
|
||
|
|
"grad_norm": 2.022183241828244,
|
||
|
|
"learning_rate": 3.3333333333333333e-06,
|
||
|
|
"loss": 1.4563226699829102,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06331117442228554,
|
||
|
|
"grad_norm": 2.568206156447999,
|
||
|
|
"learning_rate": 4.166666666666667e-06,
|
||
|
|
"loss": 1.4071508407592774,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07597340930674264,
|
||
|
|
"grad_norm": 1.3172558883152181,
|
||
|
|
"learning_rate": 5e-06,
|
||
|
|
"loss": 1.361931037902832,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08863564419119975,
|
||
|
|
"grad_norm": 1.289611427468001,
|
||
|
|
"learning_rate": 5.833333333333334e-06,
|
||
|
|
"loss": 1.3146369934082032,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10129787907565685,
|
||
|
|
"grad_norm": 1.0096819520657572,
|
||
|
|
"learning_rate": 6.666666666666667e-06,
|
||
|
|
"loss": 1.2890718460083008,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11396011396011396,
|
||
|
|
"grad_norm": 0.9312368002064222,
|
||
|
|
"learning_rate": 7.500000000000001e-06,
|
||
|
|
"loss": 1.262472152709961,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12662234884457108,
|
||
|
|
"grad_norm": 0.9372677018897868,
|
||
|
|
"learning_rate": 8.333333333333334e-06,
|
||
|
|
"loss": 1.2449541091918945,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13928458372902816,
|
||
|
|
"grad_norm": 1.156336182386965,
|
||
|
|
"learning_rate": 9.166666666666666e-06,
|
||
|
|
"loss": 1.23805570602417,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15194681861348527,
|
||
|
|
"grad_norm": 1.2881982404625736,
|
||
|
|
"learning_rate": 1e-05,
|
||
|
|
"loss": 1.2132294654846192,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1646090534979424,
|
||
|
|
"grad_norm": 1.0254220781070695,
|
||
|
|
"learning_rate": 9.999512620046523e-06,
|
||
|
|
"loss": 1.220973587036133,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1772712883823995,
|
||
|
|
"grad_norm": 0.9489950684909466,
|
||
|
|
"learning_rate": 9.998050575201772e-06,
|
||
|
|
"loss": 1.2019853591918945,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1899335232668566,
|
||
|
|
"grad_norm": 0.9316575478826806,
|
||
|
|
"learning_rate": 9.995614150494293e-06,
|
||
|
|
"loss": 1.2073640823364258,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2025957581513137,
|
||
|
|
"grad_norm": 1.0042469325935621,
|
||
|
|
"learning_rate": 9.992203820909906e-06,
|
||
|
|
"loss": 1.1844447135925293,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2152579930357708,
|
||
|
|
"grad_norm": 0.9710381702713043,
|
||
|
|
"learning_rate": 9.987820251299121e-06,
|
||
|
|
"loss": 1.1868626594543457,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22792022792022792,
|
||
|
|
"grad_norm": 0.9015713419278082,
|
||
|
|
"learning_rate": 9.982464296247523e-06,
|
||
|
|
"loss": 1.16792631149292,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24058246280468504,
|
||
|
|
"grad_norm": 0.9242205484551671,
|
||
|
|
"learning_rate": 9.976136999909156e-06,
|
||
|
|
"loss": 1.1806648254394532,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25324469768914215,
|
||
|
|
"grad_norm": 0.8421714973436404,
|
||
|
|
"learning_rate": 9.968839595802982e-06,
|
||
|
|
"loss": 1.1688653945922851,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26590693257359926,
|
||
|
|
"grad_norm": 0.9053511703432988,
|
||
|
|
"learning_rate": 9.960573506572391e-06,
|
||
|
|
"loss": 1.1603254318237304,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2785691674580563,
|
||
|
|
"grad_norm": 0.8815755237366663,
|
||
|
|
"learning_rate": 9.951340343707852e-06,
|
||
|
|
"loss": 1.1436431884765625,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29123140234251343,
|
||
|
|
"grad_norm": 0.9133167544949871,
|
||
|
|
"learning_rate": 9.941141907232766e-06,
|
||
|
|
"loss": 1.1711238861083983,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30389363722697055,
|
||
|
|
"grad_norm": 0.9280708661664501,
|
||
|
|
"learning_rate": 9.929980185352525e-06,
|
||
|
|
"loss": 1.1607641220092773,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31655587211142766,
|
||
|
|
"grad_norm": 0.8789051540869617,
|
||
|
|
"learning_rate": 9.91785735406693e-06,
|
||
|
|
"loss": 1.1655372619628905,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3292181069958848,
|
||
|
|
"grad_norm": 0.9387606971380588,
|
||
|
|
"learning_rate": 9.904775776745959e-06,
|
||
|
|
"loss": 1.1415754318237306,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3418803418803419,
|
||
|
|
"grad_norm": 0.8962535961715238,
|
||
|
|
"learning_rate": 9.890738003669029e-06,
|
||
|
|
"loss": 1.141004753112793,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.354542576764799,
|
||
|
|
"grad_norm": 0.8628618510513137,
|
||
|
|
"learning_rate": 9.875746771527817e-06,
|
||
|
|
"loss": 1.1703954696655274,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3672048116492561,
|
||
|
|
"grad_norm": 0.901181222022341,
|
||
|
|
"learning_rate": 9.859805002892733e-06,
|
||
|
|
"loss": 1.1528019905090332,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3798670465337132,
|
||
|
|
"grad_norm": 0.8630918009712893,
|
||
|
|
"learning_rate": 9.842915805643156e-06,
|
||
|
|
"loss": 1.1367189407348632,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3925292814181703,
|
||
|
|
"grad_norm": 0.8702195806012554,
|
||
|
|
"learning_rate": 9.825082472361558e-06,
|
||
|
|
"loss": 1.1533798217773437,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4051915163026274,
|
||
|
|
"grad_norm": 0.8708614692916694,
|
||
|
|
"learning_rate": 9.806308479691595e-06,
|
||
|
|
"loss": 1.158640480041504,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4178537511870845,
|
||
|
|
"grad_norm": 0.8793848769376316,
|
||
|
|
"learning_rate": 9.786597487660336e-06,
|
||
|
|
"loss": 1.1480545043945312,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4305159860715416,
|
||
|
|
"grad_norm": 0.8658001687150836,
|
||
|
|
"learning_rate": 9.765953338964736e-06,
|
||
|
|
"loss": 1.1336278915405273,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44317822095599874,
|
||
|
|
"grad_norm": 0.8569493052828222,
|
||
|
|
"learning_rate": 9.744380058222483e-06,
|
||
|
|
"loss": 1.1366922378540039,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45584045584045585,
|
||
|
|
"grad_norm": 0.8658238768368638,
|
||
|
|
"learning_rate": 9.721881851187406e-06,
|
||
|
|
"loss": 1.1221330642700196,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46850269072491296,
|
||
|
|
"grad_norm": 0.8315025062463812,
|
||
|
|
"learning_rate": 9.698463103929542e-06,
|
||
|
|
"loss": 1.137201690673828,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4811649256093701,
|
||
|
|
"grad_norm": 0.8646733066379476,
|
||
|
|
"learning_rate": 9.674128381980073e-06,
|
||
|
|
"loss": 1.1246437072753905,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49382716049382713,
|
||
|
|
"grad_norm": 0.9329613102085004,
|
||
|
|
"learning_rate": 9.648882429441258e-06,
|
||
|
|
"loss": 1.1196226119995116,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5064893953782843,
|
||
|
|
"grad_norm": 0.8893896484251661,
|
||
|
|
"learning_rate": 9.622730168061568e-06,
|
||
|
|
"loss": 1.1334550857543946,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5191516302627414,
|
||
|
|
"grad_norm": 0.912333387639604,
|
||
|
|
"learning_rate": 9.595676696276173e-06,
|
||
|
|
"loss": 1.1253994941711425,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5318138651471985,
|
||
|
|
"grad_norm": 0.982396926968246,
|
||
|
|
"learning_rate": 9.567727288213005e-06,
|
||
|
|
"loss": 1.1222535133361817,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5444761000316556,
|
||
|
|
"grad_norm": 0.885141191565451,
|
||
|
|
"learning_rate": 9.538887392664544e-06,
|
||
|
|
"loss": 1.1143704414367677,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5571383349161126,
|
||
|
|
"grad_norm": 0.840306231211871,
|
||
|
|
"learning_rate": 9.50916263202557e-06,
|
||
|
|
"loss": 1.1145578384399415,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5698005698005698,
|
||
|
|
"grad_norm": 0.873418768799577,
|
||
|
|
"learning_rate": 9.478558801197065e-06,
|
||
|
|
"loss": 1.1184951782226562,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5824628046850269,
|
||
|
|
"grad_norm": 0.8644731775393019,
|
||
|
|
"learning_rate": 9.44708186645649e-06,
|
||
|
|
"loss": 1.1118096351623534,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.595125039569484,
|
||
|
|
"grad_norm": 0.8383543019686877,
|
||
|
|
"learning_rate": 9.414737964294636e-06,
|
||
|
|
"loss": 1.1120855331420898,
|
||
|
|
"step": 235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6077872744539411,
|
||
|
|
"grad_norm": 0.8339381439594867,
|
||
|
|
"learning_rate": 9.381533400219319e-06,
|
||
|
|
"loss": 1.0976166725158691,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6204495093383983,
|
||
|
|
"grad_norm": 1.2527861157729694,
|
||
|
|
"learning_rate": 9.347474647526095e-06,
|
||
|
|
"loss": 1.1195283889770509,
|
||
|
|
"step": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6331117442228553,
|
||
|
|
"grad_norm": 0.896892265554505,
|
||
|
|
"learning_rate": 9.312568346036288e-06,
|
||
|
|
"loss": 1.1280832290649414,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6457739791073125,
|
||
|
|
"grad_norm": 0.863217024084411,
|
||
|
|
"learning_rate": 9.276821300802535e-06,
|
||
|
|
"loss": 1.1169985771179198,
|
||
|
|
"step": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6584362139917695,
|
||
|
|
"grad_norm": 0.8613522109819245,
|
||
|
|
"learning_rate": 9.24024048078213e-06,
|
||
|
|
"loss": 1.110457420349121,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6710984488762266,
|
||
|
|
"grad_norm": 0.8269568651408957,
|
||
|
|
"learning_rate": 9.202833017478421e-06,
|
||
|
|
"loss": 1.1079233169555665,
|
||
|
|
"step": 265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6837606837606838,
|
||
|
|
"grad_norm": 0.9106153459573166,
|
||
|
|
"learning_rate": 9.164606203550498e-06,
|
||
|
|
"loss": 1.115132713317871,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6964229186451408,
|
||
|
|
"grad_norm": 0.8475270076896408,
|
||
|
|
"learning_rate": 9.125567491391476e-06,
|
||
|
|
"loss": 1.114927101135254,
|
||
|
|
"step": 275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.709085153529598,
|
||
|
|
"grad_norm": 0.8419303390301319,
|
||
|
|
"learning_rate": 9.085724491675642e-06,
|
||
|
|
"loss": 1.1053291320800782,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7217473884140551,
|
||
|
|
"grad_norm": 0.8793202963091465,
|
||
|
|
"learning_rate": 9.045084971874738e-06,
|
||
|
|
"loss": 1.1043977737426758,
|
||
|
|
"step": 285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7344096232985122,
|
||
|
|
"grad_norm": 0.8844837887961337,
|
||
|
|
"learning_rate": 9.003656854743667e-06,
|
||
|
|
"loss": 1.0930152893066407,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7470718581829693,
|
||
|
|
"grad_norm": 0.8243068254935355,
|
||
|
|
"learning_rate": 8.961448216775955e-06,
|
||
|
|
"loss": 1.1083423614501953,
|
||
|
|
"step": 295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7597340930674265,
|
||
|
|
"grad_norm": 0.8231480643363308,
|
||
|
|
"learning_rate": 8.9184672866292e-06,
|
||
|
|
"loss": 1.093316650390625,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7723963279518835,
|
||
|
|
"grad_norm": 0.856487105562053,
|
||
|
|
"learning_rate": 8.874722443520898e-06,
|
||
|
|
"loss": 1.0935728073120117,
|
||
|
|
"step": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7850585628363406,
|
||
|
|
"grad_norm": 0.9069069891533103,
|
||
|
|
"learning_rate": 8.83022221559489e-06,
|
||
|
|
"loss": 1.085923957824707,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7977207977207977,
|
||
|
|
"grad_norm": 0.8415924258567633,
|
||
|
|
"learning_rate": 8.784975278258783e-06,
|
||
|
|
"loss": 1.1055352210998535,
|
||
|
|
"step": 315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8103830326052548,
|
||
|
|
"grad_norm": 0.8547842295217172,
|
||
|
|
"learning_rate": 8.73899045249266e-06,
|
||
|
|
"loss": 1.1053098678588866,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.823045267489712,
|
||
|
|
"grad_norm": 0.9042040663099864,
|
||
|
|
"learning_rate": 8.692276703129421e-06,
|
||
|
|
"loss": 1.100543212890625,
|
||
|
|
"step": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.835707502374169,
|
||
|
|
"grad_norm": 0.840156677605529,
|
||
|
|
"learning_rate": 8.644843137107058e-06,
|
||
|
|
"loss": 1.1007650375366211,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8483697372586262,
|
||
|
|
"grad_norm": 0.8554168041829401,
|
||
|
|
"learning_rate": 8.596699001693257e-06,
|
||
|
|
"loss": 1.095210647583008,
|
||
|
|
"step": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8610319721430832,
|
||
|
|
"grad_norm": 0.8378136162576828,
|
||
|
|
"learning_rate": 8.547853682682605e-06,
|
||
|
|
"loss": 1.0945035934448242,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8736942070275404,
|
||
|
|
"grad_norm": 0.8300982370825878,
|
||
|
|
"learning_rate": 8.498316702566828e-06,
|
||
|
|
"loss": 1.0824993133544922,
|
||
|
|
"step": 345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8863564419119975,
|
||
|
|
"grad_norm": 0.8879949006435145,
|
||
|
|
"learning_rate": 8.44809771867835e-06,
|
||
|
|
"loss": 1.0910042762756347,
|
||
|
|
"step": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8990186767964545,
|
||
|
|
"grad_norm": 0.8363110809635331,
|
||
|
|
"learning_rate": 8.397206521307584e-06,
|
||
|
|
"loss": 1.085635280609131,
|
||
|
|
"step": 355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9116809116809117,
|
||
|
|
"grad_norm": 0.8250978511317656,
|
||
|
|
"learning_rate": 8.345653031794292e-06,
|
||
|
|
"loss": 1.0832603454589844,
|
||
|
|
"step": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9243431465653688,
|
||
|
|
"grad_norm": 0.8250625494950978,
|
||
|
|
"learning_rate": 8.293447300593402e-06,
|
||
|
|
"loss": 1.0881545066833496,
|
||
|
|
"step": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9370053814498259,
|
||
|
|
"grad_norm": 0.9637417812174898,
|
||
|
|
"learning_rate": 8.240599505315656e-06,
|
||
|
|
"loss": 1.077590274810791,
|
||
|
|
"step": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.949667616334283,
|
||
|
|
"grad_norm": 0.938188486575515,
|
||
|
|
"learning_rate": 8.18711994874345e-06,
|
||
|
|
"loss": 1.0923616409301757,
|
||
|
|
"step": 375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9623298512187402,
|
||
|
|
"grad_norm": 0.829053167214024,
|
||
|
|
"learning_rate": 8.133019056822303e-06,
|
||
|
|
"loss": 1.0790325164794923,
|
||
|
|
"step": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9749920861031972,
|
||
|
|
"grad_norm": 0.8296874845053457,
|
||
|
|
"learning_rate": 8.078307376628292e-06,
|
||
|
|
"loss": 1.0690267562866211,
|
||
|
|
"step": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9876543209876543,
|
||
|
|
"grad_norm": 0.8248755231512207,
|
||
|
|
"learning_rate": 8.022995574311876e-06,
|
||
|
|
"loss": 1.0922147750854492,
|
||
|
|
"step": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.9123714875418006,
|
||
|
|
"learning_rate": 7.967094433018508e-06,
|
||
|
|
"loss": 1.0716293334960938,
|
||
|
|
"step": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0126622348844572,
|
||
|
|
"grad_norm": 0.8825626316822892,
|
||
|
|
"learning_rate": 7.910614850786448e-06,
|
||
|
|
"loss": 0.9421855926513671,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0253244697689141,
|
||
|
|
"grad_norm": 0.981129259243819,
|
||
|
|
"learning_rate": 7.85356783842216e-06,
|
||
|
|
"loss": 0.9680027008056641,
|
||
|
|
"step": 405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0379867046533713,
|
||
|
|
"grad_norm": 0.9490494582638624,
|
||
|
|
"learning_rate": 7.795964517353734e-06,
|
||
|
|
"loss": 0.9392026901245117,
|
||
|
|
"step": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0506489395378285,
|
||
|
|
"grad_norm": 1.0436527309713077,
|
||
|
|
"learning_rate": 7.737816117462752e-06,
|
||
|
|
"loss": 0.9481110572814941,
|
||
|
|
"step": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0633111744222856,
|
||
|
|
"grad_norm": 0.9193717140597131,
|
||
|
|
"learning_rate": 7.679133974894984e-06,
|
||
|
|
"loss": 0.9479268074035645,
|
||
|
|
"step": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0759734093067426,
|
||
|
|
"grad_norm": 0.9176846478769476,
|
||
|
|
"learning_rate": 7.619929529850397e-06,
|
||
|
|
"loss": 0.9510162353515625,
|
||
|
|
"step": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0886356441911997,
|
||
|
|
"grad_norm": 0.9263690784461404,
|
||
|
|
"learning_rate": 7.560214324352858e-06,
|
||
|
|
"loss": 0.9560428619384765,
|
||
|
|
"step": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.101297879075657,
|
||
|
|
"grad_norm": 0.8985018721390384,
|
||
|
|
"learning_rate": 7.500000000000001e-06,
|
||
|
|
"loss": 0.9549171447753906,
|
||
|
|
"step": 435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1139601139601139,
|
||
|
|
"grad_norm": 0.8383045792822509,
|
||
|
|
"learning_rate": 7.4392982956936644e-06,
|
||
|
|
"loss": 0.9572299957275391,
|
||
|
|
"step": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.126622348844571,
|
||
|
|
"grad_norm": 0.8693402459631241,
|
||
|
|
"learning_rate": 7.378121045351378e-06,
|
||
|
|
"loss": 0.9538370132446289,
|
||
|
|
"step": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1392845837290282,
|
||
|
|
"grad_norm": 0.8465948151936904,
|
||
|
|
"learning_rate": 7.31648017559931e-06,
|
||
|
|
"loss": 0.9445423126220703,
|
||
|
|
"step": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1519468186134854,
|
||
|
|
"grad_norm": 0.8993258971886791,
|
||
|
|
"learning_rate": 7.254387703447154e-06,
|
||
|
|
"loss": 0.9402847290039062,
|
||
|
|
"step": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1646090534979423,
|
||
|
|
"grad_norm": 0.8973654441260622,
|
||
|
|
"learning_rate": 7.191855733945388e-06,
|
||
|
|
"loss": 0.9458431243896485,
|
||
|
|
"step": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1772712883823995,
|
||
|
|
"grad_norm": 0.8975789539843146,
|
||
|
|
"learning_rate": 7.128896457825364e-06,
|
||
|
|
"loss": 0.9456979751586914,
|
||
|
|
"step": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1899335232668566,
|
||
|
|
"grad_norm": 0.9025883974896288,
|
||
|
|
"learning_rate": 7.06552214912271e-06,
|
||
|
|
"loss": 0.958702278137207,
|
||
|
|
"step": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2025957581513138,
|
||
|
|
"grad_norm": 0.8943619241590697,
|
||
|
|
"learning_rate": 7.0017451627844765e-06,
|
||
|
|
"loss": 0.9409778594970704,
|
||
|
|
"step": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2152579930357708,
|
||
|
|
"grad_norm": 0.8987697465779751,
|
||
|
|
"learning_rate": 6.9375779322605154e-06,
|
||
|
|
"loss": 0.952575397491455,
|
||
|
|
"step": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.227920227920228,
|
||
|
|
"grad_norm": 0.8957262384243423,
|
||
|
|
"learning_rate": 6.873032967079562e-06,
|
||
|
|
"loss": 0.9412460327148438,
|
||
|
|
"step": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.240582462804685,
|
||
|
|
"grad_norm": 0.9191287064439484,
|
||
|
|
"learning_rate": 6.808122850410461e-06,
|
||
|
|
"loss": 0.9442897796630859,
|
||
|
|
"step": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.253244697689142,
|
||
|
|
"grad_norm": 0.9120111224616239,
|
||
|
|
"learning_rate": 6.7428602366090764e-06,
|
||
|
|
"loss": 0.9721967697143554,
|
||
|
|
"step": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2659069325735992,
|
||
|
|
"grad_norm": 0.9297557344562997,
|
||
|
|
"learning_rate": 6.677257848751276e-06,
|
||
|
|
"loss": 0.9427990913391113,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2785691674580564,
|
||
|
|
"grad_norm": 0.9256360350131605,
|
||
|
|
"learning_rate": 6.611328476152557e-06,
|
||
|
|
"loss": 0.9448193550109864,
|
||
|
|
"step": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2912314023425133,
|
||
|
|
"grad_norm": 0.9178166712574457,
|
||
|
|
"learning_rate": 6.545084971874738e-06,
|
||
|
|
"loss": 0.9285225868225098,
|
||
|
|
"step": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3038936372269705,
|
||
|
|
"grad_norm": 0.8824737418151191,
|
||
|
|
"learning_rate": 6.4785402502202345e-06,
|
||
|
|
"loss": 0.9465466499328613,
|
||
|
|
"step": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3165558721114277,
|
||
|
|
"grad_norm": 0.8714305178817582,
|
||
|
|
"learning_rate": 6.411707284214384e-06,
|
||
|
|
"loss": 0.9558137893676758,
|
||
|
|
"step": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3292181069958848,
|
||
|
|
"grad_norm": 1.6420471551581535,
|
||
|
|
"learning_rate": 6.344599103076329e-06,
|
||
|
|
"loss": 0.9441043853759765,
|
||
|
|
"step": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.341880341880342,
|
||
|
|
"grad_norm": 0.8940534993249484,
|
||
|
|
"learning_rate": 6.277228789678953e-06,
|
||
|
|
"loss": 0.9406339645385742,
|
||
|
|
"step": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.354542576764799,
|
||
|
|
"grad_norm": 0.8657105103377609,
|
||
|
|
"learning_rate": 6.209609477998339e-06,
|
||
|
|
"loss": 0.9400988578796386,
|
||
|
|
"step": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3672048116492561,
|
||
|
|
"grad_norm": 0.8795303497602281,
|
||
|
|
"learning_rate": 6.141754350553279e-06,
|
||
|
|
"loss": 0.9375904083251954,
|
||
|
|
"step": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3798670465337133,
|
||
|
|
"grad_norm": 0.8778881000839949,
|
||
|
|
"learning_rate": 6.073676635835317e-06,
|
||
|
|
"loss": 0.9534420013427735,
|
||
|
|
"step": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3925292814181702,
|
||
|
|
"grad_norm": 0.8609329406866304,
|
||
|
|
"learning_rate": 6.005389605729824e-06,
|
||
|
|
"loss": 0.9435734748840332,
|
||
|
|
"step": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4051915163026274,
|
||
|
|
"grad_norm": 0.901450340070586,
|
||
|
|
"learning_rate": 5.936906572928625e-06,
|
||
|
|
"loss": 0.9454706192016602,
|
||
|
|
"step": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4178537511870846,
|
||
|
|
"grad_norm": 0.9056724009579911,
|
||
|
|
"learning_rate": 5.8682408883346535e-06,
|
||
|
|
"loss": 0.9358626365661621,
|
||
|
|
"step": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4305159860715415,
|
||
|
|
"grad_norm": 0.8767791922734569,
|
||
|
|
"learning_rate": 5.799405938459175e-06,
|
||
|
|
"loss": 0.9384665489196777,
|
||
|
|
"step": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4431782209559987,
|
||
|
|
"grad_norm": 0.9226108292554362,
|
||
|
|
"learning_rate": 5.730415142812059e-06,
|
||
|
|
"loss": 0.9389400482177734,
|
||
|
|
"step": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4558404558404558,
|
||
|
|
"grad_norm": 0.8635227126945888,
|
||
|
|
"learning_rate": 5.661281951285613e-06,
|
||
|
|
"loss": 0.9539518356323242,
|
||
|
|
"step": 575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.468502690724913,
|
||
|
|
"grad_norm": 0.8840260265705664,
|
||
|
|
"learning_rate": 5.592019841532507e-06,
|
||
|
|
"loss": 0.9480253219604492,
|
||
|
|
"step": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4811649256093702,
|
||
|
|
"grad_norm": 0.9151680057009149,
|
||
|
|
"learning_rate": 5.522642316338268e-06,
|
||
|
|
"loss": 0.9404661178588867,
|
||
|
|
"step": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4938271604938271,
|
||
|
|
"grad_norm": 0.9450262697016882,
|
||
|
|
"learning_rate": 5.453162900988902e-06,
|
||
|
|
"loss": 0.9321787834167481,
|
||
|
|
"step": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5064893953782843,
|
||
|
|
"grad_norm": 0.8402436559360018,
|
||
|
|
"learning_rate": 5.383595140634093e-06,
|
||
|
|
"loss": 0.9440553665161133,
|
||
|
|
"step": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5191516302627415,
|
||
|
|
"grad_norm": 0.8778976142471068,
|
||
|
|
"learning_rate": 5.3139525976465675e-06,
|
||
|
|
"loss": 0.9511254310607911,
|
||
|
|
"step": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5318138651471984,
|
||
|
|
"grad_norm": 0.8781843644707367,
|
||
|
|
"learning_rate": 5.244248848978067e-06,
|
||
|
|
"loss": 0.9387626647949219,
|
||
|
|
"step": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5444761000316556,
|
||
|
|
"grad_norm": 0.8642449781808372,
|
||
|
|
"learning_rate": 5.174497483512506e-06,
|
||
|
|
"loss": 0.956205177307129,
|
||
|
|
"step": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5571383349161128,
|
||
|
|
"grad_norm": 0.8846802147972775,
|
||
|
|
"learning_rate": 5.1047120994167855e-06,
|
||
|
|
"loss": 0.9363911628723145,
|
||
|
|
"step": 615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5698005698005697,
|
||
|
|
"grad_norm": 0.8739137758439613,
|
||
|
|
"learning_rate": 5.034906301489808e-06,
|
||
|
|
"loss": 0.9367790222167969,
|
||
|
|
"step": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5824628046850269,
|
||
|
|
"grad_norm": 0.8953494651595788,
|
||
|
|
"learning_rate": 4.965093698510192e-06,
|
||
|
|
"loss": 0.9425483703613281,
|
||
|
|
"step": 625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.595125039569484,
|
||
|
|
"grad_norm": 0.8615421639128288,
|
||
|
|
"learning_rate": 4.895287900583216e-06,
|
||
|
|
"loss": 0.9341062545776367,
|
||
|
|
"step": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.607787274453941,
|
||
|
|
"grad_norm": 0.8353360832306662,
|
||
|
|
"learning_rate": 4.825502516487497e-06,
|
||
|
|
"loss": 0.949849796295166,
|
||
|
|
"step": 635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6204495093383984,
|
||
|
|
"grad_norm": 0.8563998366304418,
|
||
|
|
"learning_rate": 4.755751151021934e-06,
|
||
|
|
"loss": 0.9409940719604493,
|
||
|
|
"step": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6331117442228553,
|
||
|
|
"grad_norm": 0.9360183967885729,
|
||
|
|
"learning_rate": 4.686047402353433e-06,
|
||
|
|
"loss": 0.939891242980957,
|
||
|
|
"step": 645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6457739791073125,
|
||
|
|
"grad_norm": 0.8806457976411894,
|
||
|
|
"learning_rate": 4.6164048593659076e-06,
|
||
|
|
"loss": 0.952726173400879,
|
||
|
|
"step": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6584362139917697,
|
||
|
|
"grad_norm": 0.8871650293826654,
|
||
|
|
"learning_rate": 4.546837099011101e-06,
|
||
|
|
"loss": 0.9440122604370117,
|
||
|
|
"step": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6710984488762266,
|
||
|
|
"grad_norm": 0.8543495337665787,
|
||
|
|
"learning_rate": 4.477357683661734e-06,
|
||
|
|
"loss": 0.9277559280395508,
|
||
|
|
"step": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6837606837606838,
|
||
|
|
"grad_norm": 0.8754310619944701,
|
||
|
|
"learning_rate": 4.4079801584674955e-06,
|
||
|
|
"loss": 0.9328133583068847,
|
||
|
|
"step": 665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.696422918645141,
|
||
|
|
"grad_norm": 0.846881206379322,
|
||
|
|
"learning_rate": 4.3387180487143875e-06,
|
||
|
|
"loss": 0.9440486907958985,
|
||
|
|
"step": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.709085153529598,
|
||
|
|
"grad_norm": 0.8123484252146217,
|
||
|
|
"learning_rate": 4.269584857187942e-06,
|
||
|
|
"loss": 0.9334369659423828,
|
||
|
|
"step": 675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.721747388414055,
|
||
|
|
"grad_norm": 0.8860941606484654,
|
||
|
|
"learning_rate": 4.200594061540827e-06,
|
||
|
|
"loss": 0.9386373519897461,
|
||
|
|
"step": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7344096232985122,
|
||
|
|
"grad_norm": 0.8710977899292981,
|
||
|
|
"learning_rate": 4.131759111665349e-06,
|
||
|
|
"loss": 0.9379000663757324,
|
||
|
|
"step": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7470718581829692,
|
||
|
|
"grad_norm": 0.8989668390644706,
|
||
|
|
"learning_rate": 4.063093427071376e-06,
|
||
|
|
"loss": 0.9351366043090821,
|
||
|
|
"step": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7597340930674266,
|
||
|
|
"grad_norm": 0.8426262295188102,
|
||
|
|
"learning_rate": 3.994610394270178e-06,
|
||
|
|
"loss": 0.9458501815795899,
|
||
|
|
"step": 695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7723963279518835,
|
||
|
|
"grad_norm": 0.8490556601435445,
|
||
|
|
"learning_rate": 3.926323364164684e-06,
|
||
|
|
"loss": 0.9344646453857421,
|
||
|
|
"step": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7850585628363405,
|
||
|
|
"grad_norm": 0.857013306646358,
|
||
|
|
"learning_rate": 3.8582456494467214e-06,
|
||
|
|
"loss": 0.9324585914611816,
|
||
|
|
"step": 705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7977207977207978,
|
||
|
|
"grad_norm": 0.8442075171060656,
|
||
|
|
"learning_rate": 3.790390522001662e-06,
|
||
|
|
"loss": 0.9345897674560547,
|
||
|
|
"step": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8103830326052548,
|
||
|
|
"grad_norm": 0.8635838902214552,
|
||
|
|
"learning_rate": 3.7227712103210485e-06,
|
||
|
|
"loss": 0.9480118751525879,
|
||
|
|
"step": 715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.823045267489712,
|
||
|
|
"grad_norm": 0.8701785787205291,
|
||
|
|
"learning_rate": 3.655400896923672e-06,
|
||
|
|
"loss": 0.9411863327026367,
|
||
|
|
"step": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8357075023741691,
|
||
|
|
"grad_norm": 0.9278897279843371,
|
||
|
|
"learning_rate": 3.5882927157856175e-06,
|
||
|
|
"loss": 0.9384016036987305,
|
||
|
|
"step": 725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.848369737258626,
|
||
|
|
"grad_norm": 0.8675201640437896,
|
||
|
|
"learning_rate": 3.521459749779769e-06,
|
||
|
|
"loss": 0.9388191223144531,
|
||
|
|
"step": 730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8610319721430832,
|
||
|
|
"grad_norm": 0.9047480946293855,
|
||
|
|
"learning_rate": 3.4549150281252635e-06,
|
||
|
|
"loss": 0.943515396118164,
|
||
|
|
"step": 735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8736942070275404,
|
||
|
|
"grad_norm": 0.9100256206799584,
|
||
|
|
"learning_rate": 3.3886715238474454e-06,
|
||
|
|
"loss": 0.9317167282104493,
|
||
|
|
"step": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8863564419119974,
|
||
|
|
"grad_norm": 0.9121240599713055,
|
||
|
|
"learning_rate": 3.322742151248726e-06,
|
||
|
|
"loss": 0.9298182487487793,
|
||
|
|
"step": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8990186767964545,
|
||
|
|
"grad_norm": 0.8360632961222116,
|
||
|
|
"learning_rate": 3.2571397633909252e-06,
|
||
|
|
"loss": 0.9383123397827149,
|
||
|
|
"step": 750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9116809116809117,
|
||
|
|
"grad_norm": 0.8449980062948027,
|
||
|
|
"learning_rate": 3.1918771495895395e-06,
|
||
|
|
"loss": 0.9380681991577149,
|
||
|
|
"step": 755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9243431465653686,
|
||
|
|
"grad_norm": 0.8358057866853585,
|
||
|
|
"learning_rate": 3.12696703292044e-06,
|
||
|
|
"loss": 0.9311031341552735,
|
||
|
|
"step": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.937005381449826,
|
||
|
|
"grad_norm": 0.8261214369483678,
|
||
|
|
"learning_rate": 3.0624220677394854e-06,
|
||
|
|
"loss": 0.9335260391235352,
|
||
|
|
"step": 765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.949667616334283,
|
||
|
|
"grad_norm": 0.8746978630306859,
|
||
|
|
"learning_rate": 2.9982548372155264e-06,
|
||
|
|
"loss": 0.9282594680786133,
|
||
|
|
"step": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9623298512187402,
|
||
|
|
"grad_norm": 0.8914685495920053,
|
||
|
|
"learning_rate": 2.934477850877292e-06,
|
||
|
|
"loss": 0.9267834663391114,
|
||
|
|
"step": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9749920861031973,
|
||
|
|
"grad_norm": 0.8730909900000534,
|
||
|
|
"learning_rate": 2.871103542174637e-06,
|
||
|
|
"loss": 0.9400104522705078,
|
||
|
|
"step": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9876543209876543,
|
||
|
|
"grad_norm": 0.9195388866817068,
|
||
|
|
"learning_rate": 2.8081442660546126e-06,
|
||
|
|
"loss": 0.9355339050292969,
|
||
|
|
"step": 785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.8941990040688051,
|
||
|
|
"learning_rate": 2.7456122965528475e-06,
|
||
|
|
"loss": 0.9464699745178222,
|
||
|
|
"step": 790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.012662234884457,
|
||
|
|
"grad_norm": 0.9551299167570609,
|
||
|
|
"learning_rate": 2.683519824400693e-06,
|
||
|
|
"loss": 0.8369241714477539,
|
||
|
|
"step": 795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0253244697689143,
|
||
|
|
"grad_norm": 0.9503417747763285,
|
||
|
|
"learning_rate": 2.6218789546486235e-06,
|
||
|
|
"loss": 0.8305461883544922,
|
||
|
|
"step": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0379867046533713,
|
||
|
|
"grad_norm": 0.9428708677587196,
|
||
|
|
"learning_rate": 2.560701704306336e-06,
|
||
|
|
"loss": 0.8380617141723633,
|
||
|
|
"step": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0506489395378282,
|
||
|
|
"grad_norm": 0.9141118129164282,
|
||
|
|
"learning_rate": 2.5000000000000015e-06,
|
||
|
|
"loss": 0.8350756645202637,
|
||
|
|
"step": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0633111744222856,
|
||
|
|
"grad_norm": 0.8925703133521584,
|
||
|
|
"learning_rate": 2.4397856756471435e-06,
|
||
|
|
"loss": 0.8253829956054688,
|
||
|
|
"step": 815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0759734093067426,
|
||
|
|
"grad_norm": 0.9010434641718755,
|
||
|
|
"learning_rate": 2.380070470149605e-06,
|
||
|
|
"loss": 0.8296566009521484,
|
||
|
|
"step": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0886356441912,
|
||
|
|
"grad_norm": 0.9108639104627096,
|
||
|
|
"learning_rate": 2.320866025105016e-06,
|
||
|
|
"loss": 0.8311027526855469,
|
||
|
|
"step": 825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.101297879075657,
|
||
|
|
"grad_norm": 0.8776111588691332,
|
||
|
|
"learning_rate": 2.2621838825372496e-06,
|
||
|
|
"loss": 0.8341006278991699,
|
||
|
|
"step": 830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.113960113960114,
|
||
|
|
"grad_norm": 0.9815553215946904,
|
||
|
|
"learning_rate": 2.204035482646267e-06,
|
||
|
|
"loss": 0.8500799179077149,
|
||
|
|
"step": 835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1266223488445712,
|
||
|
|
"grad_norm": 0.9142581836968815,
|
||
|
|
"learning_rate": 2.146432161577842e-06,
|
||
|
|
"loss": 0.8405605316162109,
|
||
|
|
"step": 840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.139284583729028,
|
||
|
|
"grad_norm": 1.145797436281295,
|
||
|
|
"learning_rate": 2.0893851492135536e-06,
|
||
|
|
"loss": 0.8333783149719238,
|
||
|
|
"step": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.151946818613485,
|
||
|
|
"grad_norm": 1.0132915822668673,
|
||
|
|
"learning_rate": 2.0329055669814936e-06,
|
||
|
|
"loss": 0.8394683837890625,
|
||
|
|
"step": 850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1646090534979425,
|
||
|
|
"grad_norm": 1.054834919380244,
|
||
|
|
"learning_rate": 1.977004425688126e-06,
|
||
|
|
"loss": 0.8199810028076172,
|
||
|
|
"step": 855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1772712883823995,
|
||
|
|
"grad_norm": 0.8659196804614238,
|
||
|
|
"learning_rate": 1.9216926233717087e-06,
|
||
|
|
"loss": 0.8200090408325196,
|
||
|
|
"step": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1899335232668564,
|
||
|
|
"grad_norm": 0.8936160842705467,
|
||
|
|
"learning_rate": 1.8669809431776991e-06,
|
||
|
|
"loss": 0.819823932647705,
|
||
|
|
"step": 865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.202595758151314,
|
||
|
|
"grad_norm": 0.8936829197489651,
|
||
|
|
"learning_rate": 1.8128800512565514e-06,
|
||
|
|
"loss": 0.8329672813415527,
|
||
|
|
"step": 870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2152579930357708,
|
||
|
|
"grad_norm": 0.865124596235692,
|
||
|
|
"learning_rate": 1.7594004946843458e-06,
|
||
|
|
"loss": 0.830903434753418,
|
||
|
|
"step": 875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2279202279202277,
|
||
|
|
"grad_norm": 1.0004908923945968,
|
||
|
|
"learning_rate": 1.7065526994065973e-06,
|
||
|
|
"loss": 0.8222661972045898,
|
||
|
|
"step": 880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.240582462804685,
|
||
|
|
"grad_norm": 0.9974019476784539,
|
||
|
|
"learning_rate": 1.6543469682057105e-06,
|
||
|
|
"loss": 0.8375696182250977,
|
||
|
|
"step": 885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.253244697689142,
|
||
|
|
"grad_norm": 0.9489943802555122,
|
||
|
|
"learning_rate": 1.6027934786924187e-06,
|
||
|
|
"loss": 0.8297539710998535,
|
||
|
|
"step": 890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2659069325735994,
|
||
|
|
"grad_norm": 0.8526558052313017,
|
||
|
|
"learning_rate": 1.551902281321651e-06,
|
||
|
|
"loss": 0.8450464248657227,
|
||
|
|
"step": 895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2785691674580564,
|
||
|
|
"grad_norm": 0.9095006101158244,
|
||
|
|
"learning_rate": 1.5016832974331725e-06,
|
||
|
|
"loss": 0.8434087753295898,
|
||
|
|
"step": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2912314023425133,
|
||
|
|
"grad_norm": 0.8941646461803728,
|
||
|
|
"learning_rate": 1.4521463173173966e-06,
|
||
|
|
"loss": 0.8199748992919922,
|
||
|
|
"step": 905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3038936372269707,
|
||
|
|
"grad_norm": 1.0151629908802393,
|
||
|
|
"learning_rate": 1.4033009983067454e-06,
|
||
|
|
"loss": 0.8257926940917969,
|
||
|
|
"step": 910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3165558721114277,
|
||
|
|
"grad_norm": 0.8688789281578927,
|
||
|
|
"learning_rate": 1.3551568628929434e-06,
|
||
|
|
"loss": 0.8222599029541016,
|
||
|
|
"step": 915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3292181069958846,
|
||
|
|
"grad_norm": 0.9256160248355862,
|
||
|
|
"learning_rate": 1.3077232968705805e-06,
|
||
|
|
"loss": 0.8179254531860352,
|
||
|
|
"step": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.341880341880342,
|
||
|
|
"grad_norm": 0.8992368646832662,
|
||
|
|
"learning_rate": 1.2610095475073415e-06,
|
||
|
|
"loss": 0.8351934432983399,
|
||
|
|
"step": 925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.354542576764799,
|
||
|
|
"grad_norm": 0.8950940935118609,
|
||
|
|
"learning_rate": 1.2150247217412186e-06,
|
||
|
|
"loss": 0.8317380905151367,
|
||
|
|
"step": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3672048116492563,
|
||
|
|
"grad_norm": 0.8832980241323902,
|
||
|
|
"learning_rate": 1.1697777844051105e-06,
|
||
|
|
"loss": 0.8315254211425781,
|
||
|
|
"step": 935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3798670465337133,
|
||
|
|
"grad_norm": 0.9169712891318174,
|
||
|
|
"learning_rate": 1.1252775564791023e-06,
|
||
|
|
"loss": 0.8270421981811523,
|
||
|
|
"step": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3925292814181702,
|
||
|
|
"grad_norm": 0.8820889479067028,
|
||
|
|
"learning_rate": 1.0815327133708015e-06,
|
||
|
|
"loss": 0.8373619079589844,
|
||
|
|
"step": 945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4051915163026276,
|
||
|
|
"grad_norm": 0.8633797543130399,
|
||
|
|
"learning_rate": 1.0385517832240472e-06,
|
||
|
|
"loss": 0.822084617614746,
|
||
|
|
"step": 950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4178537511870846,
|
||
|
|
"grad_norm": 0.883673978191503,
|
||
|
|
"learning_rate": 9.963431452563331e-07,
|
||
|
|
"loss": 0.8369743347167968,
|
||
|
|
"step": 955
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4305159860715415,
|
||
|
|
"grad_norm": 0.8729883350584554,
|
||
|
|
"learning_rate": 9.549150281252633e-07,
|
||
|
|
"loss": 0.8232148170471192,
|
||
|
|
"step": 960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.443178220955999,
|
||
|
|
"grad_norm": 0.9022268286398805,
|
||
|
|
"learning_rate": 9.142755083243577e-07,
|
||
|
|
"loss": 0.8386312484741211,
|
||
|
|
"step": 965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.455840455840456,
|
||
|
|
"grad_norm": 0.886835830092829,
|
||
|
|
"learning_rate": 8.744325086085248e-07,
|
||
|
|
"loss": 0.8283025741577148,
|
||
|
|
"step": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.468502690724913,
|
||
|
|
"grad_norm": 0.8893818705075751,
|
||
|
|
"learning_rate": 8.353937964495029e-07,
|
||
|
|
"loss": 0.8303911209106445,
|
||
|
|
"step": 975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.48116492560937,
|
||
|
|
"grad_norm": 0.922324297214932,
|
||
|
|
"learning_rate": 7.971669825215789e-07,
|
||
|
|
"loss": 0.836126708984375,
|
||
|
|
"step": 980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.493827160493827,
|
||
|
|
"grad_norm": 0.904255523759268,
|
||
|
|
"learning_rate": 7.597595192178702e-07,
|
||
|
|
"loss": 0.8196451187133789,
|
||
|
|
"step": 985
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.506489395378284,
|
||
|
|
"grad_norm": 0.8636934543130641,
|
||
|
|
"learning_rate": 7.23178699197467e-07,
|
||
|
|
"loss": 0.8335494995117188,
|
||
|
|
"step": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5191516302627415,
|
||
|
|
"grad_norm": 0.897039019918768,
|
||
|
|
"learning_rate": 6.874316539637127e-07,
|
||
|
|
"loss": 0.8088079452514648,
|
||
|
|
"step": 995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5318138651471984,
|
||
|
|
"grad_norm": 0.8864019874531589,
|
||
|
|
"learning_rate": 6.52525352473905e-07,
|
||
|
|
"loss": 0.8233877182006836,
|
||
|
|
"step": 1000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5444761000316554,
|
||
|
|
"grad_norm": 0.9088661368290617,
|
||
|
|
"learning_rate": 6.184665997806832e-07,
|
||
|
|
"loss": 0.8182021141052246,
|
||
|
|
"step": 1005
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5571383349161128,
|
||
|
|
"grad_norm": 0.9072624944819723,
|
||
|
|
"learning_rate": 5.852620357053651e-07,
|
||
|
|
"loss": 0.835714054107666,
|
||
|
|
"step": 1010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5698005698005697,
|
||
|
|
"grad_norm": 0.8731742132357438,
|
||
|
|
"learning_rate": 5.529181335435124e-07,
|
||
|
|
"loss": 0.8283638000488281,
|
||
|
|
"step": 1015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5824628046850266,
|
||
|
|
"grad_norm": 0.8761704900525855,
|
||
|
|
"learning_rate": 5.214411988029355e-07,
|
||
|
|
"loss": 0.828251838684082,
|
||
|
|
"step": 1020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.595125039569484,
|
||
|
|
"grad_norm": 0.8629998756845685,
|
||
|
|
"learning_rate": 4.908373679744316e-07,
|
||
|
|
"loss": 0.8239392280578614,
|
||
|
|
"step": 1025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.607787274453941,
|
||
|
|
"grad_norm": 0.9025713028894049,
|
||
|
|
"learning_rate": 4.6111260733545714e-07,
|
||
|
|
"loss": 0.8368805885314942,
|
||
|
|
"step": 1030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6204495093383984,
|
||
|
|
"grad_norm": 0.8791508721949534,
|
||
|
|
"learning_rate": 4.322727117869951e-07,
|
||
|
|
"loss": 0.8214786529541016,
|
||
|
|
"step": 1035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6331117442228553,
|
||
|
|
"grad_norm": 0.8747271828916487,
|
||
|
|
"learning_rate": 4.043233037238281e-07,
|
||
|
|
"loss": 0.8331809997558594,
|
||
|
|
"step": 1040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6457739791073127,
|
||
|
|
"grad_norm": 0.9189023842289675,
|
||
|
|
"learning_rate": 3.772698319384349e-07,
|
||
|
|
"loss": 0.8299878120422364,
|
||
|
|
"step": 1045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6584362139917697,
|
||
|
|
"grad_norm": 0.9012554611673713,
|
||
|
|
"learning_rate": 3.511175705587433e-07,
|
||
|
|
"loss": 0.8398582458496093,
|
||
|
|
"step": 1050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6710984488762266,
|
||
|
|
"grad_norm": 0.8857980961838654,
|
||
|
|
"learning_rate": 3.258716180199278e-07,
|
||
|
|
"loss": 0.818387794494629,
|
||
|
|
"step": 1055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.683760683760684,
|
||
|
|
"grad_norm": 0.8627721513188427,
|
||
|
|
"learning_rate": 3.015368960704584e-07,
|
||
|
|
"loss": 0.8408231735229492,
|
||
|
|
"step": 1060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.696422918645141,
|
||
|
|
"grad_norm": 1.0338964144567573,
|
||
|
|
"learning_rate": 2.7811814881259503e-07,
|
||
|
|
"loss": 0.8292581558227539,
|
||
|
|
"step": 1065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.709085153529598,
|
||
|
|
"grad_norm": 0.8858535803633547,
|
||
|
|
"learning_rate": 2.556199417775174e-07,
|
||
|
|
"loss": 0.8229169845581055,
|
||
|
|
"step": 1070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7217473884140553,
|
||
|
|
"grad_norm": 0.8487724578022646,
|
||
|
|
"learning_rate": 2.3404666103526542e-07,
|
||
|
|
"loss": 0.8243260383605957,
|
||
|
|
"step": 1075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7344096232985122,
|
||
|
|
"grad_norm": 0.9142856277892702,
|
||
|
|
"learning_rate": 2.134025123396638e-07,
|
||
|
|
"loss": 0.8411771774291992,
|
||
|
|
"step": 1080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.747071858182969,
|
||
|
|
"grad_norm": 0.9044836277079623,
|
||
|
|
"learning_rate": 1.9369152030840553e-07,
|
||
|
|
"loss": 0.8182785034179687,
|
||
|
|
"step": 1085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7597340930674266,
|
||
|
|
"grad_norm": 0.8753811947010942,
|
||
|
|
"learning_rate": 1.7491752763844294e-07,
|
||
|
|
"loss": 0.8330059051513672,
|
||
|
|
"step": 1090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7723963279518835,
|
||
|
|
"grad_norm": 0.9127704993164228,
|
||
|
|
"learning_rate": 1.5708419435684463e-07,
|
||
|
|
"loss": 0.8270849227905274,
|
||
|
|
"step": 1095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7850585628363405,
|
||
|
|
"grad_norm": 0.8885180442416001,
|
||
|
|
"learning_rate": 1.4019499710726913e-07,
|
||
|
|
"loss": 0.8345333099365234,
|
||
|
|
"step": 1100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.797720797720798,
|
||
|
|
"grad_norm": 0.9121582122101339,
|
||
|
|
"learning_rate": 1.2425322847218368e-07,
|
||
|
|
"loss": 0.8229399681091308,
|
||
|
|
"step": 1105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.810383032605255,
|
||
|
|
"grad_norm": 0.9127878209504691,
|
||
|
|
"learning_rate": 1.0926199633097156e-07,
|
||
|
|
"loss": 0.82467041015625,
|
||
|
|
"step": 1110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8230452674897117,
|
||
|
|
"grad_norm": 1.744776544604879,
|
||
|
|
"learning_rate": 9.522422325404234e-08,
|
||
|
|
"loss": 0.8274450302124023,
|
||
|
|
"step": 1115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.835707502374169,
|
||
|
|
"grad_norm": 0.8982447409078338,
|
||
|
|
"learning_rate": 8.214264593307097e-08,
|
||
|
|
"loss": 0.8290293693542481,
|
||
|
|
"step": 1120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.848369737258626,
|
||
|
|
"grad_norm": 0.8866891213730514,
|
||
|
|
"learning_rate": 7.001981464747565e-08,
|
||
|
|
"loss": 0.8212656021118164,
|
||
|
|
"step": 1125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.861031972143083,
|
||
|
|
"grad_norm": 0.9114969683085143,
|
||
|
|
"learning_rate": 5.8858092767236084e-08,
|
||
|
|
"loss": 0.8231026649475097,
|
||
|
|
"step": 1130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8736942070275404,
|
||
|
|
"grad_norm": 0.8832450189167574,
|
||
|
|
"learning_rate": 4.865965629214819e-08,
|
||
|
|
"loss": 0.830931282043457,
|
||
|
|
"step": 1135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8863564419119974,
|
||
|
|
"grad_norm": 0.8487481576996411,
|
||
|
|
"learning_rate": 3.9426493427611177e-08,
|
||
|
|
"loss": 0.8255987167358398,
|
||
|
|
"step": 1140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8990186767964543,
|
||
|
|
"grad_norm": 0.879731012515658,
|
||
|
|
"learning_rate": 3.1160404197018155e-08,
|
||
|
|
"loss": 0.8359064102172852,
|
||
|
|
"step": 1145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9116809116809117,
|
||
|
|
"grad_norm": 0.9058610366933287,
|
||
|
|
"learning_rate": 2.386300009084408e-08,
|
||
|
|
"loss": 0.8246042251586914,
|
||
|
|
"step": 1150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9243431465653686,
|
||
|
|
"grad_norm": 0.898984738606933,
|
||
|
|
"learning_rate": 1.753570375247815e-08,
|
||
|
|
"loss": 0.8313743591308593,
|
||
|
|
"step": 1155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.937005381449826,
|
||
|
|
"grad_norm": 0.8885810042812119,
|
||
|
|
"learning_rate": 1.2179748700879013e-08,
|
||
|
|
"loss": 0.829072380065918,
|
||
|
|
"step": 1160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.949667616334283,
|
||
|
|
"grad_norm": 0.903649383117039,
|
||
|
|
"learning_rate": 7.796179090094891e-09,
|
||
|
|
"loss": 0.8449357986450196,
|
||
|
|
"step": 1165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9623298512187404,
|
||
|
|
"grad_norm": 0.9753427783203841,
|
||
|
|
"learning_rate": 4.385849505708084e-09,
|
||
|
|
"loss": 0.8176769256591797,
|
||
|
|
"step": 1170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9749920861031973,
|
||
|
|
"grad_norm": 0.8748130538451198,
|
||
|
|
"learning_rate": 1.9494247982282386e-09,
|
||
|
|
"loss": 0.8217670440673828,
|
||
|
|
"step": 1175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9876543209876543,
|
||
|
|
"grad_norm": 0.9286601621176515,
|
||
|
|
"learning_rate": 4.87379953478806e-10,
|
||
|
|
"loss": 0.8410984992980957,
|
||
|
|
"step": 1180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.9389103063767028,
|
||
|
|
"learning_rate": 0.0,
|
||
|
|
"loss": 0.8439925193786622,
|
||
|
|
"step": 1185
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 1185,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 3,
|
||
|
|
"save_steps": 100,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 2.9001299950934426e+17,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|