812 lines
20 KiB
JSON
812 lines
20 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9995220557226798,
|
|
"eval_steps": 500,
|
|
"global_step": 1111,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00899659816132025,
|
|
"grad_norm": 9.407082207126406,
|
|
"learning_rate": 1.323529411764706e-06,
|
|
"loss": 1.1112,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0179931963226405,
|
|
"grad_norm": 1.6078560053885997,
|
|
"learning_rate": 2.7941176470588237e-06,
|
|
"loss": 0.9362,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.02698979448396075,
|
|
"grad_norm": 1.624171273563677,
|
|
"learning_rate": 4.264705882352942e-06,
|
|
"loss": 0.8806,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.035986392645281,
|
|
"grad_norm": 1.4554360159842772,
|
|
"learning_rate": 4.9997341046993195e-06,
|
|
"loss": 0.8947,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.04498299080660125,
|
|
"grad_norm": 1.4128774633791414,
|
|
"learning_rate": 4.997607281643338e-06,
|
|
"loss": 0.8764,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.0539795889679215,
|
|
"grad_norm": 1.4582318570520234,
|
|
"learning_rate": 4.993355445074358e-06,
|
|
"loss": 0.8758,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.06297618712924176,
|
|
"grad_norm": 1.241568669359911,
|
|
"learning_rate": 4.986982212538754e-06,
|
|
"loss": 0.8839,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.071972785290562,
|
|
"grad_norm": 1.4053494360834542,
|
|
"learning_rate": 4.978493006508408e-06,
|
|
"loss": 0.8741,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.08096938345188226,
|
|
"grad_norm": 1.4737910013351594,
|
|
"learning_rate": 4.967895049767168e-06,
|
|
"loss": 0.8859,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.0899659816132025,
|
|
"grad_norm": 1.3138356543765093,
|
|
"learning_rate": 4.9551973592655565e-06,
|
|
"loss": 0.8506,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.09896257977452276,
|
|
"grad_norm": 1.3758428201688702,
|
|
"learning_rate": 4.940410738448974e-06,
|
|
"loss": 0.9021,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.107959177935843,
|
|
"grad_norm": 1.4334286224262116,
|
|
"learning_rate": 4.923547768065916e-06,
|
|
"loss": 0.8752,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.11695577609716326,
|
|
"grad_norm": 1.3787955824383673,
|
|
"learning_rate": 4.904622795464018e-06,
|
|
"loss": 0.8824,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.12595237425848352,
|
|
"grad_norm": 1.3627983383165039,
|
|
"learning_rate": 4.883651922383059e-06,
|
|
"loss": 0.8519,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.13494897241980378,
|
|
"grad_norm": 1.3424633268190644,
|
|
"learning_rate": 4.860652991255274e-06,
|
|
"loss": 0.8565,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.143945570581124,
|
|
"grad_norm": 1.4227597124829214,
|
|
"learning_rate": 4.835645570024666e-06,
|
|
"loss": 0.8884,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.15294216874244426,
|
|
"grad_norm": 1.3745439024877604,
|
|
"learning_rate": 4.808650935498216e-06,
|
|
"loss": 0.8394,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.16193876690376452,
|
|
"grad_norm": 1.1913982454389247,
|
|
"learning_rate": 4.779692055243149e-06,
|
|
"loss": 0.8542,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.17093536506508478,
|
|
"grad_norm": 1.3183554482150048,
|
|
"learning_rate": 4.748793568045682e-06,
|
|
"loss": 0.8633,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.179931963226405,
|
|
"grad_norm": 1.4397694519607327,
|
|
"learning_rate": 4.715981762947854e-06,
|
|
"loss": 0.8532,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.18892856138772526,
|
|
"grad_norm": 1.2038269967391835,
|
|
"learning_rate": 4.681284556880294e-06,
|
|
"loss": 0.8544,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.19792515954904552,
|
|
"grad_norm": 1.2701132880820947,
|
|
"learning_rate": 4.6447314709099436e-06,
|
|
"loss": 0.8463,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.20692175771036578,
|
|
"grad_norm": 1.3356581457961003,
|
|
"learning_rate": 4.606353605122954e-06,
|
|
"loss": 0.8652,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.215918355871686,
|
|
"grad_norm": 1.3140257601689191,
|
|
"learning_rate": 4.566183612164116e-06,
|
|
"loss": 0.8737,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.22491495403300626,
|
|
"grad_norm": 1.2150684493886468,
|
|
"learning_rate": 4.52425566945535e-06,
|
|
"loss": 0.8448,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.23391155219432652,
|
|
"grad_norm": 1.3356045800197514,
|
|
"learning_rate": 4.480605450116879e-06,
|
|
"loss": 0.8531,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.24290815035564678,
|
|
"grad_norm": 1.3621827029728837,
|
|
"learning_rate": 4.435270092615835e-06,
|
|
"loss": 0.8569,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.25190474851696704,
|
|
"grad_norm": 1.2958362488368202,
|
|
"learning_rate": 4.388288169168121e-06,
|
|
"loss": 0.8559,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.2609013466782873,
|
|
"grad_norm": 1.315904667574477,
|
|
"learning_rate": 4.339699652920407e-06,
|
|
"loss": 0.8487,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.26989794483960755,
|
|
"grad_norm": 1.192500405781859,
|
|
"learning_rate": 4.28954588394019e-06,
|
|
"loss": 0.8427,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.27889454300092775,
|
|
"grad_norm": 1.3463647117462414,
|
|
"learning_rate": 4.237869534042848e-06,
|
|
"loss": 0.8645,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.287891141162248,
|
|
"grad_norm": 1.302446463869839,
|
|
"learning_rate": 4.184714570485619e-06,
|
|
"loss": 0.8739,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.29688773932356827,
|
|
"grad_norm": 1.2998957392856052,
|
|
"learning_rate": 4.130126218559396e-06,
|
|
"loss": 0.8388,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.3058843374848885,
|
|
"grad_norm": 1.1895515530331358,
|
|
"learning_rate": 4.074150923110149e-06,
|
|
"loss": 0.8344,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.3148809356462088,
|
|
"grad_norm": 1.2291112546963492,
|
|
"learning_rate": 4.0168363090227425e-06,
|
|
"loss": 0.8489,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.32387753380752904,
|
|
"grad_norm": 1.2187350361306888,
|
|
"learning_rate": 3.958231140700742e-06,
|
|
"loss": 0.8827,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.3328741319688493,
|
|
"grad_norm": 1.2251085843679796,
|
|
"learning_rate": 3.898385280576696e-06,
|
|
"loss": 0.8257,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.34187073013016955,
|
|
"grad_norm": 1.2708814495416274,
|
|
"learning_rate": 3.8373496466881986e-06,
|
|
"loss": 0.8586,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.35086732829148976,
|
|
"grad_norm": 1.3212728062854284,
|
|
"learning_rate": 3.775176169355816e-06,
|
|
"loss": 0.8633,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.35986392645281,
|
|
"grad_norm": 1.3153556245818046,
|
|
"learning_rate": 3.7119177469997506e-06,
|
|
"loss": 0.8385,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.36886052461413027,
|
|
"grad_norm": 1.2072741564966885,
|
|
"learning_rate": 3.647628201132818e-06,
|
|
"loss": 0.8167,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.3778571227754505,
|
|
"grad_norm": 1.2379563543143086,
|
|
"learning_rate": 3.582362230568044e-06,
|
|
"loss": 0.8331,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.3868537209367708,
|
|
"grad_norm": 1.2879168901856428,
|
|
"learning_rate": 3.5161753648798367e-06,
|
|
"loss": 0.8337,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.39585031909809104,
|
|
"grad_norm": 1.29505065565257,
|
|
"learning_rate": 3.449123917158331e-06,
|
|
"loss": 0.8371,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.4048469172594113,
|
|
"grad_norm": 1.2295083553559263,
|
|
"learning_rate": 3.3812649360970988e-06,
|
|
"loss": 0.8368,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.41384351542073156,
|
|
"grad_norm": 1.3554176223589234,
|
|
"learning_rate": 3.3126561574549975e-06,
|
|
"loss": 0.8322,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.4228401135820518,
|
|
"grad_norm": 1.3079414538272678,
|
|
"learning_rate": 3.2433559549334475e-06,
|
|
"loss": 0.8295,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.431836711743372,
|
|
"grad_norm": 1.2635231121274326,
|
|
"learning_rate": 3.173423290510937e-06,
|
|
"loss": 0.8343,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.44083330990469227,
|
|
"grad_norm": 1.3150644499613324,
|
|
"learning_rate": 3.102917664277007e-06,
|
|
"loss": 0.8548,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.44982990806601253,
|
|
"grad_norm": 1.366414847286227,
|
|
"learning_rate": 3.0318990638084055e-06,
|
|
"loss": 0.8558,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.4588265062273328,
|
|
"grad_norm": 1.2798875567653052,
|
|
"learning_rate": 2.9604279131304685e-06,
|
|
"loss": 0.8441,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.46782310438865304,
|
|
"grad_norm": 1.2205525459157436,
|
|
"learning_rate": 2.8885650213071746e-06,
|
|
"loss": 0.8346,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.4768197025499733,
|
|
"grad_norm": 1.3131391162470558,
|
|
"learning_rate": 2.8163715307035897e-06,
|
|
"loss": 0.8363,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.48581630071129356,
|
|
"grad_norm": 1.2817420417534302,
|
|
"learning_rate": 2.743908864964741e-06,
|
|
"loss": 0.8637,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.4948128988726138,
|
|
"grad_norm": 1.4117777334581971,
|
|
"learning_rate": 2.6712386767551663e-06,
|
|
"loss": 0.8439,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.5038094970339341,
|
|
"grad_norm": 1.3316023582639032,
|
|
"learning_rate": 2.5984227953036124e-06,
|
|
"loss": 0.8485,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.5128060951952543,
|
|
"grad_norm": 1.220300819321901,
|
|
"learning_rate": 2.52552317379751e-06,
|
|
"loss": 0.8279,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.5218026933565746,
|
|
"grad_norm": 1.2405242839005477,
|
|
"learning_rate": 2.452601836671977e-06,
|
|
"loss": 0.8744,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.5307992915178948,
|
|
"grad_norm": 1.160142751072927,
|
|
"learning_rate": 2.3797208268382096e-06,
|
|
"loss": 0.817,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.5397958896792151,
|
|
"grad_norm": 1.2558513559088742,
|
|
"learning_rate": 2.3069421528961493e-06,
|
|
"loss": 0.8454,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.5487924878405352,
|
|
"grad_norm": 1.2495369982031077,
|
|
"learning_rate": 2.2343277363763437e-06,
|
|
"loss": 0.821,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.5577890860018555,
|
|
"grad_norm": 1.2645404439096872,
|
|
"learning_rate": 2.1619393590558857e-06,
|
|
"loss": 0.8444,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.5667856841631758,
|
|
"grad_norm": 1.2701388082173177,
|
|
"learning_rate": 2.0898386103932634e-06,
|
|
"loss": 0.8296,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.575782282324496,
|
|
"grad_norm": 1.2620244980098876,
|
|
"learning_rate": 2.018086835126831e-06,
|
|
"loss": 0.8425,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.5847788804858163,
|
|
"grad_norm": 1.1594661341735868,
|
|
"learning_rate": 1.9467450810814984e-06,
|
|
"loss": 0.8356,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.5937754786471365,
|
|
"grad_norm": 1.2189222560662132,
|
|
"learning_rate": 1.8758740472280372e-06,
|
|
"loss": 0.8123,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.6027720768084568,
|
|
"grad_norm": 1.2444702547045237,
|
|
"learning_rate": 1.8055340320392002e-06,
|
|
"loss": 0.8124,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.611768674969777,
|
|
"grad_norm": 1.2315293692576161,
|
|
"learning_rate": 1.7357848821865914e-06,
|
|
"loss": 0.8469,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.6207652731310973,
|
|
"grad_norm": 1.2995895579660914,
|
|
"learning_rate": 1.6666859416219384e-06,
|
|
"loss": 0.8171,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.6297618712924176,
|
|
"grad_norm": 1.2091823608533048,
|
|
"learning_rate": 1.5982960010860882e-06,
|
|
"loss": 0.8105,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.6387584694537378,
|
|
"grad_norm": 1.2705740309640616,
|
|
"learning_rate": 1.530673248088687e-06,
|
|
"loss": 0.8342,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.6477550676150581,
|
|
"grad_norm": 1.2981521802148184,
|
|
"learning_rate": 1.463875217401099e-06,
|
|
"loss": 0.8715,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.6567516657763783,
|
|
"grad_norm": 1.1925093116586027,
|
|
"learning_rate": 1.397958742104687e-06,
|
|
"loss": 0.8355,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.6657482639376986,
|
|
"grad_norm": 1.1985043813628813,
|
|
"learning_rate": 1.3329799052361094e-06,
|
|
"loss": 0.8148,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.6747448620990188,
|
|
"grad_norm": 1.1010299588672359,
|
|
"learning_rate": 1.2689939920707667e-06,
|
|
"loss": 0.8229,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.6837414602603391,
|
|
"grad_norm": 1.2430718303897152,
|
|
"learning_rate": 1.2060554430849951e-06,
|
|
"loss": 0.8409,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.6927380584216594,
|
|
"grad_norm": 1.269761061806062,
|
|
"learning_rate": 1.1442178076370426e-06,
|
|
"loss": 0.8258,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.7017346565829795,
|
|
"grad_norm": 1.157246567841852,
|
|
"learning_rate": 1.0835336984062122e-06,
|
|
"loss": 0.8162,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.7107312547442998,
|
|
"grad_norm": 1.2172574409120116,
|
|
"learning_rate": 1.0240547466289646e-06,
|
|
"loss": 0.808,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.71972785290562,
|
|
"grad_norm": 1.2870129001272486,
|
|
"learning_rate": 9.65831558170037e-07,
|
|
"loss": 0.8314,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.7287244510669403,
|
|
"grad_norm": 1.2970054704276492,
|
|
"learning_rate": 9.089136704659823e-07,
|
|
"loss": 0.8437,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.7377210492282605,
|
|
"grad_norm": 1.1999856118846624,
|
|
"learning_rate": 8.533495103777367e-07,
|
|
"loss": 0.8052,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.7467176473895808,
|
|
"grad_norm": 1.233946813798178,
|
|
"learning_rate": 7.991863529880936e-07,
|
|
"loss": 0.7951,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.755714245550901,
|
|
"grad_norm": 1.2668438696715465,
|
|
"learning_rate": 7.464702813791308e-07,
|
|
"loss": 0.8461,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.7647108437122213,
|
|
"grad_norm": 1.2263597703226994,
|
|
"learning_rate": 6.952461474238182e-07,
|
|
"loss": 0.8045,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.7737074418735416,
|
|
"grad_norm": 1.2325614523967854,
|
|
"learning_rate": 6.455575336251549e-07,
|
|
"loss": 0.8562,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.7827040400348618,
|
|
"grad_norm": 1.243498384623771,
|
|
"learning_rate": 5.974467160353226e-07,
|
|
"loss": 0.8345,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.7917006381961821,
|
|
"grad_norm": 1.158311860753632,
|
|
"learning_rate": 5.509546282863806e-07,
|
|
"loss": 0.8344,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.8006972363575023,
|
|
"grad_norm": 1.2163583553306607,
|
|
"learning_rate": 5.061208267631315e-07,
|
|
"loss": 0.8256,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.8096938345188226,
|
|
"grad_norm": 1.2102463242463388,
|
|
"learning_rate": 4.629834569477626e-07,
|
|
"loss": 0.8427,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.8186904326801429,
|
|
"grad_norm": 1.1969681545587425,
|
|
"learning_rate": 4.21579220964923e-07,
|
|
"loss": 0.8195,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.8276870308414631,
|
|
"grad_norm": 1.2573696493635405,
|
|
"learning_rate": 3.8194334635482777e-07,
|
|
"loss": 0.8277,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.8366836290027834,
|
|
"grad_norm": 1.2979421497920385,
|
|
"learning_rate": 3.4410955610097745e-07,
|
|
"loss": 0.811,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.8456802271641036,
|
|
"grad_norm": 1.1946033424208522,
|
|
"learning_rate": 3.0811003993797327e-07,
|
|
"loss": 0.8187,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.8546768253254238,
|
|
"grad_norm": 1.1959972915578303,
|
|
"learning_rate": 2.7397542696386145e-07,
|
|
"loss": 0.8352,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.863673423486744,
|
|
"grad_norm": 1.2612864041438945,
|
|
"learning_rate": 2.4173475958028855e-07,
|
|
"loss": 0.8076,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.8726700216480643,
|
|
"grad_norm": 1.2393073900072233,
|
|
"learning_rate": 2.1141546878265696e-07,
|
|
"loss": 0.8357,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.8816666198093845,
|
|
"grad_norm": 1.3961098525762,
|
|
"learning_rate": 1.8304335082129032e-07,
|
|
"loss": 0.8375,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.8906632179707048,
|
|
"grad_norm": 1.2304674879812914,
|
|
"learning_rate": 1.566425452534784e-07,
|
|
"loss": 0.8482,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.8996598161320251,
|
|
"grad_norm": 1.1275632452975384,
|
|
"learning_rate": 1.3223551440506244e-07,
|
|
"loss": 0.8334,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.9086564142933453,
|
|
"grad_norm": 1.1697390212239938,
|
|
"learning_rate": 1.0984302425904869e-07,
|
|
"loss": 0.8218,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.9176530124546656,
|
|
"grad_norm": 1.1549729040350385,
|
|
"learning_rate": 8.94841267874974e-08,
|
|
"loss": 0.8242,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.9266496106159858,
|
|
"grad_norm": 1.2550852449415948,
|
|
"learning_rate": 7.117614374173353e-08,
|
|
"loss": 0.8276,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.9356462087773061,
|
|
"grad_norm": 1.1789618223129013,
|
|
"learning_rate": 5.493465191465458e-08,
|
|
"loss": 0.8399,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.9446428069386263,
|
|
"grad_norm": 1.308555979964644,
|
|
"learning_rate": 4.0773469887692154e-08,
|
|
"loss": 0.8231,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.9536394050999466,
|
|
"grad_norm": 1.1941396262140982,
|
|
"learning_rate": 2.8704646273687298e-08,
|
|
"loss": 0.8089,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.9626360032612669,
|
|
"grad_norm": 1.2336678582691885,
|
|
"learning_rate": 1.873844946569614e-08,
|
|
"loss": 0.8546,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.9716326014225871,
|
|
"grad_norm": 1.2123027368688872,
|
|
"learning_rate": 1.0883358900435626e-08,
|
|
"loss": 0.8213,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.9806291995839074,
|
|
"grad_norm": 1.101205104982958,
|
|
"learning_rate": 5.146057843814223e-09,
|
|
"loss": 0.8261,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.9896257977452276,
|
|
"grad_norm": 1.138150004760349,
|
|
"learning_rate": 1.531427704675459e-09,
|
|
"loss": 0.8431,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.9986223959065479,
|
|
"grad_norm": 1.1694006998873614,
|
|
"learning_rate": 4.25438816009649e-11,
|
|
"loss": 0.8082,
|
|
"step": 1110
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1111,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 600,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 145966987345920.0,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|