4432 lines
98 KiB
JSON
4432 lines
98 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.032734074612906575,
|
|
"eval_steps": 500,
|
|
"global_step": 630,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.967684234392304,
|
|
"learning_rate": 8.722043470761813e-09,
|
|
"loss": 1.4217,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0271119924702425,
|
|
"learning_rate": 1.7444086941523626e-08,
|
|
"loss": 1.3854,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0212962641045844,
|
|
"learning_rate": 2.6166130412285438e-08,
|
|
"loss": 1.421,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.969098210999342,
|
|
"learning_rate": 3.488817388304725e-08,
|
|
"loss": 1.3894,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9600462061407877,
|
|
"learning_rate": 4.361021735380907e-08,
|
|
"loss": 1.3803,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.074991458239833,
|
|
"learning_rate": 5.2332260824570876e-08,
|
|
"loss": 1.3815,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.026445479098331,
|
|
"learning_rate": 6.105430429533269e-08,
|
|
"loss": 1.3684,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9268992001978744,
|
|
"learning_rate": 6.97763477660945e-08,
|
|
"loss": 1.4307,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.020642302186043,
|
|
"learning_rate": 7.849839123685631e-08,
|
|
"loss": 1.3318,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0765469986494853,
|
|
"learning_rate": 8.722043470761814e-08,
|
|
"loss": 1.3942,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.081729945762728,
|
|
"learning_rate": 9.594247817837994e-08,
|
|
"loss": 1.3555,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0021774146670808,
|
|
"learning_rate": 1.0466452164914175e-07,
|
|
"loss": 1.4366,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.059550099782956,
|
|
"learning_rate": 1.1338656511990357e-07,
|
|
"loss": 1.3999,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9198920843423073,
|
|
"learning_rate": 1.2210860859066538e-07,
|
|
"loss": 1.3949,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.013793892067568,
|
|
"learning_rate": 1.308306520614272e-07,
|
|
"loss": 1.4062,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9189606946894022,
|
|
"learning_rate": 1.39552695532189e-07,
|
|
"loss": 1.401,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9989048069702475,
|
|
"learning_rate": 1.4827473900295083e-07,
|
|
"loss": 1.39,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8873329349365613,
|
|
"learning_rate": 1.5699678247371262e-07,
|
|
"loss": 1.4061,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8867416403445345,
|
|
"learning_rate": 1.6571882594447446e-07,
|
|
"loss": 1.4032,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9725069440958127,
|
|
"learning_rate": 1.7444086941523627e-07,
|
|
"loss": 1.3772,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0049844241627675,
|
|
"learning_rate": 1.8316291288599806e-07,
|
|
"loss": 1.3907,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.878905929699835,
|
|
"learning_rate": 1.9188495635675987e-07,
|
|
"loss": 1.4029,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8262523355216014,
|
|
"learning_rate": 2.006069998275217e-07,
|
|
"loss": 1.3488,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.913459302454502,
|
|
"learning_rate": 2.093290432982835e-07,
|
|
"loss": 1.3888,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.924777837323026,
|
|
"learning_rate": 2.1805108676904532e-07,
|
|
"loss": 1.3423,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0238436977422207,
|
|
"learning_rate": 2.2677313023980713e-07,
|
|
"loss": 1.4083,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9404632380821036,
|
|
"learning_rate": 2.3549517371056895e-07,
|
|
"loss": 1.3325,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9624670572200222,
|
|
"learning_rate": 2.4421721718133076e-07,
|
|
"loss": 1.4221,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9307501329723236,
|
|
"learning_rate": 2.5293926065209255e-07,
|
|
"loss": 1.3827,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8568182093611365,
|
|
"learning_rate": 2.616613041228544e-07,
|
|
"loss": 1.394,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0056868600386544,
|
|
"learning_rate": 2.703833475936162e-07,
|
|
"loss": 1.3585,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.928843591728527,
|
|
"learning_rate": 2.79105391064378e-07,
|
|
"loss": 1.3927,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.979059280785437,
|
|
"learning_rate": 2.878274345351398e-07,
|
|
"loss": 1.4622,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.948156874136048,
|
|
"learning_rate": 2.9654947800590165e-07,
|
|
"loss": 1.3583,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.883247676830032,
|
|
"learning_rate": 3.0527152147666344e-07,
|
|
"loss": 1.3533,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.058107134456641,
|
|
"learning_rate": 3.1399356494742523e-07,
|
|
"loss": 1.3904,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0535192707251237,
|
|
"learning_rate": 3.2271560841818707e-07,
|
|
"loss": 1.3978,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.897725634603183,
|
|
"learning_rate": 3.314376518889489e-07,
|
|
"loss": 1.3422,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.869420148217502,
|
|
"learning_rate": 3.401596953597107e-07,
|
|
"loss": 1.4465,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9815636403050676,
|
|
"learning_rate": 3.4888173883047254e-07,
|
|
"loss": 1.407,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9486391625075123,
|
|
"learning_rate": 3.576037823012343e-07,
|
|
"loss": 1.391,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9396534742524283,
|
|
"learning_rate": 3.663258257719961e-07,
|
|
"loss": 1.4191,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.954105365539368,
|
|
"learning_rate": 3.7504786924275796e-07,
|
|
"loss": 1.4039,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9899210526299482,
|
|
"learning_rate": 3.8376991271351975e-07,
|
|
"loss": 1.3606,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9715202250605515,
|
|
"learning_rate": 3.924919561842816e-07,
|
|
"loss": 1.3561,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8726950910953133,
|
|
"learning_rate": 4.012139996550434e-07,
|
|
"loss": 1.4004,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9356806631744568,
|
|
"learning_rate": 4.0993604312580517e-07,
|
|
"loss": 1.3626,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.828385414972167,
|
|
"learning_rate": 4.18658086596567e-07,
|
|
"loss": 1.416,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.860775941749999,
|
|
"learning_rate": 4.273801300673288e-07,
|
|
"loss": 1.4232,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.908555437519401,
|
|
"learning_rate": 4.3610217353809064e-07,
|
|
"loss": 1.371,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0110809579629283,
|
|
"learning_rate": 4.448242170088525e-07,
|
|
"loss": 1.3886,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8879956038976062,
|
|
"learning_rate": 4.5354626047961427e-07,
|
|
"loss": 1.3677,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9475494022582405,
|
|
"learning_rate": 4.622683039503761e-07,
|
|
"loss": 1.3596,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9208201499436677,
|
|
"learning_rate": 4.709903474211379e-07,
|
|
"loss": 1.3795,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9250691956165293,
|
|
"learning_rate": 4.797123908918997e-07,
|
|
"loss": 1.3521,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0059604294844795,
|
|
"learning_rate": 4.884344343626615e-07,
|
|
"loss": 1.3872,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9408348770709347,
|
|
"learning_rate": 4.971564778334233e-07,
|
|
"loss": 1.41,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9941841393676247,
|
|
"learning_rate": 5.058785213041851e-07,
|
|
"loss": 1.4293,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9287843738148744,
|
|
"learning_rate": 5.146005647749469e-07,
|
|
"loss": 1.411,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9468093301597533,
|
|
"learning_rate": 5.233226082457088e-07,
|
|
"loss": 1.3508,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.854893968299532,
|
|
"learning_rate": 5.320446517164706e-07,
|
|
"loss": 1.4042,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9224674242946387,
|
|
"learning_rate": 5.407666951872324e-07,
|
|
"loss": 1.4182,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.025591867750147,
|
|
"learning_rate": 5.494887386579943e-07,
|
|
"loss": 1.4162,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9351339151935045,
|
|
"learning_rate": 5.58210782128756e-07,
|
|
"loss": 1.2956,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9740598113404677,
|
|
"learning_rate": 5.669328255995178e-07,
|
|
"loss": 1.4181,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8698658052840464,
|
|
"learning_rate": 5.756548690702796e-07,
|
|
"loss": 1.3885,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.847745090454755,
|
|
"learning_rate": 5.843769125410415e-07,
|
|
"loss": 1.4268,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.53006793293518,
|
|
"learning_rate": 5.930989560118033e-07,
|
|
"loss": 1.4369,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.02012038605618,
|
|
"learning_rate": 6.01820999482565e-07,
|
|
"loss": 1.406,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8502665195139074,
|
|
"learning_rate": 6.105430429533269e-07,
|
|
"loss": 1.4273,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.966110454182966,
|
|
"learning_rate": 6.192650864240887e-07,
|
|
"loss": 1.3822,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9117409152319604,
|
|
"learning_rate": 6.279871298948505e-07,
|
|
"loss": 1.3758,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9583103749551247,
|
|
"learning_rate": 6.367091733656124e-07,
|
|
"loss": 1.3821,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9976941225678524,
|
|
"learning_rate": 6.454312168363741e-07,
|
|
"loss": 1.3818,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9208562033277237,
|
|
"learning_rate": 6.541532603071359e-07,
|
|
"loss": 1.4249,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9753607287344868,
|
|
"learning_rate": 6.628753037778978e-07,
|
|
"loss": 1.394,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.852059681146882,
|
|
"learning_rate": 6.715973472486596e-07,
|
|
"loss": 1.3735,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9721541467116075,
|
|
"learning_rate": 6.803193907194214e-07,
|
|
"loss": 1.3284,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.959460844473033,
|
|
"learning_rate": 6.890414341901832e-07,
|
|
"loss": 1.3998,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8741209861248747,
|
|
"learning_rate": 6.977634776609451e-07,
|
|
"loss": 1.3696,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.916879772765294,
|
|
"learning_rate": 7.064855211317069e-07,
|
|
"loss": 1.4736,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.069300514821944,
|
|
"learning_rate": 7.152075646024686e-07,
|
|
"loss": 1.3341,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.966078167553101,
|
|
"learning_rate": 7.239296080732305e-07,
|
|
"loss": 1.3705,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.891366947828864,
|
|
"learning_rate": 7.326516515439922e-07,
|
|
"loss": 1.4036,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0015624379292687,
|
|
"learning_rate": 7.41373695014754e-07,
|
|
"loss": 1.3548,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9762714989519297,
|
|
"learning_rate": 7.500957384855159e-07,
|
|
"loss": 1.4038,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0417489896152325,
|
|
"learning_rate": 7.588177819562777e-07,
|
|
"loss": 1.3972,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.957367413809754,
|
|
"learning_rate": 7.675398254270395e-07,
|
|
"loss": 1.376,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9811178726545466,
|
|
"learning_rate": 7.762618688978014e-07,
|
|
"loss": 1.366,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9882512869731994,
|
|
"learning_rate": 7.849839123685632e-07,
|
|
"loss": 1.3835,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.878782548871192,
|
|
"learning_rate": 7.93705955839325e-07,
|
|
"loss": 1.3372,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.9971417051306344,
|
|
"learning_rate": 8.024279993100868e-07,
|
|
"loss": 1.4037,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 3.0231933357399066,
|
|
"learning_rate": 8.111500427808487e-07,
|
|
"loss": 1.3692,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.8777305685815024,
|
|
"learning_rate": 8.198720862516103e-07,
|
|
"loss": 1.3493,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.990027107011572,
|
|
"learning_rate": 8.285941297223721e-07,
|
|
"loss": 1.3586,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"grad_norm": 2.818072292309908,
|
|
"learning_rate": 8.37316173193134e-07,
|
|
"loss": 1.3612,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.1271514088993144,
|
|
"learning_rate": 8.460382166638958e-07,
|
|
"loss": 1.336,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0169265133833396,
|
|
"learning_rate": 8.547602601346576e-07,
|
|
"loss": 1.3665,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9482660829507994,
|
|
"learning_rate": 8.634823036054195e-07,
|
|
"loss": 1.3551,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.087617940804514,
|
|
"learning_rate": 8.722043470761813e-07,
|
|
"loss": 1.3659,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.894747860556493,
|
|
"learning_rate": 8.72204341205319e-07,
|
|
"loss": 1.4032,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9567796592966546,
|
|
"learning_rate": 8.722043235927325e-07,
|
|
"loss": 1.3568,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.130564519854559,
|
|
"learning_rate": 8.72204294238422e-07,
|
|
"loss": 1.3567,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.98891915499642,
|
|
"learning_rate": 8.722042531423884e-07,
|
|
"loss": 1.3865,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.036577692929289,
|
|
"learning_rate": 8.722042003046327e-07,
|
|
"loss": 1.3901,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.001761214468399,
|
|
"learning_rate": 8.722041357251567e-07,
|
|
"loss": 1.4117,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.006505553779288,
|
|
"learning_rate": 8.722040594039618e-07,
|
|
"loss": 1.4083,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.970081989296889,
|
|
"learning_rate": 8.722039713410501e-07,
|
|
"loss": 1.3163,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9452680507051863,
|
|
"learning_rate": 8.72203871536424e-07,
|
|
"loss": 1.3367,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0057027079906216,
|
|
"learning_rate": 8.722037599900863e-07,
|
|
"loss": 1.3893,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9731127424958697,
|
|
"learning_rate": 8.722036367020397e-07,
|
|
"loss": 1.3604,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.884218184477182,
|
|
"learning_rate": 8.722035016722879e-07,
|
|
"loss": 1.4651,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.8763866239897644,
|
|
"learning_rate": 8.722033549008343e-07,
|
|
"loss": 1.3947,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.927176134008426,
|
|
"learning_rate": 8.722031963876829e-07,
|
|
"loss": 1.3667,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9924621440798664,
|
|
"learning_rate": 8.72203026132838e-07,
|
|
"loss": 1.3632,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0499504528400943,
|
|
"learning_rate": 8.72202844136304e-07,
|
|
"loss": 1.4176,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9684475725772392,
|
|
"learning_rate": 8.722026503980863e-07,
|
|
"loss": 1.4146,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9148264494394662,
|
|
"learning_rate": 8.722024449181895e-07,
|
|
"loss": 1.4205,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.947431519938494,
|
|
"learning_rate": 8.722022276966194e-07,
|
|
"loss": 1.3281,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.955626479582277,
|
|
"learning_rate": 8.72201998733382e-07,
|
|
"loss": 1.3465,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.028540598737732,
|
|
"learning_rate": 8.722017580284832e-07,
|
|
"loss": 1.3472,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9197378030040753,
|
|
"learning_rate": 8.722015055819296e-07,
|
|
"loss": 1.381,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0574320249257227,
|
|
"learning_rate": 8.722012413937282e-07,
|
|
"loss": 1.4225,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.913538682906452,
|
|
"learning_rate": 8.722009654638856e-07,
|
|
"loss": 1.3536,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.886066575609779,
|
|
"learning_rate": 8.722006777924096e-07,
|
|
"loss": 1.3736,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.957758024407401,
|
|
"learning_rate": 8.722003783793081e-07,
|
|
"loss": 1.3973,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.958995782934072,
|
|
"learning_rate": 8.722000672245888e-07,
|
|
"loss": 1.3954,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.987991679162308,
|
|
"learning_rate": 8.721997443282602e-07,
|
|
"loss": 1.3757,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0909686056252434,
|
|
"learning_rate": 8.721994096903311e-07,
|
|
"loss": 1.3462,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9477825764652494,
|
|
"learning_rate": 8.721990633108104e-07,
|
|
"loss": 1.4295,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.947921751933276,
|
|
"learning_rate": 8.721987051897074e-07,
|
|
"loss": 1.3854,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.8440480288328427,
|
|
"learning_rate": 8.721983353270319e-07,
|
|
"loss": 1.4106,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9792049006251906,
|
|
"learning_rate": 8.721979537227935e-07,
|
|
"loss": 1.3913,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9238731192373746,
|
|
"learning_rate": 8.721975603770031e-07,
|
|
"loss": 1.3695,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0773774260583155,
|
|
"learning_rate": 8.721971552896706e-07,
|
|
"loss": 1.3629,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9679219250269044,
|
|
"learning_rate": 8.721967384608074e-07,
|
|
"loss": 1.4205,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.030565370577699,
|
|
"learning_rate": 8.721963098904246e-07,
|
|
"loss": 1.4311,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0375097512582503,
|
|
"learning_rate": 8.721958695785336e-07,
|
|
"loss": 1.4069,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.92214551378445,
|
|
"learning_rate": 8.721954175251462e-07,
|
|
"loss": 1.422,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.1283303266405578,
|
|
"learning_rate": 8.721949537302749e-07,
|
|
"loss": 1.432,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.1049340924381705,
|
|
"learning_rate": 8.72194478193932e-07,
|
|
"loss": 1.3815,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.869253522521714,
|
|
"learning_rate": 8.721939909161303e-07,
|
|
"loss": 1.391,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.8752461370622306,
|
|
"learning_rate": 8.721934918968828e-07,
|
|
"loss": 1.3769,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.909751532098998,
|
|
"learning_rate": 8.721929811362032e-07,
|
|
"loss": 1.3995,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0462324191538572,
|
|
"learning_rate": 8.72192458634105e-07,
|
|
"loss": 1.3689,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9750194133891363,
|
|
"learning_rate": 8.721919243906024e-07,
|
|
"loss": 1.3707,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.932835851287147,
|
|
"learning_rate": 8.721913784057099e-07,
|
|
"loss": 1.3676,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9429516387295926,
|
|
"learning_rate": 8.721908206794419e-07,
|
|
"loss": 1.3731,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.994182650351975,
|
|
"learning_rate": 8.721902512118136e-07,
|
|
"loss": 1.3542,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.876833046636617,
|
|
"learning_rate": 8.721896700028404e-07,
|
|
"loss": 1.4124,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.945327518176284,
|
|
"learning_rate": 8.721890770525377e-07,
|
|
"loss": 1.4137,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 21.047240653653276,
|
|
"learning_rate": 8.721884723609218e-07,
|
|
"loss": 1.4264,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.029952956834553,
|
|
"learning_rate": 8.721878559280086e-07,
|
|
"loss": 1.4372,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.137230528895406,
|
|
"learning_rate": 8.721872277538151e-07,
|
|
"loss": 1.4019,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9696146910825694,
|
|
"learning_rate": 8.72186587838358e-07,
|
|
"loss": 1.4515,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.980760336638325,
|
|
"learning_rate": 8.721859361816546e-07,
|
|
"loss": 1.4203,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.086006727040003,
|
|
"learning_rate": 8.721852727837222e-07,
|
|
"loss": 1.3712,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.003419403761712,
|
|
"learning_rate": 8.72184597644579e-07,
|
|
"loss": 1.4107,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0463864266769773,
|
|
"learning_rate": 8.72183910764243e-07,
|
|
"loss": 1.4082,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.950362004991645,
|
|
"learning_rate": 8.721832121427326e-07,
|
|
"loss": 1.352,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.8779668562920815,
|
|
"learning_rate": 8.721825017800669e-07,
|
|
"loss": 1.4236,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0172810234945455,
|
|
"learning_rate": 8.721817796762648e-07,
|
|
"loss": 1.3871,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9726094865888224,
|
|
"learning_rate": 8.721810458313457e-07,
|
|
"loss": 1.349,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0322153773349334,
|
|
"learning_rate": 8.721803002453297e-07,
|
|
"loss": 1.3935,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0249194383352283,
|
|
"learning_rate": 8.721795429182364e-07,
|
|
"loss": 1.3849,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9938901642866718,
|
|
"learning_rate": 8.721787738500866e-07,
|
|
"loss": 1.4267,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.923274256584191,
|
|
"learning_rate": 8.721779930409007e-07,
|
|
"loss": 1.4283,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.8466301519873785,
|
|
"learning_rate": 8.721772004906999e-07,
|
|
"loss": 1.3842,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0052783960700165,
|
|
"learning_rate": 8.721763961995056e-07,
|
|
"loss": 1.4335,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0935980229307614,
|
|
"learning_rate": 8.721755801673391e-07,
|
|
"loss": 1.3751,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0201926532008505,
|
|
"learning_rate": 8.721747523942229e-07,
|
|
"loss": 1.383,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.964731945306275,
|
|
"learning_rate": 8.721739128801788e-07,
|
|
"loss": 1.3359,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.928301273992501,
|
|
"learning_rate": 8.721730616252297e-07,
|
|
"loss": 1.3461,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9759904501938617,
|
|
"learning_rate": 8.721721986293985e-07,
|
|
"loss": 1.3644,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.040389211247673,
|
|
"learning_rate": 8.721713238927082e-07,
|
|
"loss": 1.4341,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.005645814777094,
|
|
"learning_rate": 8.721704374151826e-07,
|
|
"loss": 1.3967,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.1272949994089823,
|
|
"learning_rate": 8.721695391968456e-07,
|
|
"loss": 1.3796,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0684527006439533,
|
|
"learning_rate": 8.721686292377211e-07,
|
|
"loss": 1.3905,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9348716760076603,
|
|
"learning_rate": 8.721677075378338e-07,
|
|
"loss": 1.3905,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9873597802095304,
|
|
"learning_rate": 8.721667740972085e-07,
|
|
"loss": 1.4103,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.979793648840201,
|
|
"learning_rate": 8.721658289158703e-07,
|
|
"loss": 1.3622,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.937468774579186,
|
|
"learning_rate": 8.721648719938447e-07,
|
|
"loss": 1.414,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.879104091071243,
|
|
"learning_rate": 8.721639033311573e-07,
|
|
"loss": 1.3108,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0663878291218203,
|
|
"learning_rate": 8.721629229278344e-07,
|
|
"loss": 1.3543,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9407287447315826,
|
|
"learning_rate": 8.721619307839025e-07,
|
|
"loss": 1.3753,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.967538550932994,
|
|
"learning_rate": 8.721609268993879e-07,
|
|
"loss": 1.3973,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.057519293009879,
|
|
"learning_rate": 8.721599112743179e-07,
|
|
"loss": 1.4036,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.936392616519391,
|
|
"learning_rate": 8.721588839087197e-07,
|
|
"loss": 1.4852,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9670464594249197,
|
|
"learning_rate": 8.721578448026212e-07,
|
|
"loss": 1.3643,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0273720809079663,
|
|
"learning_rate": 8.721567939560502e-07,
|
|
"loss": 1.4109,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0651462806238854,
|
|
"learning_rate": 8.721557313690349e-07,
|
|
"loss": 1.3599,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9487059919929326,
|
|
"learning_rate": 8.721546570416042e-07,
|
|
"loss": 1.3377,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0998938976371146,
|
|
"learning_rate": 8.721535709737867e-07,
|
|
"loss": 1.3685,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.940826121224176,
|
|
"learning_rate": 8.721524731656118e-07,
|
|
"loss": 1.4174,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.014412763659776,
|
|
"learning_rate": 8.721513636171093e-07,
|
|
"loss": 1.3758,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0608004542672678,
|
|
"learning_rate": 8.721502423283086e-07,
|
|
"loss": 1.3716,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.94244083669587,
|
|
"learning_rate": 8.721491092992403e-07,
|
|
"loss": 1.3937,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9911802591493144,
|
|
"learning_rate": 8.721479645299345e-07,
|
|
"loss": 1.4164,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0344694715702065,
|
|
"learning_rate": 8.721468080204223e-07,
|
|
"loss": 1.4167,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.865667879330454,
|
|
"learning_rate": 8.72145639770735e-07,
|
|
"loss": 1.4041,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.072739743995507,
|
|
"learning_rate": 8.721444597809037e-07,
|
|
"loss": 1.4133,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0276333439793843,
|
|
"learning_rate": 8.721432680509603e-07,
|
|
"loss": 1.3605,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0465572110487686,
|
|
"learning_rate": 8.721420645809369e-07,
|
|
"loss": 1.3134,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9542599421921376,
|
|
"learning_rate": 8.721408493708659e-07,
|
|
"loss": 1.4148,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.90720227559915,
|
|
"learning_rate": 8.721396224207801e-07,
|
|
"loss": 1.3997,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0246259249156617,
|
|
"learning_rate": 8.721383837307123e-07,
|
|
"loss": 1.4238,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9975853807488453,
|
|
"learning_rate": 8.721371333006962e-07,
|
|
"loss": 1.3879,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0021383888830258,
|
|
"learning_rate": 8.721358711307651e-07,
|
|
"loss": 1.3349,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0201205392426296,
|
|
"learning_rate": 8.721345972209533e-07,
|
|
"loss": 1.3692,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.972139743981842,
|
|
"learning_rate": 8.721333115712948e-07,
|
|
"loss": 1.3856,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9209251468195276,
|
|
"learning_rate": 8.721320141818245e-07,
|
|
"loss": 1.3726,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9320196507721277,
|
|
"learning_rate": 8.721307050525772e-07,
|
|
"loss": 1.4143,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0002549106343337,
|
|
"learning_rate": 8.72129384183588e-07,
|
|
"loss": 1.3897,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9582570275362206,
|
|
"learning_rate": 8.721280515748928e-07,
|
|
"loss": 1.3756,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0145408653891526,
|
|
"learning_rate": 8.721267072265271e-07,
|
|
"loss": 1.3929,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0495215598431553,
|
|
"learning_rate": 8.721253511385274e-07,
|
|
"loss": 1.4061,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9254365712957613,
|
|
"learning_rate": 8.721239833109302e-07,
|
|
"loss": 1.3903,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.997455071778174,
|
|
"learning_rate": 8.72122603743772e-07,
|
|
"loss": 1.4246,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9195114563849627,
|
|
"learning_rate": 8.721212124370902e-07,
|
|
"loss": 1.3968,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0160422542520706,
|
|
"learning_rate": 8.721198093909225e-07,
|
|
"loss": 1.4347,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.01404121750226,
|
|
"learning_rate": 8.721183946053062e-07,
|
|
"loss": 1.3945,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0611502119276692,
|
|
"learning_rate": 8.721169680802796e-07,
|
|
"loss": 1.3975,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0684020412598727,
|
|
"learning_rate": 8.721155298158811e-07,
|
|
"loss": 1.373,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9440206694677027,
|
|
"learning_rate": 8.721140798121494e-07,
|
|
"loss": 1.3432,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.974581850771121,
|
|
"learning_rate": 8.721126180691237e-07,
|
|
"loss": 1.3095,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9933489105960844,
|
|
"learning_rate": 8.721111445868431e-07,
|
|
"loss": 1.3885,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9831661987665528,
|
|
"learning_rate": 8.721096593653475e-07,
|
|
"loss": 1.3126,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.100080295310524,
|
|
"learning_rate": 8.721081624046766e-07,
|
|
"loss": 1.3567,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.989885190608965,
|
|
"learning_rate": 8.72106653704871e-07,
|
|
"loss": 1.3899,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0712176271885023,
|
|
"learning_rate": 8.721051332659713e-07,
|
|
"loss": 1.4208,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.062312373029536,
|
|
"learning_rate": 8.721036010880183e-07,
|
|
"loss": 1.4147,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9701616634317083,
|
|
"learning_rate": 8.721020571710533e-07,
|
|
"loss": 1.434,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9572643731393646,
|
|
"learning_rate": 8.721005015151179e-07,
|
|
"loss": 1.3795,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.997406682050713,
|
|
"learning_rate": 8.720989341202539e-07,
|
|
"loss": 1.4501,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9106126480356522,
|
|
"learning_rate": 8.720973549865035e-07,
|
|
"loss": 1.3684,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9444070042644817,
|
|
"learning_rate": 8.720957641139094e-07,
|
|
"loss": 1.4213,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.8728037311842822,
|
|
"learning_rate": 8.720941615025142e-07,
|
|
"loss": 1.3519,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.015889105815668,
|
|
"learning_rate": 8.720925471523613e-07,
|
|
"loss": 1.4162,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9419377055914744,
|
|
"learning_rate": 8.72090921063494e-07,
|
|
"loss": 1.3357,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.89008663153287,
|
|
"learning_rate": 8.720892832359559e-07,
|
|
"loss": 1.3647,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9219232048658736,
|
|
"learning_rate": 8.720876336697914e-07,
|
|
"loss": 1.4069,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.96537590149616,
|
|
"learning_rate": 8.72085972365045e-07,
|
|
"loss": 1.4118,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.883655732971505,
|
|
"learning_rate": 8.720842993217609e-07,
|
|
"loss": 1.4136,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9788747864733764,
|
|
"learning_rate": 8.720826145399848e-07,
|
|
"loss": 1.3976,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.875570982035785,
|
|
"learning_rate": 8.720809180197616e-07,
|
|
"loss": 1.426,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9926412719867304,
|
|
"learning_rate": 8.720792097611372e-07,
|
|
"loss": 1.3629,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.958723584893194,
|
|
"learning_rate": 8.720774897641574e-07,
|
|
"loss": 1.3918,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.968992238648431,
|
|
"learning_rate": 8.720757580288688e-07,
|
|
"loss": 1.4241,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.889688463405204,
|
|
"learning_rate": 8.720740145553177e-07,
|
|
"loss": 1.4101,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9993006762652312,
|
|
"learning_rate": 8.720722593435512e-07,
|
|
"loss": 1.3857,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.980847240255761,
|
|
"learning_rate": 8.720704923936167e-07,
|
|
"loss": 1.4077,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.884007977441845,
|
|
"learning_rate": 8.720687137055615e-07,
|
|
"loss": 1.3822,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9646728227580645,
|
|
"learning_rate": 8.720669232794336e-07,
|
|
"loss": 1.3737,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.893382783809774,
|
|
"learning_rate": 8.720651211152813e-07,
|
|
"loss": 1.3762,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.024418707419624,
|
|
"learning_rate": 8.72063307213153e-07,
|
|
"loss": 1.3546,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0252657870696495,
|
|
"learning_rate": 8.720614815730977e-07,
|
|
"loss": 1.3661,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0208601885030606,
|
|
"learning_rate": 8.720596441951642e-07,
|
|
"loss": 1.4182,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0552975630549954,
|
|
"learning_rate": 8.720577950794024e-07,
|
|
"loss": 1.38,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.916749346833794,
|
|
"learning_rate": 8.720559342258619e-07,
|
|
"loss": 1.4049,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.035247531851327,
|
|
"learning_rate": 8.720540616345928e-07,
|
|
"loss": 1.4256,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9902303644665205,
|
|
"learning_rate": 8.720521773056454e-07,
|
|
"loss": 1.3356,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.119044393884763,
|
|
"learning_rate": 8.720502812390706e-07,
|
|
"loss": 1.4103,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9552269954583803,
|
|
"learning_rate": 8.720483734349194e-07,
|
|
"loss": 1.3855,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.017213443982555,
|
|
"learning_rate": 8.720464538932433e-07,
|
|
"loss": 1.3902,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.925567645830183,
|
|
"learning_rate": 8.720445226140937e-07,
|
|
"loss": 1.4519,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.983266195022755,
|
|
"learning_rate": 8.720425795975228e-07,
|
|
"loss": 1.3971,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.992218221530755,
|
|
"learning_rate": 8.720406248435828e-07,
|
|
"loss": 1.4231,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9250759809857882,
|
|
"learning_rate": 8.720386583523264e-07,
|
|
"loss": 1.3877,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.022334104434051,
|
|
"learning_rate": 8.720366801238065e-07,
|
|
"loss": 1.4133,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.90248743689466,
|
|
"learning_rate": 8.720346901580765e-07,
|
|
"loss": 1.3889,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.006981215139682,
|
|
"learning_rate": 8.720326884551899e-07,
|
|
"loss": 1.3657,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.996511837199606,
|
|
"learning_rate": 8.720306750152005e-07,
|
|
"loss": 1.3918,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9853146230235317,
|
|
"learning_rate": 8.720286498381625e-07,
|
|
"loss": 1.3983,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.936322205558776,
|
|
"learning_rate": 8.720266129241307e-07,
|
|
"loss": 1.3549,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0336499138604984,
|
|
"learning_rate": 8.720245642731596e-07,
|
|
"loss": 1.3614,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9761367095195514,
|
|
"learning_rate": 8.720225038853046e-07,
|
|
"loss": 1.4223,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.8383029378391256,
|
|
"learning_rate": 8.72020431760621e-07,
|
|
"loss": 1.3027,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0012216332284964,
|
|
"learning_rate": 8.720183478991647e-07,
|
|
"loss": 1.307,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.202220665410359,
|
|
"learning_rate": 8.720162523009919e-07,
|
|
"loss": 1.3495,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.926537743004413,
|
|
"learning_rate": 8.720141449661587e-07,
|
|
"loss": 1.346,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.025411005245412,
|
|
"learning_rate": 8.720120258947223e-07,
|
|
"loss": 1.3581,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.010956090798263,
|
|
"learning_rate": 8.720098950867392e-07,
|
|
"loss": 1.3634,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.0383205174055727,
|
|
"learning_rate": 8.720077525422671e-07,
|
|
"loss": 1.3642,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.967895550740301,
|
|
"learning_rate": 8.720055982613638e-07,
|
|
"loss": 1.3841,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.9365373503076246,
|
|
"learning_rate": 8.720034322440872e-07,
|
|
"loss": 1.3527,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 3.1104370711279214,
|
|
"learning_rate": 8.720012544904955e-07,
|
|
"loss": 1.3483,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.952339289555111,
|
|
"learning_rate": 8.719990650006473e-07,
|
|
"loss": 1.3956,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"grad_norm": 2.942959986729864,
|
|
"learning_rate": 8.719968637746018e-07,
|
|
"loss": 1.4256,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1019407497257507,
|
|
"learning_rate": 8.71994650812418e-07,
|
|
"loss": 1.3786,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0501476908245984,
|
|
"learning_rate": 8.719924261141557e-07,
|
|
"loss": 1.4158,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9781394711393507,
|
|
"learning_rate": 8.719901896798748e-07,
|
|
"loss": 1.427,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0411329565229646,
|
|
"learning_rate": 8.719879415096352e-07,
|
|
"loss": 1.4281,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.022154009359811,
|
|
"learning_rate": 8.719856816034978e-07,
|
|
"loss": 1.435,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.965921869395771,
|
|
"learning_rate": 8.719834099615232e-07,
|
|
"loss": 1.3766,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.026938014636579,
|
|
"learning_rate": 8.719811265837728e-07,
|
|
"loss": 1.3612,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9969914810093115,
|
|
"learning_rate": 8.719788314703078e-07,
|
|
"loss": 1.3371,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.8906070169866545,
|
|
"learning_rate": 8.719765246211902e-07,
|
|
"loss": 1.3826,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0301378229830593,
|
|
"learning_rate": 8.71974206036482e-07,
|
|
"loss": 1.3937,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9673359121672145,
|
|
"learning_rate": 8.719718757162457e-07,
|
|
"loss": 1.3838,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.972360304451488,
|
|
"learning_rate": 8.719695336605439e-07,
|
|
"loss": 1.4382,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9493767886841242,
|
|
"learning_rate": 8.7196717986944e-07,
|
|
"loss": 1.3651,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.031661286320472,
|
|
"learning_rate": 8.719648143429969e-07,
|
|
"loss": 1.3482,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.920995409830151,
|
|
"learning_rate": 8.719624370812787e-07,
|
|
"loss": 1.4115,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.920727312220773,
|
|
"learning_rate": 8.719600480843491e-07,
|
|
"loss": 1.396,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.945684201201641,
|
|
"learning_rate": 8.719576473522726e-07,
|
|
"loss": 1.3557,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0651768014119956,
|
|
"learning_rate": 8.719552348851139e-07,
|
|
"loss": 1.389,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1250149616393577,
|
|
"learning_rate": 8.719528106829378e-07,
|
|
"loss": 1.469,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.977539941978143,
|
|
"learning_rate": 8.719503747458096e-07,
|
|
"loss": 1.3536,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0745479693463924,
|
|
"learning_rate": 8.71947927073795e-07,
|
|
"loss": 1.3877,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.2160266553797667,
|
|
"learning_rate": 8.719454676669596e-07,
|
|
"loss": 1.3988,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.134783336833123,
|
|
"learning_rate": 8.719429965253698e-07,
|
|
"loss": 1.4104,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.908012731710042,
|
|
"learning_rate": 8.719405136490924e-07,
|
|
"loss": 1.4186,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9868078254055934,
|
|
"learning_rate": 8.71938019038194e-07,
|
|
"loss": 1.2836,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 9.747982306246115,
|
|
"learning_rate": 8.719355126927416e-07,
|
|
"loss": 1.3331,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9503527741257476,
|
|
"learning_rate": 8.719329946128029e-07,
|
|
"loss": 1.3993,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.000724848973538,
|
|
"learning_rate": 8.719304647984458e-07,
|
|
"loss": 1.3621,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0259025600243032,
|
|
"learning_rate": 8.719279232497381e-07,
|
|
"loss": 1.4128,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.95272448132235,
|
|
"learning_rate": 8.719253699667485e-07,
|
|
"loss": 1.4239,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.7978451838553986,
|
|
"learning_rate": 8.719228049495456e-07,
|
|
"loss": 1.3694,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.976459149812301,
|
|
"learning_rate": 8.719202281981985e-07,
|
|
"loss": 1.3413,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9711797841726377,
|
|
"learning_rate": 8.719176397127765e-07,
|
|
"loss": 1.3616,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0204333297285815,
|
|
"learning_rate": 8.719150394933495e-07,
|
|
"loss": 1.4126,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0181766925849556,
|
|
"learning_rate": 8.719124275399874e-07,
|
|
"loss": 1.4323,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9167139561107893,
|
|
"learning_rate": 8.719098038527604e-07,
|
|
"loss": 1.4484,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9378575085429013,
|
|
"learning_rate": 8.719071684317393e-07,
|
|
"loss": 1.3775,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.94943978740792,
|
|
"learning_rate": 8.719045212769951e-07,
|
|
"loss": 1.3897,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.958505276332381,
|
|
"learning_rate": 8.719018623885988e-07,
|
|
"loss": 1.394,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.145867503995205,
|
|
"learning_rate": 8.718991917666222e-07,
|
|
"loss": 1.379,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9734480635815728,
|
|
"learning_rate": 8.718965094111372e-07,
|
|
"loss": 1.3953,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9049260797132335,
|
|
"learning_rate": 8.71893815322216e-07,
|
|
"loss": 1.3827,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.98953226106279,
|
|
"learning_rate": 8.718911094999311e-07,
|
|
"loss": 1.3862,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.898375435464832,
|
|
"learning_rate": 8.718883919443554e-07,
|
|
"loss": 1.4134,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9986915176885027,
|
|
"learning_rate": 8.718856626555621e-07,
|
|
"loss": 1.391,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.054114308668177,
|
|
"learning_rate": 8.718829216336246e-07,
|
|
"loss": 1.414,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.966556344517333,
|
|
"learning_rate": 8.718801688786166e-07,
|
|
"loss": 1.4188,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1040321426982134,
|
|
"learning_rate": 8.718774043906126e-07,
|
|
"loss": 1.3538,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0055709258768832,
|
|
"learning_rate": 8.718746281696866e-07,
|
|
"loss": 1.4413,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9449143169829277,
|
|
"learning_rate": 8.718718402159136e-07,
|
|
"loss": 1.3449,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9706428099571305,
|
|
"learning_rate": 8.718690405293686e-07,
|
|
"loss": 1.4158,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0814964982203112,
|
|
"learning_rate": 8.718662291101268e-07,
|
|
"loss": 1.3981,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.036501892302947,
|
|
"learning_rate": 8.718634059582641e-07,
|
|
"loss": 1.4047,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0667912243139535,
|
|
"learning_rate": 8.718605710738567e-07,
|
|
"loss": 1.4436,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.096788465549673,
|
|
"learning_rate": 8.718577244569806e-07,
|
|
"loss": 1.4332,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.031370455846918,
|
|
"learning_rate": 8.718548661077125e-07,
|
|
"loss": 1.3962,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9672083000240344,
|
|
"learning_rate": 8.718519960261294e-07,
|
|
"loss": 1.4205,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.007883947100492,
|
|
"learning_rate": 8.718491142123086e-07,
|
|
"loss": 1.3446,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9748300479515253,
|
|
"learning_rate": 8.718462206663277e-07,
|
|
"loss": 1.3854,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0397247689440396,
|
|
"learning_rate": 8.718433153882645e-07,
|
|
"loss": 1.4125,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.001960481680682,
|
|
"learning_rate": 8.718403983781974e-07,
|
|
"loss": 1.3947,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0664806351413088,
|
|
"learning_rate": 8.718374696362047e-07,
|
|
"loss": 1.3624,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0210504760194175,
|
|
"learning_rate": 8.718345291623656e-07,
|
|
"loss": 1.4671,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.971388652881855,
|
|
"learning_rate": 8.718315769567588e-07,
|
|
"loss": 1.3472,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.952431051174851,
|
|
"learning_rate": 8.718286130194643e-07,
|
|
"loss": 1.3779,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.12691241920494,
|
|
"learning_rate": 8.718256373505615e-07,
|
|
"loss": 1.4117,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0148285597590796,
|
|
"learning_rate": 8.718226499501307e-07,
|
|
"loss": 1.3676,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.98672263886442,
|
|
"learning_rate": 8.718196508182523e-07,
|
|
"loss": 1.4435,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.968930206941385,
|
|
"learning_rate": 8.718166399550071e-07,
|
|
"loss": 1.4378,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0425445228617187,
|
|
"learning_rate": 8.718136173604761e-07,
|
|
"loss": 1.3597,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9276879829658107,
|
|
"learning_rate": 8.718105830347405e-07,
|
|
"loss": 1.3689,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9317936344250413,
|
|
"learning_rate": 8.718075369778825e-07,
|
|
"loss": 1.3721,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9645490048095637,
|
|
"learning_rate": 8.718044791899837e-07,
|
|
"loss": 1.3987,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0365564821005977,
|
|
"learning_rate": 8.718014096711265e-07,
|
|
"loss": 1.3868,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9682770215203553,
|
|
"learning_rate": 8.717983284213936e-07,
|
|
"loss": 1.3415,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.134935849661195,
|
|
"learning_rate": 8.717952354408679e-07,
|
|
"loss": 1.3293,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.8124045479008384,
|
|
"learning_rate": 8.717921307296327e-07,
|
|
"loss": 1.4101,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.870855459457384,
|
|
"learning_rate": 8.717890142877717e-07,
|
|
"loss": 1.4129,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.996734500928963,
|
|
"learning_rate": 8.717858861153686e-07,
|
|
"loss": 1.4188,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.905301300393811,
|
|
"learning_rate": 8.717827462125079e-07,
|
|
"loss": 1.3503,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.963215198908182,
|
|
"learning_rate": 8.717795945792739e-07,
|
|
"loss": 1.3539,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.058342559604312,
|
|
"learning_rate": 8.717764312157515e-07,
|
|
"loss": 1.3911,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9650165681938128,
|
|
"learning_rate": 8.717732561220258e-07,
|
|
"loss": 1.4207,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9793800118049454,
|
|
"learning_rate": 8.717700692981826e-07,
|
|
"loss": 1.3691,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9162869272769556,
|
|
"learning_rate": 8.717668707443075e-07,
|
|
"loss": 1.395,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9636673086391485,
|
|
"learning_rate": 8.717636604604865e-07,
|
|
"loss": 1.4023,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.959298293762338,
|
|
"learning_rate": 8.717604384468061e-07,
|
|
"loss": 1.4328,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.965235704696395,
|
|
"learning_rate": 8.717572047033532e-07,
|
|
"loss": 1.4354,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.127226354296215,
|
|
"learning_rate": 8.717539592302147e-07,
|
|
"loss": 1.3904,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9772007199986428,
|
|
"learning_rate": 8.717507020274781e-07,
|
|
"loss": 1.3997,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1654089044177103,
|
|
"learning_rate": 8.717474330952311e-07,
|
|
"loss": 1.3664,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.897024330171534,
|
|
"learning_rate": 8.717441524335616e-07,
|
|
"loss": 1.3815,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9908064118392947,
|
|
"learning_rate": 8.717408600425579e-07,
|
|
"loss": 1.4008,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1674379998791475,
|
|
"learning_rate": 8.717375559223089e-07,
|
|
"loss": 1.4134,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.810265806813017,
|
|
"learning_rate": 8.717342400729033e-07,
|
|
"loss": 1.4046,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9789538191272626,
|
|
"learning_rate": 8.717309124944306e-07,
|
|
"loss": 1.3957,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.995888914357539,
|
|
"learning_rate": 8.717275731869801e-07,
|
|
"loss": 1.3823,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.94123727534938,
|
|
"learning_rate": 8.71724222150642e-07,
|
|
"loss": 1.3577,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.907211393678648,
|
|
"learning_rate": 8.717208593855062e-07,
|
|
"loss": 1.4016,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9549644446432546,
|
|
"learning_rate": 8.717174848916635e-07,
|
|
"loss": 1.3554,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.047404295254929,
|
|
"learning_rate": 8.717140986692047e-07,
|
|
"loss": 1.3977,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0015613969735218,
|
|
"learning_rate": 8.717107007182211e-07,
|
|
"loss": 1.4159,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.99276033713888,
|
|
"learning_rate": 8.71707291038804e-07,
|
|
"loss": 1.4194,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9613959661694427,
|
|
"learning_rate": 8.717038696310452e-07,
|
|
"loss": 1.4072,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.984569722219675,
|
|
"learning_rate": 8.717004364950369e-07,
|
|
"loss": 1.4018,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.927152138759416,
|
|
"learning_rate": 8.716969916308715e-07,
|
|
"loss": 1.4038,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0771539333400764,
|
|
"learning_rate": 8.716935350386416e-07,
|
|
"loss": 1.3754,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.970985940726889,
|
|
"learning_rate": 8.716900667184406e-07,
|
|
"loss": 1.4458,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0928065462412633,
|
|
"learning_rate": 8.716865866703617e-07,
|
|
"loss": 1.371,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9309896563133697,
|
|
"learning_rate": 8.716830948944986e-07,
|
|
"loss": 1.3509,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0455988834094736,
|
|
"learning_rate": 8.716795913909452e-07,
|
|
"loss": 1.3827,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.061462286190086,
|
|
"learning_rate": 8.716760761597961e-07,
|
|
"loss": 1.3926,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9080714574645516,
|
|
"learning_rate": 8.716725492011458e-07,
|
|
"loss": 1.4101,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9046604352395207,
|
|
"learning_rate": 8.716690105150891e-07,
|
|
"loss": 1.335,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.946411297505849,
|
|
"learning_rate": 8.716654601017216e-07,
|
|
"loss": 1.4109,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9811491538335915,
|
|
"learning_rate": 8.716618979611386e-07,
|
|
"loss": 1.4007,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.828391151750033,
|
|
"learning_rate": 8.716583240934361e-07,
|
|
"loss": 1.4194,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.861846008744537,
|
|
"learning_rate": 8.716547384987104e-07,
|
|
"loss": 1.3164,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.8751261295501274,
|
|
"learning_rate": 8.716511411770581e-07,
|
|
"loss": 1.4447,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.085208227054159,
|
|
"learning_rate": 8.716475321285758e-07,
|
|
"loss": 1.3732,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.926763955103524,
|
|
"learning_rate": 8.716439113533609e-07,
|
|
"loss": 1.427,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.021150407067775,
|
|
"learning_rate": 8.716402788515107e-07,
|
|
"loss": 1.4123,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0835953078832476,
|
|
"learning_rate": 8.716366346231232e-07,
|
|
"loss": 1.3225,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.084681723058033,
|
|
"learning_rate": 8.716329786682964e-07,
|
|
"loss": 1.4007,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9354891039496507,
|
|
"learning_rate": 8.716293109871288e-07,
|
|
"loss": 1.374,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.072363269165642,
|
|
"learning_rate": 8.71625631579719e-07,
|
|
"loss": 1.355,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9000514864569373,
|
|
"learning_rate": 8.716219404461663e-07,
|
|
"loss": 1.3718,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0562397719571766,
|
|
"learning_rate": 8.716182375865698e-07,
|
|
"loss": 1.3814,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0526646692685717,
|
|
"learning_rate": 8.716145230010296e-07,
|
|
"loss": 1.3772,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0966573543083538,
|
|
"learning_rate": 8.716107966896452e-07,
|
|
"loss": 1.4287,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0880361698977263,
|
|
"learning_rate": 8.716070586525174e-07,
|
|
"loss": 1.3751,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.832576232775376,
|
|
"learning_rate": 8.716033088897465e-07,
|
|
"loss": 1.416,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.965237748755546,
|
|
"learning_rate": 8.715995474014337e-07,
|
|
"loss": 1.3621,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0463957419921783,
|
|
"learning_rate": 8.7159577418768e-07,
|
|
"loss": 1.357,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9739015917876723,
|
|
"learning_rate": 8.715919892485873e-07,
|
|
"loss": 1.3873,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.8676852163892037,
|
|
"learning_rate": 8.715881925842573e-07,
|
|
"loss": 1.4051,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.088087320089484,
|
|
"learning_rate": 8.715843841947923e-07,
|
|
"loss": 1.3151,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0270665713855367,
|
|
"learning_rate": 8.715805640802949e-07,
|
|
"loss": 1.3679,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9869536543983366,
|
|
"learning_rate": 8.715767322408678e-07,
|
|
"loss": 1.3528,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9112845523257675,
|
|
"learning_rate": 8.715728886766143e-07,
|
|
"loss": 1.442,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0049960527344544,
|
|
"learning_rate": 8.715690333876378e-07,
|
|
"loss": 1.3681,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9921307040597664,
|
|
"learning_rate": 8.715651663740421e-07,
|
|
"loss": 1.4314,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0109768676656605,
|
|
"learning_rate": 8.715612876359315e-07,
|
|
"loss": 1.3847,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.039247694352697,
|
|
"learning_rate": 8.715573971734103e-07,
|
|
"loss": 1.4317,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.019898918932762,
|
|
"learning_rate": 8.71553494986583e-07,
|
|
"loss": 1.3623,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.00522079476457,
|
|
"learning_rate": 8.71549581075555e-07,
|
|
"loss": 1.3884,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9610972813683794,
|
|
"learning_rate": 8.715456554404316e-07,
|
|
"loss": 1.3315,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.967170476790787,
|
|
"learning_rate": 8.715417180813185e-07,
|
|
"loss": 1.4207,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9522241028635285,
|
|
"learning_rate": 8.715377689983216e-07,
|
|
"loss": 1.4012,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.928961724018662,
|
|
"learning_rate": 8.715338081915475e-07,
|
|
"loss": 1.3869,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.952221930130988,
|
|
"learning_rate": 8.715298356611025e-07,
|
|
"loss": 1.3703,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0536781428675335,
|
|
"learning_rate": 8.715258514070937e-07,
|
|
"loss": 1.3682,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.128913913582799,
|
|
"learning_rate": 8.715218554296284e-07,
|
|
"loss": 1.3435,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0301910736766318,
|
|
"learning_rate": 8.715178477288141e-07,
|
|
"loss": 1.3975,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.962217823506329,
|
|
"learning_rate": 8.715138283047589e-07,
|
|
"loss": 1.3488,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0039607070469647,
|
|
"learning_rate": 8.715097971575708e-07,
|
|
"loss": 1.378,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0602288102993938,
|
|
"learning_rate": 8.715057542873585e-07,
|
|
"loss": 1.3572,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.969434960629606,
|
|
"learning_rate": 8.715016996942307e-07,
|
|
"loss": 1.3713,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9917667276430477,
|
|
"learning_rate": 8.714976333782967e-07,
|
|
"loss": 1.4607,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.2046134996002436,
|
|
"learning_rate": 8.714935553396659e-07,
|
|
"loss": 1.3853,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1097764355868733,
|
|
"learning_rate": 8.714894655784481e-07,
|
|
"loss": 1.371,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9715612348659124,
|
|
"learning_rate": 8.714853640947534e-07,
|
|
"loss": 1.392,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9321597700386333,
|
|
"learning_rate": 8.714812508886925e-07,
|
|
"loss": 1.4051,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.949095796893927,
|
|
"learning_rate": 8.714771259603758e-07,
|
|
"loss": 1.3469,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9290487074046756,
|
|
"learning_rate": 8.714729893099144e-07,
|
|
"loss": 1.3629,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0295532590400196,
|
|
"learning_rate": 8.714688409374198e-07,
|
|
"loss": 1.3689,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.965897130998238,
|
|
"learning_rate": 8.714646808430036e-07,
|
|
"loss": 1.3619,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0935335333535336,
|
|
"learning_rate": 8.714605090267779e-07,
|
|
"loss": 1.3781,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.986770714363214,
|
|
"learning_rate": 8.71456325488855e-07,
|
|
"loss": 1.364,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.950553960144967,
|
|
"learning_rate": 8.714521302293475e-07,
|
|
"loss": 1.4063,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.014174300238565,
|
|
"learning_rate": 8.714479232483683e-07,
|
|
"loss": 1.3676,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.24671603526281,
|
|
"learning_rate": 8.714437045460308e-07,
|
|
"loss": 1.3578,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1623556544347724,
|
|
"learning_rate": 8.714394741224484e-07,
|
|
"loss": 1.3645,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0025864567325122,
|
|
"learning_rate": 8.714352319777354e-07,
|
|
"loss": 1.3607,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.94717894620833,
|
|
"learning_rate": 8.714309781120056e-07,
|
|
"loss": 1.3498,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.8964926477515935,
|
|
"learning_rate": 8.714267125253735e-07,
|
|
"loss": 1.3728,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1656664213505823,
|
|
"learning_rate": 8.714224352179544e-07,
|
|
"loss": 1.4176,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.020188270295796,
|
|
"learning_rate": 8.71418146189863e-07,
|
|
"loss": 1.4194,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.978569270321128,
|
|
"learning_rate": 8.71413845441215e-07,
|
|
"loss": 1.3749,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9396855197827283,
|
|
"learning_rate": 8.714095329721261e-07,
|
|
"loss": 1.3795,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9422669465457387,
|
|
"learning_rate": 8.714052087827125e-07,
|
|
"loss": 1.3834,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.010282715520298,
|
|
"learning_rate": 8.714008728730907e-07,
|
|
"loss": 1.3531,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9717500175741116,
|
|
"learning_rate": 8.713965252433773e-07,
|
|
"loss": 1.3667,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.052704318228861,
|
|
"learning_rate": 8.713921658936892e-07,
|
|
"loss": 1.3456,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.528710625831412,
|
|
"learning_rate": 8.713877948241442e-07,
|
|
"loss": 1.3936,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.9466834730933833,
|
|
"learning_rate": 8.713834120348596e-07,
|
|
"loss": 1.3217,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0996541898765226,
|
|
"learning_rate": 8.713790175259536e-07,
|
|
"loss": 1.3855,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 2.982252067970699,
|
|
"learning_rate": 8.713746112975446e-07,
|
|
"loss": 1.384,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.1109077626663844,
|
|
"learning_rate": 8.713701933497509e-07,
|
|
"loss": 1.3561,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0394061264038115,
|
|
"learning_rate": 8.713657636826918e-07,
|
|
"loss": 1.468,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0969706383479,
|
|
"learning_rate": 8.713613222964863e-07,
|
|
"loss": 1.3993,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0348133446475662,
|
|
"learning_rate": 8.713568691912542e-07,
|
|
"loss": 1.387,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 3.0273826285615195,
|
|
"learning_rate": 8.713524043671153e-07,
|
|
"loss": 1.3959,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.141130816615921,
|
|
"learning_rate": 8.713479278241898e-07,
|
|
"loss": 1.4479,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0490824173681945,
|
|
"learning_rate": 8.713434395625983e-07,
|
|
"loss": 1.3583,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9474474845957404,
|
|
"learning_rate": 8.713389395824614e-07,
|
|
"loss": 1.3344,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9486918699061118,
|
|
"learning_rate": 8.713344278839005e-07,
|
|
"loss": 1.4022,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9302492343255344,
|
|
"learning_rate": 8.71329904467037e-07,
|
|
"loss": 1.3344,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9531126631866753,
|
|
"learning_rate": 8.713253693319929e-07,
|
|
"loss": 1.3451,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8555050387157213,
|
|
"learning_rate": 8.713208224788899e-07,
|
|
"loss": 1.3287,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0850474903019305,
|
|
"learning_rate": 8.713162639078507e-07,
|
|
"loss": 1.4153,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9916349483264004,
|
|
"learning_rate": 8.71311693618998e-07,
|
|
"loss": 1.4025,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0448173115664545,
|
|
"learning_rate": 8.713071116124549e-07,
|
|
"loss": 1.4129,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9652773442800022,
|
|
"learning_rate": 8.713025178883445e-07,
|
|
"loss": 1.3688,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8707509231071127,
|
|
"learning_rate": 8.712979124467906e-07,
|
|
"loss": 1.3714,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.968740384281211,
|
|
"learning_rate": 8.712932952879176e-07,
|
|
"loss": 1.4012,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.938361663169202,
|
|
"learning_rate": 8.712886664118492e-07,
|
|
"loss": 1.406,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1217977871072775,
|
|
"learning_rate": 8.712840258187104e-07,
|
|
"loss": 1.3822,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9353035115177915,
|
|
"learning_rate": 8.71279373508626e-07,
|
|
"loss": 1.3578,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.014072001165337,
|
|
"learning_rate": 8.712747094817213e-07,
|
|
"loss": 1.4454,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.969346395207918,
|
|
"learning_rate": 8.71270033738122e-07,
|
|
"loss": 1.3705,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.050484611788353,
|
|
"learning_rate": 8.712653462779539e-07,
|
|
"loss": 1.3551,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.940761479764165,
|
|
"learning_rate": 8.71260647101343e-07,
|
|
"loss": 1.4147,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9928956127151944,
|
|
"learning_rate": 8.712559362084161e-07,
|
|
"loss": 1.3604,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0125472864930805,
|
|
"learning_rate": 8.712512135993e-07,
|
|
"loss": 1.4183,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.940381713821346,
|
|
"learning_rate": 8.712464792741218e-07,
|
|
"loss": 1.4414,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0279511535103336,
|
|
"learning_rate": 8.712417332330089e-07,
|
|
"loss": 1.3505,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9922191158837954,
|
|
"learning_rate": 8.712369754760892e-07,
|
|
"loss": 1.4028,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0184990677208834,
|
|
"learning_rate": 8.712322060034907e-07,
|
|
"loss": 1.3465,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.05423073796875,
|
|
"learning_rate": 8.712274248153418e-07,
|
|
"loss": 1.3416,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1326601737410025,
|
|
"learning_rate": 8.712226319117715e-07,
|
|
"loss": 1.3924,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.995785462842688,
|
|
"learning_rate": 8.712178272929084e-07,
|
|
"loss": 1.3895,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9760799944825806,
|
|
"learning_rate": 8.712130109588823e-07,
|
|
"loss": 1.4104,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9654696172148896,
|
|
"learning_rate": 8.712081829098225e-07,
|
|
"loss": 1.378,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9968608921090265,
|
|
"learning_rate": 8.712033431458593e-07,
|
|
"loss": 1.4264,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.969211803711655,
|
|
"learning_rate": 8.711984916671229e-07,
|
|
"loss": 1.3607,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.942118444739339,
|
|
"learning_rate": 8.711936284737438e-07,
|
|
"loss": 1.3899,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.98791974032871,
|
|
"learning_rate": 8.711887535658529e-07,
|
|
"loss": 1.3459,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9216444622419537,
|
|
"learning_rate": 8.711838669435818e-07,
|
|
"loss": 1.4116,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9987312946412206,
|
|
"learning_rate": 8.711789686070618e-07,
|
|
"loss": 1.4126,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9997602427201926,
|
|
"learning_rate": 8.711740585564249e-07,
|
|
"loss": 1.3392,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9736815652309683,
|
|
"learning_rate": 8.711691367918032e-07,
|
|
"loss": 1.3483,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0854886077878487,
|
|
"learning_rate": 8.711642033133292e-07,
|
|
"loss": 1.3842,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.061838785099914,
|
|
"learning_rate": 8.711592581211358e-07,
|
|
"loss": 1.3658,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.023168820723632,
|
|
"learning_rate": 8.711543012153561e-07,
|
|
"loss": 1.3929,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.970076769155023,
|
|
"learning_rate": 8.711493325961236e-07,
|
|
"loss": 1.3587,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.002070690376611,
|
|
"learning_rate": 8.71144352263572e-07,
|
|
"loss": 1.367,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.995354883459173,
|
|
"learning_rate": 8.711393602178357e-07,
|
|
"loss": 1.4185,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 24.26995863156332,
|
|
"learning_rate": 8.711343564590487e-07,
|
|
"loss": 1.3982,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.065387181865347,
|
|
"learning_rate": 8.711293409873459e-07,
|
|
"loss": 1.3668,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.965733738900826,
|
|
"learning_rate": 8.711243138028624e-07,
|
|
"loss": 1.3989,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.909922562005101,
|
|
"learning_rate": 8.711192749057334e-07,
|
|
"loss": 1.3548,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9485719715840824,
|
|
"learning_rate": 8.711142242960946e-07,
|
|
"loss": 1.3982,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0587521051463598,
|
|
"learning_rate": 8.711091619740822e-07,
|
|
"loss": 1.4208,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.016741791642564,
|
|
"learning_rate": 8.711040879398322e-07,
|
|
"loss": 1.3776,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.087201787399773,
|
|
"learning_rate": 8.710990021934814e-07,
|
|
"loss": 1.3509,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1398002317055873,
|
|
"learning_rate": 8.710939047351665e-07,
|
|
"loss": 1.4054,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.003141522348306,
|
|
"learning_rate": 8.710887955650252e-07,
|
|
"loss": 1.3895,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0461324715788862,
|
|
"learning_rate": 8.710836746831946e-07,
|
|
"loss": 1.4143,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.956952816424475,
|
|
"learning_rate": 8.710785420898127e-07,
|
|
"loss": 1.4008,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8987555743390168,
|
|
"learning_rate": 8.710733977850179e-07,
|
|
"loss": 1.3823,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0536968376300284,
|
|
"learning_rate": 8.710682417689485e-07,
|
|
"loss": 1.4578,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.969066979149947,
|
|
"learning_rate": 8.710630740417435e-07,
|
|
"loss": 1.428,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1272335317584905,
|
|
"learning_rate": 8.710578946035417e-07,
|
|
"loss": 1.3562,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9753498641377742,
|
|
"learning_rate": 8.710527034544828e-07,
|
|
"loss": 1.3953,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.943913278878306,
|
|
"learning_rate": 8.710475005947067e-07,
|
|
"loss": 1.3626,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9513572324834727,
|
|
"learning_rate": 8.710422860243531e-07,
|
|
"loss": 1.3461,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0849634901753284,
|
|
"learning_rate": 8.710370597435629e-07,
|
|
"loss": 1.3663,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.138354533416878,
|
|
"learning_rate": 8.710318217524763e-07,
|
|
"loss": 1.3141,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.972312260895404,
|
|
"learning_rate": 8.710265720512346e-07,
|
|
"loss": 1.3633,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0591517440498097,
|
|
"learning_rate": 8.710213106399791e-07,
|
|
"loss": 1.3557,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8524186182545987,
|
|
"learning_rate": 8.710160375188516e-07,
|
|
"loss": 1.3676,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9822890833732956,
|
|
"learning_rate": 8.710107526879938e-07,
|
|
"loss": 1.4331,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9176351551822868,
|
|
"learning_rate": 8.710054561475481e-07,
|
|
"loss": 1.371,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.94388048856757,
|
|
"learning_rate": 8.71000147897657e-07,
|
|
"loss": 1.393,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.085541520730895,
|
|
"learning_rate": 8.709948279384639e-07,
|
|
"loss": 1.3937,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9491352117439757,
|
|
"learning_rate": 8.709894962701115e-07,
|
|
"loss": 1.3526,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9933004115427444,
|
|
"learning_rate": 8.709841528927436e-07,
|
|
"loss": 1.3916,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.959458221957766,
|
|
"learning_rate": 8.70978797806504e-07,
|
|
"loss": 1.3462,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.963203467802987,
|
|
"learning_rate": 8.709734310115368e-07,
|
|
"loss": 1.3783,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9902951999179312,
|
|
"learning_rate": 8.709680525079866e-07,
|
|
"loss": 1.362,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.948654339418917,
|
|
"learning_rate": 8.709626622959983e-07,
|
|
"loss": 1.3841,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.001564005849718,
|
|
"learning_rate": 8.709572603757169e-07,
|
|
"loss": 1.3572,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0380595324448416,
|
|
"learning_rate": 8.709518467472878e-07,
|
|
"loss": 1.4171,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.896631790474921,
|
|
"learning_rate": 8.709464214108568e-07,
|
|
"loss": 1.3448,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9319635944784475,
|
|
"learning_rate": 8.709409843665701e-07,
|
|
"loss": 1.3917,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.963100470234962,
|
|
"learning_rate": 8.709355356145739e-07,
|
|
"loss": 1.3655,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.030553118915969,
|
|
"learning_rate": 8.709300751550151e-07,
|
|
"loss": 1.3365,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.4928530382016887,
|
|
"learning_rate": 8.709246029880405e-07,
|
|
"loss": 1.3662,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.030669221337684,
|
|
"learning_rate": 8.709191191137976e-07,
|
|
"loss": 1.4529,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.155457882933603,
|
|
"learning_rate": 8.70913623532434e-07,
|
|
"loss": 1.4022,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9856956704586115,
|
|
"learning_rate": 8.709081162440975e-07,
|
|
"loss": 1.3989,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9343339295028015,
|
|
"learning_rate": 8.709025972489367e-07,
|
|
"loss": 1.4212,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1952129991940543,
|
|
"learning_rate": 8.708970665471e-07,
|
|
"loss": 1.4436,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.015106319776789,
|
|
"learning_rate": 8.708915241387364e-07,
|
|
"loss": 1.4422,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.022781702862628,
|
|
"learning_rate": 8.708859700239951e-07,
|
|
"loss": 1.3789,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.932454535389442,
|
|
"learning_rate": 8.708804042030254e-07,
|
|
"loss": 1.3783,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1075106961875165,
|
|
"learning_rate": 8.708748266759774e-07,
|
|
"loss": 1.3904,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8613959925148356,
|
|
"learning_rate": 8.708692374430014e-07,
|
|
"loss": 1.3701,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9916282178733447,
|
|
"learning_rate": 8.708636365042476e-07,
|
|
"loss": 1.3517,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8578706799379554,
|
|
"learning_rate": 8.70858023859867e-07,
|
|
"loss": 1.4124,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0055644119433533,
|
|
"learning_rate": 8.708523995100105e-07,
|
|
"loss": 1.3869,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.027942807639001,
|
|
"learning_rate": 8.708467634548298e-07,
|
|
"loss": 1.3703,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1584010302430836,
|
|
"learning_rate": 8.708411156944765e-07,
|
|
"loss": 1.3852,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.08434624814754,
|
|
"learning_rate": 8.708354562291027e-07,
|
|
"loss": 1.4008,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.937949504985518,
|
|
"learning_rate": 8.708297850588607e-07,
|
|
"loss": 1.4026,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0080974250898116,
|
|
"learning_rate": 8.708241021839032e-07,
|
|
"loss": 1.3992,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.986930909094618,
|
|
"learning_rate": 8.708184076043833e-07,
|
|
"loss": 1.3504,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1912323292885363,
|
|
"learning_rate": 8.708127013204543e-07,
|
|
"loss": 1.3919,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.203030731079236,
|
|
"learning_rate": 8.708069833322698e-07,
|
|
"loss": 1.3601,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.097916109715531,
|
|
"learning_rate": 8.708012536399837e-07,
|
|
"loss": 1.3619,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0656903032705327,
|
|
"learning_rate": 8.707955122437504e-07,
|
|
"loss": 1.3162,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.002875260439292,
|
|
"learning_rate": 8.707897591437243e-07,
|
|
"loss": 1.389,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0143271490705077,
|
|
"learning_rate": 8.707839943400606e-07,
|
|
"loss": 1.3323,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.096313689982386,
|
|
"learning_rate": 8.707782178329142e-07,
|
|
"loss": 1.3813,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.960471861335116,
|
|
"learning_rate": 8.707724296224408e-07,
|
|
"loss": 1.3472,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0072247729027115,
|
|
"learning_rate": 8.707666297087963e-07,
|
|
"loss": 1.3522,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.949063388714746,
|
|
"learning_rate": 8.707608180921366e-07,
|
|
"loss": 1.3928,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.1121870350751064,
|
|
"learning_rate": 8.707549947726183e-07,
|
|
"loss": 1.4399,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9609559500955003,
|
|
"learning_rate": 8.707491597503982e-07,
|
|
"loss": 1.3898,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9585328248619587,
|
|
"learning_rate": 8.707433130256336e-07,
|
|
"loss": 1.379,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.918687510312111,
|
|
"learning_rate": 8.707374545984816e-07,
|
|
"loss": 1.4176,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.15488138702356,
|
|
"learning_rate": 8.707315844691002e-07,
|
|
"loss": 1.3706,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9610415881399677,
|
|
"learning_rate": 8.707257026376471e-07,
|
|
"loss": 1.3641,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0862098441133234,
|
|
"learning_rate": 8.707198091042811e-07,
|
|
"loss": 1.3893,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9328888264242106,
|
|
"learning_rate": 8.707139038691606e-07,
|
|
"loss": 1.333,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.97580891347495,
|
|
"learning_rate": 8.707079869324446e-07,
|
|
"loss": 1.3607,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.000962296933563,
|
|
"learning_rate": 8.707020582942925e-07,
|
|
"loss": 1.424,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.984700606909975,
|
|
"learning_rate": 8.706961179548639e-07,
|
|
"loss": 1.3912,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.998907818017932,
|
|
"learning_rate": 8.706901659143189e-07,
|
|
"loss": 1.4241,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.95254670370234,
|
|
"learning_rate": 8.706842021728173e-07,
|
|
"loss": 1.3759,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.134652976665768,
|
|
"learning_rate": 8.706782267305202e-07,
|
|
"loss": 1.3767,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.972318633221704,
|
|
"learning_rate": 8.706722395875881e-07,
|
|
"loss": 1.3648,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8607560362869124,
|
|
"learning_rate": 8.706662407441824e-07,
|
|
"loss": 1.3946,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8921206621422653,
|
|
"learning_rate": 8.706602302004645e-07,
|
|
"loss": 1.4396,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.949264928829496,
|
|
"learning_rate": 8.706542079565962e-07,
|
|
"loss": 1.3475,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.996499503605724,
|
|
"learning_rate": 8.706481740127399e-07,
|
|
"loss": 1.37,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9235932803770868,
|
|
"learning_rate": 8.706421283690578e-07,
|
|
"loss": 1.2987,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0655234851680824,
|
|
"learning_rate": 8.706360710257128e-07,
|
|
"loss": 1.3903,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.968020759665533,
|
|
"learning_rate": 8.706300019828679e-07,
|
|
"loss": 1.4227,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.116571254583397,
|
|
"learning_rate": 8.706239212406866e-07,
|
|
"loss": 1.4153,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0578559650775383,
|
|
"learning_rate": 8.706178287993326e-07,
|
|
"loss": 1.4168,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9399476742786907,
|
|
"learning_rate": 8.706117246589699e-07,
|
|
"loss": 1.3448,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.045715774895227,
|
|
"learning_rate": 8.706056088197628e-07,
|
|
"loss": 1.4323,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.8892619986413655,
|
|
"learning_rate": 8.705994812818759e-07,
|
|
"loss": 1.3688,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0537421060657257,
|
|
"learning_rate": 8.705933420454745e-07,
|
|
"loss": 1.2805,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.02315805498333,
|
|
"learning_rate": 8.705871911107236e-07,
|
|
"loss": 1.3664,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0968669538104705,
|
|
"learning_rate": 8.70581028477789e-07,
|
|
"loss": 1.4156,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.958411205891844,
|
|
"learning_rate": 8.705748541468365e-07,
|
|
"loss": 1.3879,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0644896141269213,
|
|
"learning_rate": 8.705686681180324e-07,
|
|
"loss": 1.406,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 2.9355330340666947,
|
|
"learning_rate": 8.705624703915431e-07,
|
|
"loss": 1.4157,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"grad_norm": 3.0295134625264732,
|
|
"learning_rate": 8.705562609675357e-07,
|
|
"loss": 1.3595,
|
|
"step": 630
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 19246,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 105,
|
|
"total_flos": 164886478848000.0,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|