6407 lines
166 KiB
JSON
6407 lines
166 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 3.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 909,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0033003300330033004,
|
||
|
|
"grad_norm": 10.81499361768409,
|
||
|
|
"learning_rate": 0.0,
|
||
|
|
"loss": 1.2079360485076904,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006600660066006601,
|
||
|
|
"grad_norm": 10.226770877445293,
|
||
|
|
"learning_rate": 4.395604395604396e-07,
|
||
|
|
"loss": 1.123347520828247,
|
||
|
|
"step": 2
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.009900990099009901,
|
||
|
|
"grad_norm": 11.292644267807786,
|
||
|
|
"learning_rate": 8.791208791208792e-07,
|
||
|
|
"loss": 1.261695384979248,
|
||
|
|
"step": 3
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.013201320132013201,
|
||
|
|
"grad_norm": 10.504638106263508,
|
||
|
|
"learning_rate": 1.3186813186813187e-06,
|
||
|
|
"loss": 1.1276888847351074,
|
||
|
|
"step": 4
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0165016501650165,
|
||
|
|
"grad_norm": 10.822100601159539,
|
||
|
|
"learning_rate": 1.7582417582417585e-06,
|
||
|
|
"loss": 1.2254480123519897,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.019801980198019802,
|
||
|
|
"grad_norm": 9.905516433474448,
|
||
|
|
"learning_rate": 2.197802197802198e-06,
|
||
|
|
"loss": 1.1809396743774414,
|
||
|
|
"step": 6
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0231023102310231,
|
||
|
|
"grad_norm": 9.323364829402967,
|
||
|
|
"learning_rate": 2.6373626373626375e-06,
|
||
|
|
"loss": 1.2000095844268799,
|
||
|
|
"step": 7
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.026402640264026403,
|
||
|
|
"grad_norm": 6.706098746162178,
|
||
|
|
"learning_rate": 3.0769230769230774e-06,
|
||
|
|
"loss": 1.0248074531555176,
|
||
|
|
"step": 8
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0297029702970297,
|
||
|
|
"grad_norm": 5.761138380327878,
|
||
|
|
"learning_rate": 3.516483516483517e-06,
|
||
|
|
"loss": 1.0840561389923096,
|
||
|
|
"step": 9
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.033003300330033,
|
||
|
|
"grad_norm": 2.7364343552329315,
|
||
|
|
"learning_rate": 3.9560439560439565e-06,
|
||
|
|
"loss": 0.955639123916626,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.036303630363036306,
|
||
|
|
"grad_norm": 2.113810438625661,
|
||
|
|
"learning_rate": 4.395604395604396e-06,
|
||
|
|
"loss": 0.9281604290008545,
|
||
|
|
"step": 11
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.039603960396039604,
|
||
|
|
"grad_norm": 1.849238684536393,
|
||
|
|
"learning_rate": 4.8351648351648355e-06,
|
||
|
|
"loss": 0.9079018831253052,
|
||
|
|
"step": 12
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0429042904290429,
|
||
|
|
"grad_norm": 1.6747171029255208,
|
||
|
|
"learning_rate": 5.274725274725275e-06,
|
||
|
|
"loss": 0.9039217233657837,
|
||
|
|
"step": 13
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0462046204620462,
|
||
|
|
"grad_norm": 2.0121666555693416,
|
||
|
|
"learning_rate": 5.7142857142857145e-06,
|
||
|
|
"loss": 0.8910936117172241,
|
||
|
|
"step": 14
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04950495049504951,
|
||
|
|
"grad_norm": 2.0600124028897526,
|
||
|
|
"learning_rate": 6.153846153846155e-06,
|
||
|
|
"loss": 0.895532488822937,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.052805280528052806,
|
||
|
|
"grad_norm": 2.0613449368510044,
|
||
|
|
"learning_rate": 6.5934065934065935e-06,
|
||
|
|
"loss": 0.8889240622520447,
|
||
|
|
"step": 16
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.056105610561056105,
|
||
|
|
"grad_norm": 1.785450637059245,
|
||
|
|
"learning_rate": 7.032967032967034e-06,
|
||
|
|
"loss": 0.8499570488929749,
|
||
|
|
"step": 17
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0594059405940594,
|
||
|
|
"grad_norm": 1.5894161631201256,
|
||
|
|
"learning_rate": 7.472527472527473e-06,
|
||
|
|
"loss": 0.839992105960846,
|
||
|
|
"step": 18
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0627062706270627,
|
||
|
|
"grad_norm": 1.1904834264503976,
|
||
|
|
"learning_rate": 7.912087912087913e-06,
|
||
|
|
"loss": 0.7718420028686523,
|
||
|
|
"step": 19
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.066006600660066,
|
||
|
|
"grad_norm": 1.0397335564670163,
|
||
|
|
"learning_rate": 8.351648351648353e-06,
|
||
|
|
"loss": 0.7865867614746094,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06930693069306931,
|
||
|
|
"grad_norm": 0.8314739102256958,
|
||
|
|
"learning_rate": 8.791208791208792e-06,
|
||
|
|
"loss": 0.7982739806175232,
|
||
|
|
"step": 21
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07260726072607261,
|
||
|
|
"grad_norm": 0.6542597896181986,
|
||
|
|
"learning_rate": 9.230769230769232e-06,
|
||
|
|
"loss": 0.7846421599388123,
|
||
|
|
"step": 22
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07590759075907591,
|
||
|
|
"grad_norm": 0.6269389928815381,
|
||
|
|
"learning_rate": 9.670329670329671e-06,
|
||
|
|
"loss": 0.7005743980407715,
|
||
|
|
"step": 23
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07920792079207921,
|
||
|
|
"grad_norm": 0.6603922634859757,
|
||
|
|
"learning_rate": 1.010989010989011e-05,
|
||
|
|
"loss": 0.7084314227104187,
|
||
|
|
"step": 24
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08250825082508251,
|
||
|
|
"grad_norm": 0.6856248928818359,
|
||
|
|
"learning_rate": 1.054945054945055e-05,
|
||
|
|
"loss": 0.7310304641723633,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0858085808580858,
|
||
|
|
"grad_norm": 0.5728331825854258,
|
||
|
|
"learning_rate": 1.098901098901099e-05,
|
||
|
|
"loss": 0.7056888341903687,
|
||
|
|
"step": 26
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0891089108910891,
|
||
|
|
"grad_norm": 0.47956485465857923,
|
||
|
|
"learning_rate": 1.1428571428571429e-05,
|
||
|
|
"loss": 0.6987950205802917,
|
||
|
|
"step": 27
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0924092409240924,
|
||
|
|
"grad_norm": 0.47407141179043555,
|
||
|
|
"learning_rate": 1.186813186813187e-05,
|
||
|
|
"loss": 0.7319807410240173,
|
||
|
|
"step": 28
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09570957095709572,
|
||
|
|
"grad_norm": 0.4856924244101555,
|
||
|
|
"learning_rate": 1.230769230769231e-05,
|
||
|
|
"loss": 0.6983063220977783,
|
||
|
|
"step": 29
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09900990099009901,
|
||
|
|
"grad_norm": 0.49122925908544063,
|
||
|
|
"learning_rate": 1.2747252747252747e-05,
|
||
|
|
"loss": 0.70492023229599,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10231023102310231,
|
||
|
|
"grad_norm": 0.4556788168903923,
|
||
|
|
"learning_rate": 1.3186813186813187e-05,
|
||
|
|
"loss": 0.7376629114151001,
|
||
|
|
"step": 31
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10561056105610561,
|
||
|
|
"grad_norm": 0.4272838300827657,
|
||
|
|
"learning_rate": 1.3626373626373627e-05,
|
||
|
|
"loss": 0.6623936295509338,
|
||
|
|
"step": 32
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10891089108910891,
|
||
|
|
"grad_norm": 0.40886227927218277,
|
||
|
|
"learning_rate": 1.4065934065934068e-05,
|
||
|
|
"loss": 0.7136330604553223,
|
||
|
|
"step": 33
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11221122112211221,
|
||
|
|
"grad_norm": 0.37821179606418975,
|
||
|
|
"learning_rate": 1.4505494505494506e-05,
|
||
|
|
"loss": 0.7113747596740723,
|
||
|
|
"step": 34
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11551155115511551,
|
||
|
|
"grad_norm": 0.4538557716923258,
|
||
|
|
"learning_rate": 1.4945054945054947e-05,
|
||
|
|
"loss": 0.8252867460250854,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1188118811881188,
|
||
|
|
"grad_norm": 0.3875808052898815,
|
||
|
|
"learning_rate": 1.5384615384615387e-05,
|
||
|
|
"loss": 0.7406599521636963,
|
||
|
|
"step": 36
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12211221122112212,
|
||
|
|
"grad_norm": 0.3503240143986989,
|
||
|
|
"learning_rate": 1.5824175824175826e-05,
|
||
|
|
"loss": 0.6572297811508179,
|
||
|
|
"step": 37
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1254125412541254,
|
||
|
|
"grad_norm": 0.3779655372487014,
|
||
|
|
"learning_rate": 1.6263736263736265e-05,
|
||
|
|
"loss": 0.7520949840545654,
|
||
|
|
"step": 38
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12871287128712872,
|
||
|
|
"grad_norm": 0.36968690038350466,
|
||
|
|
"learning_rate": 1.6703296703296707e-05,
|
||
|
|
"loss": 0.6861323118209839,
|
||
|
|
"step": 39
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.132013201320132,
|
||
|
|
"grad_norm": 0.3724328241107235,
|
||
|
|
"learning_rate": 1.7142857142857142e-05,
|
||
|
|
"loss": 0.6818518042564392,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1353135313531353,
|
||
|
|
"grad_norm": 0.35542054984937593,
|
||
|
|
"learning_rate": 1.7582417582417584e-05,
|
||
|
|
"loss": 0.6663186550140381,
|
||
|
|
"step": 41
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13861386138613863,
|
||
|
|
"grad_norm": 0.3441266617586836,
|
||
|
|
"learning_rate": 1.8021978021978023e-05,
|
||
|
|
"loss": 0.6492191553115845,
|
||
|
|
"step": 42
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1419141914191419,
|
||
|
|
"grad_norm": 0.3478448092762331,
|
||
|
|
"learning_rate": 1.8461538461538465e-05,
|
||
|
|
"loss": 0.6444741487503052,
|
||
|
|
"step": 43
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14521452145214522,
|
||
|
|
"grad_norm": 0.34951148057960574,
|
||
|
|
"learning_rate": 1.8901098901098903e-05,
|
||
|
|
"loss": 0.6476814150810242,
|
||
|
|
"step": 44
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1485148514851485,
|
||
|
|
"grad_norm": 0.3356672452160599,
|
||
|
|
"learning_rate": 1.9340659340659342e-05,
|
||
|
|
"loss": 0.6660827994346619,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15181518151815182,
|
||
|
|
"grad_norm": 0.30809956365723695,
|
||
|
|
"learning_rate": 1.9780219780219784e-05,
|
||
|
|
"loss": 0.6924091577529907,
|
||
|
|
"step": 46
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1551155115511551,
|
||
|
|
"grad_norm": 0.9030699054312887,
|
||
|
|
"learning_rate": 2.021978021978022e-05,
|
||
|
|
"loss": 0.6899605989456177,
|
||
|
|
"step": 47
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15841584158415842,
|
||
|
|
"grad_norm": 0.35784060194946976,
|
||
|
|
"learning_rate": 2.0659340659340665e-05,
|
||
|
|
"loss": 0.7242028713226318,
|
||
|
|
"step": 48
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1617161716171617,
|
||
|
|
"grad_norm": 0.3093966721093651,
|
||
|
|
"learning_rate": 2.10989010989011e-05,
|
||
|
|
"loss": 0.6203902959823608,
|
||
|
|
"step": 49
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16501650165016502,
|
||
|
|
"grad_norm": 0.4242705872636108,
|
||
|
|
"learning_rate": 2.153846153846154e-05,
|
||
|
|
"loss": 0.6420010328292847,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16831683168316833,
|
||
|
|
"grad_norm": 0.35079960590346965,
|
||
|
|
"learning_rate": 2.197802197802198e-05,
|
||
|
|
"loss": 0.7517598867416382,
|
||
|
|
"step": 51
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1716171617161716,
|
||
|
|
"grad_norm": 0.3078803790362521,
|
||
|
|
"learning_rate": 2.241758241758242e-05,
|
||
|
|
"loss": 0.6568161249160767,
|
||
|
|
"step": 52
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17491749174917492,
|
||
|
|
"grad_norm": 0.34666662805484005,
|
||
|
|
"learning_rate": 2.2857142857142858e-05,
|
||
|
|
"loss": 0.7348504662513733,
|
||
|
|
"step": 53
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1782178217821782,
|
||
|
|
"grad_norm": 0.302791415801781,
|
||
|
|
"learning_rate": 2.32967032967033e-05,
|
||
|
|
"loss": 0.6164949536323547,
|
||
|
|
"step": 54
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18151815181518152,
|
||
|
|
"grad_norm": 0.33732756727763136,
|
||
|
|
"learning_rate": 2.373626373626374e-05,
|
||
|
|
"loss": 0.6505363583564758,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1848184818481848,
|
||
|
|
"grad_norm": 0.34780152362496847,
|
||
|
|
"learning_rate": 2.4175824175824177e-05,
|
||
|
|
"loss": 0.7562520503997803,
|
||
|
|
"step": 56
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18811881188118812,
|
||
|
|
"grad_norm": 0.3310895358869482,
|
||
|
|
"learning_rate": 2.461538461538462e-05,
|
||
|
|
"loss": 0.6943148374557495,
|
||
|
|
"step": 57
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19141914191419143,
|
||
|
|
"grad_norm": 0.3367877938063833,
|
||
|
|
"learning_rate": 2.5054945054945058e-05,
|
||
|
|
"loss": 0.6571655869483948,
|
||
|
|
"step": 58
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19471947194719472,
|
||
|
|
"grad_norm": 0.32103256018771714,
|
||
|
|
"learning_rate": 2.5494505494505493e-05,
|
||
|
|
"loss": 0.7229321002960205,
|
||
|
|
"step": 59
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19801980198019803,
|
||
|
|
"grad_norm": 0.30468399230672144,
|
||
|
|
"learning_rate": 2.593406593406594e-05,
|
||
|
|
"loss": 0.6307672262191772,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20132013201320131,
|
||
|
|
"grad_norm": 0.3282635121595526,
|
||
|
|
"learning_rate": 2.6373626373626374e-05,
|
||
|
|
"loss": 0.6336506009101868,
|
||
|
|
"step": 61
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20462046204620463,
|
||
|
|
"grad_norm": 0.3280360563022675,
|
||
|
|
"learning_rate": 2.6813186813186813e-05,
|
||
|
|
"loss": 0.6492213010787964,
|
||
|
|
"step": 62
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2079207920792079,
|
||
|
|
"grad_norm": 0.3292430577817229,
|
||
|
|
"learning_rate": 2.7252747252747255e-05,
|
||
|
|
"loss": 0.6763280034065247,
|
||
|
|
"step": 63
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21122112211221122,
|
||
|
|
"grad_norm": 0.47832355846700536,
|
||
|
|
"learning_rate": 2.7692307692307694e-05,
|
||
|
|
"loss": 0.7322396039962769,
|
||
|
|
"step": 64
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2145214521452145,
|
||
|
|
"grad_norm": 0.31915340164178446,
|
||
|
|
"learning_rate": 2.8131868131868136e-05,
|
||
|
|
"loss": 0.7080870270729065,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21782178217821782,
|
||
|
|
"grad_norm": 0.3227571040968621,
|
||
|
|
"learning_rate": 2.8571428571428574e-05,
|
||
|
|
"loss": 0.6054466962814331,
|
||
|
|
"step": 66
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22112211221122113,
|
||
|
|
"grad_norm": 0.33375713186655664,
|
||
|
|
"learning_rate": 2.9010989010989013e-05,
|
||
|
|
"loss": 0.6782290935516357,
|
||
|
|
"step": 67
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22442244224422442,
|
||
|
|
"grad_norm": 0.3437770801965916,
|
||
|
|
"learning_rate": 2.9450549450549455e-05,
|
||
|
|
"loss": 0.6804753541946411,
|
||
|
|
"step": 68
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22772277227722773,
|
||
|
|
"grad_norm": 0.3228427319313703,
|
||
|
|
"learning_rate": 2.9890109890109894e-05,
|
||
|
|
"loss": 0.6493992805480957,
|
||
|
|
"step": 69
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23102310231023102,
|
||
|
|
"grad_norm": 0.3540211756840673,
|
||
|
|
"learning_rate": 3.0329670329670332e-05,
|
||
|
|
"loss": 0.6263789534568787,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23432343234323433,
|
||
|
|
"grad_norm": 0.34989089824503405,
|
||
|
|
"learning_rate": 3.0769230769230774e-05,
|
||
|
|
"loss": 0.6960322856903076,
|
||
|
|
"step": 71
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2376237623762376,
|
||
|
|
"grad_norm": 0.33624443163866324,
|
||
|
|
"learning_rate": 3.120879120879121e-05,
|
||
|
|
"loss": 0.6146604418754578,
|
||
|
|
"step": 72
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24092409240924093,
|
||
|
|
"grad_norm": 0.39618402867027047,
|
||
|
|
"learning_rate": 3.164835164835165e-05,
|
||
|
|
"loss": 0.6361377239227295,
|
||
|
|
"step": 73
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24422442244224424,
|
||
|
|
"grad_norm": 0.361603087273114,
|
||
|
|
"learning_rate": 3.2087912087912094e-05,
|
||
|
|
"loss": 0.636134147644043,
|
||
|
|
"step": 74
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24752475247524752,
|
||
|
|
"grad_norm": 0.37985663132790304,
|
||
|
|
"learning_rate": 3.252747252747253e-05,
|
||
|
|
"loss": 0.5936564803123474,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2508250825082508,
|
||
|
|
"grad_norm": 0.35883234873646996,
|
||
|
|
"learning_rate": 3.296703296703297e-05,
|
||
|
|
"loss": 0.6001103520393372,
|
||
|
|
"step": 76
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25412541254125415,
|
||
|
|
"grad_norm": 0.35227803701073973,
|
||
|
|
"learning_rate": 3.340659340659341e-05,
|
||
|
|
"loss": 0.6254594326019287,
|
||
|
|
"step": 77
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25742574257425743,
|
||
|
|
"grad_norm": 0.3563257650896171,
|
||
|
|
"learning_rate": 3.384615384615385e-05,
|
||
|
|
"loss": 0.6457959413528442,
|
||
|
|
"step": 78
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2607260726072607,
|
||
|
|
"grad_norm": 0.37234316340556584,
|
||
|
|
"learning_rate": 3.4285714285714284e-05,
|
||
|
|
"loss": 0.6186954975128174,
|
||
|
|
"step": 79
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.264026402640264,
|
||
|
|
"grad_norm": 0.35352748449766547,
|
||
|
|
"learning_rate": 3.4725274725274726e-05,
|
||
|
|
"loss": 0.6175529956817627,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26732673267326734,
|
||
|
|
"grad_norm": 0.35441369709658355,
|
||
|
|
"learning_rate": 3.516483516483517e-05,
|
||
|
|
"loss": 0.6694468259811401,
|
||
|
|
"step": 81
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2706270627062706,
|
||
|
|
"grad_norm": 0.39955400784840756,
|
||
|
|
"learning_rate": 3.56043956043956e-05,
|
||
|
|
"loss": 0.627490222454071,
|
||
|
|
"step": 82
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2739273927392739,
|
||
|
|
"grad_norm": 0.38314031523497477,
|
||
|
|
"learning_rate": 3.6043956043956045e-05,
|
||
|
|
"loss": 0.6410495638847351,
|
||
|
|
"step": 83
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27722772277227725,
|
||
|
|
"grad_norm": 0.36926215386141575,
|
||
|
|
"learning_rate": 3.648351648351649e-05,
|
||
|
|
"loss": 0.6305102109909058,
|
||
|
|
"step": 84
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28052805280528054,
|
||
|
|
"grad_norm": 0.38364118080284076,
|
||
|
|
"learning_rate": 3.692307692307693e-05,
|
||
|
|
"loss": 0.6558895111083984,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2838283828382838,
|
||
|
|
"grad_norm": 0.3370292682974053,
|
||
|
|
"learning_rate": 3.7362637362637365e-05,
|
||
|
|
"loss": 0.6029388308525085,
|
||
|
|
"step": 86
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2871287128712871,
|
||
|
|
"grad_norm": 0.39541874871701704,
|
||
|
|
"learning_rate": 3.7802197802197807e-05,
|
||
|
|
"loss": 0.6551017761230469,
|
||
|
|
"step": 87
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29042904290429045,
|
||
|
|
"grad_norm": 0.3629036550044273,
|
||
|
|
"learning_rate": 3.824175824175825e-05,
|
||
|
|
"loss": 0.6588809490203857,
|
||
|
|
"step": 88
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29372937293729373,
|
||
|
|
"grad_norm": 0.37786447228212183,
|
||
|
|
"learning_rate": 3.8681318681318684e-05,
|
||
|
|
"loss": 0.614648699760437,
|
||
|
|
"step": 89
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.297029702970297,
|
||
|
|
"grad_norm": 0.42911861803278684,
|
||
|
|
"learning_rate": 3.9120879120879126e-05,
|
||
|
|
"loss": 0.7034356594085693,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30033003300330036,
|
||
|
|
"grad_norm": 0.3707184094312094,
|
||
|
|
"learning_rate": 3.956043956043957e-05,
|
||
|
|
"loss": 0.6908263564109802,
|
||
|
|
"step": 91
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30363036303630364,
|
||
|
|
"grad_norm": 0.38262186656216063,
|
||
|
|
"learning_rate": 4e-05,
|
||
|
|
"loss": 0.6882215738296509,
|
||
|
|
"step": 92
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3069306930693069,
|
||
|
|
"grad_norm": 0.3709464296309744,
|
||
|
|
"learning_rate": 3.999985249980169e-05,
|
||
|
|
"loss": 0.6377270221710205,
|
||
|
|
"step": 93
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3102310231023102,
|
||
|
|
"grad_norm": 0.3412837406106036,
|
||
|
|
"learning_rate": 3.999941000138238e-05,
|
||
|
|
"loss": 0.6735270619392395,
|
||
|
|
"step": 94
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31353135313531355,
|
||
|
|
"grad_norm": 0.40165192879996064,
|
||
|
|
"learning_rate": 3.999867251126893e-05,
|
||
|
|
"loss": 0.6934541463851929,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31683168316831684,
|
||
|
|
"grad_norm": 0.34707128601816045,
|
||
|
|
"learning_rate": 3.9997640040339335e-05,
|
||
|
|
"loss": 0.6367039084434509,
|
||
|
|
"step": 96
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3201320132013201,
|
||
|
|
"grad_norm": 0.4268828113970776,
|
||
|
|
"learning_rate": 3.999631260382257e-05,
|
||
|
|
"loss": 0.6274522542953491,
|
||
|
|
"step": 97
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3234323432343234,
|
||
|
|
"grad_norm": 0.454428833020686,
|
||
|
|
"learning_rate": 3.999469022129834e-05,
|
||
|
|
"loss": 0.5874066352844238,
|
||
|
|
"step": 98
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32673267326732675,
|
||
|
|
"grad_norm": 0.4200675840489775,
|
||
|
|
"learning_rate": 3.9992772916696824e-05,
|
||
|
|
"loss": 0.6175942420959473,
|
||
|
|
"step": 99
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33003300330033003,
|
||
|
|
"grad_norm": 0.3796321080056305,
|
||
|
|
"learning_rate": 3.99905607182983e-05,
|
||
|
|
"loss": 0.5625832080841064,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3333333333333333,
|
||
|
|
"grad_norm": 0.39108856096759403,
|
||
|
|
"learning_rate": 3.998805365873274e-05,
|
||
|
|
"loss": 0.6153020262718201,
|
||
|
|
"step": 101
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33663366336633666,
|
||
|
|
"grad_norm": 0.3873560194436071,
|
||
|
|
"learning_rate": 3.998525177497932e-05,
|
||
|
|
"loss": 0.5585426092147827,
|
||
|
|
"step": 102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33993399339933994,
|
||
|
|
"grad_norm": 0.4084712106325698,
|
||
|
|
"learning_rate": 3.998215510836589e-05,
|
||
|
|
"loss": 0.6586359739303589,
|
||
|
|
"step": 103
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3432343234323432,
|
||
|
|
"grad_norm": 0.4383246876899704,
|
||
|
|
"learning_rate": 3.997876370456833e-05,
|
||
|
|
"loss": 0.62096107006073,
|
||
|
|
"step": 104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3465346534653465,
|
||
|
|
"grad_norm": 0.4026893562706946,
|
||
|
|
"learning_rate": 3.997507761360993e-05,
|
||
|
|
"loss": 0.6059336066246033,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34983498349834985,
|
||
|
|
"grad_norm": 0.46586240044914223,
|
||
|
|
"learning_rate": 3.997109688986059e-05,
|
||
|
|
"loss": 0.617970883846283,
|
||
|
|
"step": 106
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35313531353135313,
|
||
|
|
"grad_norm": 0.44949199032710474,
|
||
|
|
"learning_rate": 3.9966821592036066e-05,
|
||
|
|
"loss": 0.6453397274017334,
|
||
|
|
"step": 107
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3564356435643564,
|
||
|
|
"grad_norm": 0.4794978158156406,
|
||
|
|
"learning_rate": 3.996225178319709e-05,
|
||
|
|
"loss": 0.6371763348579407,
|
||
|
|
"step": 108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35973597359735976,
|
||
|
|
"grad_norm": 0.4463512391721941,
|
||
|
|
"learning_rate": 3.9957387530748435e-05,
|
||
|
|
"loss": 0.5971124172210693,
|
||
|
|
"step": 109
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36303630363036304,
|
||
|
|
"grad_norm": 0.368079413354641,
|
||
|
|
"learning_rate": 3.995222890643792e-05,
|
||
|
|
"loss": 0.5679532289505005,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36633663366336633,
|
||
|
|
"grad_norm": 0.43733705586285254,
|
||
|
|
"learning_rate": 3.9946775986355346e-05,
|
||
|
|
"loss": 0.5988069772720337,
|
||
|
|
"step": 111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3696369636963696,
|
||
|
|
"grad_norm": 0.38235582844960775,
|
||
|
|
"learning_rate": 3.994102885093141e-05,
|
||
|
|
"loss": 0.6352983713150024,
|
||
|
|
"step": 112
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37293729372937295,
|
||
|
|
"grad_norm": 0.389837871286893,
|
||
|
|
"learning_rate": 3.993498758493646e-05,
|
||
|
|
"loss": 0.58957839012146,
|
||
|
|
"step": 113
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37623762376237624,
|
||
|
|
"grad_norm": 0.40399856168911097,
|
||
|
|
"learning_rate": 3.992865227747929e-05,
|
||
|
|
"loss": 0.6396822929382324,
|
||
|
|
"step": 114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3795379537953795,
|
||
|
|
"grad_norm": 0.38891668976227123,
|
||
|
|
"learning_rate": 3.992202302200582e-05,
|
||
|
|
"loss": 0.6314754486083984,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38283828382838286,
|
||
|
|
"grad_norm": 0.4087528543828922,
|
||
|
|
"learning_rate": 3.991509991629769e-05,
|
||
|
|
"loss": 0.673650860786438,
|
||
|
|
"step": 116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38613861386138615,
|
||
|
|
"grad_norm": 0.36330054292020786,
|
||
|
|
"learning_rate": 3.990788306247085e-05,
|
||
|
|
"loss": 0.5813701152801514,
|
||
|
|
"step": 117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38943894389438943,
|
||
|
|
"grad_norm": 0.4247110332678589,
|
||
|
|
"learning_rate": 3.990037256697404e-05,
|
||
|
|
"loss": 0.6419334411621094,
|
||
|
|
"step": 118
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3927392739273927,
|
||
|
|
"grad_norm": 0.4244126002071751,
|
||
|
|
"learning_rate": 3.989256854058721e-05,
|
||
|
|
"loss": 0.6319208145141602,
|
||
|
|
"step": 119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39603960396039606,
|
||
|
|
"grad_norm": 0.3651632933942853,
|
||
|
|
"learning_rate": 3.988447109841991e-05,
|
||
|
|
"loss": 0.5989845991134644,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39933993399339934,
|
||
|
|
"grad_norm": 0.393158353074077,
|
||
|
|
"learning_rate": 3.987608035990957e-05,
|
||
|
|
"loss": 0.5853303670883179,
|
||
|
|
"step": 121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40264026402640263,
|
||
|
|
"grad_norm": 0.35965233332276103,
|
||
|
|
"learning_rate": 3.986739644881975e-05,
|
||
|
|
"loss": 0.6115257143974304,
|
||
|
|
"step": 122
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40594059405940597,
|
||
|
|
"grad_norm": 0.4252711474203845,
|
||
|
|
"learning_rate": 3.985841949323831e-05,
|
||
|
|
"loss": 0.6440504789352417,
|
||
|
|
"step": 123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40924092409240925,
|
||
|
|
"grad_norm": 0.5578797297271848,
|
||
|
|
"learning_rate": 3.984914962557553e-05,
|
||
|
|
"loss": 0.5765030384063721,
|
||
|
|
"step": 124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41254125412541254,
|
||
|
|
"grad_norm": 0.4362455029468141,
|
||
|
|
"learning_rate": 3.983958698256214e-05,
|
||
|
|
"loss": 0.6387556791305542,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4158415841584158,
|
||
|
|
"grad_norm": 0.39274811063076087,
|
||
|
|
"learning_rate": 3.98297317052473e-05,
|
||
|
|
"loss": 0.6263147592544556,
|
||
|
|
"step": 126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41914191419141916,
|
||
|
|
"grad_norm": 0.42682589637163704,
|
||
|
|
"learning_rate": 3.981958393899656e-05,
|
||
|
|
"loss": 0.6091845035552979,
|
||
|
|
"step": 127
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42244224422442245,
|
||
|
|
"grad_norm": 0.4033131171538041,
|
||
|
|
"learning_rate": 3.980914383348967e-05,
|
||
|
|
"loss": 0.6458015441894531,
|
||
|
|
"step": 128
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42574257425742573,
|
||
|
|
"grad_norm": 0.3881606915462862,
|
||
|
|
"learning_rate": 3.9798411542718395e-05,
|
||
|
|
"loss": 0.6115552186965942,
|
||
|
|
"step": 129
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.429042904290429,
|
||
|
|
"grad_norm": 0.38910317938225847,
|
||
|
|
"learning_rate": 3.978738722498423e-05,
|
||
|
|
"loss": 0.6427993774414062,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43234323432343236,
|
||
|
|
"grad_norm": 0.36836380096259913,
|
||
|
|
"learning_rate": 3.977607104289609e-05,
|
||
|
|
"loss": 0.6121467351913452,
|
||
|
|
"step": 131
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43564356435643564,
|
||
|
|
"grad_norm": 0.3743062201629088,
|
||
|
|
"learning_rate": 3.9764463163367875e-05,
|
||
|
|
"loss": 0.5951442718505859,
|
||
|
|
"step": 132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4389438943894389,
|
||
|
|
"grad_norm": 0.3699746655092952,
|
||
|
|
"learning_rate": 3.9752563757616045e-05,
|
||
|
|
"loss": 0.6639472842216492,
|
||
|
|
"step": 133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44224422442244227,
|
||
|
|
"grad_norm": 0.37398919831188604,
|
||
|
|
"learning_rate": 3.974037300115706e-05,
|
||
|
|
"loss": 0.6084764003753662,
|
||
|
|
"step": 134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44554455445544555,
|
||
|
|
"grad_norm": 0.37043195153646374,
|
||
|
|
"learning_rate": 3.972789107380484e-05,
|
||
|
|
"loss": 0.6211085915565491,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44884488448844884,
|
||
|
|
"grad_norm": 0.3509837417375981,
|
||
|
|
"learning_rate": 3.9715118159668046e-05,
|
||
|
|
"loss": 0.6098147034645081,
|
||
|
|
"step": 136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4521452145214521,
|
||
|
|
"grad_norm": 0.3350785925775803,
|
||
|
|
"learning_rate": 3.970205444714742e-05,
|
||
|
|
"loss": 0.6155884861946106,
|
||
|
|
"step": 137
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45544554455445546,
|
||
|
|
"grad_norm": 0.38529379761335925,
|
||
|
|
"learning_rate": 3.9688700128932975e-05,
|
||
|
|
"loss": 0.5984665155410767,
|
||
|
|
"step": 138
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45874587458745875,
|
||
|
|
"grad_norm": 0.45130397769476205,
|
||
|
|
"learning_rate": 3.967505540200117e-05,
|
||
|
|
"loss": 0.6656880378723145,
|
||
|
|
"step": 139
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46204620462046203,
|
||
|
|
"grad_norm": 0.3277874952439621,
|
||
|
|
"learning_rate": 3.966112046761201e-05,
|
||
|
|
"loss": 0.6607398390769958,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46534653465346537,
|
||
|
|
"grad_norm": 2.6727599644732267,
|
||
|
|
"learning_rate": 3.9646895531306046e-05,
|
||
|
|
"loss": 0.6578342914581299,
|
||
|
|
"step": 141
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46864686468646866,
|
||
|
|
"grad_norm": 0.47429126269764676,
|
||
|
|
"learning_rate": 3.963238080290136e-05,
|
||
|
|
"loss": 0.6103699803352356,
|
||
|
|
"step": 142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47194719471947194,
|
||
|
|
"grad_norm": 0.32652590291724093,
|
||
|
|
"learning_rate": 3.96175764964905e-05,
|
||
|
|
"loss": 0.5484676957130432,
|
||
|
|
"step": 143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4752475247524752,
|
||
|
|
"grad_norm": 0.4531372955951849,
|
||
|
|
"learning_rate": 3.960248283043727e-05,
|
||
|
|
"loss": 0.578776478767395,
|
||
|
|
"step": 144
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47854785478547857,
|
||
|
|
"grad_norm": 0.3685580706465372,
|
||
|
|
"learning_rate": 3.958710002737355e-05,
|
||
|
|
"loss": 0.6184446811676025,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48184818481848185,
|
||
|
|
"grad_norm": 0.3584005630962511,
|
||
|
|
"learning_rate": 3.9571428314195984e-05,
|
||
|
|
"loss": 0.6307916045188904,
|
||
|
|
"step": 146
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48514851485148514,
|
||
|
|
"grad_norm": 0.4049679254542765,
|
||
|
|
"learning_rate": 3.955546792206265e-05,
|
||
|
|
"loss": 0.6064697504043579,
|
||
|
|
"step": 147
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4884488448844885,
|
||
|
|
"grad_norm": 0.3846258995775384,
|
||
|
|
"learning_rate": 3.953921908638966e-05,
|
||
|
|
"loss": 0.6055655479431152,
|
||
|
|
"step": 148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49174917491749176,
|
||
|
|
"grad_norm": 0.3643318343315678,
|
||
|
|
"learning_rate": 3.952268204684765e-05,
|
||
|
|
"loss": 0.5856431126594543,
|
||
|
|
"step": 149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49504950495049505,
|
||
|
|
"grad_norm": 0.3854715521866927,
|
||
|
|
"learning_rate": 3.950585704735829e-05,
|
||
|
|
"loss": 0.6634635925292969,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49834983498349833,
|
||
|
|
"grad_norm": 0.34338835592304534,
|
||
|
|
"learning_rate": 3.948874433609065e-05,
|
||
|
|
"loss": 0.5880753397941589,
|
||
|
|
"step": 151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5016501650165016,
|
||
|
|
"grad_norm": 0.3481018111538647,
|
||
|
|
"learning_rate": 3.947134416545757e-05,
|
||
|
|
"loss": 0.5594221949577332,
|
||
|
|
"step": 152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.504950495049505,
|
||
|
|
"grad_norm": 0.6570220882473125,
|
||
|
|
"learning_rate": 3.94536567921119e-05,
|
||
|
|
"loss": 0.664652407169342,
|
||
|
|
"step": 153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5082508250825083,
|
||
|
|
"grad_norm": 0.340048306266198,
|
||
|
|
"learning_rate": 3.9435682476942755e-05,
|
||
|
|
"loss": 0.6002815961837769,
|
||
|
|
"step": 154
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5115511551155115,
|
||
|
|
"grad_norm": 0.3488682381523364,
|
||
|
|
"learning_rate": 3.941742148507163e-05,
|
||
|
|
"loss": 0.5905177593231201,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5148514851485149,
|
||
|
|
"grad_norm": 0.33062666453941425,
|
||
|
|
"learning_rate": 3.939887408584853e-05,
|
||
|
|
"loss": 0.5636795163154602,
|
||
|
|
"step": 156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5181518151815182,
|
||
|
|
"grad_norm": 0.35862086331061066,
|
||
|
|
"learning_rate": 3.938004055284796e-05,
|
||
|
|
"loss": 0.5639582276344299,
|
||
|
|
"step": 157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5214521452145214,
|
||
|
|
"grad_norm": 0.31769111173717246,
|
||
|
|
"learning_rate": 3.9360921163864895e-05,
|
||
|
|
"loss": 0.6515591144561768,
|
||
|
|
"step": 158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5247524752475248,
|
||
|
|
"grad_norm": 0.38401455820073427,
|
||
|
|
"learning_rate": 3.934151620091071e-05,
|
||
|
|
"loss": 0.5721683502197266,
|
||
|
|
"step": 159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.528052805280528,
|
||
|
|
"grad_norm": 0.3284331200684813,
|
||
|
|
"learning_rate": 3.9321825950209e-05,
|
||
|
|
"loss": 0.5801802277565002,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5313531353135313,
|
||
|
|
"grad_norm": 0.3493998878359796,
|
||
|
|
"learning_rate": 3.9301850702191344e-05,
|
||
|
|
"loss": 0.603084921836853,
|
||
|
|
"step": 161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5346534653465347,
|
||
|
|
"grad_norm": 0.32233519110844616,
|
||
|
|
"learning_rate": 3.928159075149304e-05,
|
||
|
|
"loss": 0.6376925110816956,
|
||
|
|
"step": 162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5379537953795379,
|
||
|
|
"grad_norm": 0.35833134197704153,
|
||
|
|
"learning_rate": 3.926104639694877e-05,
|
||
|
|
"loss": 0.5764102935791016,
|
||
|
|
"step": 163
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5412541254125413,
|
||
|
|
"grad_norm": 0.3523567199445224,
|
||
|
|
"learning_rate": 3.924021794158818e-05,
|
||
|
|
"loss": 0.6102188229560852,
|
||
|
|
"step": 164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5445544554455446,
|
||
|
|
"grad_norm": 0.36694222553878597,
|
||
|
|
"learning_rate": 3.921910569263139e-05,
|
||
|
|
"loss": 0.5833287835121155,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5478547854785478,
|
||
|
|
"grad_norm": 0.37179813198977807,
|
||
|
|
"learning_rate": 3.919770996148448e-05,
|
||
|
|
"loss": 0.5891385078430176,
|
||
|
|
"step": 166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5511551155115512,
|
||
|
|
"grad_norm": 0.3507301680001106,
|
||
|
|
"learning_rate": 3.917603106373493e-05,
|
||
|
|
"loss": 0.5838547348976135,
|
||
|
|
"step": 167
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5544554455445545,
|
||
|
|
"grad_norm": 0.3134001311174479,
|
||
|
|
"learning_rate": 3.9154069319146904e-05,
|
||
|
|
"loss": 0.5727800726890564,
|
||
|
|
"step": 168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5577557755775577,
|
||
|
|
"grad_norm": 0.33531781904204605,
|
||
|
|
"learning_rate": 3.913182505165656e-05,
|
||
|
|
"loss": 0.6102641224861145,
|
||
|
|
"step": 169
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5610561056105611,
|
||
|
|
"grad_norm": 0.35178976522027133,
|
||
|
|
"learning_rate": 3.91092985893673e-05,
|
||
|
|
"loss": 0.5718260407447815,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5643564356435643,
|
||
|
|
"grad_norm": 0.47006108726602863,
|
||
|
|
"learning_rate": 3.908649026454488e-05,
|
||
|
|
"loss": 0.6308504939079285,
|
||
|
|
"step": 171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5676567656765676,
|
||
|
|
"grad_norm": 0.3687514240026255,
|
||
|
|
"learning_rate": 3.906340041361255e-05,
|
||
|
|
"loss": 0.6089432835578918,
|
||
|
|
"step": 172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.570957095709571,
|
||
|
|
"grad_norm": 0.3586674884704593,
|
||
|
|
"learning_rate": 3.904002937714606e-05,
|
||
|
|
"loss": 0.6583501696586609,
|
||
|
|
"step": 173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5742574257425742,
|
||
|
|
"grad_norm": 0.3399808047240735,
|
||
|
|
"learning_rate": 3.9016377499868666e-05,
|
||
|
|
"loss": 0.6108609437942505,
|
||
|
|
"step": 174
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5775577557755776,
|
||
|
|
"grad_norm": 0.3840880337988826,
|
||
|
|
"learning_rate": 3.899244513064603e-05,
|
||
|
|
"loss": 0.63509202003479,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5808580858085809,
|
||
|
|
"grad_norm": 0.3725541644477348,
|
||
|
|
"learning_rate": 3.896823262248107e-05,
|
||
|
|
"loss": 0.5759241580963135,
|
||
|
|
"step": 176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5841584158415841,
|
||
|
|
"grad_norm": 0.30755721985114126,
|
||
|
|
"learning_rate": 3.8943740332508754e-05,
|
||
|
|
"loss": 0.6148169040679932,
|
||
|
|
"step": 177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5874587458745875,
|
||
|
|
"grad_norm": 0.3916756097057637,
|
||
|
|
"learning_rate": 3.891896862199086e-05,
|
||
|
|
"loss": 0.5266364216804504,
|
||
|
|
"step": 178
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5907590759075908,
|
||
|
|
"grad_norm": 0.3417854779376455,
|
||
|
|
"learning_rate": 3.88939178563106e-05,
|
||
|
|
"loss": 0.5626640319824219,
|
||
|
|
"step": 179
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.594059405940594,
|
||
|
|
"grad_norm": 0.33526488525207704,
|
||
|
|
"learning_rate": 3.886858840496727e-05,
|
||
|
|
"loss": 0.6063880920410156,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5973597359735974,
|
||
|
|
"grad_norm": 0.37344333250119977,
|
||
|
|
"learning_rate": 3.884298064157077e-05,
|
||
|
|
"loss": 0.5979235768318176,
|
||
|
|
"step": 181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6006600660066007,
|
||
|
|
"grad_norm": 0.3835133271197793,
|
||
|
|
"learning_rate": 3.881709494383612e-05,
|
||
|
|
"loss": 0.6628611087799072,
|
||
|
|
"step": 182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6039603960396039,
|
||
|
|
"grad_norm": 0.4344526004756121,
|
||
|
|
"learning_rate": 3.879093169357789e-05,
|
||
|
|
"loss": 0.6215270757675171,
|
||
|
|
"step": 183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6072607260726073,
|
||
|
|
"grad_norm": 0.3644174435488244,
|
||
|
|
"learning_rate": 3.876449127670452e-05,
|
||
|
|
"loss": 0.6148592233657837,
|
||
|
|
"step": 184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6105610561056105,
|
||
|
|
"grad_norm": 0.3619226265536735,
|
||
|
|
"learning_rate": 3.87377740832127e-05,
|
||
|
|
"loss": 0.6254778504371643,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6138613861386139,
|
||
|
|
"grad_norm": 0.3492162593840536,
|
||
|
|
"learning_rate": 3.871078050718155e-05,
|
||
|
|
"loss": 0.6025378704071045,
|
||
|
|
"step": 186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6171617161716172,
|
||
|
|
"grad_norm": 0.3866924759539626,
|
||
|
|
"learning_rate": 3.8683510946766866e-05,
|
||
|
|
"loss": 0.5887518525123596,
|
||
|
|
"step": 187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6204620462046204,
|
||
|
|
"grad_norm": 0.3357229513721586,
|
||
|
|
"learning_rate": 3.865596580419519e-05,
|
||
|
|
"loss": 0.6180317401885986,
|
||
|
|
"step": 188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6237623762376238,
|
||
|
|
"grad_norm": 0.3594949077768003,
|
||
|
|
"learning_rate": 3.8628145485757925e-05,
|
||
|
|
"loss": 0.5970651507377625,
|
||
|
|
"step": 189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6270627062706271,
|
||
|
|
"grad_norm": 0.3496234009951303,
|
||
|
|
"learning_rate": 3.860005040180533e-05,
|
||
|
|
"loss": 0.6027296781539917,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6303630363036303,
|
||
|
|
"grad_norm": 0.3830042583584045,
|
||
|
|
"learning_rate": 3.857168096674044e-05,
|
||
|
|
"loss": 0.6326305270195007,
|
||
|
|
"step": 191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6336633663366337,
|
||
|
|
"grad_norm": 0.333508477943962,
|
||
|
|
"learning_rate": 3.854303759901299e-05,
|
||
|
|
"loss": 0.6508482694625854,
|
||
|
|
"step": 192
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.636963696369637,
|
||
|
|
"grad_norm": 0.352327105927571,
|
||
|
|
"learning_rate": 3.851412072111322e-05,
|
||
|
|
"loss": 0.6088548302650452,
|
||
|
|
"step": 193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6402640264026402,
|
||
|
|
"grad_norm": 0.36196379228138037,
|
||
|
|
"learning_rate": 3.8484930759565645e-05,
|
||
|
|
"loss": 0.5975607633590698,
|
||
|
|
"step": 194
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6435643564356436,
|
||
|
|
"grad_norm": 0.3231664855297077,
|
||
|
|
"learning_rate": 3.845546814492279e-05,
|
||
|
|
"loss": 0.5467930436134338,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6468646864686468,
|
||
|
|
"grad_norm": 0.35556526722817444,
|
||
|
|
"learning_rate": 3.8425733311758795e-05,
|
||
|
|
"loss": 0.583969235420227,
|
||
|
|
"step": 196
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6501650165016502,
|
||
|
|
"grad_norm": 0.331073543443887,
|
||
|
|
"learning_rate": 3.8395726698663045e-05,
|
||
|
|
"loss": 0.6007376909255981,
|
||
|
|
"step": 197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6534653465346535,
|
||
|
|
"grad_norm": 0.34786293006180385,
|
||
|
|
"learning_rate": 3.836544874823368e-05,
|
||
|
|
"loss": 0.5971908569335938,
|
||
|
|
"step": 198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6567656765676567,
|
||
|
|
"grad_norm": 0.3128647628132879,
|
||
|
|
"learning_rate": 3.8334899907071064e-05,
|
||
|
|
"loss": 0.592069685459137,
|
||
|
|
"step": 199
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6600660066006601,
|
||
|
|
"grad_norm": 0.3308125796746202,
|
||
|
|
"learning_rate": 3.830408062577121e-05,
|
||
|
|
"loss": 0.6188071966171265,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6633663366336634,
|
||
|
|
"grad_norm": 0.34889077565364124,
|
||
|
|
"learning_rate": 3.827299135891913e-05,
|
||
|
|
"loss": 0.5976923704147339,
|
||
|
|
"step": 201
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6666666666666666,
|
||
|
|
"grad_norm": 0.33443153994631497,
|
||
|
|
"learning_rate": 3.8241632565082124e-05,
|
||
|
|
"loss": 0.6120954155921936,
|
||
|
|
"step": 202
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.66996699669967,
|
||
|
|
"grad_norm": 0.3573334503206899,
|
||
|
|
"learning_rate": 3.821000470680303e-05,
|
||
|
|
"loss": 0.6661979556083679,
|
||
|
|
"step": 203
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6732673267326733,
|
||
|
|
"grad_norm": 0.34662331225184934,
|
||
|
|
"learning_rate": 3.8178108250593384e-05,
|
||
|
|
"loss": 0.5853559970855713,
|
||
|
|
"step": 204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6765676567656765,
|
||
|
|
"grad_norm": 0.33823171869993424,
|
||
|
|
"learning_rate": 3.814594366692654e-05,
|
||
|
|
"loss": 0.6648768186569214,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6798679867986799,
|
||
|
|
"grad_norm": 0.4178878629038068,
|
||
|
|
"learning_rate": 3.8113511430230745e-05,
|
||
|
|
"loss": 0.5893838405609131,
|
||
|
|
"step": 206
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6831683168316832,
|
||
|
|
"grad_norm": 0.36858896529016355,
|
||
|
|
"learning_rate": 3.808081201888214e-05,
|
||
|
|
"loss": 0.6177140474319458,
|
||
|
|
"step": 207
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6864686468646864,
|
||
|
|
"grad_norm": 0.38061402245158527,
|
||
|
|
"learning_rate": 3.8047845915197695e-05,
|
||
|
|
"loss": 0.5793695449829102,
|
||
|
|
"step": 208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6897689768976898,
|
||
|
|
"grad_norm": 0.3591315376932048,
|
||
|
|
"learning_rate": 3.8014613605428084e-05,
|
||
|
|
"loss": 0.5571605563163757,
|
||
|
|
"step": 209
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.693069306930693,
|
||
|
|
"grad_norm": 0.33319862057164595,
|
||
|
|
"learning_rate": 3.798111557975053e-05,
|
||
|
|
"loss": 0.5945760011672974,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6963696369636964,
|
||
|
|
"grad_norm": 0.3495679574237745,
|
||
|
|
"learning_rate": 3.7947352332261586e-05,
|
||
|
|
"loss": 0.600873589515686,
|
||
|
|
"step": 211
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6996699669966997,
|
||
|
|
"grad_norm": 0.37390147639764304,
|
||
|
|
"learning_rate": 3.791332436096983e-05,
|
||
|
|
"loss": 0.6234852075576782,
|
||
|
|
"step": 212
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7029702970297029,
|
||
|
|
"grad_norm": 0.3571653694610809,
|
||
|
|
"learning_rate": 3.7879032167788494e-05,
|
||
|
|
"loss": 0.6129578948020935,
|
||
|
|
"step": 213
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7062706270627063,
|
||
|
|
"grad_norm": 0.48971881906384135,
|
||
|
|
"learning_rate": 3.784447625852812e-05,
|
||
|
|
"loss": 0.6204475164413452,
|
||
|
|
"step": 214
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7095709570957096,
|
||
|
|
"grad_norm": 0.3610294548812676,
|
||
|
|
"learning_rate": 3.780965714288905e-05,
|
||
|
|
"loss": 0.6734122037887573,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7128712871287128,
|
||
|
|
"grad_norm": 0.35396639697907356,
|
||
|
|
"learning_rate": 3.777457533445393e-05,
|
||
|
|
"loss": 0.5678560137748718,
|
||
|
|
"step": 216
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7161716171617162,
|
||
|
|
"grad_norm": 0.3232076597831296,
|
||
|
|
"learning_rate": 3.7739231350680135e-05,
|
||
|
|
"loss": 0.5784683227539062,
|
||
|
|
"step": 217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7194719471947195,
|
||
|
|
"grad_norm": 0.3540897506756201,
|
||
|
|
"learning_rate": 3.7703625712892125e-05,
|
||
|
|
"loss": 0.6060354113578796,
|
||
|
|
"step": 218
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7227722772277227,
|
||
|
|
"grad_norm": 0.35008278157890194,
|
||
|
|
"learning_rate": 3.766775894627376e-05,
|
||
|
|
"loss": 0.6248741745948792,
|
||
|
|
"step": 219
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7260726072607261,
|
||
|
|
"grad_norm": 0.32018676747331787,
|
||
|
|
"learning_rate": 3.7631631579860553e-05,
|
||
|
|
"loss": 0.6014479398727417,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7293729372937293,
|
||
|
|
"grad_norm": 0.32068744744726313,
|
||
|
|
"learning_rate": 3.759524414653189e-05,
|
||
|
|
"loss": 0.6283233761787415,
|
||
|
|
"step": 221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7326732673267327,
|
||
|
|
"grad_norm": 0.3047460979670785,
|
||
|
|
"learning_rate": 3.755859718300313e-05,
|
||
|
|
"loss": 0.5710185766220093,
|
||
|
|
"step": 222
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.735973597359736,
|
||
|
|
"grad_norm": 0.34698489216212486,
|
||
|
|
"learning_rate": 3.75216912298177e-05,
|
||
|
|
"loss": 0.6007407903671265,
|
||
|
|
"step": 223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7392739273927392,
|
||
|
|
"grad_norm": 0.4952362221345831,
|
||
|
|
"learning_rate": 3.748452683133916e-05,
|
||
|
|
"loss": 0.6852575540542603,
|
||
|
|
"step": 224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7425742574257426,
|
||
|
|
"grad_norm": 0.32106680253004655,
|
||
|
|
"learning_rate": 3.7447104535743115e-05,
|
||
|
|
"loss": 0.6270833611488342,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7458745874587459,
|
||
|
|
"grad_norm": 0.30214814189665545,
|
||
|
|
"learning_rate": 3.740942489500916e-05,
|
||
|
|
"loss": 0.5925471782684326,
|
||
|
|
"step": 226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7491749174917491,
|
||
|
|
"grad_norm": 0.3171932777170319,
|
||
|
|
"learning_rate": 3.737148846491275e-05,
|
||
|
|
"loss": 0.573570728302002,
|
||
|
|
"step": 227
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7524752475247525,
|
||
|
|
"grad_norm": 0.31480815810804524,
|
||
|
|
"learning_rate": 3.7333295805016986e-05,
|
||
|
|
"loss": 0.6088368892669678,
|
||
|
|
"step": 228
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7557755775577558,
|
||
|
|
"grad_norm": 0.3103068539492526,
|
||
|
|
"learning_rate": 3.729484747866435e-05,
|
||
|
|
"loss": 0.5496470332145691,
|
||
|
|
"step": 229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.759075907590759,
|
||
|
|
"grad_norm": 0.3007603199811456,
|
||
|
|
"learning_rate": 3.725614405296843e-05,
|
||
|
|
"loss": 0.6008220314979553,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7623762376237624,
|
||
|
|
"grad_norm": 0.3007492168191884,
|
||
|
|
"learning_rate": 3.721718609880551e-05,
|
||
|
|
"loss": 0.5982120037078857,
|
||
|
|
"step": 231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7656765676567657,
|
||
|
|
"grad_norm": 0.3010002181490163,
|
||
|
|
"learning_rate": 3.717797419080618e-05,
|
||
|
|
"loss": 0.6404559016227722,
|
||
|
|
"step": 232
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.768976897689769,
|
||
|
|
"grad_norm": 0.35604106645956024,
|
||
|
|
"learning_rate": 3.713850890734689e-05,
|
||
|
|
"loss": 0.5875239372253418,
|
||
|
|
"step": 233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7722772277227723,
|
||
|
|
"grad_norm": 0.33191901009333297,
|
||
|
|
"learning_rate": 3.709879083054133e-05,
|
||
|
|
"loss": 0.5962772369384766,
|
||
|
|
"step": 234
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7755775577557755,
|
||
|
|
"grad_norm": 0.29418628627284477,
|
||
|
|
"learning_rate": 3.705882054623192e-05,
|
||
|
|
"loss": 0.5764110684394836,
|
||
|
|
"step": 235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7788778877887789,
|
||
|
|
"grad_norm": 0.30409612807603364,
|
||
|
|
"learning_rate": 3.7018598643981165e-05,
|
||
|
|
"loss": 0.5635858178138733,
|
||
|
|
"step": 236
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7821782178217822,
|
||
|
|
"grad_norm": 0.3039645238556037,
|
||
|
|
"learning_rate": 3.69781257170629e-05,
|
||
|
|
"loss": 0.5880881547927856,
|
||
|
|
"step": 237
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7854785478547854,
|
||
|
|
"grad_norm": 0.30606246597511416,
|
||
|
|
"learning_rate": 3.6937402362453606e-05,
|
||
|
|
"loss": 0.5644733905792236,
|
||
|
|
"step": 238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7887788778877888,
|
||
|
|
"grad_norm": 0.328325214152846,
|
||
|
|
"learning_rate": 3.689642918082358e-05,
|
||
|
|
"loss": 0.6431151032447815,
|
||
|
|
"step": 239
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7920792079207921,
|
||
|
|
"grad_norm": 0.2863869456911102,
|
||
|
|
"learning_rate": 3.6855206776528055e-05,
|
||
|
|
"loss": 0.5848085880279541,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7953795379537953,
|
||
|
|
"grad_norm": 0.3169795193025283,
|
||
|
|
"learning_rate": 3.681373575759831e-05,
|
||
|
|
"loss": 0.590021550655365,
|
||
|
|
"step": 241
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7986798679867987,
|
||
|
|
"grad_norm": 0.3630216059086489,
|
||
|
|
"learning_rate": 3.67720167357327e-05,
|
||
|
|
"loss": 0.6217919588088989,
|
||
|
|
"step": 242
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.801980198019802,
|
||
|
|
"grad_norm": 0.2999270957223198,
|
||
|
|
"learning_rate": 3.673005032628763e-05,
|
||
|
|
"loss": 0.6075180172920227,
|
||
|
|
"step": 243
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8052805280528053,
|
||
|
|
"grad_norm": 0.35145967135780704,
|
||
|
|
"learning_rate": 3.668783714826846e-05,
|
||
|
|
"loss": 0.6078404188156128,
|
||
|
|
"step": 244
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8085808580858086,
|
||
|
|
"grad_norm": 0.32650805345047657,
|
||
|
|
"learning_rate": 3.664537782432042e-05,
|
||
|
|
"loss": 0.6297526955604553,
|
||
|
|
"step": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8118811881188119,
|
||
|
|
"grad_norm": 0.32461322862254094,
|
||
|
|
"learning_rate": 3.660267298071936e-05,
|
||
|
|
"loss": 0.5684514045715332,
|
||
|
|
"step": 246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8151815181518152,
|
||
|
|
"grad_norm": 0.32171296221654416,
|
||
|
|
"learning_rate": 3.655972324736259e-05,
|
||
|
|
"loss": 0.6192148327827454,
|
||
|
|
"step": 247
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8184818481848185,
|
||
|
|
"grad_norm": 0.3322336621503604,
|
||
|
|
"learning_rate": 3.6516529257759506e-05,
|
||
|
|
"loss": 0.5900243520736694,
|
||
|
|
"step": 248
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8217821782178217,
|
||
|
|
"grad_norm": 0.35183312055445004,
|
||
|
|
"learning_rate": 3.6473091649022337e-05,
|
||
|
|
"loss": 0.5941751599311829,
|
||
|
|
"step": 249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8250825082508251,
|
||
|
|
"grad_norm": 0.31255833045908565,
|
||
|
|
"learning_rate": 3.6429411061856645e-05,
|
||
|
|
"loss": 0.5744310021400452,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8283828382838284,
|
||
|
|
"grad_norm": 0.3266269251233177,
|
||
|
|
"learning_rate": 3.6385488140551985e-05,
|
||
|
|
"loss": 0.5985124707221985,
|
||
|
|
"step": 251
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8316831683168316,
|
||
|
|
"grad_norm": 0.30426711611593643,
|
||
|
|
"learning_rate": 3.6341323532972294e-05,
|
||
|
|
"loss": 0.581912636756897,
|
||
|
|
"step": 252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.834983498349835,
|
||
|
|
"grad_norm": 0.3297819735063718,
|
||
|
|
"learning_rate": 3.629691789054643e-05,
|
||
|
|
"loss": 0.586786150932312,
|
||
|
|
"step": 253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8382838283828383,
|
||
|
|
"grad_norm": 0.3074133078124695,
|
||
|
|
"learning_rate": 3.625227186825848e-05,
|
||
|
|
"loss": 0.6312603950500488,
|
||
|
|
"step": 254
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8415841584158416,
|
||
|
|
"grad_norm": 0.33007753969064285,
|
||
|
|
"learning_rate": 3.620738612463818e-05,
|
||
|
|
"loss": 0.5886626243591309,
|
||
|
|
"step": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8448844884488449,
|
||
|
|
"grad_norm": 0.31334340596765187,
|
||
|
|
"learning_rate": 3.6162261321751114e-05,
|
||
|
|
"loss": 0.5892266035079956,
|
||
|
|
"step": 256
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8481848184818482,
|
||
|
|
"grad_norm": 0.31784442826893616,
|
||
|
|
"learning_rate": 3.6116898125189045e-05,
|
||
|
|
"loss": 0.5472115278244019,
|
||
|
|
"step": 257
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8514851485148515,
|
||
|
|
"grad_norm": 0.3456330158902343,
|
||
|
|
"learning_rate": 3.6071297204059995e-05,
|
||
|
|
"loss": 0.5981796383857727,
|
||
|
|
"step": 258
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8547854785478548,
|
||
|
|
"grad_norm": 0.3377124553034101,
|
||
|
|
"learning_rate": 3.6025459230978475e-05,
|
||
|
|
"loss": 0.6708342432975769,
|
||
|
|
"step": 259
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.858085808580858,
|
||
|
|
"grad_norm": 0.3081391395426973,
|
||
|
|
"learning_rate": 3.597938488205549e-05,
|
||
|
|
"loss": 0.6306079626083374,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8613861386138614,
|
||
|
|
"grad_norm": 0.3398583824115319,
|
||
|
|
"learning_rate": 3.59330748368886e-05,
|
||
|
|
"loss": 0.6098329424858093,
|
||
|
|
"step": 261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8646864686468647,
|
||
|
|
"grad_norm": 0.32878067719138626,
|
||
|
|
"learning_rate": 3.588652977855189e-05,
|
||
|
|
"loss": 0.5617724061012268,
|
||
|
|
"step": 262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8679867986798679,
|
||
|
|
"grad_norm": 0.34962664282188816,
|
||
|
|
"learning_rate": 3.58397503935859e-05,
|
||
|
|
"loss": 0.5780894756317139,
|
||
|
|
"step": 263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8712871287128713,
|
||
|
|
"grad_norm": 0.32665214019362204,
|
||
|
|
"learning_rate": 3.5792737371987477e-05,
|
||
|
|
"loss": 0.578921377658844,
|
||
|
|
"step": 264
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8745874587458746,
|
||
|
|
"grad_norm": 0.36673188949709323,
|
||
|
|
"learning_rate": 3.574549140719962e-05,
|
||
|
|
"loss": 0.614944577217102,
|
||
|
|
"step": 265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8778877887788779,
|
||
|
|
"grad_norm": 0.3248666143164946,
|
||
|
|
"learning_rate": 3.569801319610125e-05,
|
||
|
|
"loss": 0.6269869208335876,
|
||
|
|
"step": 266
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8811881188118812,
|
||
|
|
"grad_norm": 0.3338123662452596,
|
||
|
|
"learning_rate": 3.565030343899693e-05,
|
||
|
|
"loss": 0.6045581102371216,
|
||
|
|
"step": 267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8844884488448845,
|
||
|
|
"grad_norm": 0.31011600887091817,
|
||
|
|
"learning_rate": 3.5602362839606514e-05,
|
||
|
|
"loss": 0.5872907638549805,
|
||
|
|
"step": 268
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8877887788778878,
|
||
|
|
"grad_norm": 0.31857062779594814,
|
||
|
|
"learning_rate": 3.55541921050548e-05,
|
||
|
|
"loss": 0.6283375024795532,
|
||
|
|
"step": 269
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8910891089108911,
|
||
|
|
"grad_norm": 0.32445751859048455,
|
||
|
|
"learning_rate": 3.5505791945861076e-05,
|
||
|
|
"loss": 0.5747002363204956,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8943894389438944,
|
||
|
|
"grad_norm": 0.2923309334474062,
|
||
|
|
"learning_rate": 3.545716307592864e-05,
|
||
|
|
"loss": 0.6205827593803406,
|
||
|
|
"step": 271
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8976897689768977,
|
||
|
|
"grad_norm": 0.43972579907455317,
|
||
|
|
"learning_rate": 3.54083062125343e-05,
|
||
|
|
"loss": 0.5987251400947571,
|
||
|
|
"step": 272
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.900990099009901,
|
||
|
|
"grad_norm": 0.33194286352506225,
|
||
|
|
"learning_rate": 3.535922207631776e-05,
|
||
|
|
"loss": 0.6275356411933899,
|
||
|
|
"step": 273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9042904290429042,
|
||
|
|
"grad_norm": 0.3408278730793354,
|
||
|
|
"learning_rate": 3.5309911391270996e-05,
|
||
|
|
"loss": 0.6097655892372131,
|
||
|
|
"step": 274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9075907590759076,
|
||
|
|
"grad_norm": 0.3441995699777348,
|
||
|
|
"learning_rate": 3.52603748847276e-05,
|
||
|
|
"loss": 0.544170618057251,
|
||
|
|
"step": 275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9108910891089109,
|
||
|
|
"grad_norm": 0.3034867763949278,
|
||
|
|
"learning_rate": 3.521061328735202e-05,
|
||
|
|
"loss": 0.5723366141319275,
|
||
|
|
"step": 276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9141914191419142,
|
||
|
|
"grad_norm": 0.3091145609625042,
|
||
|
|
"learning_rate": 3.516062733312879e-05,
|
||
|
|
"loss": 0.5801889896392822,
|
||
|
|
"step": 277
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9174917491749175,
|
||
|
|
"grad_norm": 0.3532845546992122,
|
||
|
|
"learning_rate": 3.511041775935175e-05,
|
||
|
|
"loss": 0.5942766666412354,
|
||
|
|
"step": 278
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9207920792079208,
|
||
|
|
"grad_norm": 0.3192035342587887,
|
||
|
|
"learning_rate": 3.50599853066131e-05,
|
||
|
|
"loss": 0.5604017972946167,
|
||
|
|
"step": 279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9240924092409241,
|
||
|
|
"grad_norm": 0.4475571406552253,
|
||
|
|
"learning_rate": 3.500933071879251e-05,
|
||
|
|
"loss": 0.6151460409164429,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9273927392739274,
|
||
|
|
"grad_norm": 0.30946498453996385,
|
||
|
|
"learning_rate": 3.495845474304616e-05,
|
||
|
|
"loss": 0.5854936838150024,
|
||
|
|
"step": 281
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9306930693069307,
|
||
|
|
"grad_norm": 0.3188531409769719,
|
||
|
|
"learning_rate": 3.490735812979572e-05,
|
||
|
|
"loss": 0.5586672425270081,
|
||
|
|
"step": 282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.933993399339934,
|
||
|
|
"grad_norm": 0.3250546549981712,
|
||
|
|
"learning_rate": 3.485604163271721e-05,
|
||
|
|
"loss": 0.578475832939148,
|
||
|
|
"step": 283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9372937293729373,
|
||
|
|
"grad_norm": 0.45030229248281484,
|
||
|
|
"learning_rate": 3.4804506008730015e-05,
|
||
|
|
"loss": 0.5236382484436035,
|
||
|
|
"step": 284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9405940594059405,
|
||
|
|
"grad_norm": 0.31677157675280776,
|
||
|
|
"learning_rate": 3.475275201798559e-05,
|
||
|
|
"loss": 0.5964822769165039,
|
||
|
|
"step": 285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9438943894389439,
|
||
|
|
"grad_norm": 0.3221519247617692,
|
||
|
|
"learning_rate": 3.4700780423856334e-05,
|
||
|
|
"loss": 0.5551598072052002,
|
||
|
|
"step": 286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9471947194719472,
|
||
|
|
"grad_norm": 0.31322506983838,
|
||
|
|
"learning_rate": 3.464859199292429e-05,
|
||
|
|
"loss": 0.6095103621482849,
|
||
|
|
"step": 287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9504950495049505,
|
||
|
|
"grad_norm": 0.33333701342858213,
|
||
|
|
"learning_rate": 3.4596187494969846e-05,
|
||
|
|
"loss": 0.5893416404724121,
|
||
|
|
"step": 288
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9537953795379538,
|
||
|
|
"grad_norm": 0.31167002926986764,
|
||
|
|
"learning_rate": 3.454356770296039e-05,
|
||
|
|
"loss": 0.5992231965065002,
|
||
|
|
"step": 289
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9570957095709571,
|
||
|
|
"grad_norm": 0.3407826991036566,
|
||
|
|
"learning_rate": 3.4490733393038895e-05,
|
||
|
|
"loss": 0.6071972250938416,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9603960396039604,
|
||
|
|
"grad_norm": 0.321397588262469,
|
||
|
|
"learning_rate": 3.443768534451248e-05,
|
||
|
|
"loss": 0.5836942195892334,
|
||
|
|
"step": 291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9636963696369637,
|
||
|
|
"grad_norm": 0.3596023570145339,
|
||
|
|
"learning_rate": 3.4384424339840916e-05,
|
||
|
|
"loss": 0.5707553625106812,
|
||
|
|
"step": 292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.966996699669967,
|
||
|
|
"grad_norm": 0.326365753033755,
|
||
|
|
"learning_rate": 3.4330951164625075e-05,
|
||
|
|
"loss": 0.5883970260620117,
|
||
|
|
"step": 293
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9702970297029703,
|
||
|
|
"grad_norm": 0.3276030981345682,
|
||
|
|
"learning_rate": 3.427726660759535e-05,
|
||
|
|
"loss": 0.6281589269638062,
|
||
|
|
"step": 294
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9735973597359736,
|
||
|
|
"grad_norm": 0.3559560269123216,
|
||
|
|
"learning_rate": 3.422337146060003e-05,
|
||
|
|
"loss": 0.6641702651977539,
|
||
|
|
"step": 295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.976897689768977,
|
||
|
|
"grad_norm": 0.34661891319338206,
|
||
|
|
"learning_rate": 3.4169266518593596e-05,
|
||
|
|
"loss": 0.6398966312408447,
|
||
|
|
"step": 296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9801980198019802,
|
||
|
|
"grad_norm": 0.3392015122860613,
|
||
|
|
"learning_rate": 3.411495257962501e-05,
|
||
|
|
"loss": 0.6376276016235352,
|
||
|
|
"step": 297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9834983498349835,
|
||
|
|
"grad_norm": 0.3454832175281825,
|
||
|
|
"learning_rate": 3.406043044482596e-05,
|
||
|
|
"loss": 0.648975133895874,
|
||
|
|
"step": 298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9867986798679867,
|
||
|
|
"grad_norm": 0.3284679145456545,
|
||
|
|
"learning_rate": 3.4005700918399016e-05,
|
||
|
|
"loss": 0.6201390624046326,
|
||
|
|
"step": 299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9900990099009901,
|
||
|
|
"grad_norm": 0.33000362479964457,
|
||
|
|
"learning_rate": 3.395076480760576e-05,
|
||
|
|
"loss": 0.6103875637054443,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9933993399339934,
|
||
|
|
"grad_norm": 0.31707924192462417,
|
||
|
|
"learning_rate": 3.3895622922754936e-05,
|
||
|
|
"loss": 0.5486876368522644,
|
||
|
|
"step": 301
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9966996699669967,
|
||
|
|
"grad_norm": 0.3094164003933957,
|
||
|
|
"learning_rate": 3.384027607719043e-05,
|
||
|
|
"loss": 0.5980846285820007,
|
||
|
|
"step": 302
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.33089398879681,
|
||
|
|
"learning_rate": 3.378472508727931e-05,
|
||
|
|
"loss": 0.5986801385879517,
|
||
|
|
"step": 303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0033003300330032,
|
||
|
|
"grad_norm": 0.4690060258405477,
|
||
|
|
"learning_rate": 3.372897077239979e-05,
|
||
|
|
"loss": 0.5586727857589722,
|
||
|
|
"step": 304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0066006600660067,
|
||
|
|
"grad_norm": 0.34686786747213394,
|
||
|
|
"learning_rate": 3.36730139549291e-05,
|
||
|
|
"loss": 0.5393255949020386,
|
||
|
|
"step": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.00990099009901,
|
||
|
|
"grad_norm": 0.4023568892604613,
|
||
|
|
"learning_rate": 3.361685546023143e-05,
|
||
|
|
"loss": 0.5377227067947388,
|
||
|
|
"step": 306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0132013201320131,
|
||
|
|
"grad_norm": 0.39915820884177944,
|
||
|
|
"learning_rate": 3.356049611664568e-05,
|
||
|
|
"loss": 0.5223784446716309,
|
||
|
|
"step": 307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0165016501650166,
|
||
|
|
"grad_norm": 0.3654265250846575,
|
||
|
|
"learning_rate": 3.350393675547328e-05,
|
||
|
|
"loss": 0.5502469539642334,
|
||
|
|
"step": 308
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0198019801980198,
|
||
|
|
"grad_norm": 0.42079557297663883,
|
||
|
|
"learning_rate": 3.3447178210965936e-05,
|
||
|
|
"loss": 0.5626603960990906,
|
||
|
|
"step": 309
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.023102310231023,
|
||
|
|
"grad_norm": 0.3684084639129366,
|
||
|
|
"learning_rate": 3.3390221320313303e-05,
|
||
|
|
"loss": 0.48262274265289307,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0264026402640265,
|
||
|
|
"grad_norm": 0.39908786063309193,
|
||
|
|
"learning_rate": 3.333306692363065e-05,
|
||
|
|
"loss": 0.5850967168807983,
|
||
|
|
"step": 311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0297029702970297,
|
||
|
|
"grad_norm": 0.44262876970078274,
|
||
|
|
"learning_rate": 3.3275715863946466e-05,
|
||
|
|
"loss": 0.5444281697273254,
|
||
|
|
"step": 312
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.033003300330033,
|
||
|
|
"grad_norm": 0.35239079669120155,
|
||
|
|
"learning_rate": 3.3218168987190004e-05,
|
||
|
|
"loss": 0.5329654216766357,
|
||
|
|
"step": 313
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0363036303630364,
|
||
|
|
"grad_norm": 0.38499730860339404,
|
||
|
|
"learning_rate": 3.316042714217885e-05,
|
||
|
|
"loss": 0.5276832580566406,
|
||
|
|
"step": 314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0396039603960396,
|
||
|
|
"grad_norm": 0.3928937531164494,
|
||
|
|
"learning_rate": 3.310249118060636e-05,
|
||
|
|
"loss": 0.5344791412353516,
|
||
|
|
"step": 315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0429042904290429,
|
||
|
|
"grad_norm": 0.3466589226743573,
|
||
|
|
"learning_rate": 3.304436195702911e-05,
|
||
|
|
"loss": 0.5479785203933716,
|
||
|
|
"step": 316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.046204620462046,
|
||
|
|
"grad_norm": 0.370325309360066,
|
||
|
|
"learning_rate": 3.298604032885431e-05,
|
||
|
|
"loss": 0.5223082900047302,
|
||
|
|
"step": 317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0495049504950495,
|
||
|
|
"grad_norm": 0.4271803134046634,
|
||
|
|
"learning_rate": 3.292752715632713e-05,
|
||
|
|
"loss": 0.5667799711227417,
|
||
|
|
"step": 318
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0528052805280528,
|
||
|
|
"grad_norm": 0.33752277032768196,
|
||
|
|
"learning_rate": 3.2868823302518016e-05,
|
||
|
|
"loss": 0.5194317698478699,
|
||
|
|
"step": 319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.056105610561056,
|
||
|
|
"grad_norm": 0.35801795115870316,
|
||
|
|
"learning_rate": 3.2809929633309985e-05,
|
||
|
|
"loss": 0.4911007285118103,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0594059405940595,
|
||
|
|
"grad_norm": 0.33819516112787196,
|
||
|
|
"learning_rate": 3.2750847017385826e-05,
|
||
|
|
"loss": 0.5269002914428711,
|
||
|
|
"step": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0627062706270627,
|
||
|
|
"grad_norm": 0.3280280196094967,
|
||
|
|
"learning_rate": 3.269157632621529e-05,
|
||
|
|
"loss": 0.5124789476394653,
|
||
|
|
"step": 322
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.066006600660066,
|
||
|
|
"grad_norm": 0.3841029677303286,
|
||
|
|
"learning_rate": 3.263211843404225e-05,
|
||
|
|
"loss": 0.5483890771865845,
|
||
|
|
"step": 323
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0693069306930694,
|
||
|
|
"grad_norm": 0.348752311292252,
|
||
|
|
"learning_rate": 3.25724742178718e-05,
|
||
|
|
"loss": 0.5582579374313354,
|
||
|
|
"step": 324
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0726072607260726,
|
||
|
|
"grad_norm": 0.3672218653955236,
|
||
|
|
"learning_rate": 3.2512644557457304e-05,
|
||
|
|
"loss": 0.5662975907325745,
|
||
|
|
"step": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0759075907590758,
|
||
|
|
"grad_norm": 0.339133227284404,
|
||
|
|
"learning_rate": 3.2452630335287445e-05,
|
||
|
|
"loss": 0.5502511858940125,
|
||
|
|
"step": 326
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0792079207920793,
|
||
|
|
"grad_norm": 0.3607463939055526,
|
||
|
|
"learning_rate": 3.239243243657318e-05,
|
||
|
|
"loss": 0.5614978075027466,
|
||
|
|
"step": 327
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0825082508250825,
|
||
|
|
"grad_norm": 0.3354690532522152,
|
||
|
|
"learning_rate": 3.233205174923472e-05,
|
||
|
|
"loss": 0.4828110635280609,
|
||
|
|
"step": 328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0858085808580857,
|
||
|
|
"grad_norm": 0.3296040603044689,
|
||
|
|
"learning_rate": 3.22714891638884e-05,
|
||
|
|
"loss": 0.5437847971916199,
|
||
|
|
"step": 329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0891089108910892,
|
||
|
|
"grad_norm": 0.3295415767468974,
|
||
|
|
"learning_rate": 3.221074557383355e-05,
|
||
|
|
"loss": 0.6240063309669495,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0924092409240924,
|
||
|
|
"grad_norm": 0.3032628226796708,
|
||
|
|
"learning_rate": 3.2149821875039325e-05,
|
||
|
|
"loss": 0.5435442328453064,
|
||
|
|
"step": 331
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0957095709570956,
|
||
|
|
"grad_norm": 0.30875440813945676,
|
||
|
|
"learning_rate": 3.20887189661315e-05,
|
||
|
|
"loss": 0.5240401029586792,
|
||
|
|
"step": 332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.099009900990099,
|
||
|
|
"grad_norm": 0.3043121620505056,
|
||
|
|
"learning_rate": 3.202743774837919e-05,
|
||
|
|
"loss": 0.5227692127227783,
|
||
|
|
"step": 333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1023102310231023,
|
||
|
|
"grad_norm": 0.3439754692795775,
|
||
|
|
"learning_rate": 3.196597912568157e-05,
|
||
|
|
"loss": 0.5607417821884155,
|
||
|
|
"step": 334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1056105610561056,
|
||
|
|
"grad_norm": 0.29691798670137787,
|
||
|
|
"learning_rate": 3.1904344004554536e-05,
|
||
|
|
"loss": 0.5607600808143616,
|
||
|
|
"step": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.108910891089109,
|
||
|
|
"grad_norm": 0.32493088910689055,
|
||
|
|
"learning_rate": 3.184253329411737e-05,
|
||
|
|
"loss": 0.47135430574417114,
|
||
|
|
"step": 336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1122112211221122,
|
||
|
|
"grad_norm": 0.3202945703052858,
|
||
|
|
"learning_rate": 3.178054790607924e-05,
|
||
|
|
"loss": 0.5708764791488647,
|
||
|
|
"step": 337
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1155115511551155,
|
||
|
|
"grad_norm": 0.3164605548495645,
|
||
|
|
"learning_rate": 3.1718388754725883e-05,
|
||
|
|
"loss": 0.5522497296333313,
|
||
|
|
"step": 338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.118811881188119,
|
||
|
|
"grad_norm": 0.3449586600316318,
|
||
|
|
"learning_rate": 3.1656056756906e-05,
|
||
|
|
"loss": 0.5556532144546509,
|
||
|
|
"step": 339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1221122112211221,
|
||
|
|
"grad_norm": 0.3130025484639745,
|
||
|
|
"learning_rate": 3.1593552832017795e-05,
|
||
|
|
"loss": 0.5727676153182983,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1254125412541254,
|
||
|
|
"grad_norm": 0.3195703179740936,
|
||
|
|
"learning_rate": 3.153087790199541e-05,
|
||
|
|
"loss": 0.5131651759147644,
|
||
|
|
"step": 341
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1287128712871288,
|
||
|
|
"grad_norm": 0.3191177264656739,
|
||
|
|
"learning_rate": 3.146803289129528e-05,
|
||
|
|
"loss": 0.5143063068389893,
|
||
|
|
"step": 342
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.132013201320132,
|
||
|
|
"grad_norm": 0.33398757419035885,
|
||
|
|
"learning_rate": 3.1405018726882595e-05,
|
||
|
|
"loss": 0.509161114692688,
|
||
|
|
"step": 343
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1353135313531353,
|
||
|
|
"grad_norm": 0.33058725446313514,
|
||
|
|
"learning_rate": 3.13418363382175e-05,
|
||
|
|
"loss": 0.5213526487350464,
|
||
|
|
"step": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1386138613861387,
|
||
|
|
"grad_norm": 0.3226863318187914,
|
||
|
|
"learning_rate": 3.127848665724149e-05,
|
||
|
|
"loss": 0.5465434789657593,
|
||
|
|
"step": 345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.141914191419142,
|
||
|
|
"grad_norm": 0.6179658385179007,
|
||
|
|
"learning_rate": 3.1214970618363626e-05,
|
||
|
|
"loss": 0.5342190265655518,
|
||
|
|
"step": 346
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1452145214521452,
|
||
|
|
"grad_norm": 0.47777163001134637,
|
||
|
|
"learning_rate": 3.115128915844672e-05,
|
||
|
|
"loss": 0.541754424571991,
|
||
|
|
"step": 347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1485148514851484,
|
||
|
|
"grad_norm": 0.33931974771490697,
|
||
|
|
"learning_rate": 3.10874432167936e-05,
|
||
|
|
"loss": 0.5318331122398376,
|
||
|
|
"step": 348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1518151815181519,
|
||
|
|
"grad_norm": 0.32111740987941506,
|
||
|
|
"learning_rate": 3.1023433735133134e-05,
|
||
|
|
"loss": 0.4972509741783142,
|
||
|
|
"step": 349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.155115511551155,
|
||
|
|
"grad_norm": 0.30074948382432587,
|
||
|
|
"learning_rate": 3.095926165760647e-05,
|
||
|
|
"loss": 0.5417294502258301,
|
||
|
|
"step": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1584158415841583,
|
||
|
|
"grad_norm": 0.3410522798436207,
|
||
|
|
"learning_rate": 3.089492793075302e-05,
|
||
|
|
"loss": 0.554945707321167,
|
||
|
|
"step": 351
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1617161716171618,
|
||
|
|
"grad_norm": 0.3254774061643724,
|
||
|
|
"learning_rate": 3.083043350349653e-05,
|
||
|
|
"loss": 0.5204564929008484,
|
||
|
|
"step": 352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.165016501650165,
|
||
|
|
"grad_norm": 0.3088402728006412,
|
||
|
|
"learning_rate": 3.076577932713108e-05,
|
||
|
|
"loss": 0.4856947064399719,
|
||
|
|
"step": 353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1683168316831682,
|
||
|
|
"grad_norm": 0.2896918095760776,
|
||
|
|
"learning_rate": 3.0700966355307055e-05,
|
||
|
|
"loss": 0.5269368886947632,
|
||
|
|
"step": 354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1716171617161717,
|
||
|
|
"grad_norm": 0.32747543865706225,
|
||
|
|
"learning_rate": 3.063599554401708e-05,
|
||
|
|
"loss": 0.5811939239501953,
|
||
|
|
"step": 355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.174917491749175,
|
||
|
|
"grad_norm": 0.29324577597304957,
|
||
|
|
"learning_rate": 3.057086785158189e-05,
|
||
|
|
"loss": 0.5636904239654541,
|
||
|
|
"step": 356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1782178217821782,
|
||
|
|
"grad_norm": 0.31779620334412045,
|
||
|
|
"learning_rate": 3.050558423863626e-05,
|
||
|
|
"loss": 0.546089768409729,
|
||
|
|
"step": 357
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1815181518151816,
|
||
|
|
"grad_norm": 0.3093045991582328,
|
||
|
|
"learning_rate": 3.0440145668114774e-05,
|
||
|
|
"loss": 0.5239901542663574,
|
||
|
|
"step": 358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1848184818481848,
|
||
|
|
"grad_norm": 0.31848934088179354,
|
||
|
|
"learning_rate": 3.0374553105237637e-05,
|
||
|
|
"loss": 0.5833466053009033,
|
||
|
|
"step": 359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.188118811881188,
|
||
|
|
"grad_norm": 0.33803859097620154,
|
||
|
|
"learning_rate": 3.0308807517496456e-05,
|
||
|
|
"loss": 0.5060774087905884,
|
||
|
|
"step": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1914191419141915,
|
||
|
|
"grad_norm": 0.31145081064149094,
|
||
|
|
"learning_rate": 3.0242909874639953e-05,
|
||
|
|
"loss": 0.5164307355880737,
|
||
|
|
"step": 361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1947194719471947,
|
||
|
|
"grad_norm": 0.29765085452905116,
|
||
|
|
"learning_rate": 3.0176861148659672e-05,
|
||
|
|
"loss": 0.49949395656585693,
|
||
|
|
"step": 362
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.198019801980198,
|
||
|
|
"grad_norm": 0.3296486034239661,
|
||
|
|
"learning_rate": 3.0110662313775623e-05,
|
||
|
|
"loss": 0.5581181049346924,
|
||
|
|
"step": 363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2013201320132012,
|
||
|
|
"grad_norm": 0.3116631729941006,
|
||
|
|
"learning_rate": 3.0044314346421938e-05,
|
||
|
|
"loss": 0.5657376646995544,
|
||
|
|
"step": 364
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2046204620462047,
|
||
|
|
"grad_norm": 0.33012695180790946,
|
||
|
|
"learning_rate": 2.9977818225232443e-05,
|
||
|
|
"loss": 0.5269935131072998,
|
||
|
|
"step": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2079207920792079,
|
||
|
|
"grad_norm": 0.31869984664933465,
|
||
|
|
"learning_rate": 2.991117493102626e-05,
|
||
|
|
"loss": 0.5385931730270386,
|
||
|
|
"step": 366
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2112211221122111,
|
||
|
|
"grad_norm": 0.30491226427581125,
|
||
|
|
"learning_rate": 2.984438544679329e-05,
|
||
|
|
"loss": 0.5615143179893494,
|
||
|
|
"step": 367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2145214521452146,
|
||
|
|
"grad_norm": 0.32195999076013593,
|
||
|
|
"learning_rate": 2.9777450757679754e-05,
|
||
|
|
"loss": 0.5175333023071289,
|
||
|
|
"step": 368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2178217821782178,
|
||
|
|
"grad_norm": 0.30930257180361886,
|
||
|
|
"learning_rate": 2.971037185097364e-05,
|
||
|
|
"loss": 0.565494179725647,
|
||
|
|
"step": 369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.221122112211221,
|
||
|
|
"grad_norm": 0.34237830645177886,
|
||
|
|
"learning_rate": 2.9643149716090146e-05,
|
||
|
|
"loss": 0.5519120693206787,
|
||
|
|
"step": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2244224422442245,
|
||
|
|
"grad_norm": 0.30959351563618437,
|
||
|
|
"learning_rate": 2.9575785344557114e-05,
|
||
|
|
"loss": 0.49374374747276306,
|
||
|
|
"step": 371
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2277227722772277,
|
||
|
|
"grad_norm": 0.31310768619122714,
|
||
|
|
"learning_rate": 2.950827973000034e-05,
|
||
|
|
"loss": 0.5608875751495361,
|
||
|
|
"step": 372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.231023102310231,
|
||
|
|
"grad_norm": 0.31986895424613543,
|
||
|
|
"learning_rate": 2.944063386812899e-05,
|
||
|
|
"loss": 0.5866271257400513,
|
||
|
|
"step": 373
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2343234323432344,
|
||
|
|
"grad_norm": 0.3359900469491975,
|
||
|
|
"learning_rate": 2.9372848756720867e-05,
|
||
|
|
"loss": 0.5342913269996643,
|
||
|
|
"step": 374
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2376237623762376,
|
||
|
|
"grad_norm": 0.2956484140793021,
|
||
|
|
"learning_rate": 2.9304925395607696e-05,
|
||
|
|
"loss": 0.5539537668228149,
|
||
|
|
"step": 375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2409240924092408,
|
||
|
|
"grad_norm": 0.3239136306261367,
|
||
|
|
"learning_rate": 2.9236864786660423e-05,
|
||
|
|
"loss": 0.5614147186279297,
|
||
|
|
"step": 376
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2442244224422443,
|
||
|
|
"grad_norm": 0.3311932744032855,
|
||
|
|
"learning_rate": 2.9168667933774356e-05,
|
||
|
|
"loss": 0.46689367294311523,
|
||
|
|
"step": 377
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2475247524752475,
|
||
|
|
"grad_norm": 0.3291299090174619,
|
||
|
|
"learning_rate": 2.910033584285444e-05,
|
||
|
|
"loss": 0.5383083820343018,
|
||
|
|
"step": 378
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2508250825082508,
|
||
|
|
"grad_norm": 0.3013900588246958,
|
||
|
|
"learning_rate": 2.903186952180037e-05,
|
||
|
|
"loss": 0.5349752902984619,
|
||
|
|
"step": 379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2541254125412542,
|
||
|
|
"grad_norm": 0.3219145450840317,
|
||
|
|
"learning_rate": 2.8963269980491743e-05,
|
||
|
|
"loss": 0.5792303681373596,
|
||
|
|
"step": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2574257425742574,
|
||
|
|
"grad_norm": 0.2840550960191948,
|
||
|
|
"learning_rate": 2.8894538230773147e-05,
|
||
|
|
"loss": 0.524924099445343,
|
||
|
|
"step": 381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2607260726072607,
|
||
|
|
"grad_norm": 0.3172399675943548,
|
||
|
|
"learning_rate": 2.882567528643925e-05,
|
||
|
|
"loss": 0.5137406587600708,
|
||
|
|
"step": 382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2640264026402641,
|
||
|
|
"grad_norm": 0.2893676822687234,
|
||
|
|
"learning_rate": 2.8756682163219857e-05,
|
||
|
|
"loss": 0.5196574926376343,
|
||
|
|
"step": 383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2673267326732673,
|
||
|
|
"grad_norm": 0.31363904787626334,
|
||
|
|
"learning_rate": 2.8687559878764903e-05,
|
||
|
|
"loss": 0.585644006729126,
|
||
|
|
"step": 384
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2706270627062706,
|
||
|
|
"grad_norm": 0.3310272877884813,
|
||
|
|
"learning_rate": 2.8618309452629445e-05,
|
||
|
|
"loss": 0.5973786115646362,
|
||
|
|
"step": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.273927392739274,
|
||
|
|
"grad_norm": 0.3201222210217655,
|
||
|
|
"learning_rate": 2.854893190625865e-05,
|
||
|
|
"loss": 0.5909825563430786,
|
||
|
|
"step": 386
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2772277227722773,
|
||
|
|
"grad_norm": 0.3507731714316878,
|
||
|
|
"learning_rate": 2.84794282629727e-05,
|
||
|
|
"loss": 0.5903690457344055,
|
||
|
|
"step": 387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2805280528052805,
|
||
|
|
"grad_norm": 0.31011243056320775,
|
||
|
|
"learning_rate": 2.840979954795171e-05,
|
||
|
|
"loss": 0.5316457152366638,
|
||
|
|
"step": 388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.283828382838284,
|
||
|
|
"grad_norm": 0.32950464198309637,
|
||
|
|
"learning_rate": 2.8340046788220613e-05,
|
||
|
|
"loss": 0.5080389976501465,
|
||
|
|
"step": 389
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2871287128712872,
|
||
|
|
"grad_norm": 0.37769184930606736,
|
||
|
|
"learning_rate": 2.8270171012633994e-05,
|
||
|
|
"loss": 0.6137889623641968,
|
||
|
|
"step": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2904290429042904,
|
||
|
|
"grad_norm": 0.34430823745531935,
|
||
|
|
"learning_rate": 2.8200173251860928e-05,
|
||
|
|
"loss": 0.5433805584907532,
|
||
|
|
"step": 391
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2937293729372938,
|
||
|
|
"grad_norm": 0.356563736773021,
|
||
|
|
"learning_rate": 2.8130054538369775e-05,
|
||
|
|
"loss": 0.4965590834617615,
|
||
|
|
"step": 392
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.297029702970297,
|
||
|
|
"grad_norm": 0.29380923244218154,
|
||
|
|
"learning_rate": 2.805981590641295e-05,
|
||
|
|
"loss": 0.5361340045928955,
|
||
|
|
"step": 393
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3003300330033003,
|
||
|
|
"grad_norm": 0.31403525376793245,
|
||
|
|
"learning_rate": 2.7989458392011678e-05,
|
||
|
|
"loss": 0.47011327743530273,
|
||
|
|
"step": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3036303630363038,
|
||
|
|
"grad_norm": 0.30710914438533876,
|
||
|
|
"learning_rate": 2.7918983032940666e-05,
|
||
|
|
"loss": 0.5893687605857849,
|
||
|
|
"step": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.306930693069307,
|
||
|
|
"grad_norm": 0.3126943781985397,
|
||
|
|
"learning_rate": 2.7848390868712886e-05,
|
||
|
|
"loss": 0.5219327211380005,
|
||
|
|
"step": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3102310231023102,
|
||
|
|
"grad_norm": 0.35585146532127665,
|
||
|
|
"learning_rate": 2.7777682940564142e-05,
|
||
|
|
"loss": 0.5652155876159668,
|
||
|
|
"step": 397
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3135313531353137,
|
||
|
|
"grad_norm": 0.41906023992763497,
|
||
|
|
"learning_rate": 2.7706860291437784e-05,
|
||
|
|
"loss": 0.5361950397491455,
|
||
|
|
"step": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.316831683168317,
|
||
|
|
"grad_norm": 0.29071400108766793,
|
||
|
|
"learning_rate": 2.763592396596929e-05,
|
||
|
|
"loss": 0.5355206727981567,
|
||
|
|
"step": 399
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3201320132013201,
|
||
|
|
"grad_norm": 0.298123677847084,
|
||
|
|
"learning_rate": 2.756487501047086e-05,
|
||
|
|
"loss": 0.5082858800888062,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3234323432343233,
|
||
|
|
"grad_norm": 0.3144050740212562,
|
||
|
|
"learning_rate": 2.7493714472916013e-05,
|
||
|
|
"loss": 0.5282934904098511,
|
||
|
|
"step": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3267326732673268,
|
||
|
|
"grad_norm": 0.29396121691648713,
|
||
|
|
"learning_rate": 2.7422443402924074e-05,
|
||
|
|
"loss": 0.5502887964248657,
|
||
|
|
"step": 402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.33003300330033,
|
||
|
|
"grad_norm": 0.2854429234726643,
|
||
|
|
"learning_rate": 2.7351062851744747e-05,
|
||
|
|
"loss": 0.5374204516410828,
|
||
|
|
"step": 403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3333333333333333,
|
||
|
|
"grad_norm": 0.30308752538818784,
|
||
|
|
"learning_rate": 2.7279573872242574e-05,
|
||
|
|
"loss": 0.5602293014526367,
|
||
|
|
"step": 404
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3366336633663367,
|
||
|
|
"grad_norm": 0.30975657746221447,
|
||
|
|
"learning_rate": 2.7207977518881418e-05,
|
||
|
|
"loss": 0.5321286916732788,
|
||
|
|
"step": 405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.33993399339934,
|
||
|
|
"grad_norm": 0.28965457921713383,
|
||
|
|
"learning_rate": 2.713627484770892e-05,
|
||
|
|
"loss": 0.5523560047149658,
|
||
|
|
"step": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3432343234323432,
|
||
|
|
"grad_norm": 0.30598816879566076,
|
||
|
|
"learning_rate": 2.706446691634089e-05,
|
||
|
|
"loss": 0.47019705176353455,
|
||
|
|
"step": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3465346534653464,
|
||
|
|
"grad_norm": 0.2977261513860205,
|
||
|
|
"learning_rate": 2.6992554783945748e-05,
|
||
|
|
"loss": 0.540359616279602,
|
||
|
|
"step": 408
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3498349834983498,
|
||
|
|
"grad_norm": 0.2845048826043699,
|
||
|
|
"learning_rate": 2.6920539511228874e-05,
|
||
|
|
"loss": 0.561464786529541,
|
||
|
|
"step": 409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.353135313531353,
|
||
|
|
"grad_norm": 0.2939741197740927,
|
||
|
|
"learning_rate": 2.6848422160416956e-05,
|
||
|
|
"loss": 0.5429259538650513,
|
||
|
|
"step": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3564356435643563,
|
||
|
|
"grad_norm": 0.2968609589915083,
|
||
|
|
"learning_rate": 2.677620379524237e-05,
|
||
|
|
"loss": 0.5452640652656555,
|
||
|
|
"step": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3597359735973598,
|
||
|
|
"grad_norm": 0.28949363661635646,
|
||
|
|
"learning_rate": 2.670388548092741e-05,
|
||
|
|
"loss": 0.49627864360809326,
|
||
|
|
"step": 412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.363036303630363,
|
||
|
|
"grad_norm": 0.328169978832012,
|
||
|
|
"learning_rate": 2.663146828416867e-05,
|
||
|
|
"loss": 0.5331633687019348,
|
||
|
|
"step": 413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3663366336633662,
|
||
|
|
"grad_norm": 0.2926434963884909,
|
||
|
|
"learning_rate": 2.6558953273121216e-05,
|
||
|
|
"loss": 0.5447151064872742,
|
||
|
|
"step": 414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3696369636963697,
|
||
|
|
"grad_norm": 0.2863360845432002,
|
||
|
|
"learning_rate": 2.648634151738292e-05,
|
||
|
|
"loss": 0.5467007160186768,
|
||
|
|
"step": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.372937293729373,
|
||
|
|
"grad_norm": 0.33044933855099695,
|
||
|
|
"learning_rate": 2.6413634087978602e-05,
|
||
|
|
"loss": 0.5804279446601868,
|
||
|
|
"step": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3762376237623761,
|
||
|
|
"grad_norm": 0.29168904019746145,
|
||
|
|
"learning_rate": 2.63408320573443e-05,
|
||
|
|
"loss": 0.5323517322540283,
|
||
|
|
"step": 417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3795379537953796,
|
||
|
|
"grad_norm": 0.3046417110987717,
|
||
|
|
"learning_rate": 2.6267936499311402e-05,
|
||
|
|
"loss": 0.5452409982681274,
|
||
|
|
"step": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3828382838283828,
|
||
|
|
"grad_norm": 0.2878853361033164,
|
||
|
|
"learning_rate": 2.619494848909084e-05,
|
||
|
|
"loss": 0.4622665047645569,
|
||
|
|
"step": 419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.386138613861386,
|
||
|
|
"grad_norm": 0.3129938954769346,
|
||
|
|
"learning_rate": 2.6121869103257206e-05,
|
||
|
|
"loss": 0.531772255897522,
|
||
|
|
"step": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3894389438943895,
|
||
|
|
"grad_norm": 0.3044320552061303,
|
||
|
|
"learning_rate": 2.6048699419732897e-05,
|
||
|
|
"loss": 0.519554853439331,
|
||
|
|
"step": 421
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3927392739273927,
|
||
|
|
"grad_norm": 0.32616258357306027,
|
||
|
|
"learning_rate": 2.5975440517772187e-05,
|
||
|
|
"loss": 0.545585572719574,
|
||
|
|
"step": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.396039603960396,
|
||
|
|
"grad_norm": 0.297995845019565,
|
||
|
|
"learning_rate": 2.5902093477945345e-05,
|
||
|
|
"loss": 0.5641547441482544,
|
||
|
|
"step": 423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3993399339933994,
|
||
|
|
"grad_norm": 0.28406971495281874,
|
||
|
|
"learning_rate": 2.5828659382122655e-05,
|
||
|
|
"loss": 0.5578028559684753,
|
||
|
|
"step": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4026402640264026,
|
||
|
|
"grad_norm": 0.35618435421860006,
|
||
|
|
"learning_rate": 2.5755139313458484e-05,
|
||
|
|
"loss": 0.5931404232978821,
|
||
|
|
"step": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4059405940594059,
|
||
|
|
"grad_norm": 0.3227282264542969,
|
||
|
|
"learning_rate": 2.5681534356375314e-05,
|
||
|
|
"loss": 0.5486891865730286,
|
||
|
|
"step": 426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4092409240924093,
|
||
|
|
"grad_norm": 0.31220449886262164,
|
||
|
|
"learning_rate": 2.5607845596547706e-05,
|
||
|
|
"loss": 0.5007671117782593,
|
||
|
|
"step": 427
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4125412541254125,
|
||
|
|
"grad_norm": 0.2970377848116104,
|
||
|
|
"learning_rate": 2.5534074120886346e-05,
|
||
|
|
"loss": 0.5044519901275635,
|
||
|
|
"step": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4158415841584158,
|
||
|
|
"grad_norm": 0.30667327850480125,
|
||
|
|
"learning_rate": 2.5460221017521952e-05,
|
||
|
|
"loss": 0.5227789878845215,
|
||
|
|
"step": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4191419141914192,
|
||
|
|
"grad_norm": 0.2902458759439887,
|
||
|
|
"learning_rate": 2.538628737578926e-05,
|
||
|
|
"loss": 0.5530189871788025,
|
||
|
|
"step": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4224422442244224,
|
||
|
|
"grad_norm": 0.3114416510328153,
|
||
|
|
"learning_rate": 2.5312274286210966e-05,
|
||
|
|
"loss": 0.508142352104187,
|
||
|
|
"step": 431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4257425742574257,
|
||
|
|
"grad_norm": 0.30284970816559353,
|
||
|
|
"learning_rate": 2.523818284048159e-05,
|
||
|
|
"loss": 0.5497263669967651,
|
||
|
|
"step": 432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4290429042904291,
|
||
|
|
"grad_norm": 0.3619418905679721,
|
||
|
|
"learning_rate": 2.5164014131451443e-05,
|
||
|
|
"loss": 0.5477034449577332,
|
||
|
|
"step": 433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4323432343234324,
|
||
|
|
"grad_norm": 0.28668741491270383,
|
||
|
|
"learning_rate": 2.508976925311045e-05,
|
||
|
|
"loss": 0.5091728568077087,
|
||
|
|
"step": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4356435643564356,
|
||
|
|
"grad_norm": 0.2922234358135184,
|
||
|
|
"learning_rate": 2.501544930057203e-05,
|
||
|
|
"loss": 0.5022713541984558,
|
||
|
|
"step": 435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.438943894389439,
|
||
|
|
"grad_norm": 0.29994035273286174,
|
||
|
|
"learning_rate": 2.494105537005697e-05,
|
||
|
|
"loss": 0.5401599407196045,
|
||
|
|
"step": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4422442244224423,
|
||
|
|
"grad_norm": 0.27863085551634303,
|
||
|
|
"learning_rate": 2.4866588558877208e-05,
|
||
|
|
"loss": 0.5632063150405884,
|
||
|
|
"step": 437
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4455445544554455,
|
||
|
|
"grad_norm": 0.2968792338733857,
|
||
|
|
"learning_rate": 2.479204996541969e-05,
|
||
|
|
"loss": 0.552355170249939,
|
||
|
|
"step": 438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.448844884488449,
|
||
|
|
"grad_norm": 0.3222205976590156,
|
||
|
|
"learning_rate": 2.4717440689130154e-05,
|
||
|
|
"loss": 0.5604996681213379,
|
||
|
|
"step": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4521452145214522,
|
||
|
|
"grad_norm": 0.2781451863798608,
|
||
|
|
"learning_rate": 2.4642761830496893e-05,
|
||
|
|
"loss": 0.4961245656013489,
|
||
|
|
"step": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4554455445544554,
|
||
|
|
"grad_norm": 0.3327533816855903,
|
||
|
|
"learning_rate": 2.4568014491034565e-05,
|
||
|
|
"loss": 0.5403590202331543,
|
||
|
|
"step": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4587458745874589,
|
||
|
|
"grad_norm": 0.2944499869326328,
|
||
|
|
"learning_rate": 2.4493199773267902e-05,
|
||
|
|
"loss": 0.4753378629684448,
|
||
|
|
"step": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.462046204620462,
|
||
|
|
"grad_norm": 0.30936599048377306,
|
||
|
|
"learning_rate": 2.4418318780715477e-05,
|
||
|
|
"loss": 0.5125438570976257,
|
||
|
|
"step": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4653465346534653,
|
||
|
|
"grad_norm": 0.3047486735791836,
|
||
|
|
"learning_rate": 2.434337261787342e-05,
|
||
|
|
"loss": 0.5670269727706909,
|
||
|
|
"step": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4686468646864688,
|
||
|
|
"grad_norm": 0.3348418102837006,
|
||
|
|
"learning_rate": 2.426836239019911e-05,
|
||
|
|
"loss": 0.5538198947906494,
|
||
|
|
"step": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.471947194719472,
|
||
|
|
"grad_norm": 0.2790312641462961,
|
||
|
|
"learning_rate": 2.4193289204094893e-05,
|
||
|
|
"loss": 0.5012328028678894,
|
||
|
|
"step": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4752475247524752,
|
||
|
|
"grad_norm": 0.30485310749783334,
|
||
|
|
"learning_rate": 2.4118154166891762e-05,
|
||
|
|
"loss": 0.538119912147522,
|
||
|
|
"step": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4785478547854787,
|
||
|
|
"grad_norm": 0.32398781026753815,
|
||
|
|
"learning_rate": 2.4042958386833003e-05,
|
||
|
|
"loss": 0.5252339839935303,
|
||
|
|
"step": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.481848184818482,
|
||
|
|
"grad_norm": 0.326928536480608,
|
||
|
|
"learning_rate": 2.3967702973057853e-05,
|
||
|
|
"loss": 0.5367081761360168,
|
||
|
|
"step": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4851485148514851,
|
||
|
|
"grad_norm": 0.3044938562463835,
|
||
|
|
"learning_rate": 2.3892389035585167e-05,
|
||
|
|
"loss": 0.5091884136199951,
|
||
|
|
"step": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4884488448844886,
|
||
|
|
"grad_norm": 0.2897824690201277,
|
||
|
|
"learning_rate": 2.3817017685297016e-05,
|
||
|
|
"loss": 0.5079891681671143,
|
||
|
|
"step": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4917491749174918,
|
||
|
|
"grad_norm": 0.2966882318097961,
|
||
|
|
"learning_rate": 2.3741590033922313e-05,
|
||
|
|
"loss": 0.511939287185669,
|
||
|
|
"step": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.495049504950495,
|
||
|
|
"grad_norm": 0.28797637565211376,
|
||
|
|
"learning_rate": 2.3666107194020404e-05,
|
||
|
|
"loss": 0.5070478916168213,
|
||
|
|
"step": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4983498349834983,
|
||
|
|
"grad_norm": 0.29050652670321586,
|
||
|
|
"learning_rate": 2.3590570278964682e-05,
|
||
|
|
"loss": 0.547492504119873,
|
||
|
|
"step": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5016501650165015,
|
||
|
|
"grad_norm": 0.311874965448668,
|
||
|
|
"learning_rate": 2.3514980402926132e-05,
|
||
|
|
"loss": 0.5386558771133423,
|
||
|
|
"step": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.504950495049505,
|
||
|
|
"grad_norm": 0.26980126113979913,
|
||
|
|
"learning_rate": 2.3439338680856943e-05,
|
||
|
|
"loss": 0.48668172955513,
|
||
|
|
"step": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5082508250825084,
|
||
|
|
"grad_norm": 0.31689121328788056,
|
||
|
|
"learning_rate": 2.3363646228474002e-05,
|
||
|
|
"loss": 0.5497942566871643,
|
||
|
|
"step": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5115511551155114,
|
||
|
|
"grad_norm": 0.3648919358675907,
|
||
|
|
"learning_rate": 2.328790416224248e-05,
|
||
|
|
"loss": 0.5267748832702637,
|
||
|
|
"step": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5148514851485149,
|
||
|
|
"grad_norm": 0.3191029117024018,
|
||
|
|
"learning_rate": 2.3212113599359368e-05,
|
||
|
|
"loss": 0.5578982830047607,
|
||
|
|
"step": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5181518151815183,
|
||
|
|
"grad_norm": 0.30610891906133464,
|
||
|
|
"learning_rate": 2.3136275657736956e-05,
|
||
|
|
"loss": 0.5136545896530151,
|
||
|
|
"step": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5214521452145213,
|
||
|
|
"grad_norm": 0.28466532575384307,
|
||
|
|
"learning_rate": 2.3060391455986403e-05,
|
||
|
|
"loss": 0.5718669891357422,
|
||
|
|
"step": 461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5247524752475248,
|
||
|
|
"grad_norm": 0.3064265170567389,
|
||
|
|
"learning_rate": 2.2984462113401184e-05,
|
||
|
|
"loss": 0.5427108407020569,
|
||
|
|
"step": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.528052805280528,
|
||
|
|
"grad_norm": 0.28495826208338726,
|
||
|
|
"learning_rate": 2.2908488749940596e-05,
|
||
|
|
"loss": 0.5293564200401306,
|
||
|
|
"step": 463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5313531353135312,
|
||
|
|
"grad_norm": 0.3073240786964915,
|
||
|
|
"learning_rate": 2.2832472486213275e-05,
|
||
|
|
"loss": 0.550743579864502,
|
||
|
|
"step": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5346534653465347,
|
||
|
|
"grad_norm": 0.30789089349395116,
|
||
|
|
"learning_rate": 2.2756414443460602e-05,
|
||
|
|
"loss": 0.5957387685775757,
|
||
|
|
"step": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.537953795379538,
|
||
|
|
"grad_norm": 0.2840660845057486,
|
||
|
|
"learning_rate": 2.2680315743540234e-05,
|
||
|
|
"loss": 0.4994407892227173,
|
||
|
|
"step": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5412541254125411,
|
||
|
|
"grad_norm": 0.2912314912557071,
|
||
|
|
"learning_rate": 2.260417750890949e-05,
|
||
|
|
"loss": 0.5120857954025269,
|
||
|
|
"step": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5445544554455446,
|
||
|
|
"grad_norm": 0.3024618438133355,
|
||
|
|
"learning_rate": 2.2528000862608845e-05,
|
||
|
|
"loss": 0.5727359056472778,
|
||
|
|
"step": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5478547854785478,
|
||
|
|
"grad_norm": 0.30379584493476613,
|
||
|
|
"learning_rate": 2.2451786928245344e-05,
|
||
|
|
"loss": 0.584964394569397,
|
||
|
|
"step": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.551155115511551,
|
||
|
|
"grad_norm": 0.2782374360382863,
|
||
|
|
"learning_rate": 2.237553682997603e-05,
|
||
|
|
"loss": 0.5507112741470337,
|
||
|
|
"step": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5544554455445545,
|
||
|
|
"grad_norm": 0.26333814455393634,
|
||
|
|
"learning_rate": 2.2299251692491364e-05,
|
||
|
|
"loss": 0.49136701226234436,
|
||
|
|
"step": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5577557755775577,
|
||
|
|
"grad_norm": 0.31673569076077385,
|
||
|
|
"learning_rate": 2.2222932640998635e-05,
|
||
|
|
"loss": 0.5374805927276611,
|
||
|
|
"step": 472
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.561056105610561,
|
||
|
|
"grad_norm": 0.29370656251116817,
|
||
|
|
"learning_rate": 2.2146580801205362e-05,
|
||
|
|
"loss": 0.523996114730835,
|
||
|
|
"step": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5643564356435644,
|
||
|
|
"grad_norm": 0.27277397989040114,
|
||
|
|
"learning_rate": 2.207019729930271e-05,
|
||
|
|
"loss": 0.48198428750038147,
|
||
|
|
"step": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5676567656765676,
|
||
|
|
"grad_norm": 0.2861287068823064,
|
||
|
|
"learning_rate": 2.199378326194883e-05,
|
||
|
|
"loss": 0.5148699879646301,
|
||
|
|
"step": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5709570957095709,
|
||
|
|
"grad_norm": 0.2981231032466442,
|
||
|
|
"learning_rate": 2.1917339816252303e-05,
|
||
|
|
"loss": 0.5297671556472778,
|
||
|
|
"step": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5742574257425743,
|
||
|
|
"grad_norm": 0.2775943923870632,
|
||
|
|
"learning_rate": 2.1840868089755465e-05,
|
||
|
|
"loss": 0.5082278847694397,
|
||
|
|
"step": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5775577557755776,
|
||
|
|
"grad_norm": 0.2988631140370514,
|
||
|
|
"learning_rate": 2.176436921041779e-05,
|
||
|
|
"loss": 0.4755392372608185,
|
||
|
|
"step": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5808580858085808,
|
||
|
|
"grad_norm": 0.28707182004966697,
|
||
|
|
"learning_rate": 2.1687844306599275e-05,
|
||
|
|
"loss": 0.5249454975128174,
|
||
|
|
"step": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5841584158415842,
|
||
|
|
"grad_norm": 0.3023499942723386,
|
||
|
|
"learning_rate": 2.161129450704376e-05,
|
||
|
|
"loss": 0.5626166462898254,
|
||
|
|
"step": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5874587458745875,
|
||
|
|
"grad_norm": 0.28182475866947054,
|
||
|
|
"learning_rate": 2.1534720940862318e-05,
|
||
|
|
"loss": 0.5590533018112183,
|
||
|
|
"step": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5907590759075907,
|
||
|
|
"grad_norm": 0.2724331542693392,
|
||
|
|
"learning_rate": 2.1458124737516557e-05,
|
||
|
|
"loss": 0.5146170854568481,
|
||
|
|
"step": 482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5940594059405941,
|
||
|
|
"grad_norm": 0.28834268248771533,
|
||
|
|
"learning_rate": 2.1381507026802007e-05,
|
||
|
|
"loss": 0.5633066296577454,
|
||
|
|
"step": 483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5973597359735974,
|
||
|
|
"grad_norm": 0.29376551657635425,
|
||
|
|
"learning_rate": 2.130486893883141e-05,
|
||
|
|
"loss": 0.5273865461349487,
|
||
|
|
"step": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6006600660066006,
|
||
|
|
"grad_norm": 0.277893471974935,
|
||
|
|
"learning_rate": 2.1228211604018088e-05,
|
||
|
|
"loss": 0.5040723085403442,
|
||
|
|
"step": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.603960396039604,
|
||
|
|
"grad_norm": 0.2901419412347278,
|
||
|
|
"learning_rate": 2.1151536153059254e-05,
|
||
|
|
"loss": 0.5254411697387695,
|
||
|
|
"step": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6072607260726073,
|
||
|
|
"grad_norm": 0.29340041503520936,
|
||
|
|
"learning_rate": 2.1074843716919323e-05,
|
||
|
|
"loss": 0.5789728760719299,
|
||
|
|
"step": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6105610561056105,
|
||
|
|
"grad_norm": 0.2858502686555999,
|
||
|
|
"learning_rate": 2.0998135426813245e-05,
|
||
|
|
"loss": 0.5521235466003418,
|
||
|
|
"step": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.613861386138614,
|
||
|
|
"grad_norm": 0.2770947277408911,
|
||
|
|
"learning_rate": 2.092141241418984e-05,
|
||
|
|
"loss": 0.4702959954738617,
|
||
|
|
"step": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6171617161716172,
|
||
|
|
"grad_norm": 0.29713285242144816,
|
||
|
|
"learning_rate": 2.0844675810715046e-05,
|
||
|
|
"loss": 0.4960707128047943,
|
||
|
|
"step": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6204620462046204,
|
||
|
|
"grad_norm": 0.2800759957297699,
|
||
|
|
"learning_rate": 2.076792674825529e-05,
|
||
|
|
"loss": 0.5334826111793518,
|
||
|
|
"step": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6237623762376239,
|
||
|
|
"grad_norm": 0.4465546145157964,
|
||
|
|
"learning_rate": 2.0691166358860775e-05,
|
||
|
|
"loss": 0.5604894161224365,
|
||
|
|
"step": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.627062706270627,
|
||
|
|
"grad_norm": 0.2895889767199155,
|
||
|
|
"learning_rate": 2.061439577474875e-05,
|
||
|
|
"loss": 0.5565654635429382,
|
||
|
|
"step": 493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6303630363036303,
|
||
|
|
"grad_norm": 0.2663082120203026,
|
||
|
|
"learning_rate": 2.0537616128286875e-05,
|
||
|
|
"loss": 0.541640043258667,
|
||
|
|
"step": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6336633663366338,
|
||
|
|
"grad_norm": 0.27975047407467746,
|
||
|
|
"learning_rate": 2.0460828551976436e-05,
|
||
|
|
"loss": 0.5247132182121277,
|
||
|
|
"step": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.636963696369637,
|
||
|
|
"grad_norm": 0.30554958978585,
|
||
|
|
"learning_rate": 2.0384034178435727e-05,
|
||
|
|
"loss": 0.533937394618988,
|
||
|
|
"step": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6402640264026402,
|
||
|
|
"grad_norm": 0.29094539458240765,
|
||
|
|
"learning_rate": 2.0307234140383264e-05,
|
||
|
|
"loss": 0.5857927799224854,
|
||
|
|
"step": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6435643564356437,
|
||
|
|
"grad_norm": 0.2718482098386275,
|
||
|
|
"learning_rate": 2.0230429570621134e-05,
|
||
|
|
"loss": 0.5191807746887207,
|
||
|
|
"step": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6468646864686467,
|
||
|
|
"grad_norm": 0.28523897670587156,
|
||
|
|
"learning_rate": 2.0153621602018276e-05,
|
||
|
|
"loss": 0.5255881547927856,
|
||
|
|
"step": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6501650165016502,
|
||
|
|
"grad_norm": 0.27057309315143646,
|
||
|
|
"learning_rate": 2.0076811367493736e-05,
|
||
|
|
"loss": 0.5134017467498779,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6534653465346536,
|
||
|
|
"grad_norm": 0.2603322919481828,
|
||
|
|
"learning_rate": 2e-05,
|
||
|
|
"loss": 0.4548872113227844,
|
||
|
|
"step": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6567656765676566,
|
||
|
|
"grad_norm": 0.2841830282558966,
|
||
|
|
"learning_rate": 1.9923188632506268e-05,
|
||
|
|
"loss": 0.4879235625267029,
|
||
|
|
"step": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.66006600660066,
|
||
|
|
"grad_norm": 0.2718072353452213,
|
||
|
|
"learning_rate": 1.9846378397981737e-05,
|
||
|
|
"loss": 0.5488070249557495,
|
||
|
|
"step": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6633663366336635,
|
||
|
|
"grad_norm": 0.26980717544426264,
|
||
|
|
"learning_rate": 1.976957042937887e-05,
|
||
|
|
"loss": 0.474858820438385,
|
||
|
|
"step": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6666666666666665,
|
||
|
|
"grad_norm": 0.2729038695715346,
|
||
|
|
"learning_rate": 1.969276585961674e-05,
|
||
|
|
"loss": 0.573983907699585,
|
||
|
|
"step": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.66996699669967,
|
||
|
|
"grad_norm": 0.2754435399081945,
|
||
|
|
"learning_rate": 1.9615965821564284e-05,
|
||
|
|
"loss": 0.5299487709999084,
|
||
|
|
"step": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6732673267326734,
|
||
|
|
"grad_norm": 0.28078214205826996,
|
||
|
|
"learning_rate": 1.9539171448023568e-05,
|
||
|
|
"loss": 0.580963134765625,
|
||
|
|
"step": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6765676567656764,
|
||
|
|
"grad_norm": 0.28056872169008745,
|
||
|
|
"learning_rate": 1.946238387171313e-05,
|
||
|
|
"loss": 0.5240850448608398,
|
||
|
|
"step": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6798679867986799,
|
||
|
|
"grad_norm": 0.27579932032687055,
|
||
|
|
"learning_rate": 1.9385604225251245e-05,
|
||
|
|
"loss": 0.5397930145263672,
|
||
|
|
"step": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6831683168316833,
|
||
|
|
"grad_norm": 0.2649239844230271,
|
||
|
|
"learning_rate": 1.9308833641139235e-05,
|
||
|
|
"loss": 0.4949077367782593,
|
||
|
|
"step": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6864686468646863,
|
||
|
|
"grad_norm": 0.26821293718742795,
|
||
|
|
"learning_rate": 1.9232073251744715e-05,
|
||
|
|
"loss": 0.4906027913093567,
|
||
|
|
"step": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6897689768976898,
|
||
|
|
"grad_norm": 0.30180544906142204,
|
||
|
|
"learning_rate": 1.9155324189284957e-05,
|
||
|
|
"loss": 0.562363862991333,
|
||
|
|
"step": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.693069306930693,
|
||
|
|
"grad_norm": 0.26560887539548794,
|
||
|
|
"learning_rate": 1.9078587585810167e-05,
|
||
|
|
"loss": 0.5347090363502502,
|
||
|
|
"step": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6963696369636962,
|
||
|
|
"grad_norm": 0.28206984650870465,
|
||
|
|
"learning_rate": 1.900186457318676e-05,
|
||
|
|
"loss": 0.5554836988449097,
|
||
|
|
"step": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6996699669966997,
|
||
|
|
"grad_norm": 0.2667791650009087,
|
||
|
|
"learning_rate": 1.8925156283080684e-05,
|
||
|
|
"loss": 0.5179104208946228,
|
||
|
|
"step": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.702970297029703,
|
||
|
|
"grad_norm": 0.2759730227945326,
|
||
|
|
"learning_rate": 1.8848463846940756e-05,
|
||
|
|
"loss": 0.552240252494812,
|
||
|
|
"step": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7062706270627062,
|
||
|
|
"grad_norm": 0.34634391778922186,
|
||
|
|
"learning_rate": 1.8771788395981915e-05,
|
||
|
|
"loss": 0.534430980682373,
|
||
|
|
"step": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7095709570957096,
|
||
|
|
"grad_norm": 0.26711110641337843,
|
||
|
|
"learning_rate": 1.8695131061168598e-05,
|
||
|
|
"loss": 0.5601803064346313,
|
||
|
|
"step": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7128712871287128,
|
||
|
|
"grad_norm": 0.3479876576460715,
|
||
|
|
"learning_rate": 1.8618492973198e-05,
|
||
|
|
"loss": 0.5119711756706238,
|
||
|
|
"step": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.716171617161716,
|
||
|
|
"grad_norm": 0.32608510378908223,
|
||
|
|
"learning_rate": 1.8541875262483446e-05,
|
||
|
|
"loss": 0.5632577538490295,
|
||
|
|
"step": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7194719471947195,
|
||
|
|
"grad_norm": 0.2744236737297373,
|
||
|
|
"learning_rate": 1.8465279059137686e-05,
|
||
|
|
"loss": 0.5499478578567505,
|
||
|
|
"step": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7227722772277227,
|
||
|
|
"grad_norm": 0.2835433030263243,
|
||
|
|
"learning_rate": 1.8388705492956244e-05,
|
||
|
|
"loss": 0.5176683664321899,
|
||
|
|
"step": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.726072607260726,
|
||
|
|
"grad_norm": 0.30494439216544983,
|
||
|
|
"learning_rate": 1.8312155693400735e-05,
|
||
|
|
"loss": 0.49528205394744873,
|
||
|
|
"step": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7293729372937294,
|
||
|
|
"grad_norm": 0.26710805184601655,
|
||
|
|
"learning_rate": 1.8235630789582213e-05,
|
||
|
|
"loss": 0.5684216022491455,
|
||
|
|
"step": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7326732673267327,
|
||
|
|
"grad_norm": 0.3852411183060649,
|
||
|
|
"learning_rate": 1.815913191024454e-05,
|
||
|
|
"loss": 0.5375942587852478,
|
||
|
|
"step": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7359735973597359,
|
||
|
|
"grad_norm": 0.33262500157086355,
|
||
|
|
"learning_rate": 1.8082660183747704e-05,
|
||
|
|
"loss": 0.5541956424713135,
|
||
|
|
"step": 526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7392739273927393,
|
||
|
|
"grad_norm": 0.28642691265671333,
|
||
|
|
"learning_rate": 1.8006216738051175e-05,
|
||
|
|
"loss": 0.5304872393608093,
|
||
|
|
"step": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7425742574257426,
|
||
|
|
"grad_norm": 0.2734388390360432,
|
||
|
|
"learning_rate": 1.7929802700697297e-05,
|
||
|
|
"loss": 0.48648735880851746,
|
||
|
|
"step": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7458745874587458,
|
||
|
|
"grad_norm": 0.28617564742207474,
|
||
|
|
"learning_rate": 1.7853419198794638e-05,
|
||
|
|
"loss": 0.49221059679985046,
|
||
|
|
"step": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7491749174917492,
|
||
|
|
"grad_norm": 0.2790947673251484,
|
||
|
|
"learning_rate": 1.7777067359001375e-05,
|
||
|
|
"loss": 0.5652948021888733,
|
||
|
|
"step": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7524752475247525,
|
||
|
|
"grad_norm": 0.2853703561489374,
|
||
|
|
"learning_rate": 1.7700748307508643e-05,
|
||
|
|
"loss": 0.5187686681747437,
|
||
|
|
"step": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7557755775577557,
|
||
|
|
"grad_norm": 0.2853976224574607,
|
||
|
|
"learning_rate": 1.7624463170023974e-05,
|
||
|
|
"loss": 0.5013114809989929,
|
||
|
|
"step": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7590759075907592,
|
||
|
|
"grad_norm": 0.2619757068753479,
|
||
|
|
"learning_rate": 1.7548213071754663e-05,
|
||
|
|
"loss": 0.47477245330810547,
|
||
|
|
"step": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7623762376237624,
|
||
|
|
"grad_norm": 0.29220608585061886,
|
||
|
|
"learning_rate": 1.7471999137391162e-05,
|
||
|
|
"loss": 0.5600515007972717,
|
||
|
|
"step": 534
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7656765676567656,
|
||
|
|
"grad_norm": 0.2680464906367101,
|
||
|
|
"learning_rate": 1.7395822491090513e-05,
|
||
|
|
"loss": 0.5017521381378174,
|
||
|
|
"step": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.768976897689769,
|
||
|
|
"grad_norm": 0.3164936697237469,
|
||
|
|
"learning_rate": 1.7319684256459773e-05,
|
||
|
|
"loss": 0.48718830943107605,
|
||
|
|
"step": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7722772277227723,
|
||
|
|
"grad_norm": 0.26576630911317906,
|
||
|
|
"learning_rate": 1.72435855565394e-05,
|
||
|
|
"loss": 0.5348131060600281,
|
||
|
|
"step": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7755775577557755,
|
||
|
|
"grad_norm": 0.3785718389935733,
|
||
|
|
"learning_rate": 1.716752751378673e-05,
|
||
|
|
"loss": 0.5132070183753967,
|
||
|
|
"step": 538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.778877887788779,
|
||
|
|
"grad_norm": 0.2912227396538846,
|
||
|
|
"learning_rate": 1.7091511250059407e-05,
|
||
|
|
"loss": 0.5194598436355591,
|
||
|
|
"step": 539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7821782178217822,
|
||
|
|
"grad_norm": 0.25340183641995817,
|
||
|
|
"learning_rate": 1.701553788659883e-05,
|
||
|
|
"loss": 0.4950656294822693,
|
||
|
|
"step": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7854785478547854,
|
||
|
|
"grad_norm": 0.32993048381725726,
|
||
|
|
"learning_rate": 1.6939608544013603e-05,
|
||
|
|
"loss": 0.5465744137763977,
|
||
|
|
"step": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7887788778877889,
|
||
|
|
"grad_norm": 0.33326548174687204,
|
||
|
|
"learning_rate": 1.6863724342263047e-05,
|
||
|
|
"loss": 0.5328625440597534,
|
||
|
|
"step": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7920792079207921,
|
||
|
|
"grad_norm": 0.2747817812302539,
|
||
|
|
"learning_rate": 1.6787886400640645e-05,
|
||
|
|
"loss": 0.483689546585083,
|
||
|
|
"step": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7953795379537953,
|
||
|
|
"grad_norm": 0.2619017709081145,
|
||
|
|
"learning_rate": 1.6712095837757525e-05,
|
||
|
|
"loss": 0.5225390195846558,
|
||
|
|
"step": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7986798679867988,
|
||
|
|
"grad_norm": 0.2718453161830156,
|
||
|
|
"learning_rate": 1.6636353771526005e-05,
|
||
|
|
"loss": 0.5168595314025879,
|
||
|
|
"step": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.801980198019802,
|
||
|
|
"grad_norm": 0.2915579523683445,
|
||
|
|
"learning_rate": 1.6560661319143064e-05,
|
||
|
|
"loss": 0.5257725119590759,
|
||
|
|
"step": 546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8052805280528053,
|
||
|
|
"grad_norm": 0.2767711815305055,
|
||
|
|
"learning_rate": 1.648501959707387e-05,
|
||
|
|
"loss": 0.5023485422134399,
|
||
|
|
"step": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8085808580858087,
|
||
|
|
"grad_norm": 0.267570701584644,
|
||
|
|
"learning_rate": 1.6409429721035324e-05,
|
||
|
|
"loss": 0.48897239565849304,
|
||
|
|
"step": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.811881188118812,
|
||
|
|
"grad_norm": 0.28714006005114934,
|
||
|
|
"learning_rate": 1.63338928059796e-05,
|
||
|
|
"loss": 0.5318676829338074,
|
||
|
|
"step": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8151815181518152,
|
||
|
|
"grad_norm": 0.2802563301473015,
|
||
|
|
"learning_rate": 1.6258409966077693e-05,
|
||
|
|
"loss": 0.4996787905693054,
|
||
|
|
"step": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8184818481848186,
|
||
|
|
"grad_norm": 0.28354713397276166,
|
||
|
|
"learning_rate": 1.6182982314702987e-05,
|
||
|
|
"loss": 0.4833434820175171,
|
||
|
|
"step": 551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8217821782178216,
|
||
|
|
"grad_norm": 0.2904168234412241,
|
||
|
|
"learning_rate": 1.6107610964414836e-05,
|
||
|
|
"loss": 0.5050291419029236,
|
||
|
|
"step": 552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.825082508250825,
|
||
|
|
"grad_norm": 0.2859100119195952,
|
||
|
|
"learning_rate": 1.6032297026942154e-05,
|
||
|
|
"loss": 0.5423529148101807,
|
||
|
|
"step": 553
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8283828382838285,
|
||
|
|
"grad_norm": 0.2700093369793658,
|
||
|
|
"learning_rate": 1.5957041613167007e-05,
|
||
|
|
"loss": 0.5670536756515503,
|
||
|
|
"step": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8316831683168315,
|
||
|
|
"grad_norm": 0.2784484594925466,
|
||
|
|
"learning_rate": 1.5881845833108245e-05,
|
||
|
|
"loss": 0.5148528814315796,
|
||
|
|
"step": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.834983498349835,
|
||
|
|
"grad_norm": 0.2795083034807244,
|
||
|
|
"learning_rate": 1.5806710795905113e-05,
|
||
|
|
"loss": 0.5441350340843201,
|
||
|
|
"step": 556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8382838283828384,
|
||
|
|
"grad_norm": 0.27706485047893287,
|
||
|
|
"learning_rate": 1.5731637609800897e-05,
|
||
|
|
"loss": 0.5338016748428345,
|
||
|
|
"step": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8415841584158414,
|
||
|
|
"grad_norm": 0.281671337152691,
|
||
|
|
"learning_rate": 1.5656627382126587e-05,
|
||
|
|
"loss": 0.522803783416748,
|
||
|
|
"step": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.844884488448845,
|
||
|
|
"grad_norm": 0.2867314215651197,
|
||
|
|
"learning_rate": 1.5581681219284523e-05,
|
||
|
|
"loss": 0.5079183578491211,
|
||
|
|
"step": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8481848184818483,
|
||
|
|
"grad_norm": 0.2880604655799914,
|
||
|
|
"learning_rate": 1.5506800226732104e-05,
|
||
|
|
"loss": 0.5360547304153442,
|
||
|
|
"step": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8514851485148514,
|
||
|
|
"grad_norm": 0.276328956502413,
|
||
|
|
"learning_rate": 1.5431985508965438e-05,
|
||
|
|
"loss": 0.5137909650802612,
|
||
|
|
"step": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8547854785478548,
|
||
|
|
"grad_norm": 0.26198432963654783,
|
||
|
|
"learning_rate": 1.5357238169503107e-05,
|
||
|
|
"loss": 0.513020396232605,
|
||
|
|
"step": 562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.858085808580858,
|
||
|
|
"grad_norm": 0.3155751914603546,
|
||
|
|
"learning_rate": 1.5282559310869856e-05,
|
||
|
|
"loss": 0.5015939474105835,
|
||
|
|
"step": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8613861386138613,
|
||
|
|
"grad_norm": 0.2654102353913447,
|
||
|
|
"learning_rate": 1.5207950034580317e-05,
|
||
|
|
"loss": 0.5012743473052979,
|
||
|
|
"step": 564
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8646864686468647,
|
||
|
|
"grad_norm": 0.27309132142690246,
|
||
|
|
"learning_rate": 1.5133411441122799e-05,
|
||
|
|
"loss": 0.48864254355430603,
|
||
|
|
"step": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.867986798679868,
|
||
|
|
"grad_norm": 0.6058665885379618,
|
||
|
|
"learning_rate": 1.5058944629943044e-05,
|
||
|
|
"loss": 0.437102347612381,
|
||
|
|
"step": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8712871287128712,
|
||
|
|
"grad_norm": 0.2718164602566872,
|
||
|
|
"learning_rate": 1.4984550699427978e-05,
|
||
|
|
"loss": 0.5518525838851929,
|
||
|
|
"step": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8745874587458746,
|
||
|
|
"grad_norm": 0.2832474093938169,
|
||
|
|
"learning_rate": 1.4910230746889559e-05,
|
||
|
|
"loss": 0.5618141889572144,
|
||
|
|
"step": 568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8778877887788779,
|
||
|
|
"grad_norm": 0.2790138686096534,
|
||
|
|
"learning_rate": 1.4835985868548557e-05,
|
||
|
|
"loss": 0.4990406632423401,
|
||
|
|
"step": 569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.881188118811881,
|
||
|
|
"grad_norm": 0.26198363334655667,
|
||
|
|
"learning_rate": 1.4761817159518415e-05,
|
||
|
|
"loss": 0.5004926919937134,
|
||
|
|
"step": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8844884488448845,
|
||
|
|
"grad_norm": 0.28233065536105734,
|
||
|
|
"learning_rate": 1.4687725713789042e-05,
|
||
|
|
"loss": 0.5166051983833313,
|
||
|
|
"step": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8877887788778878,
|
||
|
|
"grad_norm": 0.2654807250852616,
|
||
|
|
"learning_rate": 1.461371262421074e-05,
|
||
|
|
"loss": 0.5510391592979431,
|
||
|
|
"step": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.891089108910891,
|
||
|
|
"grad_norm": 0.2766439695892797,
|
||
|
|
"learning_rate": 1.4539778982478061e-05,
|
||
|
|
"loss": 0.5305938720703125,
|
||
|
|
"step": 573
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8943894389438944,
|
||
|
|
"grad_norm": 0.35617765802983586,
|
||
|
|
"learning_rate": 1.4465925879113663e-05,
|
||
|
|
"loss": 0.562718391418457,
|
||
|
|
"step": 574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8976897689768977,
|
||
|
|
"grad_norm": 0.26373250902859363,
|
||
|
|
"learning_rate": 1.4392154403452294e-05,
|
||
|
|
"loss": 0.541257381439209,
|
||
|
|
"step": 575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.900990099009901,
|
||
|
|
"grad_norm": 0.2584596806712207,
|
||
|
|
"learning_rate": 1.4318465643624696e-05,
|
||
|
|
"loss": 0.556663990020752,
|
||
|
|
"step": 576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9042904290429044,
|
||
|
|
"grad_norm": 0.2655751613308258,
|
||
|
|
"learning_rate": 1.4244860686541522e-05,
|
||
|
|
"loss": 0.5691581964492798,
|
||
|
|
"step": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9075907590759076,
|
||
|
|
"grad_norm": 0.3146864569567829,
|
||
|
|
"learning_rate": 1.4171340617877349e-05,
|
||
|
|
"loss": 0.513170063495636,
|
||
|
|
"step": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9108910891089108,
|
||
|
|
"grad_norm": 0.288458498752148,
|
||
|
|
"learning_rate": 1.4097906522054656e-05,
|
||
|
|
"loss": 0.5679588317871094,
|
||
|
|
"step": 579
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9141914191419143,
|
||
|
|
"grad_norm": 0.2858005511149637,
|
||
|
|
"learning_rate": 1.4024559482227818e-05,
|
||
|
|
"loss": 0.513796329498291,
|
||
|
|
"step": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9174917491749175,
|
||
|
|
"grad_norm": 0.25543101337641916,
|
||
|
|
"learning_rate": 1.3951300580267108e-05,
|
||
|
|
"loss": 0.4618416428565979,
|
||
|
|
"step": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9207920792079207,
|
||
|
|
"grad_norm": 0.2670194314216259,
|
||
|
|
"learning_rate": 1.3878130896742796e-05,
|
||
|
|
"loss": 0.5491312742233276,
|
||
|
|
"step": 582
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9240924092409242,
|
||
|
|
"grad_norm": 0.24204031552297342,
|
||
|
|
"learning_rate": 1.3805051510909164e-05,
|
||
|
|
"loss": 0.5524745583534241,
|
||
|
|
"step": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9273927392739274,
|
||
|
|
"grad_norm": 0.25091865473771396,
|
||
|
|
"learning_rate": 1.3732063500688604e-05,
|
||
|
|
"loss": 0.5232075452804565,
|
||
|
|
"step": 584
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9306930693069306,
|
||
|
|
"grad_norm": 0.26059464209400784,
|
||
|
|
"learning_rate": 1.3659167942655702e-05,
|
||
|
|
"loss": 0.5257346034049988,
|
||
|
|
"step": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.933993399339934,
|
||
|
|
"grad_norm": 0.2814401591736557,
|
||
|
|
"learning_rate": 1.35863659120214e-05,
|
||
|
|
"loss": 0.5196455717086792,
|
||
|
|
"step": 586
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9372937293729373,
|
||
|
|
"grad_norm": 0.2624714306516865,
|
||
|
|
"learning_rate": 1.3513658482617085e-05,
|
||
|
|
"loss": 0.5122568011283875,
|
||
|
|
"step": 587
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9405940594059405,
|
||
|
|
"grad_norm": 0.2644911414307543,
|
||
|
|
"learning_rate": 1.3441046726878786e-05,
|
||
|
|
"loss": 0.5236790180206299,
|
||
|
|
"step": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.943894389438944,
|
||
|
|
"grad_norm": 0.2699458396883844,
|
||
|
|
"learning_rate": 1.3368531715831337e-05,
|
||
|
|
"loss": 0.5508555173873901,
|
||
|
|
"step": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9471947194719472,
|
||
|
|
"grad_norm": 0.26005129022694123,
|
||
|
|
"learning_rate": 1.3296114519072594e-05,
|
||
|
|
"loss": 0.4742932617664337,
|
||
|
|
"step": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9504950495049505,
|
||
|
|
"grad_norm": 0.2530711129220065,
|
||
|
|
"learning_rate": 1.3223796204757638e-05,
|
||
|
|
"loss": 0.5406354665756226,
|
||
|
|
"step": 591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.953795379537954,
|
||
|
|
"grad_norm": 0.26847075280504556,
|
||
|
|
"learning_rate": 1.3151577839583043e-05,
|
||
|
|
"loss": 0.508262038230896,
|
||
|
|
"step": 592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9570957095709571,
|
||
|
|
"grad_norm": 0.2601716190776577,
|
||
|
|
"learning_rate": 1.3079460488771136e-05,
|
||
|
|
"loss": 0.5260204672813416,
|
||
|
|
"step": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9603960396039604,
|
||
|
|
"grad_norm": 0.2597900374740898,
|
||
|
|
"learning_rate": 1.3007445216054257e-05,
|
||
|
|
"loss": 0.522408127784729,
|
||
|
|
"step": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9636963696369638,
|
||
|
|
"grad_norm": 0.23858694591096777,
|
||
|
|
"learning_rate": 1.2935533083659114e-05,
|
||
|
|
"loss": 0.4849371910095215,
|
||
|
|
"step": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.966996699669967,
|
||
|
|
"grad_norm": 0.26399518807159883,
|
||
|
|
"learning_rate": 1.2863725152291091e-05,
|
||
|
|
"loss": 0.5319019556045532,
|
||
|
|
"step": 596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9702970297029703,
|
||
|
|
"grad_norm": 0.2797422170192374,
|
||
|
|
"learning_rate": 1.2792022481118587e-05,
|
||
|
|
"loss": 0.5562412738800049,
|
||
|
|
"step": 597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9735973597359737,
|
||
|
|
"grad_norm": 0.2537907416959109,
|
||
|
|
"learning_rate": 1.2720426127757431e-05,
|
||
|
|
"loss": 0.49608999490737915,
|
||
|
|
"step": 598
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.976897689768977,
|
||
|
|
"grad_norm": 0.2521690484869479,
|
||
|
|
"learning_rate": 1.2648937148255253e-05,
|
||
|
|
"loss": 0.5082768201828003,
|
||
|
|
"step": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9801980198019802,
|
||
|
|
"grad_norm": 0.2572245668654862,
|
||
|
|
"learning_rate": 1.2577556597075933e-05,
|
||
|
|
"loss": 0.5706614255905151,
|
||
|
|
"step": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9834983498349836,
|
||
|
|
"grad_norm": 0.2697883750179181,
|
||
|
|
"learning_rate": 1.2506285527083991e-05,
|
||
|
|
"loss": 0.5366507768630981,
|
||
|
|
"step": 601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9867986798679866,
|
||
|
|
"grad_norm": 0.26402819852563175,
|
||
|
|
"learning_rate": 1.2435124989529139e-05,
|
||
|
|
"loss": 0.5462816953659058,
|
||
|
|
"step": 602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.99009900990099,
|
||
|
|
"grad_norm": 0.246894878071046,
|
||
|
|
"learning_rate": 1.236407603403072e-05,
|
||
|
|
"loss": 0.5050650238990784,
|
||
|
|
"step": 603
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9933993399339935,
|
||
|
|
"grad_norm": 0.477370357077484,
|
||
|
|
"learning_rate": 1.2293139708562221e-05,
|
||
|
|
"loss": 0.4915675222873688,
|
||
|
|
"step": 604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9966996699669965,
|
||
|
|
"grad_norm": 0.2657795870076786,
|
||
|
|
"learning_rate": 1.2222317059435863e-05,
|
||
|
|
"loss": 0.5807889103889465,
|
||
|
|
"step": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.2770967943671612,
|
||
|
|
"learning_rate": 1.2151609131287124e-05,
|
||
|
|
"loss": 0.49173152446746826,
|
||
|
|
"step": 606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0033003300330035,
|
||
|
|
"grad_norm": 0.7014931959992592,
|
||
|
|
"learning_rate": 1.2081016967059336e-05,
|
||
|
|
"loss": 0.4426806569099426,
|
||
|
|
"step": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0066006600660065,
|
||
|
|
"grad_norm": 0.3040348249510974,
|
||
|
|
"learning_rate": 1.201054160798833e-05,
|
||
|
|
"loss": 0.45669305324554443,
|
||
|
|
"step": 608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.00990099009901,
|
||
|
|
"grad_norm": 0.31030490189011145,
|
||
|
|
"learning_rate": 1.1940184093587047e-05,
|
||
|
|
"loss": 0.4638911783695221,
|
||
|
|
"step": 609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0132013201320134,
|
||
|
|
"grad_norm": 0.36234285165121427,
|
||
|
|
"learning_rate": 1.186994546163023e-05,
|
||
|
|
"loss": 0.4541138708591461,
|
||
|
|
"step": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0165016501650164,
|
||
|
|
"grad_norm": 0.38564024677228226,
|
||
|
|
"learning_rate": 1.1799826748139079e-05,
|
||
|
|
"loss": 0.49081191420555115,
|
||
|
|
"step": 611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.01980198019802,
|
||
|
|
"grad_norm": 0.3266656962672454,
|
||
|
|
"learning_rate": 1.1729828987366009e-05,
|
||
|
|
"loss": 0.4794033169746399,
|
||
|
|
"step": 612
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0231023102310233,
|
||
|
|
"grad_norm": 0.291304204290645,
|
||
|
|
"learning_rate": 1.165995321177939e-05,
|
||
|
|
"loss": 0.4142993688583374,
|
||
|
|
"step": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0264026402640263,
|
||
|
|
"grad_norm": 0.33294658416576944,
|
||
|
|
"learning_rate": 1.159020045204829e-05,
|
||
|
|
"loss": 0.47322210669517517,
|
||
|
|
"step": 614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0297029702970297,
|
||
|
|
"grad_norm": 0.3539618583487969,
|
||
|
|
"learning_rate": 1.15205717370273e-05,
|
||
|
|
"loss": 0.4899124503135681,
|
||
|
|
"step": 615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.033003300330033,
|
||
|
|
"grad_norm": 0.2952110750729378,
|
||
|
|
"learning_rate": 1.1451068093741355e-05,
|
||
|
|
"loss": 0.4857853055000305,
|
||
|
|
"step": 616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.036303630363036,
|
||
|
|
"grad_norm": 0.28290377247578213,
|
||
|
|
"learning_rate": 1.1381690547370559e-05,
|
||
|
|
"loss": 0.4790021479129791,
|
||
|
|
"step": 617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0396039603960396,
|
||
|
|
"grad_norm": 0.2902876717109542,
|
||
|
|
"learning_rate": 1.13124401212351e-05,
|
||
|
|
"loss": 0.4519282281398773,
|
||
|
|
"step": 618
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.042904290429043,
|
||
|
|
"grad_norm": 0.32584221310071065,
|
||
|
|
"learning_rate": 1.1243317836780138e-05,
|
||
|
|
"loss": 0.4738570749759674,
|
||
|
|
"step": 619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.046204620462046,
|
||
|
|
"grad_norm": 0.3093985088780693,
|
||
|
|
"learning_rate": 1.1174324713560751e-05,
|
||
|
|
"loss": 0.5111795663833618,
|
||
|
|
"step": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0495049504950495,
|
||
|
|
"grad_norm": 0.2707360386310654,
|
||
|
|
"learning_rate": 1.1105461769226858e-05,
|
||
|
|
"loss": 0.4750926196575165,
|
||
|
|
"step": 621
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.052805280528053,
|
||
|
|
"grad_norm": 0.3107814822051771,
|
||
|
|
"learning_rate": 1.1036730019508259e-05,
|
||
|
|
"loss": 0.4580341577529907,
|
||
|
|
"step": 622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.056105610561056,
|
||
|
|
"grad_norm": 0.28803288143665157,
|
||
|
|
"learning_rate": 1.0968130478199635e-05,
|
||
|
|
"loss": 0.43322116136550903,
|
||
|
|
"step": 623
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0594059405940595,
|
||
|
|
"grad_norm": 0.2810686637672446,
|
||
|
|
"learning_rate": 1.0899664157145562e-05,
|
||
|
|
"loss": 0.5015532374382019,
|
||
|
|
"step": 624
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.062706270627063,
|
||
|
|
"grad_norm": 0.28464578766110366,
|
||
|
|
"learning_rate": 1.0831332066225645e-05,
|
||
|
|
"loss": 0.4508541226387024,
|
||
|
|
"step": 625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.066006600660066,
|
||
|
|
"grad_norm": 0.2904901154874499,
|
||
|
|
"learning_rate": 1.0763135213339589e-05,
|
||
|
|
"loss": 0.49554720520973206,
|
||
|
|
"step": 626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0693069306930694,
|
||
|
|
"grad_norm": 0.27820378239401394,
|
||
|
|
"learning_rate": 1.0695074604392305e-05,
|
||
|
|
"loss": 0.4523652493953705,
|
||
|
|
"step": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.072607260726073,
|
||
|
|
"grad_norm": 0.2794675014886217,
|
||
|
|
"learning_rate": 1.0627151243279136e-05,
|
||
|
|
"loss": 0.44413498044013977,
|
||
|
|
"step": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.075907590759076,
|
||
|
|
"grad_norm": 0.30159300158430347,
|
||
|
|
"learning_rate": 1.055936613187101e-05,
|
||
|
|
"loss": 0.4645534157752991,
|
||
|
|
"step": 629
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0792079207920793,
|
||
|
|
"grad_norm": 0.26698861915138783,
|
||
|
|
"learning_rate": 1.0491720269999663e-05,
|
||
|
|
"loss": 0.44823265075683594,
|
||
|
|
"step": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0825082508250823,
|
||
|
|
"grad_norm": 0.2813791646704669,
|
||
|
|
"learning_rate": 1.0424214655442891e-05,
|
||
|
|
"loss": 0.45181727409362793,
|
||
|
|
"step": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0858085808580857,
|
||
|
|
"grad_norm": 0.28721240697359884,
|
||
|
|
"learning_rate": 1.0356850283909852e-05,
|
||
|
|
"loss": 0.5371145009994507,
|
||
|
|
"step": 632
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.089108910891089,
|
||
|
|
"grad_norm": 0.26030729348418064,
|
||
|
|
"learning_rate": 1.0289628149026369e-05,
|
||
|
|
"loss": 0.4564274847507477,
|
||
|
|
"step": 633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.092409240924092,
|
||
|
|
"grad_norm": 0.3008427259435641,
|
||
|
|
"learning_rate": 1.0222549242320254e-05,
|
||
|
|
"loss": 0.4490276873111725,
|
||
|
|
"step": 634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0957095709570956,
|
||
|
|
"grad_norm": 0.27241405218961473,
|
||
|
|
"learning_rate": 1.0155614553206715e-05,
|
||
|
|
"loss": 0.4663650095462799,
|
||
|
|
"step": 635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.099009900990099,
|
||
|
|
"grad_norm": 0.2814271376941218,
|
||
|
|
"learning_rate": 1.0088825068973746e-05,
|
||
|
|
"loss": 0.46265488862991333,
|
||
|
|
"step": 636
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.102310231023102,
|
||
|
|
"grad_norm": 0.27083223857822414,
|
||
|
|
"learning_rate": 1.002218177476756e-05,
|
||
|
|
"loss": 0.45717963576316833,
|
||
|
|
"step": 637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1056105610561056,
|
||
|
|
"grad_norm": 0.27321625989679976,
|
||
|
|
"learning_rate": 9.955685653578068e-06,
|
||
|
|
"loss": 0.47119495272636414,
|
||
|
|
"step": 638
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.108910891089109,
|
||
|
|
"grad_norm": 0.2756031623165562,
|
||
|
|
"learning_rate": 9.88933768622439e-06,
|
||
|
|
"loss": 0.46565738320350647,
|
||
|
|
"step": 639
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.112211221122112,
|
||
|
|
"grad_norm": 0.26745369116167694,
|
||
|
|
"learning_rate": 9.823138851340337e-06,
|
||
|
|
"loss": 0.45610398054122925,
|
||
|
|
"step": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1155115511551155,
|
||
|
|
"grad_norm": 0.2722722292829376,
|
||
|
|
"learning_rate": 9.75709012536005e-06,
|
||
|
|
"loss": 0.4907280206680298,
|
||
|
|
"step": 641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.118811881188119,
|
||
|
|
"grad_norm": 0.3111977337695957,
|
||
|
|
"learning_rate": 9.691192482503546e-06,
|
||
|
|
"loss": 0.500091552734375,
|
||
|
|
"step": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.122112211221122,
|
||
|
|
"grad_norm": 0.2648612882642695,
|
||
|
|
"learning_rate": 9.625446894762371e-06,
|
||
|
|
"loss": 0.4330231547355652,
|
||
|
|
"step": 643
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1254125412541254,
|
||
|
|
"grad_norm": 0.2809597353379975,
|
||
|
|
"learning_rate": 9.559854331885233e-06,
|
||
|
|
"loss": 0.4750261902809143,
|
||
|
|
"step": 644
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.128712871287129,
|
||
|
|
"grad_norm": 0.28201431758911444,
|
||
|
|
"learning_rate": 9.49441576136374e-06,
|
||
|
|
"loss": 0.4567373991012573,
|
||
|
|
"step": 645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.132013201320132,
|
||
|
|
"grad_norm": 0.2901654659031683,
|
||
|
|
"learning_rate": 9.429132148418116e-06,
|
||
|
|
"loss": 0.4601932168006897,
|
||
|
|
"step": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1353135313531353,
|
||
|
|
"grad_norm": 0.2792782648133288,
|
||
|
|
"learning_rate": 9.364004455982931e-06,
|
||
|
|
"loss": 0.4909035265445709,
|
||
|
|
"step": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1386138613861387,
|
||
|
|
"grad_norm": 0.2531215125004539,
|
||
|
|
"learning_rate": 9.299033644692948e-06,
|
||
|
|
"loss": 0.4443170428276062,
|
||
|
|
"step": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1419141914191417,
|
||
|
|
"grad_norm": 0.2676386529649011,
|
||
|
|
"learning_rate": 9.234220672868928e-06,
|
||
|
|
"loss": 0.46534985303878784,
|
||
|
|
"step": 649
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.145214521452145,
|
||
|
|
"grad_norm": 0.2667778492620529,
|
||
|
|
"learning_rate": 9.169566496503476e-06,
|
||
|
|
"loss": 0.4351472854614258,
|
||
|
|
"step": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1485148514851486,
|
||
|
|
"grad_norm": 0.26819623679400084,
|
||
|
|
"learning_rate": 9.105072069246983e-06,
|
||
|
|
"loss": 0.41445475816726685,
|
||
|
|
"step": 651
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1518151815181517,
|
||
|
|
"grad_norm": 0.2627848025641513,
|
||
|
|
"learning_rate": 9.040738342393532e-06,
|
||
|
|
"loss": 0.475847989320755,
|
||
|
|
"step": 652
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.155115511551155,
|
||
|
|
"grad_norm": 0.26883146792086515,
|
||
|
|
"learning_rate": 8.976566264866876e-06,
|
||
|
|
"loss": 0.48487618565559387,
|
||
|
|
"step": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1584158415841586,
|
||
|
|
"grad_norm": 0.2373773636564882,
|
||
|
|
"learning_rate": 8.912556783206414e-06,
|
||
|
|
"loss": 0.4661785364151001,
|
||
|
|
"step": 654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1617161716171616,
|
||
|
|
"grad_norm": 0.25939800378632233,
|
||
|
|
"learning_rate": 8.84871084155328e-06,
|
||
|
|
"loss": 0.48009538650512695,
|
||
|
|
"step": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.165016501650165,
|
||
|
|
"grad_norm": 0.26858346089342566,
|
||
|
|
"learning_rate": 8.785029381636387e-06,
|
||
|
|
"loss": 0.45644935965538025,
|
||
|
|
"step": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1683168316831685,
|
||
|
|
"grad_norm": 0.25509808532967904,
|
||
|
|
"learning_rate": 8.721513342758516e-06,
|
||
|
|
"loss": 0.4896699786186218,
|
||
|
|
"step": 657
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1716171617161715,
|
||
|
|
"grad_norm": 0.2678040151014407,
|
||
|
|
"learning_rate": 8.658163661782507e-06,
|
||
|
|
"loss": 0.4286258816719055,
|
||
|
|
"step": 658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.174917491749175,
|
||
|
|
"grad_norm": 0.25541690613787077,
|
||
|
|
"learning_rate": 8.59498127311742e-06,
|
||
|
|
"loss": 0.42029869556427,
|
||
|
|
"step": 659
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1782178217821784,
|
||
|
|
"grad_norm": 0.2748486648157056,
|
||
|
|
"learning_rate": 8.531967108704722e-06,
|
||
|
|
"loss": 0.48522356152534485,
|
||
|
|
"step": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1815181518151814,
|
||
|
|
"grad_norm": 0.37918495336042346,
|
||
|
|
"learning_rate": 8.4691220980046e-06,
|
||
|
|
"loss": 0.461814284324646,
|
||
|
|
"step": 661
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.184818481848185,
|
||
|
|
"grad_norm": 0.2581277433441387,
|
||
|
|
"learning_rate": 8.406447167982205e-06,
|
||
|
|
"loss": 0.49913299083709717,
|
||
|
|
"step": 662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1881188118811883,
|
||
|
|
"grad_norm": 0.2804949954645611,
|
||
|
|
"learning_rate": 8.343943243094008e-06,
|
||
|
|
"loss": 0.4936009645462036,
|
||
|
|
"step": 663
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1914191419141913,
|
||
|
|
"grad_norm": 0.2621319196989517,
|
||
|
|
"learning_rate": 8.281611245274123e-06,
|
||
|
|
"loss": 0.44817712903022766,
|
||
|
|
"step": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1947194719471947,
|
||
|
|
"grad_norm": 0.26441078845804705,
|
||
|
|
"learning_rate": 8.219452093920763e-06,
|
||
|
|
"loss": 0.482817143201828,
|
||
|
|
"step": 665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.198019801980198,
|
||
|
|
"grad_norm": 0.25954690482303255,
|
||
|
|
"learning_rate": 8.157466705882645e-06,
|
||
|
|
"loss": 0.4643383026123047,
|
||
|
|
"step": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.201320132013201,
|
||
|
|
"grad_norm": 0.26531559844936237,
|
||
|
|
"learning_rate": 8.095655995445472e-06,
|
||
|
|
"loss": 0.4797602593898773,
|
||
|
|
"step": 667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2046204620462047,
|
||
|
|
"grad_norm": 0.26505896756203806,
|
||
|
|
"learning_rate": 8.03402087431844e-06,
|
||
|
|
"loss": 0.44109994173049927,
|
||
|
|
"step": 668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.207920792079208,
|
||
|
|
"grad_norm": 0.24679836702691405,
|
||
|
|
"learning_rate": 7.972562251620817e-06,
|
||
|
|
"loss": 0.46359869837760925,
|
||
|
|
"step": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.211221122112211,
|
||
|
|
"grad_norm": 0.23925371744802634,
|
||
|
|
"learning_rate": 7.9112810338685e-06,
|
||
|
|
"loss": 0.4576035141944885,
|
||
|
|
"step": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2145214521452146,
|
||
|
|
"grad_norm": 0.2854541383231889,
|
||
|
|
"learning_rate": 7.850178124960678e-06,
|
||
|
|
"loss": 0.40902045369148254,
|
||
|
|
"step": 671
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.217821782178218,
|
||
|
|
"grad_norm": 0.2726752140080075,
|
||
|
|
"learning_rate": 7.789254426166454e-06,
|
||
|
|
"loss": 0.45797932147979736,
|
||
|
|
"step": 672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.221122112211221,
|
||
|
|
"grad_norm": 0.2463208855251595,
|
||
|
|
"learning_rate": 7.728510836111602e-06,
|
||
|
|
"loss": 0.43204474449157715,
|
||
|
|
"step": 673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2244224422442245,
|
||
|
|
"grad_norm": 0.2632084235311744,
|
||
|
|
"learning_rate": 7.667948250765278e-06,
|
||
|
|
"loss": 0.46007901430130005,
|
||
|
|
"step": 674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.227722772277228,
|
||
|
|
"grad_norm": 0.2508043419515415,
|
||
|
|
"learning_rate": 7.607567563426823e-06,
|
||
|
|
"loss": 0.46342402696609497,
|
||
|
|
"step": 675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.231023102310231,
|
||
|
|
"grad_norm": 0.25728063807342477,
|
||
|
|
"learning_rate": 7.5473696647125605e-06,
|
||
|
|
"loss": 0.48953354358673096,
|
||
|
|
"step": 676
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2343234323432344,
|
||
|
|
"grad_norm": 0.2667124077929822,
|
||
|
|
"learning_rate": 7.487355442542696e-06,
|
||
|
|
"loss": 0.5022163391113281,
|
||
|
|
"step": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.237623762376238,
|
||
|
|
"grad_norm": 0.2666199657154719,
|
||
|
|
"learning_rate": 7.4275257821281995e-06,
|
||
|
|
"loss": 0.5144001245498657,
|
||
|
|
"step": 678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.240924092409241,
|
||
|
|
"grad_norm": 0.2598091753134079,
|
||
|
|
"learning_rate": 7.3678815659577505e-06,
|
||
|
|
"loss": 0.489937961101532,
|
||
|
|
"step": 679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2442244224422443,
|
||
|
|
"grad_norm": 0.25000738365352393,
|
||
|
|
"learning_rate": 7.3084236737847125e-06,
|
||
|
|
"loss": 0.48842746019363403,
|
||
|
|
"step": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2475247524752477,
|
||
|
|
"grad_norm": 0.2672754249714767,
|
||
|
|
"learning_rate": 7.249152982614176e-06,
|
||
|
|
"loss": 0.5024458765983582,
|
||
|
|
"step": 681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2508250825082508,
|
||
|
|
"grad_norm": 0.25558161311007577,
|
||
|
|
"learning_rate": 7.190070366690014e-06,
|
||
|
|
"loss": 0.46162086725234985,
|
||
|
|
"step": 682
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.254125412541254,
|
||
|
|
"grad_norm": 0.24807827286497117,
|
||
|
|
"learning_rate": 7.13117669748199e-06,
|
||
|
|
"loss": 0.44991785287857056,
|
||
|
|
"step": 683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2574257425742577,
|
||
|
|
"grad_norm": 0.24635539567650763,
|
||
|
|
"learning_rate": 7.072472843672877e-06,
|
||
|
|
"loss": 0.43738633394241333,
|
||
|
|
"step": 684
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2607260726072607,
|
||
|
|
"grad_norm": 0.25605350464823584,
|
||
|
|
"learning_rate": 7.013959671145691e-06,
|
||
|
|
"loss": 0.46122169494628906,
|
||
|
|
"step": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.264026402640264,
|
||
|
|
"grad_norm": 0.24205320356251103,
|
||
|
|
"learning_rate": 6.955638042970896e-06,
|
||
|
|
"loss": 0.4504377841949463,
|
||
|
|
"step": 686
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2673267326732676,
|
||
|
|
"grad_norm": 0.2570116198268661,
|
||
|
|
"learning_rate": 6.897508819393645e-06,
|
||
|
|
"loss": 0.4620972275733948,
|
||
|
|
"step": 687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2706270627062706,
|
||
|
|
"grad_norm": 0.2629731642768507,
|
||
|
|
"learning_rate": 6.8395728578211525e-06,
|
||
|
|
"loss": 0.5271490216255188,
|
||
|
|
"step": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.273927392739274,
|
||
|
|
"grad_norm": 1.9898738742816064,
|
||
|
|
"learning_rate": 6.781831012810001e-06,
|
||
|
|
"loss": 0.4448450803756714,
|
||
|
|
"step": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2772277227722775,
|
||
|
|
"grad_norm": 0.3213733503923664,
|
||
|
|
"learning_rate": 6.72428413605354e-06,
|
||
|
|
"loss": 0.4602925181388855,
|
||
|
|
"step": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2805280528052805,
|
||
|
|
"grad_norm": 0.26788259096559774,
|
||
|
|
"learning_rate": 6.6669330763693485e-06,
|
||
|
|
"loss": 0.4722862243652344,
|
||
|
|
"step": 691
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.283828382838284,
|
||
|
|
"grad_norm": 0.25272077157298134,
|
||
|
|
"learning_rate": 6.609778679686694e-06,
|
||
|
|
"loss": 0.47454553842544556,
|
||
|
|
"step": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.287128712871287,
|
||
|
|
"grad_norm": 0.24015565864939845,
|
||
|
|
"learning_rate": 6.552821789034067e-06,
|
||
|
|
"loss": 0.4750802516937256,
|
||
|
|
"step": 693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2904290429042904,
|
||
|
|
"grad_norm": 0.2559036200154721,
|
||
|
|
"learning_rate": 6.496063244526723e-06,
|
||
|
|
"loss": 0.4640570282936096,
|
||
|
|
"step": 694
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.293729372937294,
|
||
|
|
"grad_norm": 0.25061879602537984,
|
||
|
|
"learning_rate": 6.439503883354323e-06,
|
||
|
|
"loss": 0.47181540727615356,
|
||
|
|
"step": 695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.297029702970297,
|
||
|
|
"grad_norm": 0.24588968301020392,
|
||
|
|
"learning_rate": 6.3831445397685755e-06,
|
||
|
|
"loss": 0.4335097372531891,
|
||
|
|
"step": 696
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3003300330033003,
|
||
|
|
"grad_norm": 0.26057507812572134,
|
||
|
|
"learning_rate": 6.3269860450709016e-06,
|
||
|
|
"loss": 0.5158364772796631,
|
||
|
|
"step": 697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3036303630363038,
|
||
|
|
"grad_norm": 0.24767301357183136,
|
||
|
|
"learning_rate": 6.271029227600216e-06,
|
||
|
|
"loss": 0.497075617313385,
|
||
|
|
"step": 698
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3069306930693068,
|
||
|
|
"grad_norm": 0.2612680212099097,
|
||
|
|
"learning_rate": 6.215274912720697e-06,
|
||
|
|
"loss": 0.4946526288986206,
|
||
|
|
"step": 699
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.31023102310231,
|
||
|
|
"grad_norm": 0.25694731286364175,
|
||
|
|
"learning_rate": 6.159723922809577e-06,
|
||
|
|
"loss": 0.4632418155670166,
|
||
|
|
"step": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3135313531353137,
|
||
|
|
"grad_norm": 0.26826842519558464,
|
||
|
|
"learning_rate": 6.10437707724507e-06,
|
||
|
|
"loss": 0.4936927258968353,
|
||
|
|
"step": 701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3168316831683167,
|
||
|
|
"grad_norm": 0.3039451981089408,
|
||
|
|
"learning_rate": 6.049235192394242e-06,
|
||
|
|
"loss": 0.4373137056827545,
|
||
|
|
"step": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.32013201320132,
|
||
|
|
"grad_norm": 0.2502753739217944,
|
||
|
|
"learning_rate": 5.994299081600996e-06,
|
||
|
|
"loss": 0.49224400520324707,
|
||
|
|
"step": 703
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3234323432343236,
|
||
|
|
"grad_norm": 0.25232784831466315,
|
||
|
|
"learning_rate": 5.939569555174045e-06,
|
||
|
|
"loss": 0.453000545501709,
|
||
|
|
"step": 704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3267326732673266,
|
||
|
|
"grad_norm": 0.2443845287083898,
|
||
|
|
"learning_rate": 5.885047420374992e-06,
|
||
|
|
"loss": 0.4201410114765167,
|
||
|
|
"step": 705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.33003300330033,
|
||
|
|
"grad_norm": 0.2757856931959748,
|
||
|
|
"learning_rate": 5.830733481406415e-06,
|
||
|
|
"loss": 0.4817071557044983,
|
||
|
|
"step": 706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3333333333333335,
|
||
|
|
"grad_norm": 0.23548633980687703,
|
||
|
|
"learning_rate": 5.776628539399975e-06,
|
||
|
|
"loss": 0.42609190940856934,
|
||
|
|
"step": 707
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3366336633663365,
|
||
|
|
"grad_norm": 0.2484780532867763,
|
||
|
|
"learning_rate": 5.722733392404652e-06,
|
||
|
|
"loss": 0.46225881576538086,
|
||
|
|
"step": 708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.33993399339934,
|
||
|
|
"grad_norm": 0.28677279656296756,
|
||
|
|
"learning_rate": 5.669048835374933e-06,
|
||
|
|
"loss": 0.49061962962150574,
|
||
|
|
"step": 709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3432343234323434,
|
||
|
|
"grad_norm": 0.25600200089074804,
|
||
|
|
"learning_rate": 5.615575660159089e-06,
|
||
|
|
"loss": 0.4506024122238159,
|
||
|
|
"step": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3465346534653464,
|
||
|
|
"grad_norm": 0.23921559671813297,
|
||
|
|
"learning_rate": 5.562314655487522e-06,
|
||
|
|
"loss": 0.4433022141456604,
|
||
|
|
"step": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.34983498349835,
|
||
|
|
"grad_norm": 0.26708565402858225,
|
||
|
|
"learning_rate": 5.5092666069611055e-06,
|
||
|
|
"loss": 0.45988917350769043,
|
||
|
|
"step": 712
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3531353135313533,
|
||
|
|
"grad_norm": 0.2294068192725238,
|
||
|
|
"learning_rate": 5.4564322970396154e-06,
|
||
|
|
"loss": 0.44675180315971375,
|
||
|
|
"step": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3564356435643563,
|
||
|
|
"grad_norm": 0.2431380886271115,
|
||
|
|
"learning_rate": 5.403812505030157e-06,
|
||
|
|
"loss": 0.46991807222366333,
|
||
|
|
"step": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3597359735973598,
|
||
|
|
"grad_norm": 0.2412850801003648,
|
||
|
|
"learning_rate": 5.351408007075714e-06,
|
||
|
|
"loss": 0.49208664894104004,
|
||
|
|
"step": 715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.363036303630363,
|
||
|
|
"grad_norm": 2.760535806072788,
|
||
|
|
"learning_rate": 5.299219576143673e-06,
|
||
|
|
"loss": 0.48280128836631775,
|
||
|
|
"step": 716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.366336633663366,
|
||
|
|
"grad_norm": 0.24609236023763137,
|
||
|
|
"learning_rate": 5.247247982014414e-06,
|
||
|
|
"loss": 0.4491961896419525,
|
||
|
|
"step": 717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3696369636963697,
|
||
|
|
"grad_norm": 0.24672380739006747,
|
||
|
|
"learning_rate": 5.195493991269991e-06,
|
||
|
|
"loss": 0.4943190813064575,
|
||
|
|
"step": 718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.372937293729373,
|
||
|
|
"grad_norm": 0.27378763646010795,
|
||
|
|
"learning_rate": 5.143958367282795e-06,
|
||
|
|
"loss": 0.4586840867996216,
|
||
|
|
"step": 719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.376237623762376,
|
||
|
|
"grad_norm": 0.2422334792581867,
|
||
|
|
"learning_rate": 5.0926418702042914e-06,
|
||
|
|
"loss": 0.46227943897247314,
|
||
|
|
"step": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3795379537953796,
|
||
|
|
"grad_norm": 0.23796137337817433,
|
||
|
|
"learning_rate": 5.041545256953839e-06,
|
||
|
|
"loss": 0.45386868715286255,
|
||
|
|
"step": 721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.382838283828383,
|
||
|
|
"grad_norm": 0.24415832537414764,
|
||
|
|
"learning_rate": 4.990669281207492e-06,
|
||
|
|
"loss": 0.5026980042457581,
|
||
|
|
"step": 722
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.386138613861386,
|
||
|
|
"grad_norm": 0.247792875546048,
|
||
|
|
"learning_rate": 4.940014693386909e-06,
|
||
|
|
"loss": 0.4834757447242737,
|
||
|
|
"step": 723
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3894389438943895,
|
||
|
|
"grad_norm": 0.43027345510854853,
|
||
|
|
"learning_rate": 4.889582240648254e-06,
|
||
|
|
"loss": 0.44382545351982117,
|
||
|
|
"step": 724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3927392739273925,
|
||
|
|
"grad_norm": 0.2519737312346543,
|
||
|
|
"learning_rate": 4.839372666871212e-06,
|
||
|
|
"loss": 0.45313894748687744,
|
||
|
|
"step": 725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.396039603960396,
|
||
|
|
"grad_norm": 0.23932824454201898,
|
||
|
|
"learning_rate": 4.789386712647994e-06,
|
||
|
|
"loss": 0.4597586393356323,
|
||
|
|
"step": 726
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3993399339933994,
|
||
|
|
"grad_norm": 0.23075224453442636,
|
||
|
|
"learning_rate": 4.739625115272408e-06,
|
||
|
|
"loss": 0.4427994191646576,
|
||
|
|
"step": 727
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4026402640264024,
|
||
|
|
"grad_norm": 0.24450312969705348,
|
||
|
|
"learning_rate": 4.690088608729007e-06,
|
||
|
|
"loss": 0.4459637403488159,
|
||
|
|
"step": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.405940594059406,
|
||
|
|
"grad_norm": 0.2516039358654293,
|
||
|
|
"learning_rate": 4.640777923682247e-06,
|
||
|
|
"loss": 0.5043150186538696,
|
||
|
|
"step": 729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4092409240924093,
|
||
|
|
"grad_norm": 0.26743057517217783,
|
||
|
|
"learning_rate": 4.5916937874657055e-06,
|
||
|
|
"loss": 0.4942860007286072,
|
||
|
|
"step": 730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4125412541254123,
|
||
|
|
"grad_norm": 0.25489023032736696,
|
||
|
|
"learning_rate": 4.5428369240713655e-06,
|
||
|
|
"loss": 0.4572402834892273,
|
||
|
|
"step": 731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4158415841584158,
|
||
|
|
"grad_norm": 0.24954926782274506,
|
||
|
|
"learning_rate": 4.494208054138934e-06,
|
||
|
|
"loss": 0.44927412271499634,
|
||
|
|
"step": 732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.419141914191419,
|
||
|
|
"grad_norm": 0.24684795220524788,
|
||
|
|
"learning_rate": 4.445807894945211e-06,
|
||
|
|
"loss": 0.461928129196167,
|
||
|
|
"step": 733
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4224422442244222,
|
||
|
|
"grad_norm": 0.2375757440633774,
|
||
|
|
"learning_rate": 4.397637160393493e-06,
|
||
|
|
"loss": 0.46279191970825195,
|
||
|
|
"step": 734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4257425742574257,
|
||
|
|
"grad_norm": 0.24407488686385456,
|
||
|
|
"learning_rate": 4.349696561003076e-06,
|
||
|
|
"loss": 0.48653045296669006,
|
||
|
|
"step": 735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.429042904290429,
|
||
|
|
"grad_norm": 0.2443771510662661,
|
||
|
|
"learning_rate": 4.301986803898752e-06,
|
||
|
|
"loss": 0.4587661027908325,
|
||
|
|
"step": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.432343234323432,
|
||
|
|
"grad_norm": 0.25142970699984885,
|
||
|
|
"learning_rate": 4.2545085928003906e-06,
|
||
|
|
"loss": 0.4946083426475525,
|
||
|
|
"step": 737
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4356435643564356,
|
||
|
|
"grad_norm": 0.2446760243354809,
|
||
|
|
"learning_rate": 4.207262628012534e-06,
|
||
|
|
"loss": 0.4614926278591156,
|
||
|
|
"step": 738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.438943894389439,
|
||
|
|
"grad_norm": 0.24323846273380414,
|
||
|
|
"learning_rate": 4.160249606414109e-06,
|
||
|
|
"loss": 0.46377992630004883,
|
||
|
|
"step": 739
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.442244224422442,
|
||
|
|
"grad_norm": 0.2554844227936452,
|
||
|
|
"learning_rate": 4.1134702214481126e-06,
|
||
|
|
"loss": 0.4217844009399414,
|
||
|
|
"step": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4455445544554455,
|
||
|
|
"grad_norm": 0.40365970056175393,
|
||
|
|
"learning_rate": 4.066925163111406e-06,
|
||
|
|
"loss": 0.4616321325302124,
|
||
|
|
"step": 741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.448844884488449,
|
||
|
|
"grad_norm": 0.23727547629912737,
|
||
|
|
"learning_rate": 4.020615117944515e-06,
|
||
|
|
"loss": 0.48755043745040894,
|
||
|
|
"step": 742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.452145214521452,
|
||
|
|
"grad_norm": 0.2636488971277773,
|
||
|
|
"learning_rate": 3.974540769021529e-06,
|
||
|
|
"loss": 0.47338151931762695,
|
||
|
|
"step": 743
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4554455445544554,
|
||
|
|
"grad_norm": 0.26687939105998304,
|
||
|
|
"learning_rate": 3.928702795940007e-06,
|
||
|
|
"loss": 0.47220849990844727,
|
||
|
|
"step": 744
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.458745874587459,
|
||
|
|
"grad_norm": 0.23440870124340746,
|
||
|
|
"learning_rate": 3.883101874810966e-06,
|
||
|
|
"loss": 0.4117845296859741,
|
||
|
|
"step": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.462046204620462,
|
||
|
|
"grad_norm": 0.2389531188545627,
|
||
|
|
"learning_rate": 3.8377386782488875e-06,
|
||
|
|
"loss": 0.44338276982307434,
|
||
|
|
"step": 746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4653465346534653,
|
||
|
|
"grad_norm": 0.28253943840492757,
|
||
|
|
"learning_rate": 3.7926138753618257e-06,
|
||
|
|
"loss": 0.470272958278656,
|
||
|
|
"step": 747
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4686468646864688,
|
||
|
|
"grad_norm": 0.2533414456878978,
|
||
|
|
"learning_rate": 3.747728131741517e-06,
|
||
|
|
"loss": 0.4825139045715332,
|
||
|
|
"step": 748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4719471947194718,
|
||
|
|
"grad_norm": 0.22813621303002277,
|
||
|
|
"learning_rate": 3.703082109453575e-06,
|
||
|
|
"loss": 0.43612140417099,
|
||
|
|
"step": 749
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4752475247524752,
|
||
|
|
"grad_norm": 0.22709733679425215,
|
||
|
|
"learning_rate": 3.6586764670277065e-06,
|
||
|
|
"loss": 0.4573146402835846,
|
||
|
|
"step": 750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4785478547854787,
|
||
|
|
"grad_norm": 0.24807030489347143,
|
||
|
|
"learning_rate": 3.61451185944802e-06,
|
||
|
|
"loss": 0.4419093430042267,
|
||
|
|
"step": 751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4818481848184817,
|
||
|
|
"grad_norm": 0.23735191741997233,
|
||
|
|
"learning_rate": 3.570588938143353e-06,
|
||
|
|
"loss": 0.440906822681427,
|
||
|
|
"step": 752
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.485148514851485,
|
||
|
|
"grad_norm": 0.24792760735437452,
|
||
|
|
"learning_rate": 3.5269083509776735e-06,
|
||
|
|
"loss": 0.432383269071579,
|
||
|
|
"step": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4884488448844886,
|
||
|
|
"grad_norm": 0.24788857238042053,
|
||
|
|
"learning_rate": 3.4834707422404957e-06,
|
||
|
|
"loss": 0.4615401029586792,
|
||
|
|
"step": 754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4917491749174916,
|
||
|
|
"grad_norm": 0.29288725170403773,
|
||
|
|
"learning_rate": 3.440276752637417e-06,
|
||
|
|
"loss": 0.43933019042015076,
|
||
|
|
"step": 755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.495049504950495,
|
||
|
|
"grad_norm": 0.24422605775888084,
|
||
|
|
"learning_rate": 3.3973270192806427e-06,
|
||
|
|
"loss": 0.4651945233345032,
|
||
|
|
"step": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4983498349834985,
|
||
|
|
"grad_norm": 0.3408455968625333,
|
||
|
|
"learning_rate": 3.3546221756795874e-06,
|
||
|
|
"loss": 0.4423069953918457,
|
||
|
|
"step": 757
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5016501650165015,
|
||
|
|
"grad_norm": 0.32517130275625505,
|
||
|
|
"learning_rate": 3.3121628517315373e-06,
|
||
|
|
"loss": 0.4905679225921631,
|
||
|
|
"step": 758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.504950495049505,
|
||
|
|
"grad_norm": 0.24015956320352147,
|
||
|
|
"learning_rate": 3.2699496737123758e-06,
|
||
|
|
"loss": 0.46989548206329346,
|
||
|
|
"step": 759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5082508250825084,
|
||
|
|
"grad_norm": 0.24393784259324253,
|
||
|
|
"learning_rate": 3.2279832642673025e-06,
|
||
|
|
"loss": 0.5168344378471375,
|
||
|
|
"step": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5115511551155114,
|
||
|
|
"grad_norm": 0.2446798962745333,
|
||
|
|
"learning_rate": 3.186264242401693e-06,
|
||
|
|
"loss": 0.46055924892425537,
|
||
|
|
"step": 761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.514851485148515,
|
||
|
|
"grad_norm": 0.2561165095643357,
|
||
|
|
"learning_rate": 3.144793223471949e-06,
|
||
|
|
"loss": 0.5135318040847778,
|
||
|
|
"step": 762
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5181518151815183,
|
||
|
|
"grad_norm": 1.1234233736547772,
|
||
|
|
"learning_rate": 3.1035708191764246e-06,
|
||
|
|
"loss": 0.5026534199714661,
|
||
|
|
"step": 763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5214521452145213,
|
||
|
|
"grad_norm": 0.23866674349332329,
|
||
|
|
"learning_rate": 3.0625976375463938e-06,
|
||
|
|
"loss": 0.43348389863967896,
|
||
|
|
"step": 764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5247524752475248,
|
||
|
|
"grad_norm": 0.2295043927466033,
|
||
|
|
"learning_rate": 3.021874282937103e-06,
|
||
|
|
"loss": 0.4620594382286072,
|
||
|
|
"step": 765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5280528052805282,
|
||
|
|
"grad_norm": 0.25250691113798673,
|
||
|
|
"learning_rate": 2.9814013560188425e-06,
|
||
|
|
"loss": 0.4646865725517273,
|
||
|
|
"step": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5313531353135312,
|
||
|
|
"grad_norm": 0.2396511266141401,
|
||
|
|
"learning_rate": 2.9411794537680795e-06,
|
||
|
|
"loss": 0.46846333146095276,
|
||
|
|
"step": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5346534653465347,
|
||
|
|
"grad_norm": 0.24818691561244743,
|
||
|
|
"learning_rate": 2.901209169458672e-06,
|
||
|
|
"loss": 0.487953782081604,
|
||
|
|
"step": 768
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.537953795379538,
|
||
|
|
"grad_norm": 0.24296952409375147,
|
||
|
|
"learning_rate": 2.861491092653115e-06,
|
||
|
|
"loss": 0.4543481469154358,
|
||
|
|
"step": 769
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.541254125412541,
|
||
|
|
"grad_norm": 0.24368208278529027,
|
||
|
|
"learning_rate": 2.822025809193818e-06,
|
||
|
|
"loss": 0.4961584806442261,
|
||
|
|
"step": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5445544554455446,
|
||
|
|
"grad_norm": 0.2377375055697493,
|
||
|
|
"learning_rate": 2.7828139011944967e-06,
|
||
|
|
"loss": 0.44123750925064087,
|
||
|
|
"step": 771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.547854785478548,
|
||
|
|
"grad_norm": 0.2301227484744363,
|
||
|
|
"learning_rate": 2.743855947031575e-06,
|
||
|
|
"loss": 0.43014320731163025,
|
||
|
|
"step": 772
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.551155115511551,
|
||
|
|
"grad_norm": 0.2250422650499226,
|
||
|
|
"learning_rate": 2.7051525213356546e-06,
|
||
|
|
"loss": 0.4774499535560608,
|
||
|
|
"step": 773
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5544554455445545,
|
||
|
|
"grad_norm": 0.23823454905644054,
|
||
|
|
"learning_rate": 2.6667041949830186e-06,
|
||
|
|
"loss": 0.44963133335113525,
|
||
|
|
"step": 774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.557755775577558,
|
||
|
|
"grad_norm": 0.2554981481850554,
|
||
|
|
"learning_rate": 2.6285115350872524e-06,
|
||
|
|
"loss": 0.4840245842933655,
|
||
|
|
"step": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.561056105610561,
|
||
|
|
"grad_norm": 0.2589754738757413,
|
||
|
|
"learning_rate": 2.5905751049908466e-06,
|
||
|
|
"loss": 0.5490096807479858,
|
||
|
|
"step": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5643564356435644,
|
||
|
|
"grad_norm": 0.30754095371590884,
|
||
|
|
"learning_rate": 2.5528954642568947e-06,
|
||
|
|
"loss": 0.4965711832046509,
|
||
|
|
"step": 777
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.567656765676568,
|
||
|
|
"grad_norm": 0.2261872478084121,
|
||
|
|
"learning_rate": 2.5154731686608424e-06,
|
||
|
|
"loss": 0.4518459439277649,
|
||
|
|
"step": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.570957095709571,
|
||
|
|
"grad_norm": 0.24374764034742216,
|
||
|
|
"learning_rate": 2.4783087701823026e-06,
|
||
|
|
"loss": 0.5022287964820862,
|
||
|
|
"step": 779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5742574257425743,
|
||
|
|
"grad_norm": 0.2531412256958666,
|
||
|
|
"learning_rate": 2.441402816996876e-06,
|
||
|
|
"loss": 0.47195330262184143,
|
||
|
|
"step": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5775577557755778,
|
||
|
|
"grad_norm": 0.25588546327446415,
|
||
|
|
"learning_rate": 2.4047558534681124e-06,
|
||
|
|
"loss": 0.5155715346336365,
|
||
|
|
"step": 781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.580858085808581,
|
||
|
|
"grad_norm": 0.26863032492519423,
|
||
|
|
"learning_rate": 2.3683684201394507e-06,
|
||
|
|
"loss": 0.46963661909103394,
|
||
|
|
"step": 782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5841584158415842,
|
||
|
|
"grad_norm": 0.2303264290466175,
|
||
|
|
"learning_rate": 2.3322410537262495e-06,
|
||
|
|
"loss": 0.4279938340187073,
|
||
|
|
"step": 783
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5874587458745877,
|
||
|
|
"grad_norm": 0.24160002325917174,
|
||
|
|
"learning_rate": 2.296374287107883e-06,
|
||
|
|
"loss": 0.47818487882614136,
|
||
|
|
"step": 784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5907590759075907,
|
||
|
|
"grad_norm": 0.23493031875502465,
|
||
|
|
"learning_rate": 2.260768649319869e-06,
|
||
|
|
"loss": 0.4445609152317047,
|
||
|
|
"step": 785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.594059405940594,
|
||
|
|
"grad_norm": 0.2545526596288379,
|
||
|
|
"learning_rate": 2.2254246655460765e-06,
|
||
|
|
"loss": 0.4838835895061493,
|
||
|
|
"step": 786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5973597359735976,
|
||
|
|
"grad_norm": 0.24631479441885146,
|
||
|
|
"learning_rate": 2.1903428571109566e-06,
|
||
|
|
"loss": 0.4454101324081421,
|
||
|
|
"step": 787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6006600660066006,
|
||
|
|
"grad_norm": 0.2399303225290425,
|
||
|
|
"learning_rate": 2.1555237414718854e-06,
|
||
|
|
"loss": 0.46468472480773926,
|
||
|
|
"step": 788
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.603960396039604,
|
||
|
|
"grad_norm": 0.24533578787784271,
|
||
|
|
"learning_rate": 2.1209678322115133e-06,
|
||
|
|
"loss": 0.508684515953064,
|
||
|
|
"step": 789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6072607260726075,
|
||
|
|
"grad_norm": 0.23699012050293838,
|
||
|
|
"learning_rate": 2.0866756390301778e-06,
|
||
|
|
"loss": 0.46998751163482666,
|
||
|
|
"step": 790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6105610561056105,
|
||
|
|
"grad_norm": 0.22442653448303418,
|
||
|
|
"learning_rate": 2.0526476677384123e-06,
|
||
|
|
"loss": 0.41589513421058655,
|
||
|
|
"step": 791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.613861386138614,
|
||
|
|
"grad_norm": 0.23870429201603713,
|
||
|
|
"learning_rate": 2.018884420249474e-06,
|
||
|
|
"loss": 0.4948643445968628,
|
||
|
|
"step": 792
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6171617161716174,
|
||
|
|
"grad_norm": 0.23103305184303033,
|
||
|
|
"learning_rate": 1.9853863945719243e-06,
|
||
|
|
"loss": 0.4494874179363251,
|
||
|
|
"step": 793
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6204620462046204,
|
||
|
|
"grad_norm": 0.23980252076908543,
|
||
|
|
"learning_rate": 1.9521540848023113e-06,
|
||
|
|
"loss": 0.42173343896865845,
|
||
|
|
"step": 794
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.623762376237624,
|
||
|
|
"grad_norm": 0.24254851053091633,
|
||
|
|
"learning_rate": 1.9191879811178605e-06,
|
||
|
|
"loss": 0.4319555461406708,
|
||
|
|
"step": 795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6270627062706273,
|
||
|
|
"grad_norm": 0.21769714480169441,
|
||
|
|
"learning_rate": 1.8864885697692582e-06,
|
||
|
|
"loss": 0.40467706322669983,
|
||
|
|
"step": 796
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6303630363036303,
|
||
|
|
"grad_norm": 0.23815188307796767,
|
||
|
|
"learning_rate": 1.8540563330734662e-06,
|
||
|
|
"loss": 0.5141273736953735,
|
||
|
|
"step": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.633663366336634,
|
||
|
|
"grad_norm": 0.23237959155910853,
|
||
|
|
"learning_rate": 1.8218917494066212e-06,
|
||
|
|
"loss": 0.44990289211273193,
|
||
|
|
"step": 798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6369636963696372,
|
||
|
|
"grad_norm": 0.2393948822814923,
|
||
|
|
"learning_rate": 1.7899952931969756e-06,
|
||
|
|
"loss": 0.4878673553466797,
|
||
|
|
"step": 799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6402640264026402,
|
||
|
|
"grad_norm": 0.22595932266177446,
|
||
|
|
"learning_rate": 1.7583674349178803e-06,
|
||
|
|
"loss": 0.46406376361846924,
|
||
|
|
"step": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6435643564356437,
|
||
|
|
"grad_norm": 0.22163499847677615,
|
||
|
|
"learning_rate": 1.7270086410808762e-06,
|
||
|
|
"loss": 0.44470641016960144,
|
||
|
|
"step": 801
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6468646864686467,
|
||
|
|
"grad_norm": 0.23461158504190754,
|
||
|
|
"learning_rate": 1.695919374228796e-06,
|
||
|
|
"loss": 0.5306479930877686,
|
||
|
|
"step": 802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.65016501650165,
|
||
|
|
"grad_norm": 0.23844670077139818,
|
||
|
|
"learning_rate": 1.6651000929289462e-06,
|
||
|
|
"loss": 0.4570600390434265,
|
||
|
|
"step": 803
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6534653465346536,
|
||
|
|
"grad_norm": 0.24202990025785212,
|
||
|
|
"learning_rate": 1.6345512517663275e-06,
|
||
|
|
"loss": 0.48561781644821167,
|
||
|
|
"step": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6567656765676566,
|
||
|
|
"grad_norm": 0.23785932147050265,
|
||
|
|
"learning_rate": 1.6042733013369604e-06,
|
||
|
|
"loss": 0.4666748642921448,
|
||
|
|
"step": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.66006600660066,
|
||
|
|
"grad_norm": 0.2420529385568233,
|
||
|
|
"learning_rate": 1.5742666882412106e-06,
|
||
|
|
"loss": 0.4761434495449066,
|
||
|
|
"step": 806
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6633663366336635,
|
||
|
|
"grad_norm": 0.23716960917200494,
|
||
|
|
"learning_rate": 1.5445318550772204e-06,
|
||
|
|
"loss": 0.4475252628326416,
|
||
|
|
"step": 807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6666666666666665,
|
||
|
|
"grad_norm": 0.2477540352529907,
|
||
|
|
"learning_rate": 1.5150692404343637e-06,
|
||
|
|
"loss": 0.5299564599990845,
|
||
|
|
"step": 808
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.66996699669967,
|
||
|
|
"grad_norm": 0.23933028255710986,
|
||
|
|
"learning_rate": 1.4858792788867904e-06,
|
||
|
|
"loss": 0.518581748008728,
|
||
|
|
"step": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6732673267326734,
|
||
|
|
"grad_norm": 0.2332077440459636,
|
||
|
|
"learning_rate": 1.4569624009870165e-06,
|
||
|
|
"loss": 0.5162506103515625,
|
||
|
|
"step": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6765676567656764,
|
||
|
|
"grad_norm": 0.23396257763770162,
|
||
|
|
"learning_rate": 1.4283190332595665e-06,
|
||
|
|
"loss": 0.4762595593929291,
|
||
|
|
"step": 811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.67986798679868,
|
||
|
|
"grad_norm": 0.24891326451914347,
|
||
|
|
"learning_rate": 1.3999495981946764e-06,
|
||
|
|
"loss": 0.44347697496414185,
|
||
|
|
"step": 812
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6831683168316833,
|
||
|
|
"grad_norm": 0.22951918904681498,
|
||
|
|
"learning_rate": 1.3718545142420768e-06,
|
||
|
|
"loss": 0.4344146251678467,
|
||
|
|
"step": 813
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6864686468646863,
|
||
|
|
"grad_norm": 0.23863686607461265,
|
||
|
|
"learning_rate": 1.344034195804813e-06,
|
||
|
|
"loss": 0.4936307668685913,
|
||
|
|
"step": 814
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.68976897689769,
|
||
|
|
"grad_norm": 0.23758007083024585,
|
||
|
|
"learning_rate": 1.3164890532331386e-06,
|
||
|
|
"loss": 0.43635520339012146,
|
||
|
|
"step": 815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.693069306930693,
|
||
|
|
"grad_norm": 0.24550816708533926,
|
||
|
|
"learning_rate": 1.2892194928184499e-06,
|
||
|
|
"loss": 0.48006054759025574,
|
||
|
|
"step": 816
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6963696369636962,
|
||
|
|
"grad_norm": 0.22610358677951214,
|
||
|
|
"learning_rate": 1.2622259167873008e-06,
|
||
|
|
"loss": 0.4296647906303406,
|
||
|
|
"step": 817
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6996699669966997,
|
||
|
|
"grad_norm": 0.3871947383123805,
|
||
|
|
"learning_rate": 1.2355087232954754e-06,
|
||
|
|
"loss": 0.47840994596481323,
|
||
|
|
"step": 818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7029702970297027,
|
||
|
|
"grad_norm": 0.21432181977841594,
|
||
|
|
"learning_rate": 1.209068306422112e-06,
|
||
|
|
"loss": 0.41459953784942627,
|
||
|
|
"step": 819
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.706270627062706,
|
||
|
|
"grad_norm": 0.24313471794627498,
|
||
|
|
"learning_rate": 1.1829050561638766e-06,
|
||
|
|
"loss": 0.4278629422187805,
|
||
|
|
"step": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7095709570957096,
|
||
|
|
"grad_norm": 0.24379358416226346,
|
||
|
|
"learning_rate": 1.1570193584292323e-06,
|
||
|
|
"loss": 0.44538602232933044,
|
||
|
|
"step": 821
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7128712871287126,
|
||
|
|
"grad_norm": 0.23094639733408046,
|
||
|
|
"learning_rate": 1.1314115950327365e-06,
|
||
|
|
"loss": 0.4757949709892273,
|
||
|
|
"step": 822
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.716171617161716,
|
||
|
|
"grad_norm": 0.22182336808333136,
|
||
|
|
"learning_rate": 1.106082143689402e-06,
|
||
|
|
"loss": 0.49131542444229126,
|
||
|
|
"step": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7194719471947195,
|
||
|
|
"grad_norm": 0.2534124798335607,
|
||
|
|
"learning_rate": 1.0810313780091408e-06,
|
||
|
|
"loss": 0.4917967915534973,
|
||
|
|
"step": 824
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7227722772277225,
|
||
|
|
"grad_norm": 0.23670068032674005,
|
||
|
|
"learning_rate": 1.056259667491244e-06,
|
||
|
|
"loss": 0.4949303865432739,
|
||
|
|
"step": 825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.726072607260726,
|
||
|
|
"grad_norm": 0.23770304320813665,
|
||
|
|
"learning_rate": 1.0317673775189374e-06,
|
||
|
|
"loss": 0.4287925958633423,
|
||
|
|
"step": 826
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7293729372937294,
|
||
|
|
"grad_norm": 0.2425418928573913,
|
||
|
|
"learning_rate": 1.007554869353975e-06,
|
||
|
|
"loss": 0.5059949159622192,
|
||
|
|
"step": 827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7326732673267324,
|
||
|
|
"grad_norm": 0.25049371554006,
|
||
|
|
"learning_rate": 9.83622500131336e-07,
|
||
|
|
"loss": 0.47914958000183105,
|
||
|
|
"step": 828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.735973597359736,
|
||
|
|
"grad_norm": 0.24168515794090734,
|
||
|
|
"learning_rate": 9.599706228539452e-07,
|
||
|
|
"loss": 0.5237720608711243,
|
||
|
|
"step": 829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7392739273927393,
|
||
|
|
"grad_norm": 0.23836969767457952,
|
||
|
|
"learning_rate": 9.365995863874566e-07,
|
||
|
|
"loss": 0.4628916382789612,
|
||
|
|
"step": 830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7425742574257423,
|
||
|
|
"grad_norm": 0.22835633263617844,
|
||
|
|
"learning_rate": 9.135097354551203e-07,
|
||
|
|
"loss": 0.49988898634910583,
|
||
|
|
"step": 831
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.745874587458746,
|
||
|
|
"grad_norm": 0.2229937423966958,
|
||
|
|
"learning_rate": 8.907014106327039e-07,
|
||
|
|
"loss": 0.4631851315498352,
|
||
|
|
"step": 832
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7491749174917492,
|
||
|
|
"grad_norm": 0.24485133529173167,
|
||
|
|
"learning_rate": 8.681749483434387e-07,
|
||
|
|
"loss": 0.47001713514328003,
|
||
|
|
"step": 833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7524752475247523,
|
||
|
|
"grad_norm": 0.23400965677751775,
|
||
|
|
"learning_rate": 8.459306808530999e-07,
|
||
|
|
"loss": 0.4437292218208313,
|
||
|
|
"step": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7557755775577557,
|
||
|
|
"grad_norm": 0.26632452732629835,
|
||
|
|
"learning_rate": 8.239689362650694e-07,
|
||
|
|
"loss": 0.5006406903266907,
|
||
|
|
"step": 835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.759075907590759,
|
||
|
|
"grad_norm": 0.23471614589516374,
|
||
|
|
"learning_rate": 8.022900385155185e-07,
|
||
|
|
"loss": 0.45732003450393677,
|
||
|
|
"step": 836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.762376237623762,
|
||
|
|
"grad_norm": 0.47225644675751677,
|
||
|
|
"learning_rate": 7.808943073686159e-07,
|
||
|
|
"loss": 0.5012909173965454,
|
||
|
|
"step": 837
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7656765676567656,
|
||
|
|
"grad_norm": 0.25510766784506034,
|
||
|
|
"learning_rate": 7.597820584118221e-07,
|
||
|
|
"loss": 0.5104090571403503,
|
||
|
|
"step": 838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.768976897689769,
|
||
|
|
"grad_norm": 0.22536004830501363,
|
||
|
|
"learning_rate": 7.38953603051229e-07,
|
||
|
|
"loss": 0.44415900111198425,
|
||
|
|
"step": 839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.772277227722772,
|
||
|
|
"grad_norm": 0.23868123290562657,
|
||
|
|
"learning_rate": 7.184092485069638e-07,
|
||
|
|
"loss": 0.46958473324775696,
|
||
|
|
"step": 840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7755775577557755,
|
||
|
|
"grad_norm": 0.22685199851447227,
|
||
|
|
"learning_rate": 6.981492978086634e-07,
|
||
|
|
"loss": 0.4305083155632019,
|
||
|
|
"step": 841
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.778877887788779,
|
||
|
|
"grad_norm": 0.2363937135429503,
|
||
|
|
"learning_rate": 6.78174049791005e-07,
|
||
|
|
"loss": 0.4812752604484558,
|
||
|
|
"step": 842
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.782178217821782,
|
||
|
|
"grad_norm": 0.23536493344498524,
|
||
|
|
"learning_rate": 6.584837990892889e-07,
|
||
|
|
"loss": 0.522142231464386,
|
||
|
|
"step": 843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7854785478547854,
|
||
|
|
"grad_norm": 0.2629089101886439,
|
||
|
|
"learning_rate": 6.390788361351053e-07,
|
||
|
|
"loss": 0.4789726138114929,
|
||
|
|
"step": 844
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.788778877887789,
|
||
|
|
"grad_norm": 0.221963892758326,
|
||
|
|
"learning_rate": 6.199594471520453e-07,
|
||
|
|
"loss": 0.44507476687431335,
|
||
|
|
"step": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.792079207920792,
|
||
|
|
"grad_norm": 0.23452674626378717,
|
||
|
|
"learning_rate": 6.011259141514747e-07,
|
||
|
|
"loss": 0.47613948583602905,
|
||
|
|
"step": 846
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7953795379537953,
|
||
|
|
"grad_norm": 0.22167932095355114,
|
||
|
|
"learning_rate": 5.825785149283758e-07,
|
||
|
|
"loss": 0.44828763604164124,
|
||
|
|
"step": 847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.798679867986799,
|
||
|
|
"grad_norm": 0.3027768768548174,
|
||
|
|
"learning_rate": 5.64317523057254e-07,
|
||
|
|
"loss": 0.4695909321308136,
|
||
|
|
"step": 848
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.801980198019802,
|
||
|
|
"grad_norm": 0.2349539472452322,
|
||
|
|
"learning_rate": 5.463432078881093e-07,
|
||
|
|
"loss": 0.48341453075408936,
|
||
|
|
"step": 849
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8052805280528053,
|
||
|
|
"grad_norm": 0.21333400051209225,
|
||
|
|
"learning_rate": 5.286558345424397e-07,
|
||
|
|
"loss": 0.47008436918258667,
|
||
|
|
"step": 850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8085808580858087,
|
||
|
|
"grad_norm": 0.2369125413431687,
|
||
|
|
"learning_rate": 5.112556639093536e-07,
|
||
|
|
"loss": 0.5081039071083069,
|
||
|
|
"step": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8118811881188117,
|
||
|
|
"grad_norm": 0.23230496066562498,
|
||
|
|
"learning_rate": 4.941429526417163e-07,
|
||
|
|
"loss": 0.49790090322494507,
|
||
|
|
"step": 852
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.815181518151815,
|
||
|
|
"grad_norm": 0.2314377157636827,
|
||
|
|
"learning_rate": 4.773179531523542e-07,
|
||
|
|
"loss": 0.476767897605896,
|
||
|
|
"step": 853
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8184818481848186,
|
||
|
|
"grad_norm": 0.234974793768271,
|
||
|
|
"learning_rate": 4.6078091361034585e-07,
|
||
|
|
"loss": 0.5067446231842041,
|
||
|
|
"step": 854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8217821782178216,
|
||
|
|
"grad_norm": 0.2229121342330284,
|
||
|
|
"learning_rate": 4.4453207793735185e-07,
|
||
|
|
"loss": 0.45703452825546265,
|
||
|
|
"step": 855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.825082508250825,
|
||
|
|
"grad_norm": 0.25006675020075053,
|
||
|
|
"learning_rate": 4.285716858040223e-07,
|
||
|
|
"loss": 0.4193270206451416,
|
||
|
|
"step": 856
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8283828382838285,
|
||
|
|
"grad_norm": 0.2214334357956483,
|
||
|
|
"learning_rate": 4.128999726264549e-07,
|
||
|
|
"loss": 0.4367069602012634,
|
||
|
|
"step": 857
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8316831683168315,
|
||
|
|
"grad_norm": 0.23745672544685706,
|
||
|
|
"learning_rate": 3.9751716956273113e-07,
|
||
|
|
"loss": 0.46601590514183044,
|
||
|
|
"step": 858
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.834983498349835,
|
||
|
|
"grad_norm": 0.23728948504727357,
|
||
|
|
"learning_rate": 3.824235035095036e-07,
|
||
|
|
"loss": 0.4801405072212219,
|
||
|
|
"step": 859
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8382838283828384,
|
||
|
|
"grad_norm": 0.2305722834125333,
|
||
|
|
"learning_rate": 3.676191970986409e-07,
|
||
|
|
"loss": 0.4729960262775421,
|
||
|
|
"step": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8415841584158414,
|
||
|
|
"grad_norm": 0.2565962552578653,
|
||
|
|
"learning_rate": 3.531044686939611e-07,
|
||
|
|
"loss": 0.453819215297699,
|
||
|
|
"step": 861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.844884488448845,
|
||
|
|
"grad_norm": 0.2345568934684747,
|
||
|
|
"learning_rate": 3.388795323879923e-07,
|
||
|
|
"loss": 0.4655516743659973,
|
||
|
|
"step": 862
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8481848184818483,
|
||
|
|
"grad_norm": 0.2602122051468819,
|
||
|
|
"learning_rate": 3.249445979988286e-07,
|
||
|
|
"loss": 0.4915505647659302,
|
||
|
|
"step": 863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8514851485148514,
|
||
|
|
"grad_norm": 0.227534967530927,
|
||
|
|
"learning_rate": 3.112998710670279e-07,
|
||
|
|
"loss": 0.46072205901145935,
|
||
|
|
"step": 864
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.854785478547855,
|
||
|
|
"grad_norm": 0.2372527927247435,
|
||
|
|
"learning_rate": 2.979455528525854e-07,
|
||
|
|
"loss": 0.47496911883354187,
|
||
|
|
"step": 865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8580858085808583,
|
||
|
|
"grad_norm": 0.2396587074165527,
|
||
|
|
"learning_rate": 2.8488184033195867e-07,
|
||
|
|
"loss": 0.4863288402557373,
|
||
|
|
"step": 866
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8613861386138613,
|
||
|
|
"grad_norm": 0.23166629272471134,
|
||
|
|
"learning_rate": 2.721089261951626e-07,
|
||
|
|
"loss": 0.4543803930282593,
|
||
|
|
"step": 867
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8646864686468647,
|
||
|
|
"grad_norm": 0.2431611152190322,
|
||
|
|
"learning_rate": 2.5962699884293894e-07,
|
||
|
|
"loss": 0.4589266777038574,
|
||
|
|
"step": 868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.867986798679868,
|
||
|
|
"grad_norm": 0.2225895431580723,
|
||
|
|
"learning_rate": 2.474362423839627e-07,
|
||
|
|
"loss": 0.45603302121162415,
|
||
|
|
"step": 869
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.871287128712871,
|
||
|
|
"grad_norm": 0.2221408751585563,
|
||
|
|
"learning_rate": 2.3553683663213088e-07,
|
||
|
|
"loss": 0.4547184109687805,
|
||
|
|
"step": 870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8745874587458746,
|
||
|
|
"grad_norm": 0.24123343867414457,
|
||
|
|
"learning_rate": 2.2392895710391604e-07,
|
||
|
|
"loss": 0.4900602102279663,
|
||
|
|
"step": 871
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.877887788778878,
|
||
|
|
"grad_norm": 0.2412441535157341,
|
||
|
|
"learning_rate": 2.126127750157725e-07,
|
||
|
|
"loss": 0.48706525564193726,
|
||
|
|
"step": 872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.881188118811881,
|
||
|
|
"grad_norm": 0.24173675884162568,
|
||
|
|
"learning_rate": 2.0158845728160958e-07,
|
||
|
|
"loss": 0.4726618230342865,
|
||
|
|
"step": 873
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8844884488448845,
|
||
|
|
"grad_norm": 0.25907893004745514,
|
||
|
|
"learning_rate": 1.9085616651033147e-07,
|
||
|
|
"loss": 0.45884019136428833,
|
||
|
|
"step": 874
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.887788778877888,
|
||
|
|
"grad_norm": 0.2641670850826395,
|
||
|
|
"learning_rate": 1.804160610034411e-07,
|
||
|
|
"loss": 0.4787840247154236,
|
||
|
|
"step": 875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.891089108910891,
|
||
|
|
"grad_norm": 0.24253910042279672,
|
||
|
|
"learning_rate": 1.702682947527001e-07,
|
||
|
|
"loss": 0.4758448004722595,
|
||
|
|
"step": 876
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8943894389438944,
|
||
|
|
"grad_norm": 0.2279011748861112,
|
||
|
|
"learning_rate": 1.6041301743786596e-07,
|
||
|
|
"loss": 0.47089093923568726,
|
||
|
|
"step": 877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.897689768976898,
|
||
|
|
"grad_norm": 0.29849498701163135,
|
||
|
|
"learning_rate": 1.5085037442446937e-07,
|
||
|
|
"loss": 0.46921056509017944,
|
||
|
|
"step": 878
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.900990099009901,
|
||
|
|
"grad_norm": 0.2344970489799305,
|
||
|
|
"learning_rate": 1.415805067616871e-07,
|
||
|
|
"loss": 0.5218731164932251,
|
||
|
|
"step": 879
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9042904290429044,
|
||
|
|
"grad_norm": 0.2254215991599414,
|
||
|
|
"learning_rate": 1.3260355118025036e-07,
|
||
|
|
"loss": 0.43099671602249146,
|
||
|
|
"step": 880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.907590759075908,
|
||
|
|
"grad_norm": 0.23874830724823604,
|
||
|
|
"learning_rate": 1.2391964009043078e-07,
|
||
|
|
"loss": 0.48290592432022095,
|
||
|
|
"step": 881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.910891089108911,
|
||
|
|
"grad_norm": 0.23943766068140404,
|
||
|
|
"learning_rate": 1.1552890158009311e-07,
|
||
|
|
"loss": 0.4634360074996948,
|
||
|
|
"step": 882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9141914191419143,
|
||
|
|
"grad_norm": 0.2453653346062948,
|
||
|
|
"learning_rate": 1.0743145941279453e-07,
|
||
|
|
"loss": 0.5041622519493103,
|
||
|
|
"step": 883
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9174917491749177,
|
||
|
|
"grad_norm": 0.21518547033713775,
|
||
|
|
"learning_rate": 9.962743302596612e-08,
|
||
|
|
"loss": 0.480410099029541,
|
||
|
|
"step": 884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9207920792079207,
|
||
|
|
"grad_norm": 0.24487326504708118,
|
||
|
|
"learning_rate": 9.211693752915419e-08,
|
||
|
|
"loss": 0.49919891357421875,
|
||
|
|
"step": 885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.924092409240924,
|
||
|
|
"grad_norm": 0.23373083594094138,
|
||
|
|
"learning_rate": 8.490008370231506e-08,
|
||
|
|
"loss": 0.508806586265564,
|
||
|
|
"step": 886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9273927392739276,
|
||
|
|
"grad_norm": 0.23076843849897602,
|
||
|
|
"learning_rate": 7.797697799418525e-08,
|
||
|
|
"loss": 0.4233350157737732,
|
||
|
|
"step": 887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9306930693069306,
|
||
|
|
"grad_norm": 0.2406032429252954,
|
||
|
|
"learning_rate": 7.134772252071154e-08,
|
||
|
|
"loss": 0.4577901363372803,
|
||
|
|
"step": 888
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.933993399339934,
|
||
|
|
"grad_norm": 0.22213331512067527,
|
||
|
|
"learning_rate": 6.501241506354561e-08,
|
||
|
|
"loss": 0.4028077721595764,
|
||
|
|
"step": 889
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9372937293729375,
|
||
|
|
"grad_norm": 0.23681508976522572,
|
||
|
|
"learning_rate": 5.897114906859402e-08,
|
||
|
|
"loss": 0.48321446776390076,
|
||
|
|
"step": 890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9405940594059405,
|
||
|
|
"grad_norm": 0.27558742916404966,
|
||
|
|
"learning_rate": 5.322401364465491e-08,
|
||
|
|
"loss": 0.48732608556747437,
|
||
|
|
"step": 891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.943894389438944,
|
||
|
|
"grad_norm": 0.22725537704850798,
|
||
|
|
"learning_rate": 4.777109356208565e-08,
|
||
|
|
"loss": 0.46879494190216064,
|
||
|
|
"step": 892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9471947194719474,
|
||
|
|
"grad_norm": 0.23495776431163154,
|
||
|
|
"learning_rate": 4.261246925156837e-08,
|
||
|
|
"loss": 0.4858628511428833,
|
||
|
|
"step": 893
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9504950495049505,
|
||
|
|
"grad_norm": 0.22802725333151694,
|
||
|
|
"learning_rate": 3.7748216802913077e-08,
|
||
|
|
"loss": 0.48119616508483887,
|
||
|
|
"step": 894
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.953795379537954,
|
||
|
|
"grad_norm": 0.22512889420077337,
|
||
|
|
"learning_rate": 3.3178407963938564e-08,
|
||
|
|
"loss": 0.4994167983531952,
|
||
|
|
"step": 895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9570957095709574,
|
||
|
|
"grad_norm": 0.23739211802797258,
|
||
|
|
"learning_rate": 2.8903110139417712e-08,
|
||
|
|
"loss": 0.46394845843315125,
|
||
|
|
"step": 896
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9603960396039604,
|
||
|
|
"grad_norm": 0.2476698533912655,
|
||
|
|
"learning_rate": 2.4922386390076047e-08,
|
||
|
|
"loss": 0.42504560947418213,
|
||
|
|
"step": 897
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.963696369636964,
|
||
|
|
"grad_norm": 0.24523827629331452,
|
||
|
|
"learning_rate": 2.1236295431670275e-08,
|
||
|
|
"loss": 0.4186960756778717,
|
||
|
|
"step": 898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9669966996699673,
|
||
|
|
"grad_norm": 0.22738870735932892,
|
||
|
|
"learning_rate": 1.7844891634113402e-08,
|
||
|
|
"loss": 0.4529160261154175,
|
||
|
|
"step": 899
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9702970297029703,
|
||
|
|
"grad_norm": 0.23734524364327658,
|
||
|
|
"learning_rate": 1.4748225020679851e-08,
|
||
|
|
"loss": 0.44012153148651123,
|
||
|
|
"step": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9735973597359737,
|
||
|
|
"grad_norm": 0.23103066951863727,
|
||
|
|
"learning_rate": 1.1946341267263794e-08,
|
||
|
|
"loss": 0.4775368571281433,
|
||
|
|
"step": 901
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.976897689768977,
|
||
|
|
"grad_norm": 0.22618868632744704,
|
||
|
|
"learning_rate": 9.439281701704162e-09,
|
||
|
|
"loss": 0.4465276002883911,
|
||
|
|
"step": 902
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.98019801980198,
|
||
|
|
"grad_norm": 0.24271367480309458,
|
||
|
|
"learning_rate": 7.227083303180671e-09,
|
||
|
|
"loss": 0.4674132168292999,
|
||
|
|
"step": 903
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9834983498349836,
|
||
|
|
"grad_norm": 0.23142674174926925,
|
||
|
|
"learning_rate": 5.30977870166316e-09,
|
||
|
|
"loss": 0.4751841127872467,
|
||
|
|
"step": 904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9867986798679866,
|
||
|
|
"grad_norm": 0.24061959007170008,
|
||
|
|
"learning_rate": 3.687396177434188e-09,
|
||
|
|
"loss": 0.4587743580341339,
|
||
|
|
"step": 905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.99009900990099,
|
||
|
|
"grad_norm": 0.22301171950064888,
|
||
|
|
"learning_rate": 2.359959660667155e-09,
|
||
|
|
"loss": 0.4815826416015625,
|
||
|
|
"step": 906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9933993399339935,
|
||
|
|
"grad_norm": 0.22240717058445192,
|
||
|
|
"learning_rate": 1.3274887310732454e-09,
|
||
|
|
"loss": 0.45863479375839233,
|
||
|
|
"step": 907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9966996699669965,
|
||
|
|
"grad_norm": 0.23321307876392341,
|
||
|
|
"learning_rate": 5.899986176260974e-10,
|
||
|
|
"loss": 0.4888804256916046,
|
||
|
|
"step": 908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.2343821134475686,
|
||
|
|
"learning_rate": 1.475001983131108e-10,
|
||
|
|
"loss": 0.46804267168045044,
|
||
|
|
"step": 909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"step": 909,
|
||
|
|
"total_flos": 1274755977576448.0,
|
||
|
|
"train_loss": 0.5495063810720958,
|
||
|
|
"train_runtime": 34913.8374,
|
||
|
|
"train_samples_per_second": 3.33,
|
||
|
|
"train_steps_per_second": 0.026
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 1,
|
||
|
|
"max_steps": 909,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 3,
|
||
|
|
"save_steps": 500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 1274755977576448.0,
|
||
|
|
"train_batch_size": 8,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|