1456 lines
35 KiB
JSON
1456 lines
35 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 0,
|
|
"global_step": 203,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0049261083743842365,
|
|
"grad_norm": 28.522218704223633,
|
|
"learning_rate": 1e-05,
|
|
"loss": 1.8038,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.009852216748768473,
|
|
"grad_norm": 10.439436912536621,
|
|
"learning_rate": 9.999401258983426e-06,
|
|
"loss": 1.7024,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.014778325123152709,
|
|
"grad_norm": 10.16576099395752,
|
|
"learning_rate": 9.997605179330018e-06,
|
|
"loss": 1.6585,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.019704433497536946,
|
|
"grad_norm": 8.24977970123291,
|
|
"learning_rate": 9.994612191194407e-06,
|
|
"loss": 1.5447,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.024630541871921183,
|
|
"grad_norm": 4.327300071716309,
|
|
"learning_rate": 9.990423011386489e-06,
|
|
"loss": 1.4008,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.029556650246305417,
|
|
"grad_norm": 3.9305260181427,
|
|
"learning_rate": 9.98503864319978e-06,
|
|
"loss": 1.4287,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.034482758620689655,
|
|
"grad_norm": 2.2586233615875244,
|
|
"learning_rate": 9.978460376171113e-06,
|
|
"loss": 1.4288,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.03940886699507389,
|
|
"grad_norm": 2.152981996536255,
|
|
"learning_rate": 9.970689785771798e-06,
|
|
"loss": 1.2331,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.04433497536945813,
|
|
"grad_norm": 2.3123421669006348,
|
|
"learning_rate": 9.961728733030318e-06,
|
|
"loss": 1.5576,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.04926108374384237,
|
|
"grad_norm": 2.1223628520965576,
|
|
"learning_rate": 9.951579364086603e-06,
|
|
"loss": 1.3364,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.054187192118226604,
|
|
"grad_norm": 2.337981939315796,
|
|
"learning_rate": 9.940244109678043e-06,
|
|
"loss": 1.3432,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.059113300492610835,
|
|
"grad_norm": 2.3937160968780518,
|
|
"learning_rate": 9.927725684557339e-06,
|
|
"loss": 1.3346,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.06403940886699508,
|
|
"grad_norm": 1.533258080482483,
|
|
"learning_rate": 9.914027086842323e-06,
|
|
"loss": 1.2448,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.06896551724137931,
|
|
"grad_norm": 1.9601627588272095,
|
|
"learning_rate": 9.899151597297923e-06,
|
|
"loss": 1.2093,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.07389162561576355,
|
|
"grad_norm": 1.4371508359909058,
|
|
"learning_rate": 9.883102778550434e-06,
|
|
"loss": 1.0955,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.07881773399014778,
|
|
"grad_norm": 1.5759714841842651,
|
|
"learning_rate": 9.865884474234275e-06,
|
|
"loss": 1.2615,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.08374384236453201,
|
|
"grad_norm": 1.4584101438522339,
|
|
"learning_rate": 9.847500808071458e-06,
|
|
"loss": 1.2764,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.08866995073891626,
|
|
"grad_norm": 1.432774305343628,
|
|
"learning_rate": 9.82795618288397e-06,
|
|
"loss": 1.11,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.09359605911330049,
|
|
"grad_norm": 1.7812994718551636,
|
|
"learning_rate": 9.807255279539313e-06,
|
|
"loss": 1.2687,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.09852216748768473,
|
|
"grad_norm": 1.3773082494735718,
|
|
"learning_rate": 9.78540305582945e-06,
|
|
"loss": 1.1375,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.10344827586206896,
|
|
"grad_norm": 1.4377540349960327,
|
|
"learning_rate": 9.762404745283439e-06,
|
|
"loss": 1.1887,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.10837438423645321,
|
|
"grad_norm": 1.2567236423492432,
|
|
"learning_rate": 9.738265855914014e-06,
|
|
"loss": 1.1226,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.11330049261083744,
|
|
"grad_norm": 1.288097620010376,
|
|
"learning_rate": 9.712992168898436e-06,
|
|
"loss": 1.1442,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.11822660098522167,
|
|
"grad_norm": 1.3083038330078125,
|
|
"learning_rate": 9.686589737193929e-06,
|
|
"loss": 1.1809,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.12315270935960591,
|
|
"grad_norm": 1.0722836256027222,
|
|
"learning_rate": 9.659064884088017e-06,
|
|
"loss": 1.1327,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.12807881773399016,
|
|
"grad_norm": 1.1409716606140137,
|
|
"learning_rate": 9.630424201684105e-06,
|
|
"loss": 1.0866,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.1330049261083744,
|
|
"grad_norm": 1.1258468627929688,
|
|
"learning_rate": 9.600674549322716e-06,
|
|
"loss": 1.0847,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.13793103448275862,
|
|
"grad_norm": 1.0608943700790405,
|
|
"learning_rate": 9.569823051938689e-06,
|
|
"loss": 0.9715,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.14285714285714285,
|
|
"grad_norm": 1.1090885400772095,
|
|
"learning_rate": 9.537877098354787e-06,
|
|
"loss": 1.0492,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.1477832512315271,
|
|
"grad_norm": 1.2303950786590576,
|
|
"learning_rate": 9.504844339512096e-06,
|
|
"loss": 0.995,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.15270935960591134,
|
|
"grad_norm": 1.2325893640518188,
|
|
"learning_rate": 9.470732686637665e-06,
|
|
"loss": 1.1353,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.15763546798029557,
|
|
"grad_norm": 1.0923973321914673,
|
|
"learning_rate": 9.435550309349776e-06,
|
|
"loss": 1.0256,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.1625615763546798,
|
|
"grad_norm": 1.8741207122802734,
|
|
"learning_rate": 9.399305633701372e-06,
|
|
"loss": 1.117,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.16748768472906403,
|
|
"grad_norm": 1.292672038078308,
|
|
"learning_rate": 9.36200734016203e-06,
|
|
"loss": 1.0424,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.1724137931034483,
|
|
"grad_norm": 1.2905791997909546,
|
|
"learning_rate": 9.32366436153902e-06,
|
|
"loss": 1.1793,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.17733990147783252,
|
|
"grad_norm": 1.1905455589294434,
|
|
"learning_rate": 9.284285880837947e-06,
|
|
"loss": 1.0032,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.18226600985221675,
|
|
"grad_norm": 1.1533136367797852,
|
|
"learning_rate": 9.243881329063436e-06,
|
|
"loss": 1.0406,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.18719211822660098,
|
|
"grad_norm": 1.2299302816390991,
|
|
"learning_rate": 9.202460382960449e-06,
|
|
"loss": 1.1085,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.1921182266009852,
|
|
"grad_norm": 1.0995800495147705,
|
|
"learning_rate": 9.160032962696734e-06,
|
|
"loss": 1.0465,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.19704433497536947,
|
|
"grad_norm": 1.2899202108383179,
|
|
"learning_rate": 9.116609229486992e-06,
|
|
"loss": 1.0072,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.2019704433497537,
|
|
"grad_norm": 1.068886399269104,
|
|
"learning_rate": 9.072199583159285e-06,
|
|
"loss": 1.0853,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.20689655172413793,
|
|
"grad_norm": 1.0160249471664429,
|
|
"learning_rate": 9.026814659664331e-06,
|
|
"loss": 0.9201,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.21182266009852216,
|
|
"grad_norm": 0.980324387550354,
|
|
"learning_rate": 8.98046532852822e-06,
|
|
"loss": 0.9792,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.21674876847290642,
|
|
"grad_norm": 1.0656648874282837,
|
|
"learning_rate": 8.93316269024921e-06,
|
|
"loss": 0.9549,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.22167487684729065,
|
|
"grad_norm": 1.124436855316162,
|
|
"learning_rate": 8.88491807363919e-06,
|
|
"loss": 1.0474,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.22660098522167488,
|
|
"grad_norm": 1.1231716871261597,
|
|
"learning_rate": 8.835743033110482e-06,
|
|
"loss": 0.9981,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.2315270935960591,
|
|
"grad_norm": 1.0960031747817993,
|
|
"learning_rate": 8.78564934590859e-06,
|
|
"loss": 1.0547,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.23645320197044334,
|
|
"grad_norm": 1.056442141532898,
|
|
"learning_rate": 8.734649009291586e-06,
|
|
"loss": 1.0868,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.2413793103448276,
|
|
"grad_norm": 1.0149261951446533,
|
|
"learning_rate": 8.68275423765683e-06,
|
|
"loss": 0.9538,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.24630541871921183,
|
|
"grad_norm": 0.9313431978225708,
|
|
"learning_rate": 8.629977459615655e-06,
|
|
"loss": 0.9597,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.2512315270935961,
|
|
"grad_norm": 1.086411714553833,
|
|
"learning_rate": 8.576331315016753e-06,
|
|
"loss": 1.0181,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.2561576354679803,
|
|
"grad_norm": 1.1177152395248413,
|
|
"learning_rate": 8.521828651918983e-06,
|
|
"loss": 1.0278,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.26108374384236455,
|
|
"grad_norm": 0.9545988440513611,
|
|
"learning_rate": 8.46648252351431e-06,
|
|
"loss": 0.9892,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.2660098522167488,
|
|
"grad_norm": 1.05325186252594,
|
|
"learning_rate": 8.41030618500161e-06,
|
|
"loss": 1.0133,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.270935960591133,
|
|
"grad_norm": 1.0253876447677612,
|
|
"learning_rate": 8.353313090412093e-06,
|
|
"loss": 0.9538,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.27586206896551724,
|
|
"grad_norm": 1.0110273361206055,
|
|
"learning_rate": 8.295516889387115e-06,
|
|
"loss": 0.8805,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.28078817733990147,
|
|
"grad_norm": 1.0400066375732422,
|
|
"learning_rate": 8.23693142390914e-06,
|
|
"loss": 0.9632,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 1.128901481628418,
|
|
"learning_rate": 8.177570724986627e-06,
|
|
"loss": 1.015,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.29064039408866993,
|
|
"grad_norm": 1.1031105518341064,
|
|
"learning_rate": 8.117449009293668e-06,
|
|
"loss": 0.9957,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.2955665024630542,
|
|
"grad_norm": 4.238386154174805,
|
|
"learning_rate": 8.05658067576513e-06,
|
|
"loss": 0.9085,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.30049261083743845,
|
|
"grad_norm": 1.160597324371338,
|
|
"learning_rate": 7.99498030214817e-06,
|
|
"loss": 0.9809,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.3054187192118227,
|
|
"grad_norm": 1.0774437189102173,
|
|
"learning_rate": 7.932662641510915e-06,
|
|
"loss": 0.99,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.3103448275862069,
|
|
"grad_norm": 1.0282933712005615,
|
|
"learning_rate": 7.869642618709162e-06,
|
|
"loss": 0.9275,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.31527093596059114,
|
|
"grad_norm": 1.0454133749008179,
|
|
"learning_rate": 7.805935326811913e-06,
|
|
"loss": 0.9071,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.32019704433497537,
|
|
"grad_norm": 1.1418848037719727,
|
|
"learning_rate": 7.741556023486655e-06,
|
|
"loss": 0.9734,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.3251231527093596,
|
|
"grad_norm": 1.0286744832992554,
|
|
"learning_rate": 7.676520127345198e-06,
|
|
"loss": 0.9934,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.33004926108374383,
|
|
"grad_norm": 1.2144535779953003,
|
|
"learning_rate": 7.610843214250964e-06,
|
|
"loss": 0.9829,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.33497536945812806,
|
|
"grad_norm": 1.4030691385269165,
|
|
"learning_rate": 7.5445410135886455e-06,
|
|
"loss": 0.9717,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.3399014778325123,
|
|
"grad_norm": 1.0528010129928589,
|
|
"learning_rate": 7.477629404497048e-06,
|
|
"loss": 0.9649,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"grad_norm": 1.0271952152252197,
|
|
"learning_rate": 7.4101244120661105e-06,
|
|
"loss": 0.9185,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.3497536945812808,
|
|
"grad_norm": 0.9849188327789307,
|
|
"learning_rate": 7.342042203498952e-06,
|
|
"loss": 0.9192,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.35467980295566504,
|
|
"grad_norm": 1.0050177574157715,
|
|
"learning_rate": 7.273399084239878e-06,
|
|
"loss": 0.9326,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.35960591133004927,
|
|
"grad_norm": 0.9628230929374695,
|
|
"learning_rate": 7.204211494069292e-06,
|
|
"loss": 0.884,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.3645320197044335,
|
|
"grad_norm": 1.1869782209396362,
|
|
"learning_rate": 7.134496003166423e-06,
|
|
"loss": 0.966,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.3694581280788177,
|
|
"grad_norm": 1.0189898014068604,
|
|
"learning_rate": 7.06426930814083e-06,
|
|
"loss": 0.8847,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.37438423645320196,
|
|
"grad_norm": 1.5452977418899536,
|
|
"learning_rate": 6.993548228033618e-06,
|
|
"loss": 0.9902,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.3793103448275862,
|
|
"grad_norm": 1.061618685722351,
|
|
"learning_rate": 6.922349700289348e-06,
|
|
"loss": 0.9273,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.3842364532019704,
|
|
"grad_norm": 0.9350699186325073,
|
|
"learning_rate": 6.850690776699574e-06,
|
|
"loss": 0.8633,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.3891625615763547,
|
|
"grad_norm": 0.944102942943573,
|
|
"learning_rate": 6.7785886193189936e-06,
|
|
"loss": 0.9348,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.39408866995073893,
|
|
"grad_norm": 0.9876391291618347,
|
|
"learning_rate": 6.7060604963552125e-06,
|
|
"loss": 0.9354,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.39901477832512317,
|
|
"grad_norm": 1.1221191883087158,
|
|
"learning_rate": 6.633123778033061e-06,
|
|
"loss": 0.9122,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.4039408866995074,
|
|
"grad_norm": 1.171373724937439,
|
|
"learning_rate": 6.559795932434489e-06,
|
|
"loss": 0.9184,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.4088669950738916,
|
|
"grad_norm": 1.0021214485168457,
|
|
"learning_rate": 6.486094521315022e-06,
|
|
"loss": 0.904,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.41379310344827586,
|
|
"grad_norm": 1.0860635042190552,
|
|
"learning_rate": 6.412037195897786e-06,
|
|
"loss": 0.9216,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.4187192118226601,
|
|
"grad_norm": 1.1491731405258179,
|
|
"learning_rate": 6.337641692646106e-06,
|
|
"loss": 0.9381,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.4236453201970443,
|
|
"grad_norm": 1.0246098041534424,
|
|
"learning_rate": 6.262925829015675e-06,
|
|
"loss": 0.8873,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.42857142857142855,
|
|
"grad_norm": 0.9979232549667358,
|
|
"learning_rate": 6.187907499187357e-06,
|
|
"loss": 0.955,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.43349753694581283,
|
|
"grad_norm": 1.022977590560913,
|
|
"learning_rate": 6.112604669781572e-06,
|
|
"loss": 0.8763,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.43842364532019706,
|
|
"grad_norm": 1.0037063360214233,
|
|
"learning_rate": 6.037035375555376e-06,
|
|
"loss": 0.9651,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.4433497536945813,
|
|
"grad_norm": 1.0663460493087769,
|
|
"learning_rate": 5.961217715083185e-06,
|
|
"loss": 0.969,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.4482758620689655,
|
|
"grad_norm": 0.9997808933258057,
|
|
"learning_rate": 5.885169846422242e-06,
|
|
"loss": 1.0117,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.45320197044334976,
|
|
"grad_norm": 0.9787831902503967,
|
|
"learning_rate": 5.808909982763825e-06,
|
|
"loss": 0.7974,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.458128078817734,
|
|
"grad_norm": 1.020983338356018,
|
|
"learning_rate": 5.732456388071247e-06,
|
|
"loss": 0.9561,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.4630541871921182,
|
|
"grad_norm": 0.9058898091316223,
|
|
"learning_rate": 5.655827372705712e-06,
|
|
"loss": 0.9409,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.46798029556650245,
|
|
"grad_norm": 1.0344359874725342,
|
|
"learning_rate": 5.579041289041045e-06,
|
|
"loss": 0.9722,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.4729064039408867,
|
|
"grad_norm": 0.9576674103736877,
|
|
"learning_rate": 5.502116527068363e-06,
|
|
"loss": 0.8873,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.47783251231527096,
|
|
"grad_norm": 1.2546874284744263,
|
|
"learning_rate": 5.425071509991737e-06,
|
|
"loss": 0.912,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.4827586206896552,
|
|
"grad_norm": 0.9510334730148315,
|
|
"learning_rate": 5.347924689815906e-06,
|
|
"loss": 0.8627,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.4876847290640394,
|
|
"grad_norm": 0.9422460198402405,
|
|
"learning_rate": 5.270694542927089e-06,
|
|
"loss": 0.9726,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.49261083743842365,
|
|
"grad_norm": 0.9733043909072876,
|
|
"learning_rate": 5.193399565667945e-06,
|
|
"loss": 0.8824,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.4975369458128079,
|
|
"grad_norm": 0.9182532429695129,
|
|
"learning_rate": 5.116058269907779e-06,
|
|
"loss": 0.8612,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.5024630541871922,
|
|
"grad_norm": 0.8935402035713196,
|
|
"learning_rate": 5.038689178609011e-06,
|
|
"loss": 0.9394,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.5073891625615764,
|
|
"grad_norm": 1.4738961458206177,
|
|
"learning_rate": 4.96131082139099e-06,
|
|
"loss": 0.9609,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.5123152709359606,
|
|
"grad_norm": 1.0967806577682495,
|
|
"learning_rate": 4.883941730092222e-06,
|
|
"loss": 0.9497,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.5172413793103449,
|
|
"grad_norm": 0.9221424460411072,
|
|
"learning_rate": 4.806600434332056e-06,
|
|
"loss": 0.891,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.5221674876847291,
|
|
"grad_norm": 1.0894826650619507,
|
|
"learning_rate": 4.729305457072913e-06,
|
|
"loss": 0.8682,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.5270935960591133,
|
|
"grad_norm": 0.9089194536209106,
|
|
"learning_rate": 4.6520753101840945e-06,
|
|
"loss": 0.8213,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.5320197044334976,
|
|
"grad_norm": 0.8551648855209351,
|
|
"learning_rate": 4.574928490008264e-06,
|
|
"loss": 0.8802,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.5369458128078818,
|
|
"grad_norm": 0.9886828064918518,
|
|
"learning_rate": 4.497883472931639e-06,
|
|
"loss": 0.9347,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.541871921182266,
|
|
"grad_norm": 0.9351578950881958,
|
|
"learning_rate": 4.4209587109589565e-06,
|
|
"loss": 0.8173,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.5467980295566502,
|
|
"grad_norm": 0.9019532203674316,
|
|
"learning_rate": 4.3441726272942895e-06,
|
|
"loss": 0.8473,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.5517241379310345,
|
|
"grad_norm": 0.9672942757606506,
|
|
"learning_rate": 4.267543611928755e-06,
|
|
"loss": 0.918,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.5566502463054187,
|
|
"grad_norm": 1.3270796537399292,
|
|
"learning_rate": 4.191090017236177e-06,
|
|
"loss": 0.9567,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.5615763546798029,
|
|
"grad_norm": 0.980305552482605,
|
|
"learning_rate": 4.114830153577759e-06,
|
|
"loss": 0.863,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.5665024630541872,
|
|
"grad_norm": 0.8682278394699097,
|
|
"learning_rate": 4.0387822849168165e-06,
|
|
"loss": 0.8437,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.9460963010787964,
|
|
"learning_rate": 3.962964624444625e-06,
|
|
"loss": 0.899,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.5763546798029556,
|
|
"grad_norm": 0.8857601881027222,
|
|
"learning_rate": 3.887395330218429e-06,
|
|
"loss": 0.849,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.5812807881773399,
|
|
"grad_norm": 1.0073286294937134,
|
|
"learning_rate": 3.8120925008126457e-06,
|
|
"loss": 0.9561,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.5862068965517241,
|
|
"grad_norm": 0.976075291633606,
|
|
"learning_rate": 3.7370741709843263e-06,
|
|
"loss": 0.8938,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.5911330049261084,
|
|
"grad_norm": 0.9713646173477173,
|
|
"learning_rate": 3.662358307353897e-06,
|
|
"loss": 0.9119,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.5960591133004927,
|
|
"grad_norm": 0.8663797974586487,
|
|
"learning_rate": 3.587962804102214e-06,
|
|
"loss": 0.8631,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.6009852216748769,
|
|
"grad_norm": 0.8859656453132629,
|
|
"learning_rate": 3.5139054786849787e-06,
|
|
"loss": 0.8044,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.6059113300492611,
|
|
"grad_norm": 1.091760277748108,
|
|
"learning_rate": 3.440204067565511e-06,
|
|
"loss": 0.9143,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.6108374384236454,
|
|
"grad_norm": 0.982275128364563,
|
|
"learning_rate": 3.3668762219669393e-06,
|
|
"loss": 0.918,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.6157635467980296,
|
|
"grad_norm": 0.8803215622901917,
|
|
"learning_rate": 3.293939503644788e-06,
|
|
"loss": 0.8426,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.6206896551724138,
|
|
"grad_norm": 0.922527015209198,
|
|
"learning_rate": 3.2214113806810077e-06,
|
|
"loss": 0.8571,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.625615763546798,
|
|
"grad_norm": 0.9506503343582153,
|
|
"learning_rate": 3.149309223300428e-06,
|
|
"loss": 0.8659,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.6305418719211823,
|
|
"grad_norm": 1.0316869020462036,
|
|
"learning_rate": 3.0776502997106526e-06,
|
|
"loss": 0.9088,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.6354679802955665,
|
|
"grad_norm": 1.9990965127944946,
|
|
"learning_rate": 3.0064517719663833e-06,
|
|
"loss": 0.8672,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.6403940886699507,
|
|
"grad_norm": 0.928225040435791,
|
|
"learning_rate": 2.935730691859172e-06,
|
|
"loss": 0.8305,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.645320197044335,
|
|
"grad_norm": 0.9361408948898315,
|
|
"learning_rate": 2.8655039968335774e-06,
|
|
"loss": 0.8462,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.6502463054187192,
|
|
"grad_norm": 0.9435849189758301,
|
|
"learning_rate": 2.7957885059307097e-06,
|
|
"loss": 0.8756,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.6551724137931034,
|
|
"grad_norm": 0.9171866178512573,
|
|
"learning_rate": 2.7266009157601226e-06,
|
|
"loss": 0.917,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.6600985221674877,
|
|
"grad_norm": 0.911807656288147,
|
|
"learning_rate": 2.65795779650105e-06,
|
|
"loss": 0.8588,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.6650246305418719,
|
|
"grad_norm": 1.8045060634613037,
|
|
"learning_rate": 2.589875587933892e-06,
|
|
"loss": 0.9057,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.6699507389162561,
|
|
"grad_norm": 0.8906491994857788,
|
|
"learning_rate": 2.522370595502954e-06,
|
|
"loss": 0.8708,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.6748768472906403,
|
|
"grad_norm": 0.9065340757369995,
|
|
"learning_rate": 2.4554589864113566e-06,
|
|
"loss": 0.8558,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.6798029556650246,
|
|
"grad_norm": 1.116025686264038,
|
|
"learning_rate": 2.3891567857490373e-06,
|
|
"loss": 0.9355,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.6847290640394089,
|
|
"grad_norm": 0.8539987802505493,
|
|
"learning_rate": 2.323479872654805e-06,
|
|
"loss": 0.7964,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.6896551724137931,
|
|
"grad_norm": 0.8484991192817688,
|
|
"learning_rate": 2.2584439765133453e-06,
|
|
"loss": 0.808,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.6945812807881774,
|
|
"grad_norm": 0.9017306566238403,
|
|
"learning_rate": 2.1940646731880887e-06,
|
|
"loss": 0.9113,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.6995073891625616,
|
|
"grad_norm": 1.0858136415481567,
|
|
"learning_rate": 2.1303573812908383e-06,
|
|
"loss": 0.8572,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.7044334975369458,
|
|
"grad_norm": 0.8687289953231812,
|
|
"learning_rate": 2.0673373584890847e-06,
|
|
"loss": 0.8145,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.7093596059113301,
|
|
"grad_norm": 0.9045321345329285,
|
|
"learning_rate": 2.0050196978518323e-06,
|
|
"loss": 0.8543,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 1.0116416215896606,
|
|
"learning_rate": 1.943419324234871e-06,
|
|
"loss": 0.8539,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.7192118226600985,
|
|
"grad_norm": 0.9924123883247375,
|
|
"learning_rate": 1.8825509907063328e-06,
|
|
"loss": 0.95,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.7241379310344828,
|
|
"grad_norm": 0.8728001713752747,
|
|
"learning_rate": 1.8224292750133743e-06,
|
|
"loss": 0.9293,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.729064039408867,
|
|
"grad_norm": 0.8748157620429993,
|
|
"learning_rate": 1.7630685760908623e-06,
|
|
"loss": 0.844,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.7339901477832512,
|
|
"grad_norm": 0.857449471950531,
|
|
"learning_rate": 1.7044831106128867e-06,
|
|
"loss": 0.8433,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.7389162561576355,
|
|
"grad_norm": 0.9839447140693665,
|
|
"learning_rate": 1.6466869095879079e-06,
|
|
"loss": 0.8528,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.7438423645320197,
|
|
"grad_norm": 1.1705787181854248,
|
|
"learning_rate": 1.589693814998391e-06,
|
|
"loss": 0.9376,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.7487684729064039,
|
|
"grad_norm": 1.311543345451355,
|
|
"learning_rate": 1.533517476485691e-06,
|
|
"loss": 0.7922,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.7536945812807881,
|
|
"grad_norm": 0.8858788013458252,
|
|
"learning_rate": 1.4781713480810184e-06,
|
|
"loss": 0.8161,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.7586206896551724,
|
|
"grad_norm": 0.8841625452041626,
|
|
"learning_rate": 1.4236686849832497e-06,
|
|
"loss": 0.8746,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.7635467980295566,
|
|
"grad_norm": 1.0294075012207031,
|
|
"learning_rate": 1.370022540384347e-06,
|
|
"loss": 0.9812,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.7684729064039408,
|
|
"grad_norm": 1.0972410440444946,
|
|
"learning_rate": 1.3172457623431706e-06,
|
|
"loss": 0.966,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.7733990147783252,
|
|
"grad_norm": 0.884397566318512,
|
|
"learning_rate": 1.2653509907084171e-06,
|
|
"loss": 0.8526,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.7783251231527094,
|
|
"grad_norm": 0.9068350791931152,
|
|
"learning_rate": 1.214350654091413e-06,
|
|
"loss": 0.9192,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.7832512315270936,
|
|
"grad_norm": 1.0193994045257568,
|
|
"learning_rate": 1.1642569668895171e-06,
|
|
"loss": 0.8804,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.7881773399014779,
|
|
"grad_norm": 0.8925516605377197,
|
|
"learning_rate": 1.1150819263608098e-06,
|
|
"loss": 0.8384,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.7931034482758621,
|
|
"grad_norm": 1.0054816007614136,
|
|
"learning_rate": 1.0668373097507922e-06,
|
|
"loss": 0.8544,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.7980295566502463,
|
|
"grad_norm": 0.8788971304893494,
|
|
"learning_rate": 1.0195346714717813e-06,
|
|
"loss": 0.8462,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.8029556650246306,
|
|
"grad_norm": 0.8969894051551819,
|
|
"learning_rate": 9.731853403356705e-07,
|
|
"loss": 0.8614,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.8078817733990148,
|
|
"grad_norm": 0.9033743739128113,
|
|
"learning_rate": 9.278004168407151e-07,
|
|
"loss": 0.7701,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.812807881773399,
|
|
"grad_norm": 0.8596148490905762,
|
|
"learning_rate": 8.833907705130091e-07,
|
|
"loss": 0.8451,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.8177339901477833,
|
|
"grad_norm": 0.9375607967376709,
|
|
"learning_rate": 8.399670373032665e-07,
|
|
"loss": 0.8357,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.8226600985221675,
|
|
"grad_norm": 1.2474132776260376,
|
|
"learning_rate": 7.975396170395522e-07,
|
|
"loss": 0.8348,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.8275862068965517,
|
|
"grad_norm": 1.0321135520935059,
|
|
"learning_rate": 7.561186709365653e-07,
|
|
"loss": 1.0182,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.8325123152709359,
|
|
"grad_norm": 0.8872163891792297,
|
|
"learning_rate": 7.157141191620548e-07,
|
|
"loss": 0.8193,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.8374384236453202,
|
|
"grad_norm": 1.0191763639450073,
|
|
"learning_rate": 6.763356384609809e-07,
|
|
"loss": 0.867,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.8423645320197044,
|
|
"grad_norm": 1.7469654083251953,
|
|
"learning_rate": 6.379926598379727e-07,
|
|
"loss": 0.7807,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.8472906403940886,
|
|
"grad_norm": 0.8473700284957886,
|
|
"learning_rate": 6.006943662986275e-07,
|
|
"loss": 0.7896,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.8522167487684729,
|
|
"grad_norm": 0.8807177543640137,
|
|
"learning_rate": 5.644496906502233e-07,
|
|
"loss": 0.8352,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 1.1127580404281616,
|
|
"learning_rate": 5.292673133623372e-07,
|
|
"loss": 0.931,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.8620689655172413,
|
|
"grad_norm": 1.0402883291244507,
|
|
"learning_rate": 4.951556604879049e-07,
|
|
"loss": 0.954,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.8669950738916257,
|
|
"grad_norm": 0.8743595480918884,
|
|
"learning_rate": 4.6212290164521554e-07,
|
|
"loss": 0.8078,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.8719211822660099,
|
|
"grad_norm": 0.8114182353019714,
|
|
"learning_rate": 4.3017694806131163e-07,
|
|
"loss": 0.8326,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.8768472906403941,
|
|
"grad_norm": 0.907577395439148,
|
|
"learning_rate": 3.9932545067728366e-07,
|
|
"loss": 0.9081,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.8817733990147784,
|
|
"grad_norm": 0.8789636492729187,
|
|
"learning_rate": 3.695757983158954e-07,
|
|
"loss": 0.8478,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.8866995073891626,
|
|
"grad_norm": 0.8163318634033203,
|
|
"learning_rate": 3.409351159119845e-07,
|
|
"loss": 0.7992,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.8916256157635468,
|
|
"grad_norm": 0.9108045697212219,
|
|
"learning_rate": 3.134102628060698e-07,
|
|
"loss": 0.9684,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.896551724137931,
|
|
"grad_norm": 0.9332152009010315,
|
|
"learning_rate": 2.8700783110156507e-07,
|
|
"loss": 0.8402,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.9014778325123153,
|
|
"grad_norm": 0.9538592100143433,
|
|
"learning_rate": 2.617341440859883e-07,
|
|
"loss": 0.885,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.9064039408866995,
|
|
"grad_norm": 0.9269714951515198,
|
|
"learning_rate": 2.3759525471656163e-07,
|
|
"loss": 0.8598,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.9113300492610837,
|
|
"grad_norm": 1.0273958444595337,
|
|
"learning_rate": 2.1459694417055033e-07,
|
|
"loss": 0.9211,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.916256157635468,
|
|
"grad_norm": 0.8281537890434265,
|
|
"learning_rate": 1.9274472046068805e-07,
|
|
"loss": 0.7494,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.9211822660098522,
|
|
"grad_norm": 0.91705721616745,
|
|
"learning_rate": 1.7204381711603046e-07,
|
|
"loss": 0.8442,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.9261083743842364,
|
|
"grad_norm": 0.9961190223693848,
|
|
"learning_rate": 1.524991919285429e-07,
|
|
"loss": 1.0154,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.9310344827586207,
|
|
"grad_norm": 0.8181946277618408,
|
|
"learning_rate": 1.3411552576572562e-07,
|
|
"loss": 0.8561,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.9359605911330049,
|
|
"grad_norm": 0.822562575340271,
|
|
"learning_rate": 1.1689722144956672e-07,
|
|
"loss": 0.7987,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.9408866995073891,
|
|
"grad_norm": 0.9585839509963989,
|
|
"learning_rate": 1.008484027020773e-07,
|
|
"loss": 0.8168,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.9458128078817734,
|
|
"grad_norm": 0.8027588129043579,
|
|
"learning_rate": 8.597291315767808e-08,
|
|
"loss": 0.779,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.9507389162561576,
|
|
"grad_norm": 0.965819239616394,
|
|
"learning_rate": 7.227431544266194e-08,
|
|
"loss": 0.9602,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.9556650246305419,
|
|
"grad_norm": 0.8224076628684998,
|
|
"learning_rate": 5.97558903219575e-08,
|
|
"loss": 0.8123,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.9605911330049262,
|
|
"grad_norm": 0.9190309643745422,
|
|
"learning_rate": 4.842063591339763e-08,
|
|
"loss": 0.9018,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.9655172413793104,
|
|
"grad_norm": 0.8280571699142456,
|
|
"learning_rate": 3.82712669696822e-08,
|
|
"loss": 0.8204,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.9704433497536946,
|
|
"grad_norm": 0.8307924270629883,
|
|
"learning_rate": 2.9310214228202016e-08,
|
|
"loss": 0.8532,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.9753694581280788,
|
|
"grad_norm": 0.9194669127464294,
|
|
"learning_rate": 2.153962382888841e-08,
|
|
"loss": 0.8382,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.9802955665024631,
|
|
"grad_norm": 0.804408848285675,
|
|
"learning_rate": 1.496135680021993e-08,
|
|
"loss": 0.8196,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.9852216748768473,
|
|
"grad_norm": 0.8451563715934753,
|
|
"learning_rate": 9.576988613511084e-09,
|
|
"loss": 0.8492,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.9901477832512315,
|
|
"grad_norm": 0.8822647929191589,
|
|
"learning_rate": 5.387808805594752e-09,
|
|
"loss": 0.8916,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.9950738916256158,
|
|
"grad_norm": 0.8675324320793152,
|
|
"learning_rate": 2.3948206699819787e-09,
|
|
"loss": 0.8024,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.8639441132545471,
|
|
"learning_rate": 5.987410165758656e-10,
|
|
"loss": 0.8228,
|
|
"step": 203
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 203,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 0,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.4104222270160896e+16,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|