Files
UIGEN-FX-4B-08-27-full/trainer_state.json
ModelHub XC c1749a33e3 初始化项目,由ModelHub XC社区提供模型
Model: smirki/UIGEN-FX-4B-08-27-full
Source: Original Platform
2026-04-13 16:21:00 +08:00

1106 lines
24 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1535,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03262642740619902,
"grad_norm": 0.0,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.7799,
"step": 10
},
{
"epoch": 0.06525285481239804,
"grad_norm": 0.0,
"learning_rate": 7.600000000000001e-06,
"loss": 0.7788,
"step": 20
},
{
"epoch": 0.09787928221859707,
"grad_norm": 0.0,
"learning_rate": 1.16e-05,
"loss": 0.7558,
"step": 30
},
{
"epoch": 0.13050570962479607,
"grad_norm": 0.0,
"learning_rate": 1.5600000000000003e-05,
"loss": 0.7929,
"step": 40
},
{
"epoch": 0.1631321370309951,
"grad_norm": 0.0,
"learning_rate": 1.9600000000000002e-05,
"loss": 0.8025,
"step": 50
},
{
"epoch": 0.19575856443719414,
"grad_norm": 0.0,
"learning_rate": 1.999818745523526e-05,
"loss": 0.8121,
"step": 60
},
{
"epoch": 0.22838499184339314,
"grad_norm": 0.0,
"learning_rate": 1.9991922711960104e-05,
"loss": 0.768,
"step": 70
},
{
"epoch": 0.26101141924959215,
"grad_norm": 0.0,
"learning_rate": 1.998118619612634e-05,
"loss": 0.7567,
"step": 80
},
{
"epoch": 0.2936378466557912,
"grad_norm": 0.0,
"learning_rate": 1.996598271274081e-05,
"loss": 0.7997,
"step": 90
},
{
"epoch": 0.3262642740619902,
"grad_norm": 0.0,
"learning_rate": 1.9946319065951382e-05,
"loss": 0.7646,
"step": 100
},
{
"epoch": 0.35889070146818924,
"grad_norm": 0.0,
"learning_rate": 1.9922204056001896e-05,
"loss": 0.793,
"step": 110
},
{
"epoch": 0.3915171288743883,
"grad_norm": 0.0,
"learning_rate": 1.9893648475293646e-05,
"loss": 0.7884,
"step": 120
},
{
"epoch": 0.42414355628058725,
"grad_norm": 0.0,
"learning_rate": 1.9860665103555418e-05,
"loss": 0.7747,
"step": 130
},
{
"epoch": 0.4567699836867863,
"grad_norm": 0.0,
"learning_rate": 1.982326870212402e-05,
"loss": 0.7603,
"step": 140
},
{
"epoch": 0.4893964110929853,
"grad_norm": 0.0,
"learning_rate": 1.9781476007338058e-05,
"loss": 0.7703,
"step": 150
},
{
"epoch": 0.5220228384991843,
"grad_norm": 0.0,
"learning_rate": 1.973530572304773e-05,
"loss": 0.7675,
"step": 160
},
{
"epoch": 0.5546492659053833,
"grad_norm": 0.0,
"learning_rate": 1.9684778512244172e-05,
"loss": 0.8043,
"step": 170
},
{
"epoch": 0.5872756933115824,
"grad_norm": 0.0,
"learning_rate": 1.9629916987811924e-05,
"loss": 0.8024,
"step": 180
},
{
"epoch": 0.6199021207177814,
"grad_norm": 0.0,
"learning_rate": 1.957074570240883e-05,
"loss": 0.7716,
"step": 190
},
{
"epoch": 0.6525285481239804,
"grad_norm": 0.0,
"learning_rate": 1.9507291137477744e-05,
"loss": 0.7651,
"step": 200
},
{
"epoch": 0.6851549755301795,
"grad_norm": 0.0,
"learning_rate": 1.943958169139507e-05,
"loss": 0.7819,
"step": 210
},
{
"epoch": 0.7177814029363785,
"grad_norm": 0.0,
"learning_rate": 1.9367647666761384e-05,
"loss": 0.7792,
"step": 220
},
{
"epoch": 0.7504078303425775,
"grad_norm": 0.0,
"learning_rate": 1.929152125683986e-05,
"loss": 0.7937,
"step": 230
},
{
"epoch": 0.7830342577487766,
"grad_norm": 0.0,
"learning_rate": 1.92112365311485e-05,
"loss": 0.7721,
"step": 240
},
{
"epoch": 0.8156606851549756,
"grad_norm": 0.0,
"learning_rate": 1.9126829420212764e-05,
"loss": 0.772,
"step": 250
},
{
"epoch": 0.8482871125611745,
"grad_norm": 0.0,
"learning_rate": 1.9038337699485207e-05,
"loss": 0.7611,
"step": 260
},
{
"epoch": 0.8809135399673735,
"grad_norm": 0.0,
"learning_rate": 1.894580097243954e-05,
"loss": 0.7829,
"step": 270
},
{
"epoch": 0.9135399673735726,
"grad_norm": 0.0,
"learning_rate": 1.884926065284652e-05,
"loss": 0.7815,
"step": 280
},
{
"epoch": 0.9461663947797716,
"grad_norm": 0.0,
"learning_rate": 1.87487599462397e-05,
"loss": 0.7742,
"step": 290
},
{
"epoch": 0.9787928221859706,
"grad_norm": 0.0,
"learning_rate": 1.864434383057927e-05,
"loss": 0.7561,
"step": 300
},
{
"epoch": 1.0097879282218598,
"grad_norm": 0.0,
"learning_rate": 1.853605903612267e-05,
"loss": 0.7452,
"step": 310
},
{
"epoch": 1.0424143556280587,
"grad_norm": 0.0,
"learning_rate": 1.8423954024510995e-05,
"loss": 0.7773,
"step": 320
},
{
"epoch": 1.0750407830342577,
"grad_norm": 0.0,
"learning_rate": 1.8308078967080547e-05,
"loss": 0.8153,
"step": 330
},
{
"epoch": 1.1076672104404568,
"grad_norm": 0.0,
"learning_rate": 1.8188485722409196e-05,
"loss": 0.793,
"step": 340
},
{
"epoch": 1.1402936378466557,
"grad_norm": 0.0,
"learning_rate": 1.8065227813107667e-05,
"loss": 0.7822,
"step": 350
},
{
"epoch": 1.1729200652528549,
"grad_norm": 0.0,
"learning_rate": 1.7938360401866096e-05,
"loss": 0.7703,
"step": 360
},
{
"epoch": 1.2055464926590538,
"grad_norm": 0.0,
"learning_rate": 1.7807940266766595e-05,
"loss": 0.78,
"step": 370
},
{
"epoch": 1.238172920065253,
"grad_norm": 0.0,
"learning_rate": 1.767402577587285e-05,
"loss": 0.7718,
"step": 380
},
{
"epoch": 1.2707993474714518,
"grad_norm": 0.0,
"learning_rate": 1.7536676861108167e-05,
"loss": 0.7895,
"step": 390
},
{
"epoch": 1.3034257748776508,
"grad_norm": 0.0,
"learning_rate": 1.7395954991433588e-05,
"loss": 0.7638,
"step": 400
},
{
"epoch": 1.33605220228385,
"grad_norm": 0.0,
"learning_rate": 1.7251923145338175e-05,
"loss": 0.7874,
"step": 410
},
{
"epoch": 1.368678629690049,
"grad_norm": 0.0,
"learning_rate": 1.710464578265369e-05,
"loss": 0.7947,
"step": 420
},
{
"epoch": 1.401305057096248,
"grad_norm": 0.0,
"learning_rate": 1.6954188815706306e-05,
"loss": 0.7811,
"step": 430
},
{
"epoch": 1.433931484502447,
"grad_norm": 0.0,
"learning_rate": 1.680061957981831e-05,
"loss": 0.736,
"step": 440
},
{
"epoch": 1.466557911908646,
"grad_norm": 0.0,
"learning_rate": 1.6644006803172926e-05,
"loss": 0.765,
"step": 450
},
{
"epoch": 1.499184339314845,
"grad_norm": 0.0,
"learning_rate": 1.6484420576055787e-05,
"loss": 0.7608,
"step": 460
},
{
"epoch": 1.531810766721044,
"grad_norm": 0.0,
"learning_rate": 1.6321932319486822e-05,
"loss": 0.779,
"step": 470
},
{
"epoch": 1.564437194127243,
"grad_norm": 0.0,
"learning_rate": 1.6156614753256583e-05,
"loss": 0.7824,
"step": 480
},
{
"epoch": 1.597063621533442,
"grad_norm": 0.0,
"learning_rate": 1.5988541863381323e-05,
"loss": 0.7859,
"step": 490
},
{
"epoch": 1.629690048939641,
"grad_norm": 0.0,
"learning_rate": 1.581778886899138e-05,
"loss": 0.784,
"step": 500
},
{
"epoch": 1.6623164763458402,
"grad_norm": 0.0,
"learning_rate": 1.5644432188667695e-05,
"loss": 0.7578,
"step": 510
},
{
"epoch": 1.6949429037520392,
"grad_norm": 0.0,
"learning_rate": 1.546854940624156e-05,
"loss": 0.7779,
"step": 520
},
{
"epoch": 1.727569331158238,
"grad_norm": 0.0,
"learning_rate": 1.5290219236072833e-05,
"loss": 0.7667,
"step": 530
},
{
"epoch": 1.7601957585644372,
"grad_norm": 0.0,
"learning_rate": 1.5109521487822208e-05,
"loss": 0.7765,
"step": 540
},
{
"epoch": 1.7928221859706364,
"grad_norm": 0.0,
"learning_rate": 1.4926537030733301e-05,
"loss": 0.8005,
"step": 550
},
{
"epoch": 1.8254486133768353,
"grad_norm": 0.0,
"learning_rate": 1.474134775744054e-05,
"loss": 0.7501,
"step": 560
},
{
"epoch": 1.8580750407830342,
"grad_norm": 0.0,
"learning_rate": 1.4554036547319033e-05,
"loss": 0.7968,
"step": 570
},
{
"epoch": 1.8907014681892331,
"grad_norm": 0.0,
"learning_rate": 1.4364687229392823e-05,
"loss": 0.7676,
"step": 580
},
{
"epoch": 1.9233278955954323,
"grad_norm": 0.0,
"learning_rate": 1.417338454481818e-05,
"loss": 0.8098,
"step": 590
},
{
"epoch": 1.9559543230016314,
"grad_norm": 0.0,
"learning_rate": 1.3980214108958626e-05,
"loss": 0.7602,
"step": 600
},
{
"epoch": 1.9885807504078303,
"grad_norm": 0.0,
"learning_rate": 1.3785262373068742e-05,
"loss": 0.78,
"step": 610
},
{
"epoch": 2.0195758564437196,
"grad_norm": 0.0,
"learning_rate": 1.3588616585603908e-05,
"loss": 0.79,
"step": 620
},
{
"epoch": 2.0522022838499185,
"grad_norm": 0.0,
"learning_rate": 1.3390364753173206e-05,
"loss": 0.7759,
"step": 630
},
{
"epoch": 2.0848287112561175,
"grad_norm": 0.0,
"learning_rate": 1.319059560115308e-05,
"loss": 0.7811,
"step": 640
},
{
"epoch": 2.1174551386623164,
"grad_norm": 0.0,
"learning_rate": 1.2989398533979271e-05,
"loss": 0.793,
"step": 650
},
{
"epoch": 2.1500815660685153,
"grad_norm": 0.0,
"learning_rate": 1.278686359513488e-05,
"loss": 0.7435,
"step": 660
},
{
"epoch": 2.1827079934747147,
"grad_norm": 0.0,
"learning_rate": 1.2583081426852412e-05,
"loss": 0.7775,
"step": 670
},
{
"epoch": 2.2153344208809136,
"grad_norm": 0.0,
"learning_rate": 1.237814322954788e-05,
"loss": 0.7885,
"step": 680
},
{
"epoch": 2.2479608482871125,
"grad_norm": 0.0,
"learning_rate": 1.217214072100508e-05,
"loss": 0.7745,
"step": 690
},
{
"epoch": 2.2805872756933114,
"grad_norm": 0.0,
"learning_rate": 1.1965166095328302e-05,
"loss": 0.7463,
"step": 700
},
{
"epoch": 2.3132137030995104,
"grad_norm": 0.0,
"learning_rate": 1.1757311981681943e-05,
"loss": 0.7962,
"step": 710
},
{
"epoch": 2.3458401305057097,
"grad_norm": 0.0,
"learning_rate": 1.1548671402835325e-05,
"loss": 0.7699,
"step": 720
},
{
"epoch": 2.3784665579119086,
"grad_norm": 0.0,
"learning_rate": 1.1339337733531435e-05,
"loss": 0.8087,
"step": 730
},
{
"epoch": 2.4110929853181076,
"grad_norm": 0.0,
"learning_rate": 1.1129404658698082e-05,
"loss": 0.7399,
"step": 740
},
{
"epoch": 2.443719412724307,
"grad_norm": 0.0,
"learning_rate": 1.0918966131520276e-05,
"loss": 0.7841,
"step": 750
},
{
"epoch": 2.476345840130506,
"grad_norm": 0.0,
"learning_rate": 1.0708116331392542e-05,
"loss": 0.7998,
"step": 760
},
{
"epoch": 2.5089722675367048,
"grad_norm": 0.0,
"learning_rate": 1.0496949621769976e-05,
"loss": 0.7869,
"step": 770
},
{
"epoch": 2.5415986949429037,
"grad_norm": 0.0,
"learning_rate": 1.0285560507936962e-05,
"loss": 0.789,
"step": 780
},
{
"epoch": 2.5742251223491026,
"grad_norm": 0.0,
"learning_rate": 1.007404359471238e-05,
"loss": 0.7694,
"step": 790
},
{
"epoch": 2.6068515497553015,
"grad_norm": 0.0,
"learning_rate": 9.862493544110282e-06,
"loss": 0.7746,
"step": 800
},
{
"epoch": 2.639477977161501,
"grad_norm": 0.0,
"learning_rate": 9.651005032974994e-06,
"loss": 0.7776,
"step": 810
},
{
"epoch": 2.6721044045677,
"grad_norm": 0.0,
"learning_rate": 9.439672710609532e-06,
"loss": 0.8017,
"step": 820
},
{
"epoch": 2.7047308319738987,
"grad_norm": 0.0,
"learning_rate": 9.228591156416405e-06,
"loss": 0.7494,
"step": 830
},
{
"epoch": 2.737357259380098,
"grad_norm": 0.0,
"learning_rate": 9.017854837569629e-06,
"loss": 0.7635,
"step": 840
},
{
"epoch": 2.769983686786297,
"grad_norm": 0.0,
"learning_rate": 8.807558066737042e-06,
"loss": 0.7947,
"step": 850
},
{
"epoch": 2.802610114192496,
"grad_norm": 0.0,
"learning_rate": 8.597794959871694e-06,
"loss": 0.7897,
"step": 860
},
{
"epoch": 2.835236541598695,
"grad_norm": 0.0,
"learning_rate": 8.388659394091362e-06,
"loss": 0.7715,
"step": 870
},
{
"epoch": 2.867862969004894,
"grad_norm": 0.0,
"learning_rate": 8.180244965664845e-06,
"loss": 0.7685,
"step": 880
},
{
"epoch": 2.9004893964110927,
"grad_norm": 0.0,
"learning_rate": 7.97264494812405e-06,
"loss": 0.7456,
"step": 890
},
{
"epoch": 2.933115823817292,
"grad_norm": 0.0,
"learning_rate": 7.765952250520459e-06,
"loss": 0.8071,
"step": 900
},
{
"epoch": 2.965742251223491,
"grad_norm": 0.0,
"learning_rate": 7.560259375844719e-06,
"loss": 0.7667,
"step": 910
},
{
"epoch": 2.99836867862969,
"grad_norm": 0.0,
"learning_rate": 7.355658379627981e-06,
"loss": 0.764,
"step": 920
},
{
"epoch": 3.029363784665579,
"grad_norm": 0.0,
"learning_rate": 7.1522408287434774e-06,
"loss": 0.8021,
"step": 930
},
{
"epoch": 3.061990212071778,
"grad_norm": 0.0,
"learning_rate": 6.950097760426814e-06,
"loss": 0.7764,
"step": 940
},
{
"epoch": 3.094616639477977,
"grad_norm": 0.0,
"learning_rate": 6.74931964153325e-06,
"loss": 0.8317,
"step": 950
},
{
"epoch": 3.1272430668841764,
"grad_norm": 0.0,
"learning_rate": 6.549996328050296e-06,
"loss": 0.789,
"step": 960
},
{
"epoch": 3.1598694942903753,
"grad_norm": 0.0,
"learning_rate": 6.352217024883678e-06,
"loss": 0.7928,
"step": 970
},
{
"epoch": 3.1924959216965743,
"grad_norm": 0.0,
"learning_rate": 6.1560702459346845e-06,
"loss": 0.7768,
"step": 980
},
{
"epoch": 3.225122349102773,
"grad_norm": 0.0,
"learning_rate": 5.961643774486754e-06,
"loss": 0.7542,
"step": 990
},
{
"epoch": 3.257748776508972,
"grad_norm": 0.0,
"learning_rate": 5.769024623919064e-06,
"loss": 0.7807,
"step": 1000
},
{
"epoch": 3.2903752039151715,
"grad_norm": 0.0,
"learning_rate": 5.57829899876469e-06,
"loss": 0.7849,
"step": 1010
},
{
"epoch": 3.3230016313213704,
"grad_norm": 0.0,
"learning_rate": 5.38955225613069e-06,
"loss": 0.78,
"step": 1020
},
{
"epoch": 3.3556280587275693,
"grad_norm": 0.0,
"learning_rate": 5.202868867497542e-06,
"loss": 0.777,
"step": 1030
},
{
"epoch": 3.3882544861337682,
"grad_norm": 0.0,
"learning_rate": 5.01833238091485e-06,
"loss": 0.7735,
"step": 1040
},
{
"epoch": 3.4208809135399676,
"grad_norm": 0.0,
"learning_rate": 4.836025383610382e-06,
"loss": 0.7732,
"step": 1050
},
{
"epoch": 3.4535073409461665,
"grad_norm": 0.0,
"learning_rate": 4.656029465029057e-06,
"loss": 0.7516,
"step": 1060
},
{
"epoch": 3.4861337683523654,
"grad_norm": 0.0,
"learning_rate": 4.478425180318523e-06,
"loss": 0.7534,
"step": 1070
},
{
"epoch": 3.5187601957585644,
"grad_norm": 0.0,
"learning_rate": 4.3032920142776125e-06,
"loss": 0.7672,
"step": 1080
},
{
"epoch": 3.5513866231647633,
"grad_norm": 0.0,
"learning_rate": 4.1307083457838004e-06,
"loss": 0.7406,
"step": 1090
},
{
"epoch": 3.5840130505709626,
"grad_norm": 0.0,
"learning_rate": 3.960751412715629e-06,
"loss": 0.82,
"step": 1100
},
{
"epoch": 3.6166394779771616,
"grad_norm": 0.0,
"learning_rate": 3.7934972773857637e-06,
"loss": 0.7934,
"step": 1110
},
{
"epoch": 3.6492659053833605,
"grad_norm": 0.0,
"learning_rate": 3.6290207925001585e-06,
"loss": 0.7772,
"step": 1120
},
{
"epoch": 3.6818923327895594,
"grad_norm": 0.0,
"learning_rate": 3.4673955676585734e-06,
"loss": 0.7678,
"step": 1130
},
{
"epoch": 3.7145187601957588,
"grad_norm": 0.0,
"learning_rate": 3.308693936411421e-06,
"loss": 0.7717,
"step": 1140
},
{
"epoch": 3.7471451876019577,
"grad_norm": 0.0,
"learning_rate": 3.152986923887703e-06,
"loss": 0.7977,
"step": 1150
},
{
"epoch": 3.7797716150081566,
"grad_norm": 0.0,
"learning_rate": 3.000344215008524e-06,
"loss": 0.76,
"step": 1160
},
{
"epoch": 3.8123980424143555,
"grad_norm": 0.0,
"learning_rate": 2.8508341233003656e-06,
"loss": 0.7893,
"step": 1170
},
{
"epoch": 3.8450244698205545,
"grad_norm": 0.0,
"learning_rate": 2.7045235603221533e-06,
"loss": 0.7612,
"step": 1180
},
{
"epoch": 3.877650897226754,
"grad_norm": 0.0,
"learning_rate": 2.561478005719743e-06,
"loss": 0.7541,
"step": 1190
},
{
"epoch": 3.9102773246329527,
"grad_norm": 0.0,
"learning_rate": 2.421761477921232e-06,
"loss": 0.7643,
"step": 1200
},
{
"epoch": 3.9429037520391517,
"grad_norm": 0.0,
"learning_rate": 2.2854365054862383e-06,
"loss": 0.7838,
"step": 1210
},
{
"epoch": 3.9755301794453506,
"grad_norm": 0.0,
"learning_rate": 2.152564099121944e-06,
"loss": 0.788,
"step": 1220
},
{
"epoch": 4.006525285481239,
"grad_norm": 0.0,
"learning_rate": 2.0232037243784475e-06,
"loss": 0.7716,
"step": 1230
},
{
"epoch": 4.039151712887439,
"grad_norm": 0.0,
"learning_rate": 1.8974132750356156e-06,
"loss": 0.792,
"step": 1240
},
{
"epoch": 4.071778140293638,
"grad_norm": 0.0,
"learning_rate": 1.7752490471933769e-06,
"loss": 0.768,
"step": 1250
},
{
"epoch": 4.104404567699837,
"grad_norm": 0.0,
"learning_rate": 1.6567657140770477e-06,
"loss": 0.7654,
"step": 1260
},
{
"epoch": 4.137030995106036,
"grad_norm": 0.0,
"learning_rate": 1.542016301568926e-06,
"loss": 0.7698,
"step": 1270
},
{
"epoch": 4.169657422512235,
"grad_norm": 0.0,
"learning_rate": 1.4310521644771657e-06,
"loss": 0.745,
"step": 1280
},
{
"epoch": 4.202283849918434,
"grad_norm": 0.0,
"learning_rate": 1.3239229635525074e-06,
"loss": 0.7774,
"step": 1290
},
{
"epoch": 4.234910277324633,
"grad_norm": 0.0,
"learning_rate": 1.2206766432631766e-06,
"loss": 0.7848,
"step": 1300
},
{
"epoch": 4.267536704730832,
"grad_norm": 0.0,
"learning_rate": 1.121359410337859e-06,
"loss": 0.7814,
"step": 1310
},
{
"epoch": 4.300163132137031,
"grad_norm": 0.0,
"learning_rate": 1.0260157130864178e-06,
"loss": 0.809,
"step": 1320
},
{
"epoch": 4.33278955954323,
"grad_norm": 0.0,
"learning_rate": 9.346882215075348e-07,
"loss": 0.7976,
"step": 1330
},
{
"epoch": 4.365415986949429,
"grad_norm": 0.0,
"learning_rate": 8.474178081922524e-07,
"loss": 0.7825,
"step": 1340
},
{
"epoch": 4.398042414355628,
"grad_norm": 0.0,
"learning_rate": 7.642435300318906e-07,
"loss": 0.7712,
"step": 1350
},
{
"epoch": 4.430668841761827,
"grad_norm": 0.0,
"learning_rate": 6.852026107385756e-07,
"loss": 0.7711,
"step": 1360
},
{
"epoch": 4.463295269168026,
"grad_norm": 0.0,
"learning_rate": 6.103304241862006e-07,
"loss": 0.7903,
"step": 1370
},
{
"epoch": 4.495921696574225,
"grad_norm": 0.0,
"learning_rate": 5.396604785792281e-07,
"loss": 0.7527,
"step": 1380
},
{
"epoch": 4.528548123980424,
"grad_norm": 0.0,
"learning_rate": 4.7322440145647905e-07,
"loss": 0.7781,
"step": 1390
},
{
"epoch": 4.561174551386623,
"grad_norm": 0.0,
"learning_rate": 4.110519255365852e-07,
"loss": 0.8016,
"step": 1400
},
{
"epoch": 4.593800978792823,
"grad_norm": 0.0,
"learning_rate": 3.531708754114438e-07,
"loss": 0.7768,
"step": 1410
},
{
"epoch": 4.626427406199021,
"grad_norm": 0.0,
"learning_rate": 2.996071550936319e-07,
"loss": 0.7688,
"step": 1420
},
{
"epoch": 4.6590538336052205,
"grad_norm": 0.0,
"learning_rate": 2.503847364233614e-07,
"loss": 0.8049,
"step": 1430
},
{
"epoch": 4.691680261011419,
"grad_norm": 0.0,
"learning_rate": 2.0552564834014797e-07,
"loss": 0.7818,
"step": 1440
},
{
"epoch": 4.724306688417618,
"grad_norm": 0.0,
"learning_rate": 1.6504996702401243e-07,
"loss": 0.7737,
"step": 1450
},
{
"epoch": 4.756933115823817,
"grad_norm": 0.0,
"learning_rate": 1.2897580691060506e-07,
"loss": 0.8014,
"step": 1460
},
{
"epoch": 4.789559543230016,
"grad_norm": 0.0,
"learning_rate": 9.731931258429638e-08,
"loss": 0.7563,
"step": 1470
},
{
"epoch": 4.822185970636215,
"grad_norm": 0.0,
"learning_rate": 7.009465155285777e-08,
"loss": 0.7504,
"step": 1480
},
{
"epoch": 4.854812398042414,
"grad_norm": 0.0,
"learning_rate": 4.731400790693785e-08,
"loss": 0.7879,
"step": 1490
},
{
"epoch": 4.887438825448614,
"grad_norm": 0.0,
"learning_rate": 2.898757686722542e-08,
"loss": 0.7755,
"step": 1500
},
{
"epoch": 4.920065252854813,
"grad_norm": 0.0,
"learning_rate": 1.5123560221681488e-08,
"loss": 0.7803,
"step": 1510
},
{
"epoch": 4.952691680261012,
"grad_norm": 0.0,
"learning_rate": 5.728162654927705e-09,
"loss": 0.7493,
"step": 1520
},
{
"epoch": 4.985318107667211,
"grad_norm": 0.0,
"learning_rate": 8.05588971406479e-10,
"loss": 0.7814,
"step": 1530
}
],
"logging_steps": 10,
"max_steps": 1535,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.877356359182975e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}