Files
openrubric-judgment-sft/trainer_state.json

821 lines
21 KiB
JSON
Raw Permalink Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1114,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017969451931716084,
"grad_norm": 1.0288746356964111,
"learning_rate": 4.959605026929982e-06,
"loss": 0.5949527740478515,
"step": 10
},
{
"epoch": 0.03593890386343217,
"grad_norm": 0.6101402044296265,
"learning_rate": 4.9147217235188516e-06,
"loss": 0.41325950622558594,
"step": 20
},
{
"epoch": 0.05390835579514825,
"grad_norm": 0.5305581092834473,
"learning_rate": 4.86983842010772e-06,
"loss": 0.3709533929824829,
"step": 30
},
{
"epoch": 0.07187780772686433,
"grad_norm": 0.5169686675071716,
"learning_rate": 4.8249551166965895e-06,
"loss": 0.3509422540664673,
"step": 40
},
{
"epoch": 0.08984725965858041,
"grad_norm": 0.5222465991973877,
"learning_rate": 4.780071813285458e-06,
"loss": 0.3454415321350098,
"step": 50
},
{
"epoch": 0.1078167115902965,
"grad_norm": 0.4856426417827606,
"learning_rate": 4.7351885098743274e-06,
"loss": 0.33249969482421876,
"step": 60
},
{
"epoch": 0.12578616352201258,
"grad_norm": 0.5357626676559448,
"learning_rate": 4.690305206463196e-06,
"loss": 0.3292850971221924,
"step": 70
},
{
"epoch": 0.14375561545372867,
"grad_norm": 0.46837398409843445,
"learning_rate": 4.6454219030520645e-06,
"loss": 0.3244313716888428,
"step": 80
},
{
"epoch": 0.16172506738544473,
"grad_norm": 0.48174625635147095,
"learning_rate": 4.600538599640934e-06,
"loss": 0.3236015558242798,
"step": 90
},
{
"epoch": 0.17969451931716082,
"grad_norm": 0.5259532332420349,
"learning_rate": 4.5556552962298025e-06,
"loss": 0.3194127559661865,
"step": 100
},
{
"epoch": 0.1976639712488769,
"grad_norm": 0.5332797765731812,
"learning_rate": 4.510771992818672e-06,
"loss": 0.31786675453186036,
"step": 110
},
{
"epoch": 0.215633423180593,
"grad_norm": 0.4909115433692932,
"learning_rate": 4.465888689407541e-06,
"loss": 0.3128951072692871,
"step": 120
},
{
"epoch": 0.23360287511230907,
"grad_norm": 0.4780581295490265,
"learning_rate": 4.42100538599641e-06,
"loss": 0.31437077522277834,
"step": 130
},
{
"epoch": 0.25157232704402516,
"grad_norm": 0.5149464011192322,
"learning_rate": 4.376122082585278e-06,
"loss": 0.30969116687774656,
"step": 140
},
{
"epoch": 0.2695417789757412,
"grad_norm": 0.4815337657928467,
"learning_rate": 4.331238779174148e-06,
"loss": 0.31036303043365476,
"step": 150
},
{
"epoch": 0.28751123090745734,
"grad_norm": 0.4882141053676605,
"learning_rate": 4.286355475763016e-06,
"loss": 0.30779433250427246,
"step": 160
},
{
"epoch": 0.3054806828391734,
"grad_norm": 0.47035127878189087,
"learning_rate": 4.241472172351886e-06,
"loss": 0.3062736511230469,
"step": 170
},
{
"epoch": 0.32345013477088946,
"grad_norm": 0.47444701194763184,
"learning_rate": 4.196588868940754e-06,
"loss": 0.30041847229003904,
"step": 180
},
{
"epoch": 0.3414195867026056,
"grad_norm": 0.4834694564342499,
"learning_rate": 4.151705565529624e-06,
"loss": 0.29730544090270994,
"step": 190
},
{
"epoch": 0.35938903863432164,
"grad_norm": 0.508245587348938,
"learning_rate": 4.106822262118492e-06,
"loss": 0.30029687881469724,
"step": 200
},
{
"epoch": 0.37735849056603776,
"grad_norm": 0.48643767833709717,
"learning_rate": 4.061938958707361e-06,
"loss": 0.29685449600219727,
"step": 210
},
{
"epoch": 0.3953279424977538,
"grad_norm": 0.4577917456626892,
"learning_rate": 4.01705565529623e-06,
"loss": 0.2990954160690308,
"step": 220
},
{
"epoch": 0.4132973944294699,
"grad_norm": 0.5375077724456787,
"learning_rate": 3.9721723518850995e-06,
"loss": 0.30233011245727537,
"step": 230
},
{
"epoch": 0.431266846361186,
"grad_norm": 0.4925467371940613,
"learning_rate": 3.927289048473968e-06,
"loss": 0.2941945314407349,
"step": 240
},
{
"epoch": 0.44923629829290207,
"grad_norm": 0.5110061168670654,
"learning_rate": 3.882405745062837e-06,
"loss": 0.3003401279449463,
"step": 250
},
{
"epoch": 0.46720575022461813,
"grad_norm": 0.44966429471969604,
"learning_rate": 3.837522441651706e-06,
"loss": 0.2935019016265869,
"step": 260
},
{
"epoch": 0.48517520215633425,
"grad_norm": 0.49473223090171814,
"learning_rate": 3.792639138240575e-06,
"loss": 0.2941242218017578,
"step": 270
},
{
"epoch": 0.5031446540880503,
"grad_norm": 0.4826172888278961,
"learning_rate": 3.7477558348294435e-06,
"loss": 0.2936396598815918,
"step": 280
},
{
"epoch": 0.5211141060197664,
"grad_norm": 0.5087786316871643,
"learning_rate": 3.702872531418313e-06,
"loss": 0.28728442192077636,
"step": 290
},
{
"epoch": 0.5390835579514824,
"grad_norm": 0.45754265785217285,
"learning_rate": 3.6579892280071814e-06,
"loss": 0.29381372928619387,
"step": 300
},
{
"epoch": 0.5570530098831986,
"grad_norm": 0.47864410281181335,
"learning_rate": 3.6131059245960504e-06,
"loss": 0.28871979713439944,
"step": 310
},
{
"epoch": 0.5750224618149147,
"grad_norm": 0.446613073348999,
"learning_rate": 3.5682226211849198e-06,
"loss": 0.2919660806655884,
"step": 320
},
{
"epoch": 0.5929919137466307,
"grad_norm": 0.5203211903572083,
"learning_rate": 3.5233393177737883e-06,
"loss": 0.2949108600616455,
"step": 330
},
{
"epoch": 0.6109613656783468,
"grad_norm": 0.5251737236976624,
"learning_rate": 3.4784560143626573e-06,
"loss": 0.28940815925598146,
"step": 340
},
{
"epoch": 0.6289308176100629,
"grad_norm": 0.4626797139644623,
"learning_rate": 3.4335727109515267e-06,
"loss": 0.2877013683319092,
"step": 350
},
{
"epoch": 0.6469002695417789,
"grad_norm": 0.5425576567649841,
"learning_rate": 3.3886894075403952e-06,
"loss": 0.28816981315612794,
"step": 360
},
{
"epoch": 0.6648697214734951,
"grad_norm": 0.5507893562316895,
"learning_rate": 3.343806104129264e-06,
"loss": 0.28344998359680174,
"step": 370
},
{
"epoch": 0.6828391734052112,
"grad_norm": 0.45895373821258545,
"learning_rate": 3.2989228007181327e-06,
"loss": 0.28453927040100097,
"step": 380
},
{
"epoch": 0.7008086253369272,
"grad_norm": 0.47491055727005005,
"learning_rate": 3.254039497307002e-06,
"loss": 0.27885701656341555,
"step": 390
},
{
"epoch": 0.7187780772686433,
"grad_norm": 0.4567403793334961,
"learning_rate": 3.209156193895871e-06,
"loss": 0.2846828937530518,
"step": 400
},
{
"epoch": 0.7367475292003594,
"grad_norm": 0.506420910358429,
"learning_rate": 3.1642728904847396e-06,
"loss": 0.2884047269821167,
"step": 410
},
{
"epoch": 0.7547169811320755,
"grad_norm": 0.4960302710533142,
"learning_rate": 3.119389587073609e-06,
"loss": 0.28609886169433596,
"step": 420
},
{
"epoch": 0.7726864330637916,
"grad_norm": 0.44618239998817444,
"learning_rate": 3.074506283662478e-06,
"loss": 0.2808084487915039,
"step": 430
},
{
"epoch": 0.7906558849955077,
"grad_norm": 0.45904698967933655,
"learning_rate": 3.0296229802513465e-06,
"loss": 0.28656601905822754,
"step": 440
},
{
"epoch": 0.8086253369272237,
"grad_norm": 0.5420985817909241,
"learning_rate": 2.984739676840216e-06,
"loss": 0.2885767936706543,
"step": 450
},
{
"epoch": 0.8265947888589398,
"grad_norm": 0.49061647057533264,
"learning_rate": 2.939856373429085e-06,
"loss": 0.28384861946105955,
"step": 460
},
{
"epoch": 0.8445642407906558,
"grad_norm": 0.5167312026023865,
"learning_rate": 2.8949730700179535e-06,
"loss": 0.28023710250854494,
"step": 470
},
{
"epoch": 0.862533692722372,
"grad_norm": 0.46029844880104065,
"learning_rate": 2.8500897666068224e-06,
"loss": 0.280789852142334,
"step": 480
},
{
"epoch": 0.8805031446540881,
"grad_norm": 0.44982901215553284,
"learning_rate": 2.8052064631956914e-06,
"loss": 0.27998642921447753,
"step": 490
},
{
"epoch": 0.8984725965858041,
"grad_norm": 0.4832385182380676,
"learning_rate": 2.7603231597845604e-06,
"loss": 0.2860716819763184,
"step": 500
},
{
"epoch": 0.9164420485175202,
"grad_norm": 0.5139860510826111,
"learning_rate": 2.715439856373429e-06,
"loss": 0.2779590845108032,
"step": 510
},
{
"epoch": 0.9344115004492363,
"grad_norm": 0.4550414979457855,
"learning_rate": 2.6705565529622983e-06,
"loss": 0.2789080381393433,
"step": 520
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.4613369107246399,
"learning_rate": 2.6256732495511673e-06,
"loss": 0.28540740013122556,
"step": 530
},
{
"epoch": 0.9703504043126685,
"grad_norm": 0.45095086097717285,
"learning_rate": 2.580789946140036e-06,
"loss": 0.276381254196167,
"step": 540
},
{
"epoch": 0.9883198562443846,
"grad_norm": 0.48203322291374207,
"learning_rate": 2.535906642728905e-06,
"loss": 0.2832359790802002,
"step": 550
},
{
"epoch": 1.005390835579515,
"grad_norm": 0.4708728492259979,
"learning_rate": 2.491023339317774e-06,
"loss": 0.2769860029220581,
"step": 560
},
{
"epoch": 1.0233602875112309,
"grad_norm": 0.4912715554237366,
"learning_rate": 2.4461400359066427e-06,
"loss": 0.2540097713470459,
"step": 570
},
{
"epoch": 1.041329739442947,
"grad_norm": 0.48824694752693176,
"learning_rate": 2.4012567324955117e-06,
"loss": 0.2609401226043701,
"step": 580
},
{
"epoch": 1.059299191374663,
"grad_norm": 0.4870210289955139,
"learning_rate": 2.356373429084381e-06,
"loss": 0.25025138854980467,
"step": 590
},
{
"epoch": 1.0772686433063792,
"grad_norm": 0.5163658261299133,
"learning_rate": 2.3114901256732496e-06,
"loss": 0.25263664722442625,
"step": 600
},
{
"epoch": 1.0952380952380953,
"grad_norm": 0.5006254315376282,
"learning_rate": 2.2666068222621186e-06,
"loss": 0.25940570831298826,
"step": 610
},
{
"epoch": 1.1132075471698113,
"grad_norm": 0.511043131351471,
"learning_rate": 2.2217235188509876e-06,
"loss": 0.2521126508712769,
"step": 620
},
{
"epoch": 1.1311769991015275,
"grad_norm": 0.49282217025756836,
"learning_rate": 2.1768402154398565e-06,
"loss": 0.25096635818481444,
"step": 630
},
{
"epoch": 1.1491464510332434,
"grad_norm": 0.5031591653823853,
"learning_rate": 2.1319569120287255e-06,
"loss": 0.2539719581604004,
"step": 640
},
{
"epoch": 1.1671159029649596,
"grad_norm": 0.5004000067710876,
"learning_rate": 2.0870736086175945e-06,
"loss": 0.2542546510696411,
"step": 650
},
{
"epoch": 1.1850853548966755,
"grad_norm": 0.47906896471977234,
"learning_rate": 2.0421903052064634e-06,
"loss": 0.2506051778793335,
"step": 660
},
{
"epoch": 1.2030548068283917,
"grad_norm": 0.5111077427864075,
"learning_rate": 1.9973070017953324e-06,
"loss": 0.2501336336135864,
"step": 670
},
{
"epoch": 1.221024258760108,
"grad_norm": 0.46470290422439575,
"learning_rate": 1.9524236983842014e-06,
"loss": 0.2527280330657959,
"step": 680
},
{
"epoch": 1.2389937106918238,
"grad_norm": 0.49279844760894775,
"learning_rate": 1.9075403949730703e-06,
"loss": 0.25149285793304443,
"step": 690
},
{
"epoch": 1.25696316262354,
"grad_norm": 0.48192131519317627,
"learning_rate": 1.862657091561939e-06,
"loss": 0.2485593795776367,
"step": 700
},
{
"epoch": 1.2749326145552562,
"grad_norm": 0.49026069045066833,
"learning_rate": 1.817773788150808e-06,
"loss": 0.24832606315612793,
"step": 710
},
{
"epoch": 1.2929020664869721,
"grad_norm": 0.46640709042549133,
"learning_rate": 1.7728904847396768e-06,
"loss": 0.2521926164627075,
"step": 720
},
{
"epoch": 1.310871518418688,
"grad_norm": 0.5054717063903809,
"learning_rate": 1.728007181328546e-06,
"loss": 0.25048768520355225,
"step": 730
},
{
"epoch": 1.3288409703504043,
"grad_norm": 0.4634091258049011,
"learning_rate": 1.683123877917415e-06,
"loss": 0.24852404594421387,
"step": 740
},
{
"epoch": 1.3468104222821204,
"grad_norm": 0.4614594578742981,
"learning_rate": 1.6382405745062837e-06,
"loss": 0.2514226198196411,
"step": 750
},
{
"epoch": 1.3647798742138364,
"grad_norm": 0.5008041262626648,
"learning_rate": 1.593357271095153e-06,
"loss": 0.2507458686828613,
"step": 760
},
{
"epoch": 1.3827493261455526,
"grad_norm": 0.47305938601493835,
"learning_rate": 1.5484739676840217e-06,
"loss": 0.2497103691101074,
"step": 770
},
{
"epoch": 1.4007187780772687,
"grad_norm": 0.5139908194541931,
"learning_rate": 1.5035906642728906e-06,
"loss": 0.24853968620300293,
"step": 780
},
{
"epoch": 1.4186882300089847,
"grad_norm": 0.4631156027317047,
"learning_rate": 1.4587073608617596e-06,
"loss": 0.248740816116333,
"step": 790
},
{
"epoch": 1.4366576819407009,
"grad_norm": 0.47681012749671936,
"learning_rate": 1.4138240574506283e-06,
"loss": 0.2534752368927002,
"step": 800
},
{
"epoch": 1.4546271338724168,
"grad_norm": 0.4538913667201996,
"learning_rate": 1.3689407540394975e-06,
"loss": 0.24337444305419922,
"step": 810
},
{
"epoch": 1.472596585804133,
"grad_norm": 0.48104986548423767,
"learning_rate": 1.3240574506283663e-06,
"loss": 0.2502609729766846,
"step": 820
},
{
"epoch": 1.490566037735849,
"grad_norm": 0.4610423147678375,
"learning_rate": 1.2791741472172353e-06,
"loss": 0.24652738571166993,
"step": 830
},
{
"epoch": 1.5085354896675651,
"grad_norm": 0.4587244689464569,
"learning_rate": 1.2342908438061042e-06,
"loss": 0.25362207889556887,
"step": 840
},
{
"epoch": 1.5265049415992813,
"grad_norm": 0.4708814322948456,
"learning_rate": 1.1894075403949732e-06,
"loss": 0.24814538955688475,
"step": 850
},
{
"epoch": 1.5444743935309972,
"grad_norm": 0.4898167550563812,
"learning_rate": 1.144524236983842e-06,
"loss": 0.2517171621322632,
"step": 860
},
{
"epoch": 1.5624438454627134,
"grad_norm": 0.5054773688316345,
"learning_rate": 1.0996409335727111e-06,
"loss": 0.25095720291137696,
"step": 870
},
{
"epoch": 1.5804132973944296,
"grad_norm": 0.5150067806243896,
"learning_rate": 1.05475763016158e-06,
"loss": 0.25122294425964353,
"step": 880
},
{
"epoch": 1.5983827493261455,
"grad_norm": 0.44859108328819275,
"learning_rate": 1.0098743267504488e-06,
"loss": 0.24518187046051027,
"step": 890
},
{
"epoch": 1.6163522012578615,
"grad_norm": 0.4460717737674713,
"learning_rate": 9.649910233393178e-07,
"loss": 0.25164237022399905,
"step": 900
},
{
"epoch": 1.6343216531895777,
"grad_norm": 0.4884060323238373,
"learning_rate": 9.201077199281867e-07,
"loss": 0.2497255325317383,
"step": 910
},
{
"epoch": 1.6522911051212938,
"grad_norm": 0.4527634084224701,
"learning_rate": 8.752244165170558e-07,
"loss": 0.2494762897491455,
"step": 920
},
{
"epoch": 1.6702605570530098,
"grad_norm": 0.47182497382164,
"learning_rate": 8.303411131059247e-07,
"loss": 0.24840357303619384,
"step": 930
},
{
"epoch": 1.688230008984726,
"grad_norm": 0.4759376347064972,
"learning_rate": 7.854578096947936e-07,
"loss": 0.25184221267700196,
"step": 940
},
{
"epoch": 1.7061994609164421,
"grad_norm": 0.495343416929245,
"learning_rate": 7.405745062836626e-07,
"loss": 0.25055861473083496,
"step": 950
},
{
"epoch": 1.724168912848158,
"grad_norm": 0.5005154609680176,
"learning_rate": 6.956912028725314e-07,
"loss": 0.25119876861572266,
"step": 960
},
{
"epoch": 1.742138364779874,
"grad_norm": 0.47676777839660645,
"learning_rate": 6.508078994614005e-07,
"loss": 0.2516517162322998,
"step": 970
},
{
"epoch": 1.7601078167115904,
"grad_norm": 0.4394581913948059,
"learning_rate": 6.059245960502694e-07,
"loss": 0.250733470916748,
"step": 980
},
{
"epoch": 1.7780772686433064,
"grad_norm": 0.4702657163143158,
"learning_rate": 5.610412926391383e-07,
"loss": 0.2478208065032959,
"step": 990
},
{
"epoch": 1.7960467205750223,
"grad_norm": 0.4843612611293793,
"learning_rate": 5.161579892280072e-07,
"loss": 0.24935145378112794,
"step": 1000
},
{
"epoch": 1.8140161725067385,
"grad_norm": 0.4673105776309967,
"learning_rate": 4.7127468581687615e-07,
"loss": 0.24984090328216552,
"step": 1010
},
{
"epoch": 1.8319856244384547,
"grad_norm": 0.4820215404033661,
"learning_rate": 4.2639138240574507e-07,
"loss": 0.24917204380035402,
"step": 1020
},
{
"epoch": 1.8499550763701706,
"grad_norm": 0.45277148485183716,
"learning_rate": 3.815080789946141e-07,
"loss": 0.24739840030670165,
"step": 1030
},
{
"epoch": 1.8679245283018868,
"grad_norm": 0.48467275500297546,
"learning_rate": 3.3662477558348295e-07,
"loss": 0.24694859981536865,
"step": 1040
},
{
"epoch": 1.885893980233603,
"grad_norm": 0.46758314967155457,
"learning_rate": 2.917414721723519e-07,
"loss": 0.24703009128570558,
"step": 1050
},
{
"epoch": 1.903863432165319,
"grad_norm": 0.4634384512901306,
"learning_rate": 2.4685816876122083e-07,
"loss": 0.2495879650115967,
"step": 1060
},
{
"epoch": 1.921832884097035,
"grad_norm": 0.4621906578540802,
"learning_rate": 2.0197486535008978e-07,
"loss": 0.25526316165924073,
"step": 1070
},
{
"epoch": 1.939802336028751,
"grad_norm": 0.46646031737327576,
"learning_rate": 1.5709156193895872e-07,
"loss": 0.24767594337463378,
"step": 1080
},
{
"epoch": 1.9577717879604672,
"grad_norm": 0.4569203555583954,
"learning_rate": 1.1220825852782766e-07,
"loss": 0.24955098628997802,
"step": 1090
},
{
"epoch": 1.9757412398921832,
"grad_norm": 0.47747698426246643,
"learning_rate": 6.732495511669659e-08,
"loss": 0.2489546775817871,
"step": 1100
},
{
"epoch": 1.9937106918238994,
"grad_norm": 0.47046294808387756,
"learning_rate": 2.2441651705565532e-08,
"loss": 0.24430301189422607,
"step": 1110
},
{
"epoch": 2.0,
"step": 1114,
"total_flos": 1.4534558685629252e+19,
"train_loss": 0.27833450065266935,
"train_runtime": 6971.6374,
"train_samples_per_second": 20.435,
"train_steps_per_second": 0.16
}
],
"logging_steps": 10,
"max_steps": 1114,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.4534558685629252e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}