Files
qwen2.5vl-3b-aha-7b-distilled/trainer_state.json
ModelHub XC f0aadb366e 初始化项目,由ModelHub XC社区提供模型
Model: waltonfuture/qwen2.5vl-3b-aha-7b-distilled
Source: Original Platform
2026-06-03 22:19:13 +08:00

721 lines
20 KiB
JSON

{
"best_global_step": 180,
"best_metric": 0.27287108,
"best_model_checkpoint": "/data/home/scyb089/CODE/scripts/ms-swift/3b/v32-20250504-043500/checkpoint-180",
"epoch": 2.9732620320855614,
"eval_steps": 20,
"global_step": 279,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0106951871657754,
"grad_norm": 3.2918701171875,
"learning_rate": 9.999683023724021e-06,
"loss": 0.2989569902420044,
"memory(GiB)": 28.98,
"step": 1,
"token_acc": 0.9019468186134852,
"train_speed(iter/s)": 0.075727
},
{
"epoch": 0.053475935828877004,
"grad_norm": 1.4581928253173828,
"learning_rate": 9.992077602401358e-06,
"loss": 0.26982036232948303,
"memory(GiB)": 28.98,
"step": 5,
"token_acc": 0.9124155874528606,
"train_speed(iter/s)": 0.162223
},
{
"epoch": 0.10695187165775401,
"grad_norm": 0.8813036680221558,
"learning_rate": 9.968335515358916e-06,
"loss": 0.26494245529174804,
"memory(GiB)": 28.98,
"step": 10,
"token_acc": 0.9064403726266386,
"train_speed(iter/s)": 0.196471
},
{
"epoch": 0.16042780748663102,
"grad_norm": 0.8264817595481873,
"learning_rate": 9.92884897657402e-06,
"loss": 0.27532644271850587,
"memory(GiB)": 28.98,
"step": 15,
"token_acc": 0.9036378177940428,
"train_speed(iter/s)": 0.206122
},
{
"epoch": 0.21390374331550802,
"grad_norm": 0.7590827345848083,
"learning_rate": 9.873743117270691e-06,
"loss": 0.24748692512512208,
"memory(GiB)": 28.98,
"step": 20,
"token_acc": 0.9165032561067131,
"train_speed(iter/s)": 0.213242
},
{
"epoch": 0.21390374331550802,
"eval_loss": 0.29857584834098816,
"eval_runtime": 1.6521,
"eval_samples_per_second": 36.318,
"eval_steps_per_second": 9.079,
"eval_token_acc": 0.9033078880407125,
"step": 20
},
{
"epoch": 0.26737967914438504,
"grad_norm": 0.7544880509376526,
"learning_rate": 9.803192565659898e-06,
"loss": 0.2472740650177002,
"memory(GiB)": 28.98,
"step": 25,
"token_acc": 0.9127769919849128,
"train_speed(iter/s)": 0.193549
},
{
"epoch": 0.32085561497326204,
"grad_norm": 0.7806485891342163,
"learning_rate": 9.717420893549902e-06,
"loss": 0.2667980670928955,
"memory(GiB)": 28.98,
"step": 30,
"token_acc": 0.908313332992902,
"train_speed(iter/s)": 0.199526
},
{
"epoch": 0.37433155080213903,
"grad_norm": 0.7090319395065308,
"learning_rate": 9.616699907856368e-06,
"loss": 0.23824496269226075,
"memory(GiB)": 28.98,
"step": 35,
"token_acc": 0.9139585630821934,
"train_speed(iter/s)": 0.202496
},
{
"epoch": 0.42780748663101603,
"grad_norm": 0.6566728949546814,
"learning_rate": 9.501348789257373e-06,
"loss": 0.24109985828399658,
"memory(GiB)": 28.98,
"step": 40,
"token_acc": 0.9166758030917662,
"train_speed(iter/s)": 0.20511
},
{
"epoch": 0.42780748663101603,
"eval_loss": 0.2843839228153229,
"eval_runtime": 1.6484,
"eval_samples_per_second": 36.398,
"eval_steps_per_second": 9.1,
"eval_token_acc": 0.9075595065545785,
"step": 40
},
{
"epoch": 0.48128342245989303,
"grad_norm": 0.6919281482696533,
"learning_rate": 9.371733080722911e-06,
"loss": 0.24000308513641358,
"memory(GiB)": 28.98,
"step": 45,
"token_acc": 0.9120257943391221,
"train_speed(iter/s)": 0.195603
},
{
"epoch": 0.5347593582887701,
"grad_norm": 0.6628720164299011,
"learning_rate": 9.228263529124199e-06,
"loss": 0.225927734375,
"memory(GiB)": 28.98,
"step": 50,
"token_acc": 0.922932112394543,
"train_speed(iter/s)": 0.199738
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.7601417899131775,
"learning_rate": 9.071394783593664e-06,
"loss": 0.24698638916015625,
"memory(GiB)": 28.98,
"step": 55,
"token_acc": 0.916304375460809,
"train_speed(iter/s)": 0.202796
},
{
"epoch": 0.6417112299465241,
"grad_norm": 0.7333827018737793,
"learning_rate": 8.90162395476046e-06,
"loss": 0.24123883247375488,
"memory(GiB)": 28.98,
"step": 60,
"token_acc": 0.9169203180670583,
"train_speed(iter/s)": 0.205478
},
{
"epoch": 0.6417112299465241,
"eval_loss": 0.2792617380619049,
"eval_runtime": 1.6412,
"eval_samples_per_second": 36.559,
"eval_steps_per_second": 9.14,
"eval_token_acc": 0.908719038876542,
"step": 60
},
{
"epoch": 0.6951871657754011,
"grad_norm": 0.732627809047699,
"learning_rate": 8.719489039427256e-06,
"loss": 0.2210240602493286,
"memory(GiB)": 28.98,
"step": 65,
"token_acc": 0.9183851177518306,
"train_speed(iter/s)": 0.19878
},
{
"epoch": 0.7486631016042781,
"grad_norm": 0.7144444584846497,
"learning_rate": 8.525567215680397e-06,
"loss": 0.24620118141174316,
"memory(GiB)": 28.98,
"step": 70,
"token_acc": 0.9128896697452457,
"train_speed(iter/s)": 0.20038
},
{
"epoch": 0.8021390374331551,
"grad_norm": 0.736126184463501,
"learning_rate": 8.320473013836197e-06,
"loss": 0.23789706230163574,
"memory(GiB)": 28.98,
"step": 75,
"token_acc": 0.9134095303360337,
"train_speed(iter/s)": 0.202414
},
{
"epoch": 0.8556149732620321,
"grad_norm": 0.7036953568458557,
"learning_rate": 8.104856369019525e-06,
"loss": 0.23406553268432617,
"memory(GiB)": 28.98,
"step": 80,
"token_acc": 0.9200627693460746,
"train_speed(iter/s)": 0.204729
},
{
"epoch": 0.8556149732620321,
"eval_loss": 0.2767316699028015,
"eval_runtime": 1.6471,
"eval_samples_per_second": 36.427,
"eval_steps_per_second": 9.107,
"eval_token_acc": 0.9091699681128611,
"step": 80
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.6711894273757935,
"learning_rate": 7.879400561546033e-06,
"loss": 0.23753652572631836,
"memory(GiB)": 28.98,
"step": 85,
"token_acc": 0.9143226902311286,
"train_speed(iter/s)": 0.200371
},
{
"epoch": 0.9625668449197861,
"grad_norm": 0.6814318895339966,
"learning_rate": 7.644820051634813e-06,
"loss": 0.23459360599517823,
"memory(GiB)": 28.98,
"step": 90,
"token_acc": 0.9142770409116383,
"train_speed(iter/s)": 0.201764
},
{
"epoch": 1.0106951871657754,
"grad_norm": 1.021850824356079,
"learning_rate": 7.401858215313228e-06,
"loss": 0.21953530311584474,
"memory(GiB)": 28.98,
"step": 95,
"token_acc": 0.9268375978563548,
"train_speed(iter/s)": 0.203547
},
{
"epoch": 1.0641711229946524,
"grad_norm": 0.7571138739585876,
"learning_rate": 7.151284988688731e-06,
"loss": 0.19227520227432252,
"memory(GiB)": 28.98,
"step": 100,
"token_acc": 0.9299330505442838,
"train_speed(iter/s)": 0.205218
},
{
"epoch": 1.0641711229946524,
"eval_loss": 0.27827945351600647,
"eval_runtime": 1.6406,
"eval_samples_per_second": 36.572,
"eval_steps_per_second": 9.143,
"eval_token_acc": 0.9097497342738429,
"step": 100
},
{
"epoch": 1.1176470588235294,
"grad_norm": 0.6977977156639099,
"learning_rate": 6.893894428052881e-06,
"loss": 0.18528327941894532,
"memory(GiB)": 28.98,
"step": 105,
"token_acc": 0.9313939048472141,
"train_speed(iter/s)": 0.200929
},
{
"epoch": 1.1711229946524064,
"grad_norm": 0.7503034472465515,
"learning_rate": 6.6305021935494755e-06,
"loss": 0.191499924659729,
"memory(GiB)": 28.98,
"step": 110,
"token_acc": 0.934816576879125,
"train_speed(iter/s)": 0.202263
},
{
"epoch": 1.2245989304812834,
"grad_norm": 0.6970927715301514,
"learning_rate": 6.361942964380967e-06,
"loss": 0.18341017961502076,
"memory(GiB)": 28.98,
"step": 115,
"token_acc": 0.9350552403702598,
"train_speed(iter/s)": 0.203567
},
{
"epoch": 1.2780748663101604,
"grad_norm": 0.7112876176834106,
"learning_rate": 6.089067793744258e-06,
"loss": 0.19445158243179322,
"memory(GiB)": 28.98,
"step": 120,
"token_acc": 0.9335015519281871,
"train_speed(iter/s)": 0.204703
},
{
"epoch": 1.2780748663101604,
"eval_loss": 0.27691978216171265,
"eval_runtime": 1.6606,
"eval_samples_per_second": 36.132,
"eval_steps_per_second": 9.033,
"eval_token_acc": 0.9094920604245177,
"step": 120
},
{
"epoch": 1.3315508021390374,
"grad_norm": 0.6548081636428833,
"learning_rate": 5.8127414118779825e-06,
"loss": 0.18807239532470704,
"memory(GiB)": 31.29,
"step": 125,
"token_acc": 0.9327750242123853,
"train_speed(iter/s)": 0.200918
},
{
"epoch": 1.3850267379679144,
"grad_norm": 0.709028422832489,
"learning_rate": 5.533839485767795e-06,
"loss": 0.19655026197433473,
"memory(GiB)": 31.29,
"step": 130,
"token_acc": 0.9308182054862607,
"train_speed(iter/s)": 0.201887
},
{
"epoch": 1.4385026737967914,
"grad_norm": 0.6588287949562073,
"learning_rate": 5.253245844193564e-06,
"loss": 0.19113950729370116,
"memory(GiB)": 31.29,
"step": 135,
"token_acc": 0.9270080346573307,
"train_speed(iter/s)": 0.202901
},
{
"epoch": 1.4919786096256684,
"grad_norm": 0.6656479239463806,
"learning_rate": 4.971849676912172e-06,
"loss": 0.18891613483428954,
"memory(GiB)": 31.29,
"step": 140,
"token_acc": 0.9305257651059378,
"train_speed(iter/s)": 0.203847
},
{
"epoch": 1.4919786096256684,
"eval_loss": 0.2746458649635315,
"eval_runtime": 1.6467,
"eval_samples_per_second": 36.436,
"eval_steps_per_second": 9.109,
"eval_token_acc": 0.9111991496762972,
"step": 140
},
{
"epoch": 1.5454545454545454,
"grad_norm": 0.7020911574363708,
"learning_rate": 4.6905427168515914e-06,
"loss": 0.19171638488769532,
"memory(GiB)": 31.29,
"step": 145,
"token_acc": 0.9305895351590245,
"train_speed(iter/s)": 0.201182
},
{
"epoch": 1.5989304812834224,
"grad_norm": 0.6727572083473206,
"learning_rate": 4.410216414245771e-06,
"loss": 0.1821829557418823,
"memory(GiB)": 31.29,
"step": 150,
"token_acc": 0.9352090736503919,
"train_speed(iter/s)": 0.202227
},
{
"epoch": 1.6524064171122994,
"grad_norm": 0.6589164733886719,
"learning_rate": 4.131759111665349e-06,
"loss": 0.18441460132598878,
"memory(GiB)": 31.29,
"step": 155,
"token_acc": 0.9374578346368156,
"train_speed(iter/s)": 0.203318
},
{
"epoch": 1.7058823529411766,
"grad_norm": 0.6176323890686035,
"learning_rate": 3.856053228896442e-06,
"loss": 0.18946645259857178,
"memory(GiB)": 31.29,
"step": 160,
"token_acc": 0.9367611881372071,
"train_speed(iter/s)": 0.20408
},
{
"epoch": 1.7058823529411766,
"eval_loss": 0.2751389443874359,
"eval_runtime": 1.6421,
"eval_samples_per_second": 36.539,
"eval_steps_per_second": 9.135,
"eval_token_acc": 0.9112957773697942,
"step": 160
},
{
"epoch": 1.7593582887700534,
"grad_norm": 0.6360734701156616,
"learning_rate": 3.58397246658848e-06,
"loss": 0.1823675274848938,
"memory(GiB)": 31.29,
"step": 165,
"token_acc": 0.9278697615463836,
"train_speed(iter/s)": 0.201592
},
{
"epoch": 1.8128342245989306,
"grad_norm": 0.5981405973434448,
"learning_rate": 3.316379037532644e-06,
"loss": 0.18013572692871094,
"memory(GiB)": 31.29,
"step": 170,
"token_acc": 0.9407218114408998,
"train_speed(iter/s)": 0.202459
},
{
"epoch": 1.8663101604278074,
"grad_norm": 0.5807086825370789,
"learning_rate": 3.0541209343448373e-06,
"loss": 0.1835346221923828,
"memory(GiB)": 31.29,
"step": 175,
"token_acc": 0.9373540226163772,
"train_speed(iter/s)": 0.203227
},
{
"epoch": 1.9197860962566846,
"grad_norm": 0.610285758972168,
"learning_rate": 2.7980292422118282e-06,
"loss": 0.18963263034820557,
"memory(GiB)": 31.29,
"step": 180,
"token_acc": 0.9329708446611044,
"train_speed(iter/s)": 0.204063
},
{
"epoch": 1.9197860962566846,
"eval_loss": 0.27287107706069946,
"eval_runtime": 1.6399,
"eval_samples_per_second": 36.589,
"eval_steps_per_second": 9.147,
"eval_token_acc": 0.911682288143782,
"step": 180
},
{
"epoch": 1.9732620320855614,
"grad_norm": 0.6412176489830017,
"learning_rate": 2.548915505216333e-06,
"loss": 0.18783329725265502,
"memory(GiB)": 31.29,
"step": 185,
"token_acc": 0.9265865937289413,
"train_speed(iter/s)": 0.201898
},
{
"epoch": 2.021390374331551,
"grad_norm": 0.607214629650116,
"learning_rate": 2.307569154587056e-06,
"loss": 0.1662315845489502,
"memory(GiB)": 31.29,
"step": 190,
"token_acc": 0.9465564026359995,
"train_speed(iter/s)": 0.203071
},
{
"epoch": 2.0748663101604277,
"grad_norm": 0.6007011532783508,
"learning_rate": 2.074755007023461e-06,
"loss": 0.16532043218612671,
"memory(GiB)": 31.29,
"step": 195,
"token_acc": 0.9450870631362545,
"train_speed(iter/s)": 0.203942
},
{
"epoch": 2.128342245989305,
"grad_norm": 0.6896679997444153,
"learning_rate": 1.8512108410229878e-06,
"loss": 0.15121257305145264,
"memory(GiB)": 31.29,
"step": 200,
"token_acc": 0.9484533555566449,
"train_speed(iter/s)": 0.204504
},
{
"epoch": 2.128342245989305,
"eval_loss": 0.28094714879989624,
"eval_runtime": 1.6463,
"eval_samples_per_second": 36.446,
"eval_steps_per_second": 9.111,
"eval_token_acc": 0.9109736850581377,
"step": 200
},
{
"epoch": 2.1818181818181817,
"grad_norm": 0.6233195662498474,
"learning_rate": 1.6376450588911985e-06,
"loss": 0.15310670137405397,
"memory(GiB)": 31.29,
"step": 205,
"token_acc": 0.9403647217565523,
"train_speed(iter/s)": 0.202351
},
{
"epoch": 2.235294117647059,
"grad_norm": 0.6323373913764954,
"learning_rate": 1.434734441843899e-06,
"loss": 0.15562598705291747,
"memory(GiB)": 31.29,
"step": 210,
"token_acc": 0.9448852085089503,
"train_speed(iter/s)": 0.202913
},
{
"epoch": 2.2887700534759357,
"grad_norm": 0.6409267783164978,
"learning_rate": 1.2431220053151832e-06,
"loss": 0.15542089939117432,
"memory(GiB)": 31.29,
"step": 215,
"token_acc": 0.9450054780164817,
"train_speed(iter/s)": 0.203493
},
{
"epoch": 2.342245989304813,
"grad_norm": 0.6448594331741333,
"learning_rate": 1.063414961267859e-06,
"loss": 0.1522960662841797,
"memory(GiB)": 31.29,
"step": 220,
"token_acc": 0.9481132075471698,
"train_speed(iter/s)": 0.204302
},
{
"epoch": 2.342245989304813,
"eval_loss": 0.2818092703819275,
"eval_runtime": 1.643,
"eval_samples_per_second": 36.519,
"eval_steps_per_second": 9.13,
"eval_token_acc": 0.9113924050632911,
"step": 220
},
{
"epoch": 2.3957219251336896,
"grad_norm": 0.6113874316215515,
"learning_rate": 8.961827939636198e-07,
"loss": 0.16363799571990967,
"memory(GiB)": 31.29,
"step": 225,
"token_acc": 0.9390907965842993,
"train_speed(iter/s)": 0.202503
},
{
"epoch": 2.449197860962567,
"grad_norm": 0.597212016582489,
"learning_rate": 7.41955455290726e-07,
"loss": 0.15171511173248292,
"memory(GiB)": 31.29,
"step": 230,
"token_acc": 0.9467608786903596,
"train_speed(iter/s)": 0.20326
},
{
"epoch": 2.502673796791444,
"grad_norm": 0.6323869228363037,
"learning_rate": 6.012216853682001e-07,
"loss": 0.16323232650756836,
"memory(GiB)": 31.29,
"step": 235,
"token_acc": 0.9391786687427014,
"train_speed(iter/s)": 0.20378
},
{
"epoch": 2.556149732620321,
"grad_norm": 0.6109181642532349,
"learning_rate": 4.7442746374839363e-07,
"loss": 0.1464900016784668,
"memory(GiB)": 31.3,
"step": 240,
"token_acc": 0.9483738659414637,
"train_speed(iter/s)": 0.20435
},
{
"epoch": 2.556149732620321,
"eval_loss": 0.28033456206321716,
"eval_runtime": 1.6424,
"eval_samples_per_second": 36.532,
"eval_steps_per_second": 9.133,
"eval_token_acc": 0.9115856604502851,
"step": 240
},
{
"epoch": 2.6096256684491976,
"grad_norm": 0.6514647006988525,
"learning_rate": 3.619745961260623e-07,
"loss": 0.1541598081588745,
"memory(GiB)": 31.3,
"step": 245,
"token_acc": 0.9415382075569038,
"train_speed(iter/s)": 0.202522
},
{
"epoch": 2.663101604278075,
"grad_norm": 0.5899693965911865,
"learning_rate": 2.6421944103256657e-07,
"loss": 0.15795296430587769,
"memory(GiB)": 31.3,
"step": 250,
"token_acc": 0.947255862532017,
"train_speed(iter/s)": 0.203017
},
{
"epoch": 2.716577540106952,
"grad_norm": 0.612455427646637,
"learning_rate": 1.814717805502958e-07,
"loss": 0.15344234704971313,
"memory(GiB)": 31.3,
"step": 255,
"token_acc": 0.9460515010284584,
"train_speed(iter/s)": 0.203605
},
{
"epoch": 2.770053475935829,
"grad_norm": 0.6128495931625366,
"learning_rate": 1.1399383862592928e-07,
"loss": 0.1595083236694336,
"memory(GiB)": 31.3,
"step": 260,
"token_acc": 0.9440190249702735,
"train_speed(iter/s)": 0.20408
},
{
"epoch": 2.770053475935829,
"eval_loss": 0.2802920639514923,
"eval_runtime": 1.6389,
"eval_samples_per_second": 36.609,
"eval_steps_per_second": 9.152,
"eval_token_acc": 0.9115856604502851,
"step": 260
},
{
"epoch": 2.8235294117647056,
"grad_norm": 0.5782672166824341,
"learning_rate": 6.199945009349173e-08,
"loss": 0.15760741233825684,
"memory(GiB)": 31.3,
"step": 265,
"token_acc": 0.9367169337749707,
"train_speed(iter/s)": 0.202464
},
{
"epoch": 2.877005347593583,
"grad_norm": 0.6260784864425659,
"learning_rate": 2.5653383040524228e-08,
"loss": 0.14205594062805177,
"memory(GiB)": 31.3,
"step": 270,
"token_acc": 0.9525445321564256,
"train_speed(iter/s)": 0.202893
},
{
"epoch": 2.93048128342246,
"grad_norm": 0.6263572573661804,
"learning_rate": 5.0708166647628345e-09,
"loss": 0.1594037890434265,
"memory(GiB)": 31.3,
"step": 275,
"token_acc": 0.9494285781334335,
"train_speed(iter/s)": 0.203622
},
{
"epoch": 2.9732620320855614,
"eval_loss": 0.27989062666893005,
"eval_runtime": 1.6446,
"eval_samples_per_second": 36.483,
"eval_steps_per_second": 9.121,
"eval_token_acc": 0.9118433342996103,
"step": 279
}
],
"logging_steps": 5,
"max_steps": 279,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.8413396385162854e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}