{ "best_global_step": 1540, "best_metric": 0.20705882, "best_model_checkpoint": "/data/home/scyb089/CODE/scripts/ms-swift/3b/v12-20250430-202042/checkpoint-1540", "epoch": 2.9972559780478245, "eval_steps": 20, "global_step": 2391, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012544100352802822, "grad_norm": 2.5221166610717773, "learning_rate": 9.999995684008912e-06, "loss": 0.34258008003234863, "memory(GiB)": 28.82, "step": 1, "token_acc": 0.8867170626349892, "train_speed(iter/s)": 0.063413 }, { "epoch": 0.006272050176401411, "grad_norm": 1.4420850276947021, "learning_rate": 9.999892100595329e-06, "loss": 0.32799670100212097, "memory(GiB)": 28.86, "step": 5, "token_acc": 0.8843052006239421, "train_speed(iter/s)": 0.122976 }, { "epoch": 0.012544100352802822, "grad_norm": 1.0056296586990356, "learning_rate": 9.999568407038233e-06, "loss": 0.3203620672225952, "memory(GiB)": 28.86, "step": 10, "token_acc": 0.891963781939332, "train_speed(iter/s)": 0.138195 }, { "epoch": 0.018816150529204233, "grad_norm": 0.8973207473754883, "learning_rate": 9.999028933299243e-06, "loss": 0.2882222652435303, "memory(GiB)": 28.86, "step": 15, "token_acc": 0.9004290263995999, "train_speed(iter/s)": 0.14514 }, { "epoch": 0.025088200705605645, "grad_norm": 0.9302909970283508, "learning_rate": 9.99827370266192e-06, "loss": 0.2774477481842041, "memory(GiB)": 28.86, "step": 20, "token_acc": 0.9076236168501622, "train_speed(iter/s)": 0.146392 }, { "epoch": 0.025088200705605645, "eval_loss": 0.3028348684310913, "eval_runtime": 29.847, "eval_samples_per_second": 17.255, "eval_steps_per_second": 4.322, "eval_token_acc": 0.903765129060793, "step": 20 }, { "epoch": 0.03136025088200706, "grad_norm": 0.86534184217453, "learning_rate": 9.99730274772184e-06, "loss": 0.2807079553604126, "memory(GiB)": 28.86, "step": 25, "token_acc": 0.9082198329790155, "train_speed(iter/s)": 0.119433 }, { "epoch": 0.037632301058408466, "grad_norm": 1.1536375284194946, "learning_rate": 9.996116110385186e-06, "loss": 0.283935022354126, "memory(GiB)": 28.86, "step": 30, "token_acc": 0.9082473717980356, "train_speed(iter/s)": 0.125112 }, { "epoch": 0.04390435123480988, "grad_norm": 0.81349778175354, "learning_rate": 9.99471384186694e-06, "loss": 0.27507519721984863, "memory(GiB)": 28.87, "step": 35, "token_acc": 0.9149028749028749, "train_speed(iter/s)": 0.128991 }, { "epoch": 0.05017640141121129, "grad_norm": 0.8251215815544128, "learning_rate": 9.99309600268868e-06, "loss": 0.25176520347595216, "memory(GiB)": 28.87, "step": 40, "token_acc": 0.9197068447954133, "train_speed(iter/s)": 0.131242 }, { "epoch": 0.05017640141121129, "eval_loss": 0.2838481664657593, "eval_runtime": 29.7701, "eval_samples_per_second": 17.299, "eval_steps_per_second": 4.333, "eval_token_acc": 0.9088420373033275, "step": 40 }, { "epoch": 0.0564484515876127, "grad_norm": 0.8325265645980835, "learning_rate": 9.991262662675962e-06, "loss": 0.26733884811401365, "memory(GiB)": 28.87, "step": 45, "token_acc": 0.9162756526424058, "train_speed(iter/s)": 0.119295 }, { "epoch": 0.06272050176401411, "grad_norm": 0.8100744485855103, "learning_rate": 9.9892139009553e-06, "loss": 0.2542246103286743, "memory(GiB)": 28.87, "step": 50, "token_acc": 0.9066385049327594, "train_speed(iter/s)": 0.122408 }, { "epoch": 0.06899255194041552, "grad_norm": 0.877231240272522, "learning_rate": 9.986949805950763e-06, "loss": 0.2703877925872803, "memory(GiB)": 28.87, "step": 55, "token_acc": 0.9160455515225402, "train_speed(iter/s)": 0.124536 }, { "epoch": 0.07526460211681693, "grad_norm": 0.808023989200592, "learning_rate": 9.984470475380154e-06, "loss": 0.2656998157501221, "memory(GiB)": 28.87, "step": 60, "token_acc": 0.9185868853481338, "train_speed(iter/s)": 0.126941 }, { "epoch": 0.07526460211681693, "eval_loss": 0.27411043643951416, "eval_runtime": 29.8492, "eval_samples_per_second": 17.253, "eval_steps_per_second": 4.322, "eval_token_acc": 0.9110513937391785, "step": 60 }, { "epoch": 0.08153665229321834, "grad_norm": 0.8025234937667847, "learning_rate": 9.981776016250789e-06, "loss": 0.25631260871887207, "memory(GiB)": 28.87, "step": 65, "token_acc": 0.9179844371690116, "train_speed(iter/s)": 0.119148 }, { "epoch": 0.08780870246961976, "grad_norm": 0.8097490072250366, "learning_rate": 9.97886654485488e-06, "loss": 0.24328134059906006, "memory(GiB)": 28.87, "step": 70, "token_acc": 0.9217776363588153, "train_speed(iter/s)": 0.121225 }, { "epoch": 0.09408075264602117, "grad_norm": 0.7601203918457031, "learning_rate": 9.975742186764526e-06, "loss": 0.24020836353302003, "memory(GiB)": 28.87, "step": 75, "token_acc": 0.9274766452418747, "train_speed(iter/s)": 0.12313 }, { "epoch": 0.10035280282242258, "grad_norm": 0.8542447090148926, "learning_rate": 9.972403076826272e-06, "loss": 0.24563825130462646, "memory(GiB)": 28.87, "step": 80, "token_acc": 0.9028298984927715, "train_speed(iter/s)": 0.124748 }, { "epoch": 0.10035280282242258, "eval_loss": 0.26768097281455994, "eval_runtime": 29.7152, "eval_samples_per_second": 17.331, "eval_steps_per_second": 4.341, "eval_token_acc": 0.9123132842272074, "step": 80 }, { "epoch": 0.10662485299882399, "grad_norm": 0.7627353072166443, "learning_rate": 9.96884935915531e-06, "loss": 0.2343665599822998, "memory(GiB)": 28.87, "step": 85, "token_acc": 0.9181495379897511, "train_speed(iter/s)": 0.119041 }, { "epoch": 0.1128969031752254, "grad_norm": 0.7160388827323914, "learning_rate": 9.965081187129248e-06, "loss": 0.23817930221557618, "memory(GiB)": 28.87, "step": 90, "token_acc": 0.9190057254573384, "train_speed(iter/s)": 0.120099 }, { "epoch": 0.11916895335162682, "grad_norm": 0.8704434633255005, "learning_rate": 9.961098723381495e-06, "loss": 0.24323635101318358, "memory(GiB)": 28.87, "step": 95, "token_acc": 0.9187486766885454, "train_speed(iter/s)": 0.121879 }, { "epoch": 0.12544100352802823, "grad_norm": 0.7855771780014038, "learning_rate": 9.956902139794236e-06, "loss": 0.2694889545440674, "memory(GiB)": 28.87, "step": 100, "token_acc": 0.9003986382285332, "train_speed(iter/s)": 0.123256 }, { "epoch": 0.12544100352802823, "eval_loss": 0.26312825083732605, "eval_runtime": 29.8426, "eval_samples_per_second": 17.257, "eval_steps_per_second": 4.323, "eval_token_acc": 0.9138979839098482, "step": 100 }, { "epoch": 0.13171305370442962, "grad_norm": 0.7281831502914429, "learning_rate": 9.95249161749102e-06, "loss": 0.2436688184738159, "memory(GiB)": 28.87, "step": 105, "token_acc": 0.9221378504672897, "train_speed(iter/s)": 0.118943 }, { "epoch": 0.13798510388083104, "grad_norm": 0.770193338394165, "learning_rate": 9.94786734682894e-06, "loss": 0.2424685001373291, "memory(GiB)": 28.87, "step": 110, "token_acc": 0.9147483821400076, "train_speed(iter/s)": 0.1202 }, { "epoch": 0.14425715405723247, "grad_norm": 0.7887458801269531, "learning_rate": 9.943029527390415e-06, "loss": 0.2566553592681885, "memory(GiB)": 28.87, "step": 115, "token_acc": 0.917302101047781, "train_speed(iter/s)": 0.121648 }, { "epoch": 0.15052920423363386, "grad_norm": 0.7880879640579224, "learning_rate": 9.93797836797458e-06, "loss": 0.24933695793151855, "memory(GiB)": 28.87, "step": 120, "token_acc": 0.916873418038744, "train_speed(iter/s)": 0.122856 }, { "epoch": 0.15052920423363386, "eval_loss": 0.2586906850337982, "eval_runtime": 29.8223, "eval_samples_per_second": 17.269, "eval_steps_per_second": 4.326, "eval_token_acc": 0.914292062666907, "step": 120 }, { "epoch": 0.15680125441003528, "grad_norm": 0.8280410170555115, "learning_rate": 9.932714086588276e-06, "loss": 0.24312918186187743, "memory(GiB)": 28.87, "step": 125, "token_acc": 0.9201260500102441, "train_speed(iter/s)": 0.11913 }, { "epoch": 0.16307330458643668, "grad_norm": 0.7925168871879578, "learning_rate": 9.92723691043663e-06, "loss": 0.23867030143737794, "memory(GiB)": 28.87, "step": 130, "token_acc": 0.9171843234696182, "train_speed(iter/s)": 0.12024 }, { "epoch": 0.1693453547628381, "grad_norm": 0.7491594552993774, "learning_rate": 9.921547075913261e-06, "loss": 0.235352087020874, "memory(GiB)": 28.87, "step": 135, "token_acc": 0.9219220779220779, "train_speed(iter/s)": 0.121219 }, { "epoch": 0.17561740493923952, "grad_norm": 0.7498698234558105, "learning_rate": 9.915644828590074e-06, "loss": 0.2597238063812256, "memory(GiB)": 28.87, "step": 140, "token_acc": 0.9098761257799772, "train_speed(iter/s)": 0.122367 }, { "epoch": 0.17561740493923952, "eval_loss": 0.25434646010398865, "eval_runtime": 29.7793, "eval_samples_per_second": 17.294, "eval_steps_per_second": 4.332, "eval_token_acc": 0.9158977239855616, "step": 140 }, { "epoch": 0.18188945511564092, "grad_norm": 0.7065432667732239, "learning_rate": 9.909530423206657e-06, "loss": 0.24801239967346192, "memory(GiB)": 28.87, "step": 145, "token_acc": 0.9213938076007041, "train_speed(iter/s)": 0.119043 }, { "epoch": 0.18816150529204234, "grad_norm": 0.7023429274559021, "learning_rate": 9.903204123659288e-06, "loss": 0.22359247207641603, "memory(GiB)": 28.87, "step": 150, "token_acc": 0.922397476340694, "train_speed(iter/s)": 0.120053 }, { "epoch": 0.19443355546844374, "grad_norm": 0.779660701751709, "learning_rate": 9.896666202989553e-06, "loss": 0.2474226951599121, "memory(GiB)": 28.87, "step": 155, "token_acc": 0.9126028777783999, "train_speed(iter/s)": 0.121177 }, { "epoch": 0.20070560564484516, "grad_norm": 0.7874925136566162, "learning_rate": 9.889916943372549e-06, "loss": 0.2478172779083252, "memory(GiB)": 28.87, "step": 160, "token_acc": 0.9174663613676709, "train_speed(iter/s)": 0.122185 }, { "epoch": 0.20070560564484516, "eval_loss": 0.2529737055301666, "eval_runtime": 29.8233, "eval_samples_per_second": 17.268, "eval_steps_per_second": 4.325, "eval_token_acc": 0.9160318784560497, "step": 160 }, { "epoch": 0.20697765582124658, "grad_norm": 0.6781697869300842, "learning_rate": 9.882956636104714e-06, "loss": 0.2371211528778076, "memory(GiB)": 28.87, "step": 165, "token_acc": 0.9221280174789006, "train_speed(iter/s)": 0.119331 }, { "epoch": 0.21324970599764798, "grad_norm": 0.7917264103889465, "learning_rate": 9.875785581591253e-06, "loss": 0.24579255580902098, "memory(GiB)": 28.87, "step": 170, "token_acc": 0.9130986455782112, "train_speed(iter/s)": 0.120095 }, { "epoch": 0.2195217561740494, "grad_norm": 0.7744810581207275, "learning_rate": 9.868404089333171e-06, "loss": 0.22069144248962402, "memory(GiB)": 28.87, "step": 175, "token_acc": 0.9295828549559892, "train_speed(iter/s)": 0.120995 }, { "epoch": 0.2257938063504508, "grad_norm": 0.8563181757926941, "learning_rate": 9.860812477913915e-06, "loss": 0.23019468784332275, "memory(GiB)": 28.87, "step": 180, "token_acc": 0.9216493698005064, "train_speed(iter/s)": 0.121807 }, { "epoch": 0.2257938063504508, "eval_loss": 0.25059598684310913, "eval_runtime": 29.8473, "eval_samples_per_second": 17.254, "eval_steps_per_second": 4.322, "eval_token_acc": 0.9165852656468132, "step": 180 }, { "epoch": 0.23206585652685222, "grad_norm": 0.7056512236595154, "learning_rate": 9.853011074985628e-06, "loss": 0.23237879276275636, "memory(GiB)": 28.87, "step": 185, "token_acc": 0.9226392416106082, "train_speed(iter/s)": 0.119324 }, { "epoch": 0.23833790670325364, "grad_norm": 0.8427721858024597, "learning_rate": 9.845000217255e-06, "loss": 0.23154301643371583, "memory(GiB)": 28.87, "step": 190, "token_acc": 0.9154171704957679, "train_speed(iter/s)": 0.120232 }, { "epoch": 0.24460995687965503, "grad_norm": 0.7186033725738525, "learning_rate": 9.836780250468744e-06, "loss": 0.23792126178741455, "memory(GiB)": 28.87, "step": 195, "token_acc": 0.9164362118445082, "train_speed(iter/s)": 0.120955 }, { "epoch": 0.25088200705605646, "grad_norm": 0.7176758050918579, "learning_rate": 9.82835152939867e-06, "loss": 0.2266530990600586, "memory(GiB)": 28.87, "step": 200, "token_acc": 0.9176911913693492, "train_speed(iter/s)": 0.121635 }, { "epoch": 0.25088200705605646, "eval_loss": 0.2479380965232849, "eval_runtime": 29.8661, "eval_samples_per_second": 17.244, "eval_steps_per_second": 4.319, "eval_token_acc": 0.9172476533448483, "step": 200 }, { "epoch": 0.2571540572324579, "grad_norm": 0.7367855906486511, "learning_rate": 9.81971441782637e-06, "loss": 0.2414193868637085, "memory(GiB)": 28.87, "step": 205, "token_acc": 0.9207727327666089, "train_speed(iter/s)": 0.119447 }, { "epoch": 0.26342610740885924, "grad_norm": 0.7605990767478943, "learning_rate": 9.810869288527528e-06, "loss": 0.22583391666412353, "memory(GiB)": 28.87, "step": 210, "token_acc": 0.916309738503156, "train_speed(iter/s)": 0.120034 }, { "epoch": 0.26969815758526067, "grad_norm": 0.8129069209098816, "learning_rate": 9.801816523255811e-06, "loss": 0.22113454341888428, "memory(GiB)": 28.87, "step": 215, "token_acc": 0.9300042680324371, "train_speed(iter/s)": 0.120689 }, { "epoch": 0.2759702077616621, "grad_norm": 0.7405229210853577, "learning_rate": 9.792556512726419e-06, "loss": 0.23366448879241944, "memory(GiB)": 28.87, "step": 220, "token_acc": 0.9338177623990773, "train_speed(iter/s)": 0.121495 }, { "epoch": 0.2759702077616621, "eval_loss": 0.24686363339424133, "eval_runtime": 29.7985, "eval_samples_per_second": 17.283, "eval_steps_per_second": 4.329, "eval_token_acc": 0.9180903111126018, "step": 220 }, { "epoch": 0.2822422579380635, "grad_norm": 0.8519447445869446, "learning_rate": 9.783089656599196e-06, "loss": 0.22918324470520018, "memory(GiB)": 28.87, "step": 225, "token_acc": 0.9249420787728689, "train_speed(iter/s)": 0.119596 }, { "epoch": 0.28851430811446493, "grad_norm": 0.7695819139480591, "learning_rate": 9.773416363461401e-06, "loss": 0.2181222677230835, "memory(GiB)": 28.87, "step": 230, "token_acc": 0.9278282801744522, "train_speed(iter/s)": 0.12021 }, { "epoch": 0.2947863582908663, "grad_norm": 0.7810352444648743, "learning_rate": 9.763537050810064e-06, "loss": 0.2256471872329712, "memory(GiB)": 28.87, "step": 235, "token_acc": 0.9293833370318317, "train_speed(iter/s)": 0.120752 }, { "epoch": 0.3010584084672677, "grad_norm": 0.6736721992492676, "learning_rate": 9.753452145033961e-06, "loss": 0.2320047378540039, "memory(GiB)": 28.87, "step": 240, "token_acc": 0.9163366778813231, "train_speed(iter/s)": 0.121219 }, { "epoch": 0.3010584084672677, "eval_loss": 0.2439332753419876, "eval_runtime": 29.7876, "eval_samples_per_second": 17.289, "eval_steps_per_second": 4.331, "eval_token_acc": 0.9183376584175642, "step": 240 }, { "epoch": 0.30733045864366915, "grad_norm": 0.7435624599456787, "learning_rate": 9.743162081395227e-06, "loss": 0.22847986221313477, "memory(GiB)": 28.87, "step": 245, "token_acc": 0.9244482253414623, "train_speed(iter/s)": 0.119441 }, { "epoch": 0.31360250882007057, "grad_norm": 0.7495070099830627, "learning_rate": 9.73266730401056e-06, "loss": 0.23787951469421387, "memory(GiB)": 28.87, "step": 250, "token_acc": 0.9102755453501722, "train_speed(iter/s)": 0.120195 }, { "epoch": 0.319874558996472, "grad_norm": 0.762973427772522, "learning_rate": 9.72196826583205e-06, "loss": 0.22795605659484863, "memory(GiB)": 28.87, "step": 255, "token_acc": 0.9216171142676405, "train_speed(iter/s)": 0.120779 }, { "epoch": 0.32614660917287336, "grad_norm": 0.7174361944198608, "learning_rate": 9.711065428627638e-06, "loss": 0.22773213386535646, "memory(GiB)": 28.87, "step": 260, "token_acc": 0.9152317529383672, "train_speed(iter/s)": 0.121245 }, { "epoch": 0.32614660917287336, "eval_loss": 0.24210233986377716, "eval_runtime": 29.8271, "eval_samples_per_second": 17.266, "eval_steps_per_second": 4.325, "eval_token_acc": 0.9190168154244103, "step": 260 }, { "epoch": 0.3324186593492748, "grad_norm": 0.6918441653251648, "learning_rate": 9.699959262961182e-06, "loss": 0.23561110496520996, "memory(GiB)": 28.87, "step": 265, "token_acc": 0.9212337133550489, "train_speed(iter/s)": 0.119577 }, { "epoch": 0.3386907095256762, "grad_norm": 0.7615500688552856, "learning_rate": 9.688650248172145e-06, "loss": 0.2438591480255127, "memory(GiB)": 28.87, "step": 270, "token_acc": 0.9079355783308931, "train_speed(iter/s)": 0.120126 }, { "epoch": 0.3449627597020776, "grad_norm": 0.7297282218933105, "learning_rate": 9.677138872354916e-06, "loss": 0.22617642879486083, "memory(GiB)": 28.87, "step": 275, "token_acc": 0.9269484485707306, "train_speed(iter/s)": 0.120692 }, { "epoch": 0.35123480987847905, "grad_norm": 0.8113409876823425, "learning_rate": 9.665425632337731e-06, "loss": 0.24270424842834473, "memory(GiB)": 28.87, "step": 280, "token_acc": 0.9174018389662028, "train_speed(iter/s)": 0.121257 }, { "epoch": 0.35123480987847905, "eval_loss": 0.2409682720899582, "eval_runtime": 29.9049, "eval_samples_per_second": 17.221, "eval_steps_per_second": 4.314, "eval_token_acc": 0.9193647785822388, "step": 280 }, { "epoch": 0.3575068600548804, "grad_norm": 0.7579072117805481, "learning_rate": 9.653511033661242e-06, "loss": 0.23863134384155274, "memory(GiB)": 28.87, "step": 285, "token_acc": 0.9221180005684817, "train_speed(iter/s)": 0.119665 }, { "epoch": 0.36377891023128184, "grad_norm": 0.7347179055213928, "learning_rate": 9.641395590556689e-06, "loss": 0.21491737365722657, "memory(GiB)": 28.87, "step": 290, "token_acc": 0.9329900246145874, "train_speed(iter/s)": 0.120176 }, { "epoch": 0.37005096040768326, "grad_norm": 0.7506256699562073, "learning_rate": 9.629079825923712e-06, "loss": 0.22804722785949708, "memory(GiB)": 28.87, "step": 295, "token_acc": 0.9276003625313216, "train_speed(iter/s)": 0.120703 }, { "epoch": 0.3763230105840847, "grad_norm": 0.7653704285621643, "learning_rate": 9.616564271307779e-06, "loss": 0.2438521385192871, "memory(GiB)": 28.87, "step": 300, "token_acc": 0.9164976705946835, "train_speed(iter/s)": 0.121274 }, { "epoch": 0.3763230105840847, "eval_loss": 0.23974527418613434, "eval_runtime": 29.9433, "eval_samples_per_second": 17.199, "eval_steps_per_second": 4.308, "eval_token_acc": 0.9196456645048233, "step": 300 }, { "epoch": 0.3825950607604861, "grad_norm": 0.7230799198150635, "learning_rate": 9.603849466877249e-06, "loss": 0.21197593212127686, "memory(GiB)": 28.87, "step": 305, "token_acc": 0.9271648369422373, "train_speed(iter/s)": 0.11978 }, { "epoch": 0.3888671109368875, "grad_norm": 0.7244482636451721, "learning_rate": 9.59093596140005e-06, "loss": 0.22695040702819824, "memory(GiB)": 28.87, "step": 310, "token_acc": 0.9223127159030737, "train_speed(iter/s)": 0.120294 }, { "epoch": 0.3951391611132889, "grad_norm": 0.7103644609451294, "learning_rate": 9.577824312220006e-06, "loss": 0.2294787883758545, "memory(GiB)": 28.87, "step": 315, "token_acc": 0.9219038140678512, "train_speed(iter/s)": 0.120646 }, { "epoch": 0.4014112112896903, "grad_norm": 0.6850073337554932, "learning_rate": 9.564515085232772e-06, "loss": 0.22310760021209716, "memory(GiB)": 28.87, "step": 320, "token_acc": 0.916691517336731, "train_speed(iter/s)": 0.121107 }, { "epoch": 0.4014112112896903, "eval_loss": 0.23897218704223633, "eval_runtime": 29.8253, "eval_samples_per_second": 17.267, "eval_steps_per_second": 4.325, "eval_token_acc": 0.9197756266481086, "step": 320 }, { "epoch": 0.40768326146609174, "grad_norm": 0.7237871289253235, "learning_rate": 9.55100885486142e-06, "loss": 0.2445456027984619, "memory(GiB)": 28.87, "step": 325, "token_acc": 0.9257608045461343, "train_speed(iter/s)": 0.119765 }, { "epoch": 0.41395531164249316, "grad_norm": 0.8139222860336304, "learning_rate": 9.537306204031628e-06, "loss": 0.2413849115371704, "memory(GiB)": 28.87, "step": 330, "token_acc": 0.9230007008724653, "train_speed(iter/s)": 0.120187 }, { "epoch": 0.42022736181889453, "grad_norm": 0.7721692323684692, "learning_rate": 9.523407724146548e-06, "loss": 0.22532095909118652, "memory(GiB)": 28.87, "step": 335, "token_acc": 0.9216260855666258, "train_speed(iter/s)": 0.120724 }, { "epoch": 0.42649941199529595, "grad_norm": 0.7195901274681091, "learning_rate": 9.509314015061263e-06, "loss": 0.21710624694824218, "memory(GiB)": 28.87, "step": 340, "token_acc": 0.926438437202603, "train_speed(iter/s)": 0.121078 }, { "epoch": 0.42649941199529595, "eval_loss": 0.2361544817686081, "eval_runtime": 29.729, "eval_samples_per_second": 17.323, "eval_steps_per_second": 4.339, "eval_token_acc": 0.9209872092097044, "step": 340 }, { "epoch": 0.4327714621716974, "grad_norm": 0.7148153185844421, "learning_rate": 9.495025685056898e-06, "loss": 0.21872997283935547, "memory(GiB)": 28.87, "step": 345, "token_acc": 0.9274190386575708, "train_speed(iter/s)": 0.119764 }, { "epoch": 0.4390435123480988, "grad_norm": 0.7195214033126831, "learning_rate": 9.480543350814376e-06, "loss": 0.22351717948913574, "memory(GiB)": 28.87, "step": 350, "token_acc": 0.9214563773757728, "train_speed(iter/s)": 0.120266 }, { "epoch": 0.4453155625245002, "grad_norm": 0.7162268161773682, "learning_rate": 9.465867637387793e-06, "loss": 0.24704561233520508, "memory(GiB)": 28.87, "step": 355, "token_acc": 0.9105946522795992, "train_speed(iter/s)": 0.120763 }, { "epoch": 0.4515876127009016, "grad_norm": 0.6543457508087158, "learning_rate": 9.450999178177445e-06, "loss": 0.22001304626464843, "memory(GiB)": 28.87, "step": 360, "token_acc": 0.9289104914584577, "train_speed(iter/s)": 0.121125 }, { "epoch": 0.4515876127009016, "eval_loss": 0.2345624417066574, "eval_runtime": 29.6851, "eval_samples_per_second": 17.349, "eval_steps_per_second": 4.346, "eval_token_acc": 0.9206140920886593, "step": 360 }, { "epoch": 0.457859662877303, "grad_norm": 0.7254881262779236, "learning_rate": 9.435938614902494e-06, "loss": 0.20390102863311768, "memory(GiB)": 28.87, "step": 365, "token_acc": 0.9266752382392981, "train_speed(iter/s)": 0.119898 }, { "epoch": 0.46413171305370443, "grad_norm": 0.7083726525306702, "learning_rate": 9.42068659757326e-06, "loss": 0.231141996383667, "memory(GiB)": 28.87, "step": 370, "token_acc": 0.9151756790431099, "train_speed(iter/s)": 0.12026 }, { "epoch": 0.47040376323010585, "grad_norm": 0.7314102053642273, "learning_rate": 9.405243784463181e-06, "loss": 0.22621698379516603, "memory(GiB)": 28.87, "step": 375, "token_acc": 0.9306795666829633, "train_speed(iter/s)": 0.120718 }, { "epoch": 0.4766758134065073, "grad_norm": 0.7966891527175903, "learning_rate": 9.389610842080394e-06, "loss": 0.23102831840515137, "memory(GiB)": 28.87, "step": 380, "token_acc": 0.9184476940382452, "train_speed(iter/s)": 0.121124 }, { "epoch": 0.4766758134065073, "eval_loss": 0.23356294631958008, "eval_runtime": 29.8585, "eval_samples_per_second": 17.248, "eval_steps_per_second": 4.32, "eval_token_acc": 0.9207440542319447, "step": 380 }, { "epoch": 0.48294786358290864, "grad_norm": 0.7005392909049988, "learning_rate": 9.373788445138972e-06, "loss": 0.21558718681335448, "memory(GiB)": 28.87, "step": 385, "token_acc": 0.9273667904013094, "train_speed(iter/s)": 0.119963 }, { "epoch": 0.48921991375931007, "grad_norm": 0.7006497383117676, "learning_rate": 9.357777276529793e-06, "loss": 0.21882824897766112, "memory(GiB)": 28.87, "step": 390, "token_acc": 0.9215467262379661, "train_speed(iter/s)": 0.120425 }, { "epoch": 0.4954919639357115, "grad_norm": 0.7023485898971558, "learning_rate": 9.341578027291085e-06, "loss": 0.2044372081756592, "memory(GiB)": 28.87, "step": 395, "token_acc": 0.93374081873748, "train_speed(iter/s)": 0.120782 }, { "epoch": 0.5017640141121129, "grad_norm": 0.7891673445701599, "learning_rate": 9.325191396578589e-06, "loss": 0.2204671859741211, "memory(GiB)": 28.87, "step": 400, "token_acc": 0.9241170268845545, "train_speed(iter/s)": 0.121159 }, { "epoch": 0.5017640141121129, "eval_loss": 0.23371295630931854, "eval_runtime": 29.8363, "eval_samples_per_second": 17.261, "eval_steps_per_second": 4.324, "eval_token_acc": 0.9213351723675329, "step": 400 }, { "epoch": 0.5080360642885143, "grad_norm": 0.696616530418396, "learning_rate": 9.308618091635382e-06, "loss": 0.22419328689575196, "memory(GiB)": 28.87, "step": 405, "token_acc": 0.9280618250678131, "train_speed(iter/s)": 0.120013 }, { "epoch": 0.5143081144649158, "grad_norm": 0.7773451209068298, "learning_rate": 9.291858827761359e-06, "loss": 0.2144181489944458, "memory(GiB)": 28.87, "step": 410, "token_acc": 0.930534196094362, "train_speed(iter/s)": 0.12032 }, { "epoch": 0.5205801646413172, "grad_norm": 0.786472737789154, "learning_rate": 9.274914328282359e-06, "loss": 0.23719301223754882, "memory(GiB)": 28.87, "step": 415, "token_acc": 0.9227249618708694, "train_speed(iter/s)": 0.120643 }, { "epoch": 0.5268522148177185, "grad_norm": 0.8163383603096008, "learning_rate": 9.257785324518943e-06, "loss": 0.22335872650146485, "memory(GiB)": 28.87, "step": 420, "token_acc": 0.9292451461241548, "train_speed(iter/s)": 0.120974 }, { "epoch": 0.5268522148177185, "eval_loss": 0.2324906587600708, "eval_runtime": 29.851, "eval_samples_per_second": 17.252, "eval_steps_per_second": 4.321, "eval_token_acc": 0.9215112501100486, "step": 420 }, { "epoch": 0.5331242649941199, "grad_norm": 0.690645158290863, "learning_rate": 9.240472555754835e-06, "loss": 0.21186673641204834, "memory(GiB)": 28.87, "step": 425, "token_acc": 0.9271501893311638, "train_speed(iter/s)": 0.120028 }, { "epoch": 0.5393963151705213, "grad_norm": 0.7793926000595093, "learning_rate": 9.222976769205013e-06, "loss": 0.21735620498657227, "memory(GiB)": 28.87, "step": 430, "token_acc": 0.929071782480291, "train_speed(iter/s)": 0.120349 }, { "epoch": 0.5456683653469228, "grad_norm": 0.6697238683700562, "learning_rate": 9.205298719983458e-06, "loss": 0.2206124782562256, "memory(GiB)": 31.15, "step": 435, "token_acc": 0.9281159722041485, "train_speed(iter/s)": 0.120617 }, { "epoch": 0.5519404155233242, "grad_norm": 1.481563687324524, "learning_rate": 9.187439171070563e-06, "loss": 0.22309460639953613, "memory(GiB)": 31.15, "step": 440, "token_acc": 0.9234951798730308, "train_speed(iter/s)": 0.120931 }, { "epoch": 0.5519404155233242, "eval_loss": 0.2316710203886032, "eval_runtime": 29.9096, "eval_samples_per_second": 17.219, "eval_steps_per_second": 4.313, "eval_token_acc": 0.9212806721138972, "step": 440 }, { "epoch": 0.5582124656997256, "grad_norm": 0.8089612722396851, "learning_rate": 9.169398893280208e-06, "loss": 0.20908033847808838, "memory(GiB)": 31.15, "step": 445, "token_acc": 0.9294068842705951, "train_speed(iter/s)": 0.119914 }, { "epoch": 0.564484515876127, "grad_norm": 0.6420107483863831, "learning_rate": 9.151178665226486e-06, "loss": 0.22311244010925294, "memory(GiB)": 31.15, "step": 450, "token_acc": 0.9273165858531712, "train_speed(iter/s)": 0.120233 }, { "epoch": 0.5707565660525284, "grad_norm": 0.6611379981040955, "learning_rate": 9.132779273290103e-06, "loss": 0.21406009197235107, "memory(GiB)": 31.15, "step": 455, "token_acc": 0.9297352623862347, "train_speed(iter/s)": 0.120453 }, { "epoch": 0.5770286162289299, "grad_norm": 0.665287435054779, "learning_rate": 9.114201511584428e-06, "loss": 0.22191643714904785, "memory(GiB)": 31.15, "step": 460, "token_acc": 0.9259487069772417, "train_speed(iter/s)": 0.120712 }, { "epoch": 0.5770286162289299, "eval_loss": 0.23122188448905945, "eval_runtime": 29.7667, "eval_samples_per_second": 17.301, "eval_steps_per_second": 4.334, "eval_token_acc": 0.9220562526464066, "step": 460 }, { "epoch": 0.5833006664053313, "grad_norm": 0.8055879473686218, "learning_rate": 9.095446181921237e-06, "loss": 0.22888469696044922, "memory(GiB)": 31.15, "step": 465, "token_acc": 0.9251080172827653, "train_speed(iter/s)": 0.119856 }, { "epoch": 0.5895727165817326, "grad_norm": 0.6367520689964294, "learning_rate": 9.07651409377609e-06, "loss": 0.22404332160949708, "memory(GiB)": 31.15, "step": 470, "token_acc": 0.918756183745583, "train_speed(iter/s)": 0.120217 }, { "epoch": 0.595844766758134, "grad_norm": 0.7680770754814148, "learning_rate": 9.057406064253404e-06, "loss": 0.2318411111831665, "memory(GiB)": 31.15, "step": 475, "token_acc": 0.9213946117274168, "train_speed(iter/s)": 0.120561 }, { "epoch": 0.6021168169345354, "grad_norm": 0.685991644859314, "learning_rate": 9.038122918051184e-06, "loss": 0.21981484889984132, "memory(GiB)": 31.15, "step": 480, "token_acc": 0.9215212527964206, "train_speed(iter/s)": 0.120844 }, { "epoch": 0.6021168169345354, "eval_loss": 0.22933758795261383, "eval_runtime": 29.8714, "eval_samples_per_second": 17.241, "eval_steps_per_second": 4.319, "eval_token_acc": 0.9229072950685655, "step": 480 }, { "epoch": 0.6083888671109369, "grad_norm": 0.7308952808380127, "learning_rate": 9.018665487425426e-06, "loss": 0.21712393760681153, "memory(GiB)": 31.15, "step": 485, "token_acc": 0.9292804826355687, "train_speed(iter/s)": 0.119914 }, { "epoch": 0.6146609172873383, "grad_norm": 0.7039359211921692, "learning_rate": 8.999034612154204e-06, "loss": 0.22841830253601075, "memory(GiB)": 31.15, "step": 490, "token_acc": 0.9264894745658607, "train_speed(iter/s)": 0.12024 }, { "epoch": 0.6209329674637397, "grad_norm": 0.7186778783798218, "learning_rate": 8.979231139501417e-06, "loss": 0.21591267585754395, "memory(GiB)": 31.15, "step": 495, "token_acc": 0.9271491015055853, "train_speed(iter/s)": 0.120545 }, { "epoch": 0.6272050176401411, "grad_norm": 0.8200941681861877, "learning_rate": 8.95925592418023e-06, "loss": 0.2159876823425293, "memory(GiB)": 31.15, "step": 500, "token_acc": 0.9208719988216232, "train_speed(iter/s)": 0.120825 }, { "epoch": 0.6272050176401411, "eval_loss": 0.22962290048599243, "eval_runtime": 29.8717, "eval_samples_per_second": 17.24, "eval_steps_per_second": 4.318, "eval_token_acc": 0.9228486024877269, "step": 500 }, { "epoch": 0.6334770678165426, "grad_norm": 0.7458428740501404, "learning_rate": 8.939109828316184e-06, "loss": 0.21893787384033203, "memory(GiB)": 31.15, "step": 505, "token_acc": 0.92739813330044, "train_speed(iter/s)": 0.119977 }, { "epoch": 0.639749117992944, "grad_norm": 0.6861876249313354, "learning_rate": 8.918793721409973e-06, "loss": 0.2168494701385498, "memory(GiB)": 31.15, "step": 510, "token_acc": 0.9236680466488081, "train_speed(iter/s)": 0.120232 }, { "epoch": 0.6460211681693454, "grad_norm": 0.6782411932945251, "learning_rate": 8.898308480299937e-06, "loss": 0.21742620468139648, "memory(GiB)": 31.15, "step": 515, "token_acc": 0.9275666477595008, "train_speed(iter/s)": 0.120504 }, { "epoch": 0.6522932183457467, "grad_norm": 0.6103708744049072, "learning_rate": 8.877654989124202e-06, "loss": 0.20578155517578126, "memory(GiB)": 31.15, "step": 520, "token_acc": 0.9377705924261522, "train_speed(iter/s)": 0.120812 }, { "epoch": 0.6522932183457467, "eval_loss": 0.22769030928611755, "eval_runtime": 29.7319, "eval_samples_per_second": 17.321, "eval_steps_per_second": 4.339, "eval_token_acc": 0.9225677165651425, "step": 520 }, { "epoch": 0.6585652685221481, "grad_norm": 0.6786876916885376, "learning_rate": 8.856834139282531e-06, "loss": 0.2087319850921631, "memory(GiB)": 31.15, "step": 525, "token_acc": 0.9303061874753442, "train_speed(iter/s)": 0.119999 }, { "epoch": 0.6648373186985496, "grad_norm": 0.6989500522613525, "learning_rate": 8.835846829397843e-06, "loss": 0.2093345880508423, "memory(GiB)": 31.15, "step": 530, "token_acc": 0.9277319617252378, "train_speed(iter/s)": 0.120304 }, { "epoch": 0.671109368874951, "grad_norm": 0.7903891801834106, "learning_rate": 8.814693965277435e-06, "loss": 0.2172760248184204, "memory(GiB)": 31.15, "step": 535, "token_acc": 0.9253766898933953, "train_speed(iter/s)": 0.120551 }, { "epoch": 0.6773814190513524, "grad_norm": 0.7009393572807312, "learning_rate": 8.793376459873888e-06, "loss": 0.2161731481552124, "memory(GiB)": 31.15, "step": 540, "token_acc": 0.9260658696142567, "train_speed(iter/s)": 0.120818 }, { "epoch": 0.6773814190513524, "eval_loss": 0.22710371017456055, "eval_runtime": 29.8324, "eval_samples_per_second": 17.263, "eval_steps_per_second": 4.324, "eval_token_acc": 0.9232384889175831, "step": 540 }, { "epoch": 0.6836534692277538, "grad_norm": 0.6882784962654114, "learning_rate": 8.771895233245655e-06, "loss": 0.20790476799011232, "memory(GiB)": 31.15, "step": 545, "token_acc": 0.9269955800687545, "train_speed(iter/s)": 0.120024 }, { "epoch": 0.6899255194041553, "grad_norm": 0.7261970639228821, "learning_rate": 8.750251212517364e-06, "loss": 0.21239514350891114, "memory(GiB)": 31.15, "step": 550, "token_acc": 0.934424197347217, "train_speed(iter/s)": 0.120259 }, { "epoch": 0.6961975695805567, "grad_norm": 0.7103798985481262, "learning_rate": 8.728445331839796e-06, "loss": 0.22427408695220946, "memory(GiB)": 31.15, "step": 555, "token_acc": 0.926710200976352, "train_speed(iter/s)": 0.120542 }, { "epoch": 0.7024696197569581, "grad_norm": 0.6691487431526184, "learning_rate": 8.706478532349567e-06, "loss": 0.2043588399887085, "memory(GiB)": 31.15, "step": 560, "token_acc": 0.9273906956709025, "train_speed(iter/s)": 0.120786 }, { "epoch": 0.7024696197569581, "eval_loss": 0.22625486552715302, "eval_runtime": 29.9222, "eval_samples_per_second": 17.211, "eval_steps_per_second": 4.311, "eval_token_acc": 0.9232091426271638, "step": 560 }, { "epoch": 0.7087416699333595, "grad_norm": 0.7665515542030334, "learning_rate": 8.684351762128511e-06, "loss": 0.21694588661193848, "memory(GiB)": 31.15, "step": 565, "token_acc": 0.9275524304991059, "train_speed(iter/s)": 0.120052 }, { "epoch": 0.7150137201097608, "grad_norm": 0.6899131536483765, "learning_rate": 8.662065976162765e-06, "loss": 0.20793275833129882, "memory(GiB)": 31.15, "step": 570, "token_acc": 0.9312310457149275, "train_speed(iter/s)": 0.12035 }, { "epoch": 0.7212857702861623, "grad_norm": 0.7864097356796265, "learning_rate": 8.639622136301541e-06, "loss": 0.21702027320861816, "memory(GiB)": 33.6, "step": 575, "token_acc": 0.9272943626357281, "train_speed(iter/s)": 0.120589 }, { "epoch": 0.7275578204625637, "grad_norm": 0.729067325592041, "learning_rate": 8.617021211215629e-06, "loss": 0.20268304347991944, "memory(GiB)": 33.6, "step": 580, "token_acc": 0.9264947749297842, "train_speed(iter/s)": 0.120859 }, { "epoch": 0.7275578204625637, "eval_loss": 0.22482995688915253, "eval_runtime": 29.7802, "eval_samples_per_second": 17.293, "eval_steps_per_second": 4.332, "eval_token_acc": 0.92376672214513, "step": 580 }, { "epoch": 0.7338298706389651, "grad_norm": 0.6976431608200073, "learning_rate": 8.594264176355565e-06, "loss": 0.2071969985961914, "memory(GiB)": 33.6, "step": 585, "token_acc": 0.9269029680138362, "train_speed(iter/s)": 0.120124 }, { "epoch": 0.7401019208153665, "grad_norm": 0.760837733745575, "learning_rate": 8.571352013909558e-06, "loss": 0.2175739288330078, "memory(GiB)": 33.6, "step": 590, "token_acc": 0.9229339361969354, "train_speed(iter/s)": 0.1204 }, { "epoch": 0.7463739709917679, "grad_norm": 0.646027147769928, "learning_rate": 8.548285712761084e-06, "loss": 0.21033940315246583, "memory(GiB)": 33.6, "step": 595, "token_acc": 0.9275317355671036, "train_speed(iter/s)": 0.120614 }, { "epoch": 0.7526460211681694, "grad_norm": 0.732366681098938, "learning_rate": 8.525066268446208e-06, "loss": 0.2169095754623413, "memory(GiB)": 33.6, "step": 600, "token_acc": 0.9253966064284729, "train_speed(iter/s)": 0.120858 }, { "epoch": 0.7526460211681694, "eval_loss": 0.22306384146213531, "eval_runtime": 29.8297, "eval_samples_per_second": 17.265, "eval_steps_per_second": 4.325, "eval_token_acc": 0.9240308387589035, "step": 600 }, { "epoch": 0.7589180713445708, "grad_norm": 0.7121861577033997, "learning_rate": 8.501694683110615e-06, "loss": 0.22008817195892333, "memory(GiB)": 33.6, "step": 605, "token_acc": 0.9294228080296467, "train_speed(iter/s)": 0.120141 }, { "epoch": 0.7651901215209722, "grad_norm": 0.8034666776657104, "learning_rate": 8.478171965466366e-06, "loss": 0.21159706115722657, "memory(GiB)": 33.6, "step": 610, "token_acc": 0.925722043939768, "train_speed(iter/s)": 0.120368 }, { "epoch": 0.7714621716973736, "grad_norm": 0.791170597076416, "learning_rate": 8.454499130748352e-06, "loss": 0.20048816204071046, "memory(GiB)": 33.6, "step": 615, "token_acc": 0.9308691482869845, "train_speed(iter/s)": 0.120665 }, { "epoch": 0.777734221873775, "grad_norm": 0.7372978329658508, "learning_rate": 8.43067720067048e-06, "loss": 0.2082076072692871, "memory(GiB)": 33.6, "step": 620, "token_acc": 0.93298405204675, "train_speed(iter/s)": 0.120914 }, { "epoch": 0.777734221873775, "eval_loss": 0.22348882257938385, "eval_runtime": 29.701, "eval_samples_per_second": 17.34, "eval_steps_per_second": 4.343, "eval_token_acc": 0.9239637615236594, "step": 620 }, { "epoch": 0.7840062720501764, "grad_norm": 0.7372789978981018, "learning_rate": 8.40670720338158e-06, "loss": 0.22890782356262207, "memory(GiB)": 33.6, "step": 625, "token_acc": 0.9263055911491702, "train_speed(iter/s)": 0.120201 }, { "epoch": 0.7902783222265778, "grad_norm": 0.7267508506774902, "learning_rate": 8.382590173421029e-06, "loss": 0.21681501865386962, "memory(GiB)": 33.6, "step": 630, "token_acc": 0.9380081814049795, "train_speed(iter/s)": 0.120459 }, { "epoch": 0.7965503724029792, "grad_norm": 0.7548701167106628, "learning_rate": 8.358327151674095e-06, "loss": 0.21880314350128174, "memory(GiB)": 33.6, "step": 635, "token_acc": 0.9205941088367449, "train_speed(iter/s)": 0.120708 }, { "epoch": 0.8028224225793806, "grad_norm": 0.7470581531524658, "learning_rate": 8.33391918532702e-06, "loss": 0.210282564163208, "memory(GiB)": 33.6, "step": 640, "token_acc": 0.9302132163919552, "train_speed(iter/s)": 0.120949 }, { "epoch": 0.8028224225793806, "eval_loss": 0.22056862711906433, "eval_runtime": 29.8322, "eval_samples_per_second": 17.263, "eval_steps_per_second": 4.324, "eval_token_acc": 0.9247183804201551, "step": 640 }, { "epoch": 0.8090944727557821, "grad_norm": 0.6555790901184082, "learning_rate": 8.309367327821819e-06, "loss": 0.19786405563354492, "memory(GiB)": 33.6, "step": 645, "token_acc": 0.9318245266126937, "train_speed(iter/s)": 0.12025 }, { "epoch": 0.8153665229321835, "grad_norm": 0.7353399395942688, "learning_rate": 8.284672638810813e-06, "loss": 0.2103184938430786, "memory(GiB)": 33.6, "step": 650, "token_acc": 0.93095703125, "train_speed(iter/s)": 0.120454 }, { "epoch": 0.8216385731085849, "grad_norm": 0.6436148881912231, "learning_rate": 8.259836184110904e-06, "loss": 0.20670008659362793, "memory(GiB)": 33.6, "step": 655, "token_acc": 0.9274860182785432, "train_speed(iter/s)": 0.120662 }, { "epoch": 0.8279106232849863, "grad_norm": 0.6722457408905029, "learning_rate": 8.234859035657557e-06, "loss": 0.21930215358734131, "memory(GiB)": 33.6, "step": 660, "token_acc": 0.926199601410394, "train_speed(iter/s)": 0.120911 }, { "epoch": 0.8279106232849863, "eval_loss": 0.22012893855571747, "eval_runtime": 29.7417, "eval_samples_per_second": 17.316, "eval_steps_per_second": 4.337, "eval_token_acc": 0.9244207251887595, "step": 660 }, { "epoch": 0.8341826734613876, "grad_norm": 0.661716103553772, "learning_rate": 8.209742271458556e-06, "loss": 0.21486959457397461, "memory(GiB)": 33.6, "step": 665, "token_acc": 0.9290491629452907, "train_speed(iter/s)": 0.120203 }, { "epoch": 0.8404547236377891, "grad_norm": 0.6564416885375977, "learning_rate": 8.18448697554746e-06, "loss": 0.19918079376220704, "memory(GiB)": 33.6, "step": 670, "token_acc": 0.9267048282727726, "train_speed(iter/s)": 0.120429 }, { "epoch": 0.8467267738141905, "grad_norm": 0.699549674987793, "learning_rate": 8.159094237936828e-06, "loss": 0.2177518367767334, "memory(GiB)": 33.6, "step": 675, "token_acc": 0.9223363604783882, "train_speed(iter/s)": 0.120656 }, { "epoch": 0.8529988239905919, "grad_norm": 0.7374889850616455, "learning_rate": 8.133565154571169e-06, "loss": 0.22228724956512452, "memory(GiB)": 33.6, "step": 680, "token_acc": 0.9125668288580525, "train_speed(iter/s)": 0.120849 }, { "epoch": 0.8529988239905919, "eval_loss": 0.2190328687429428, "eval_runtime": 29.8942, "eval_samples_per_second": 17.227, "eval_steps_per_second": 4.315, "eval_token_acc": 0.9247435343833715, "step": 680 }, { "epoch": 0.8592708741669933, "grad_norm": 0.6313470602035522, "learning_rate": 8.107900827279638e-06, "loss": 0.20030460357666016, "memory(GiB)": 33.6, "step": 685, "token_acc": 0.9315766341697299, "train_speed(iter/s)": 0.120164 }, { "epoch": 0.8655429243433947, "grad_norm": 0.7052416205406189, "learning_rate": 8.082102363728494e-06, "loss": 0.2170419692993164, "memory(GiB)": 33.6, "step": 690, "token_acc": 0.9215518226488402, "train_speed(iter/s)": 0.120386 }, { "epoch": 0.8718149745197962, "grad_norm": 0.7035377025604248, "learning_rate": 8.056170877373277e-06, "loss": 0.22514162063598633, "memory(GiB)": 33.6, "step": 695, "token_acc": 0.9211979208747407, "train_speed(iter/s)": 0.120592 }, { "epoch": 0.8780870246961976, "grad_norm": 0.6568828821182251, "learning_rate": 8.030107487410766e-06, "loss": 0.20586962699890138, "memory(GiB)": 33.6, "step": 700, "token_acc": 0.9286558694881192, "train_speed(iter/s)": 0.120818 }, { "epoch": 0.8780870246961976, "eval_loss": 0.21893204748630524, "eval_runtime": 29.9213, "eval_samples_per_second": 17.212, "eval_steps_per_second": 4.311, "eval_token_acc": 0.9248315732546294, "step": 700 }, { "epoch": 0.884359074872599, "grad_norm": 0.7014189958572388, "learning_rate": 8.003913318730662e-06, "loss": 0.2156972885131836, "memory(GiB)": 33.6, "step": 705, "token_acc": 0.9312223183831391, "train_speed(iter/s)": 0.120206 }, { "epoch": 0.8906311250490004, "grad_norm": 0.7113268971443176, "learning_rate": 7.97758950186705e-06, "loss": 0.20703303813934326, "memory(GiB)": 33.6, "step": 710, "token_acc": 0.9297330786308018, "train_speed(iter/s)": 0.120418 }, { "epoch": 0.8969031752254017, "grad_norm": 0.6587302684783936, "learning_rate": 7.951137172949595e-06, "loss": 0.20361075401306153, "memory(GiB)": 33.6, "step": 715, "token_acc": 0.9325225225225225, "train_speed(iter/s)": 0.120636 }, { "epoch": 0.9031752254018032, "grad_norm": 0.6677445769309998, "learning_rate": 7.924557473654516e-06, "loss": 0.19508445262908936, "memory(GiB)": 33.6, "step": 720, "token_acc": 0.9272533459670733, "train_speed(iter/s)": 0.120834 }, { "epoch": 0.9031752254018032, "eval_loss": 0.2177843153476715, "eval_runtime": 29.9167, "eval_samples_per_second": 17.214, "eval_steps_per_second": 4.312, "eval_token_acc": 0.9248483425634404, "step": 720 }, { "epoch": 0.9094472755782046, "grad_norm": 0.6732158064842224, "learning_rate": 7.897851551155306e-06, "loss": 0.20930843353271483, "memory(GiB)": 33.6, "step": 725, "token_acc": 0.9286447233404372, "train_speed(iter/s)": 0.120233 }, { "epoch": 0.915719325754606, "grad_norm": 0.7134806513786316, "learning_rate": 7.871020558073217e-06, "loss": 0.22407774925231932, "memory(GiB)": 33.6, "step": 730, "token_acc": 0.9180210751919986, "train_speed(iter/s)": 0.120451 }, { "epoch": 0.9219913759310074, "grad_norm": 0.7282131910324097, "learning_rate": 7.844065652427523e-06, "loss": 0.20888471603393555, "memory(GiB)": 33.6, "step": 735, "token_acc": 0.9275435780462392, "train_speed(iter/s)": 0.120674 }, { "epoch": 0.9282634261074089, "grad_norm": 0.6692695021629333, "learning_rate": 7.816987997585535e-06, "loss": 0.2041374683380127, "memory(GiB)": 33.6, "step": 740, "token_acc": 0.9322735248670875, "train_speed(iter/s)": 0.12084 }, { "epoch": 0.9282634261074089, "eval_loss": 0.21771369874477386, "eval_runtime": 29.8739, "eval_samples_per_second": 17.239, "eval_steps_per_second": 4.318, "eval_token_acc": 0.9255442688790975, "step": 740 }, { "epoch": 0.9345354762838103, "grad_norm": 0.7032608985900879, "learning_rate": 7.789788762212384e-06, "loss": 0.19432848691940308, "memory(GiB)": 33.6, "step": 745, "token_acc": 0.9331851716544383, "train_speed(iter/s)": 0.120233 }, { "epoch": 0.9408075264602117, "grad_norm": 0.6778285503387451, "learning_rate": 7.762469120220595e-06, "loss": 0.20669918060302733, "memory(GiB)": 33.6, "step": 750, "token_acc": 0.9296290992410139, "train_speed(iter/s)": 0.120451 }, { "epoch": 0.9470795766366131, "grad_norm": 0.7147387266159058, "learning_rate": 7.73503025071941e-06, "loss": 0.2145129680633545, "memory(GiB)": 33.6, "step": 755, "token_acc": 0.9212092639519123, "train_speed(iter/s)": 0.120647 }, { "epoch": 0.9533516268130146, "grad_norm": 0.6544482111930847, "learning_rate": 7.7074733379639e-06, "loss": 0.2081056594848633, "memory(GiB)": 33.6, "step": 760, "token_acc": 0.9254992319508448, "train_speed(iter/s)": 0.120855 }, { "epoch": 0.9533516268130146, "eval_loss": 0.21670959889888763, "eval_runtime": 29.8367, "eval_samples_per_second": 17.261, "eval_steps_per_second": 4.324, "eval_token_acc": 0.9255191149158809, "step": 760 }, { "epoch": 0.9596236769894159, "grad_norm": 0.6839804649353027, "learning_rate": 7.679799571303861e-06, "loss": 0.21366724967956544, "memory(GiB)": 33.6, "step": 765, "token_acc": 0.9328849994693322, "train_speed(iter/s)": 0.120321 }, { "epoch": 0.9658957271658173, "grad_norm": 0.7972912788391113, "learning_rate": 7.65201014513247e-06, "loss": 0.21506853103637696, "memory(GiB)": 33.6, "step": 770, "token_acc": 0.9269746646795827, "train_speed(iter/s)": 0.120492 }, { "epoch": 0.9721677773422187, "grad_norm": 0.6575592756271362, "learning_rate": 7.62410625883474e-06, "loss": 0.21630258560180665, "memory(GiB)": 33.6, "step": 775, "token_acc": 0.92975748611615, "train_speed(iter/s)": 0.120664 }, { "epoch": 0.9784398275186201, "grad_norm": 0.6901047825813293, "learning_rate": 7.596089116735765e-06, "loss": 0.2089380741119385, "memory(GiB)": 33.6, "step": 780, "token_acc": 0.9275595528864113, "train_speed(iter/s)": 0.12086 }, { "epoch": 0.9784398275186201, "eval_loss": 0.2157372534275055, "eval_runtime": 29.8458, "eval_samples_per_second": 17.255, "eval_steps_per_second": 4.322, "eval_token_acc": 0.9259257706545481, "step": 780 }, { "epoch": 0.9847118776950216, "grad_norm": 0.6576786041259766, "learning_rate": 7.567959928048723e-06, "loss": 0.2120821475982666, "memory(GiB)": 33.6, "step": 785, "token_acc": 0.9295358776486603, "train_speed(iter/s)": 0.120278 }, { "epoch": 0.990983927871423, "grad_norm": 0.8202281594276428, "learning_rate": 7.5397199068227e-06, "loss": 0.21234326362609862, "memory(GiB)": 33.6, "step": 790, "token_acc": 0.9287775025499053, "train_speed(iter/s)": 0.120438 }, { "epoch": 0.9972559780478244, "grad_norm": 0.6771529912948608, "learning_rate": 7.511370271890286e-06, "loss": 0.20872533321380615, "memory(GiB)": 33.6, "step": 795, "token_acc": 0.9276382199405878, "train_speed(iter/s)": 0.12063 }, { "epoch": 1.0025088200705605, "grad_norm": 0.6259863376617432, "learning_rate": 7.482912246814975e-06, "loss": 0.1691659927368164, "memory(GiB)": 33.6, "step": 800, "token_acc": 0.9478380434146627, "train_speed(iter/s)": 0.120917 }, { "epoch": 1.0025088200705605, "eval_loss": 0.21573711931705475, "eval_runtime": 29.9639, "eval_samples_per_second": 17.187, "eval_steps_per_second": 4.305, "eval_token_acc": 0.9261102330514692, "step": 800 }, { "epoch": 1.008780870246962, "grad_norm": 0.688946545124054, "learning_rate": 7.454347059838351e-06, "loss": 0.1593709945678711, "memory(GiB)": 33.6, "step": 805, "token_acc": 0.9363939073284858, "train_speed(iter/s)": 0.120397 }, { "epoch": 1.0150529204233634, "grad_norm": 0.740982174873352, "learning_rate": 7.425675943827084e-06, "loss": 0.17170259952545167, "memory(GiB)": 33.6, "step": 810, "token_acc": 0.9332383983916904, "train_speed(iter/s)": 0.120599 }, { "epoch": 1.021324970599765, "grad_norm": 0.7703794240951538, "learning_rate": 7.3969001362197135e-06, "loss": 0.15921430587768554, "memory(GiB)": 33.6, "step": 815, "token_acc": 0.9395872420262664, "train_speed(iter/s)": 0.120788 }, { "epoch": 1.0275970207761662, "grad_norm": 0.6545404195785522, "learning_rate": 7.3680208789732385e-06, "loss": 0.15435378551483153, "memory(GiB)": 33.6, "step": 820, "token_acc": 0.946012336917954, "train_speed(iter/s)": 0.12096 }, { "epoch": 1.0275970207761662, "eval_loss": 0.2201254665851593, "eval_runtime": 29.934, "eval_samples_per_second": 17.205, "eval_steps_per_second": 4.309, "eval_token_acc": 0.925619730768747, "step": 820 }, { "epoch": 1.0338690709525675, "grad_norm": 0.6074718236923218, "learning_rate": 7.339039418509532e-06, "loss": 0.15760223865509032, "memory(GiB)": 33.6, "step": 825, "token_acc": 0.9387994171373061, "train_speed(iter/s)": 0.120423 }, { "epoch": 1.040141121128969, "grad_norm": 0.7706517577171326, "learning_rate": 7.309957005661521e-06, "loss": 0.15146889686584472, "memory(GiB)": 33.6, "step": 830, "token_acc": 0.9525346241764152, "train_speed(iter/s)": 0.120632 }, { "epoch": 1.0464131713053704, "grad_norm": 0.7167385220527649, "learning_rate": 7.280774895619219e-06, "loss": 0.15735208988189697, "memory(GiB)": 33.6, "step": 835, "token_acc": 0.9430896598332957, "train_speed(iter/s)": 0.120825 }, { "epoch": 1.052685221481772, "grad_norm": 0.6908670663833618, "learning_rate": 7.25149434787555e-06, "loss": 0.15631234645843506, "memory(GiB)": 33.6, "step": 840, "token_acc": 0.9427331753700342, "train_speed(iter/s)": 0.121019 }, { "epoch": 1.052685221481772, "eval_loss": 0.2204887419939041, "eval_runtime": 29.7677, "eval_samples_per_second": 17.301, "eval_steps_per_second": 4.334, "eval_token_acc": 0.9257958085112626, "step": 840 }, { "epoch": 1.0589572716581732, "grad_norm": 0.6649175882339478, "learning_rate": 7.2221166261719755e-06, "loss": 0.14833444356918335, "memory(GiB)": 33.6, "step": 845, "token_acc": 0.9379661510111051, "train_speed(iter/s)": 0.120482 }, { "epoch": 1.0652293218345747, "grad_norm": 0.6566579937934875, "learning_rate": 7.192642998443975e-06, "loss": 0.15106643438339235, "memory(GiB)": 33.6, "step": 850, "token_acc": 0.9515770402701145, "train_speed(iter/s)": 0.120644 }, { "epoch": 1.071501372010976, "grad_norm": 0.7228676676750183, "learning_rate": 7.163074736766299e-06, "loss": 0.151542592048645, "memory(GiB)": 33.6, "step": 855, "token_acc": 0.9434348954775242, "train_speed(iter/s)": 0.120779 }, { "epoch": 1.0777734221873776, "grad_norm": 0.6888749003410339, "learning_rate": 7.133413117298081e-06, "loss": 0.14555807113647462, "memory(GiB)": 33.6, "step": 860, "token_acc": 0.9450332471906849, "train_speed(iter/s)": 0.120951 }, { "epoch": 1.0777734221873776, "eval_loss": 0.22007805109024048, "eval_runtime": 29.794, "eval_samples_per_second": 17.285, "eval_steps_per_second": 4.33, "eval_token_acc": 0.9259173860001425, "step": 860 }, { "epoch": 1.084045472363779, "grad_norm": 0.6592821478843689, "learning_rate": 7.103659420227755e-06, "loss": 0.1563601851463318, "memory(GiB)": 33.6, "step": 865, "token_acc": 0.9376562343765623, "train_speed(iter/s)": 0.120435 }, { "epoch": 1.0903175225401802, "grad_norm": 0.7310729622840881, "learning_rate": 7.0738149297178005e-06, "loss": 0.1602903962135315, "memory(GiB)": 33.6, "step": 870, "token_acc": 0.9540862093385581, "train_speed(iter/s)": 0.120585 }, { "epoch": 1.0965895727165818, "grad_norm": 0.7009090185165405, "learning_rate": 7.04388093384932e-06, "loss": 0.14554691314697266, "memory(GiB)": 33.6, "step": 875, "token_acc": 0.9516893894487255, "train_speed(iter/s)": 0.120725 }, { "epoch": 1.102861622892983, "grad_norm": 0.7225506901741028, "learning_rate": 7.013858724566449e-06, "loss": 0.16036466360092164, "memory(GiB)": 33.6, "step": 880, "token_acc": 0.9487800335257962, "train_speed(iter/s)": 0.120883 }, { "epoch": 1.102861622892983, "eval_loss": 0.22006595134735107, "eval_runtime": 29.8492, "eval_samples_per_second": 17.253, "eval_steps_per_second": 4.322, "eval_token_acc": 0.9258377317832902, "step": 880 }, { "epoch": 1.1091336730693846, "grad_norm": 0.7332343459129333, "learning_rate": 6.983749597620588e-06, "loss": 0.15600578784942626, "memory(GiB)": 33.6, "step": 885, "token_acc": 0.9356690055649649, "train_speed(iter/s)": 0.120401 }, { "epoch": 1.115405723245786, "grad_norm": 0.7584828734397888, "learning_rate": 6.9535548525144894e-06, "loss": 0.15730617046356202, "memory(GiB)": 33.6, "step": 890, "token_acc": 0.9422360762461726, "train_speed(iter/s)": 0.120564 }, { "epoch": 1.1216777734221874, "grad_norm": 0.6870989203453064, "learning_rate": 6.923275792446159e-06, "loss": 0.15372934341430664, "memory(GiB)": 33.6, "step": 895, "token_acc": 0.9412487331470164, "train_speed(iter/s)": 0.120704 }, { "epoch": 1.1279498235985888, "grad_norm": 0.6541606187820435, "learning_rate": 6.8929137242526216e-06, "loss": 0.1524061918258667, "memory(GiB)": 33.6, "step": 900, "token_acc": 0.9508530617643169, "train_speed(iter/s)": 0.120832 }, { "epoch": 1.1279498235985888, "eval_loss": 0.2202031910419464, "eval_runtime": 29.8745, "eval_samples_per_second": 17.239, "eval_steps_per_second": 4.318, "eval_token_acc": 0.9259173860001425, "step": 900 }, { "epoch": 1.1342218737749903, "grad_norm": 0.63804692029953, "learning_rate": 6.862469958353506e-06, "loss": 0.15143206119537353, "memory(GiB)": 33.6, "step": 905, "token_acc": 0.9388512882977574, "train_speed(iter/s)": 0.120334 }, { "epoch": 1.1404939239513916, "grad_norm": 0.8169479966163635, "learning_rate": 6.8319458086945026e-06, "loss": 0.1651373863220215, "memory(GiB)": 33.6, "step": 910, "token_acc": 0.9463410976706987, "train_speed(iter/s)": 0.120499 }, { "epoch": 1.146765974127793, "grad_norm": 0.7615450024604797, "learning_rate": 6.801342592690641e-06, "loss": 0.15947287082672118, "memory(GiB)": 33.6, "step": 915, "token_acc": 0.943523544080974, "train_speed(iter/s)": 0.120662 }, { "epoch": 1.1530380243041944, "grad_norm": 0.6645803451538086, "learning_rate": 6.770661631169434e-06, "loss": 0.14712635278701783, "memory(GiB)": 33.6, "step": 920, "token_acc": 0.9416338351553735, "train_speed(iter/s)": 0.120812 }, { "epoch": 1.1530380243041944, "eval_loss": 0.21968427300453186, "eval_runtime": 30.0062, "eval_samples_per_second": 17.163, "eval_steps_per_second": 4.299, "eval_token_acc": 0.9260389634890224, "step": 920 }, { "epoch": 1.1593100744805958, "grad_norm": 0.6639111042022705, "learning_rate": 6.739904248313879e-06, "loss": 0.1582737922668457, "memory(GiB)": 33.6, "step": 925, "token_acc": 0.9358828491280381, "train_speed(iter/s)": 0.120318 }, { "epoch": 1.1655821246569973, "grad_norm": 0.7869791388511658, "learning_rate": 6.709071771605292e-06, "loss": 0.15897371768951415, "memory(GiB)": 33.6, "step": 930, "token_acc": 0.9428512114831401, "train_speed(iter/s)": 0.12045 }, { "epoch": 1.1718541748333986, "grad_norm": 0.7262241840362549, "learning_rate": 6.678165531766029e-06, "loss": 0.15734575986862182, "memory(GiB)": 33.6, "step": 935, "token_acc": 0.9491624723709089, "train_speed(iter/s)": 0.120601 }, { "epoch": 1.1781262250098001, "grad_norm": 0.7573165893554688, "learning_rate": 6.647186862702038e-06, "loss": 0.1512979507446289, "memory(GiB)": 33.6, "step": 940, "token_acc": 0.9467049494120864, "train_speed(iter/s)": 0.120739 }, { "epoch": 1.1781262250098001, "eval_loss": 0.22000892460346222, "eval_runtime": 30.0307, "eval_samples_per_second": 17.149, "eval_steps_per_second": 4.296, "eval_token_acc": 0.9255736151695168, "step": 940 }, { "epoch": 1.1843982751862014, "grad_norm": 0.6843467354774475, "learning_rate": 6.616137101445301e-06, "loss": 0.1581122875213623, "memory(GiB)": 33.6, "step": 945, "token_acc": 0.9385528792778878, "train_speed(iter/s)": 0.12026 }, { "epoch": 1.190670325362603, "grad_norm": 0.7229541540145874, "learning_rate": 6.58501758809612e-06, "loss": 0.17478140592575073, "memory(GiB)": 33.6, "step": 950, "token_acc": 0.9408516112836927, "train_speed(iter/s)": 0.120424 }, { "epoch": 1.1969423755390043, "grad_norm": 0.7466816306114197, "learning_rate": 6.55382966576528e-06, "loss": 0.15570859909057616, "memory(GiB)": 33.6, "step": 955, "token_acc": 0.9458100145459925, "train_speed(iter/s)": 0.120574 }, { "epoch": 1.2032144257154056, "grad_norm": 0.7393471598625183, "learning_rate": 6.522574680516081e-06, "loss": 0.1629380464553833, "memory(GiB)": 33.6, "step": 960, "token_acc": 0.94634954320764, "train_speed(iter/s)": 0.120745 }, { "epoch": 1.2032144257154056, "eval_loss": 0.22024324536323547, "eval_runtime": 29.8937, "eval_samples_per_second": 17.228, "eval_steps_per_second": 4.315, "eval_token_acc": 0.9261898872683215, "step": 960 }, { "epoch": 1.2094864758918071, "grad_norm": 0.7199200987815857, "learning_rate": 6.491253981306245e-06, "loss": 0.15614912509918213, "memory(GiB)": 33.6, "step": 965, "token_acc": 0.9363130072672509, "train_speed(iter/s)": 0.120314 }, { "epoch": 1.2157585260682087, "grad_norm": 0.7639812231063843, "learning_rate": 6.459868919929691e-06, "loss": 0.15401583909988403, "memory(GiB)": 33.6, "step": 970, "token_acc": 0.9419335026939505, "train_speed(iter/s)": 0.120443 }, { "epoch": 1.22203057624461, "grad_norm": 0.7610638737678528, "learning_rate": 6.428420850958194e-06, "loss": 0.15354688167572023, "memory(GiB)": 33.6, "step": 975, "token_acc": 0.9475969889982628, "train_speed(iter/s)": 0.120569 }, { "epoch": 1.2283026264210113, "grad_norm": 0.7256251573562622, "learning_rate": 6.3969111316829215e-06, "loss": 0.15662674903869628, "memory(GiB)": 33.6, "step": 980, "token_acc": 0.9459802620188146, "train_speed(iter/s)": 0.120732 }, { "epoch": 1.2283026264210113, "eval_loss": 0.21970878541469574, "eval_runtime": 29.8153, "eval_samples_per_second": 17.273, "eval_steps_per_second": 4.327, "eval_token_acc": 0.9261437716690912, "step": 980 }, { "epoch": 1.2345746765974128, "grad_norm": 0.7129140496253967, "learning_rate": 6.365341122055857e-06, "loss": 0.15520663261413575, "memory(GiB)": 33.6, "step": 985, "token_acc": 0.9358353146537455, "train_speed(iter/s)": 0.120322 }, { "epoch": 1.2408467267738141, "grad_norm": 0.6518073081970215, "learning_rate": 6.333712184631093e-06, "loss": 0.14519546031951905, "memory(GiB)": 33.6, "step": 990, "token_acc": 0.9487179487179487, "train_speed(iter/s)": 0.120454 }, { "epoch": 1.2471187769502157, "grad_norm": 0.6729604005813599, "learning_rate": 6.302025684506042e-06, "loss": 0.1582566022872925, "memory(GiB)": 33.6, "step": 995, "token_acc": 0.9451847717388776, "train_speed(iter/s)": 0.120603 }, { "epoch": 1.253390827126617, "grad_norm": 0.7031135559082031, "learning_rate": 6.2702829892625e-06, "loss": 0.1544743537902832, "memory(GiB)": 33.6, "step": 1000, "token_acc": 0.9461549355615352, "train_speed(iter/s)": 0.120764 }, { "epoch": 1.253390827126617, "eval_loss": 0.21808215975761414, "eval_runtime": 29.9101, "eval_samples_per_second": 17.218, "eval_steps_per_second": 4.313, "eval_token_acc": 0.9264749655181087, "step": 1000 }, { "epoch": 1.2596628773030183, "grad_norm": 0.7392243146896362, "learning_rate": 6.238485468907637e-06, "loss": 0.15514018535614013, "memory(GiB)": 33.6, "step": 1005, "token_acc": 0.9405148412279971, "train_speed(iter/s)": 0.120335 }, { "epoch": 1.2659349274794198, "grad_norm": 0.6544946432113647, "learning_rate": 6.2066344958148596e-06, "loss": 0.15222200155258178, "memory(GiB)": 33.6, "step": 1010, "token_acc": 0.9450700357044768, "train_speed(iter/s)": 0.120493 }, { "epoch": 1.2722069776558214, "grad_norm": 0.6442455649375916, "learning_rate": 6.174731444664579e-06, "loss": 0.1523426055908203, "memory(GiB)": 33.6, "step": 1015, "token_acc": 0.9424824791940429, "train_speed(iter/s)": 0.120659 }, { "epoch": 1.2784790278322227, "grad_norm": 0.6623610854148865, "learning_rate": 6.14277769238489e-06, "loss": 0.15341660976409913, "memory(GiB)": 33.6, "step": 1020, "token_acc": 0.94507058287796, "train_speed(iter/s)": 0.120796 }, { "epoch": 1.2784790278322227, "eval_loss": 0.21728560328483582, "eval_runtime": 29.8731, "eval_samples_per_second": 17.24, "eval_steps_per_second": 4.318, "eval_token_acc": 0.9268396979847483, "step": 1020 }, { "epoch": 1.284751078008624, "grad_norm": 0.6439575552940369, "learning_rate": 6.110774618092128e-06, "loss": 0.14585806131362916, "memory(GiB)": 33.6, "step": 1025, "token_acc": 0.9402694008845999, "train_speed(iter/s)": 0.120366 }, { "epoch": 1.2910231281850255, "grad_norm": 0.6308783888816833, "learning_rate": 6.07872360303136e-06, "loss": 0.1529778242111206, "memory(GiB)": 33.6, "step": 1030, "token_acc": 0.9463726446578015, "train_speed(iter/s)": 0.120542 }, { "epoch": 1.2972951783614268, "grad_norm": 0.6911998391151428, "learning_rate": 6.046626030516766e-06, "loss": 0.15263807773590088, "memory(GiB)": 33.6, "step": 1035, "token_acc": 0.9480356726509676, "train_speed(iter/s)": 0.120685 }, { "epoch": 1.3035672285378284, "grad_norm": 0.6479185223579407, "learning_rate": 6.0144832858719256e-06, "loss": 0.1511695623397827, "memory(GiB)": 33.6, "step": 1040, "token_acc": 0.9483408164318522, "train_speed(iter/s)": 0.120811 }, { "epoch": 1.3035672285378284, "eval_loss": 0.21816755831241608, "eval_runtime": 29.8673, "eval_samples_per_second": 17.243, "eval_steps_per_second": 4.319, "eval_token_acc": 0.9266594279150299, "step": 1040 }, { "epoch": 1.3098392787142297, "grad_norm": 0.7152245044708252, "learning_rate": 5.982296756370052e-06, "loss": 0.15091612339019775, "memory(GiB)": 33.6, "step": 1045, "token_acc": 0.9368677988540223, "train_speed(iter/s)": 0.120391 }, { "epoch": 1.3161113288906312, "grad_norm": 0.7569878101348877, "learning_rate": 5.950067831174086e-06, "loss": 0.1640252947807312, "memory(GiB)": 33.6, "step": 1050, "token_acc": 0.9438852605967474, "train_speed(iter/s)": 0.120521 }, { "epoch": 1.3223833790670325, "grad_norm": 0.7000331282615662, "learning_rate": 5.917797901276771e-06, "loss": 0.1507915735244751, "memory(GiB)": 33.6, "step": 1055, "token_acc": 0.9414864333464412, "train_speed(iter/s)": 0.120671 }, { "epoch": 1.328655429243434, "grad_norm": 0.6913698315620422, "learning_rate": 5.885488359440592e-06, "loss": 0.14514442682266235, "memory(GiB)": 33.6, "step": 1060, "token_acc": 0.9446129425437229, "train_speed(iter/s)": 0.120797 }, { "epoch": 1.328655429243434, "eval_loss": 0.2164752334356308, "eval_runtime": 30.0417, "eval_samples_per_second": 17.143, "eval_steps_per_second": 4.294, "eval_token_acc": 0.9267306974774767, "step": 1060 }, { "epoch": 1.3349274794198354, "grad_norm": 0.6658451557159424, "learning_rate": 5.853140600137684e-06, "loss": 0.15348198413848876, "memory(GiB)": 33.6, "step": 1065, "token_acc": 0.9394616144184715, "train_speed(iter/s)": 0.120389 }, { "epoch": 1.3411995295962367, "grad_norm": 0.6750782132148743, "learning_rate": 5.8207560194896325e-06, "loss": 0.16195533275604249, "memory(GiB)": 33.6, "step": 1070, "token_acc": 0.9389181190397895, "train_speed(iter/s)": 0.120519 }, { "epoch": 1.3474715797726382, "grad_norm": 0.6864067912101746, "learning_rate": 5.78833601520723e-06, "loss": 0.15502965450286865, "memory(GiB)": 33.6, "step": 1075, "token_acc": 0.9447475298539129, "train_speed(iter/s)": 0.120654 }, { "epoch": 1.3537436299490395, "grad_norm": 0.7045819759368896, "learning_rate": 5.755881986530137e-06, "loss": 0.16037662029266359, "memory(GiB)": 33.6, "step": 1080, "token_acc": 0.9435206662381578, "train_speed(iter/s)": 0.120778 }, { "epoch": 1.3537436299490395, "eval_loss": 0.21634995937347412, "eval_runtime": 30.004, "eval_samples_per_second": 17.164, "eval_steps_per_second": 4.299, "eval_token_acc": 0.9266720048966381, "step": 1080 }, { "epoch": 1.360015680125441, "grad_norm": 0.7639293074607849, "learning_rate": 5.723395334166506e-06, "loss": 0.15927184820175172, "memory(GiB)": 33.6, "step": 1085, "token_acc": 0.9369159769632709, "train_speed(iter/s)": 0.120386 }, { "epoch": 1.3662877303018424, "grad_norm": 0.6852443814277649, "learning_rate": 5.6908774602325165e-06, "loss": 0.14834917783737184, "memory(GiB)": 33.6, "step": 1090, "token_acc": 0.9440402603796291, "train_speed(iter/s)": 0.120507 }, { "epoch": 1.372559780478244, "grad_norm": 0.6806090474128723, "learning_rate": 5.6583297681918615e-06, "loss": 0.14343435764312745, "memory(GiB)": 33.6, "step": 1095, "token_acc": 0.9514565363959733, "train_speed(iter/s)": 0.120637 }, { "epoch": 1.3788318306546452, "grad_norm": 0.7074826955795288, "learning_rate": 5.625753662795183e-06, "loss": 0.15417686700820923, "memory(GiB)": 33.6, "step": 1100, "token_acc": 0.9386321901831356, "train_speed(iter/s)": 0.120771 }, { "epoch": 1.3788318306546452, "eval_loss": 0.21541613340377808, "eval_runtime": 29.9501, "eval_samples_per_second": 17.195, "eval_steps_per_second": 4.307, "eval_token_acc": 0.9270451220176832, "step": 1100 }, { "epoch": 1.3851038808310467, "grad_norm": 0.776336669921875, "learning_rate": 5.59315055001943e-06, "loss": 0.16252031326293945, "memory(GiB)": 33.6, "step": 1105, "token_acc": 0.9352993130520117, "train_speed(iter/s)": 0.120392 }, { "epoch": 1.391375931007448, "grad_norm": 0.6779446005821228, "learning_rate": 5.5605218370071836e-06, "loss": 0.14336334466934203, "memory(GiB)": 33.6, "step": 1110, "token_acc": 0.9528518089352388, "train_speed(iter/s)": 0.120505 }, { "epoch": 1.3976479811838494, "grad_norm": 0.690963089466095, "learning_rate": 5.5278689320059305e-06, "loss": 0.15652428865432738, "memory(GiB)": 33.6, "step": 1115, "token_acc": 0.9426378227494766, "train_speed(iter/s)": 0.120669 }, { "epoch": 1.403920031360251, "grad_norm": 0.7639049887657166, "learning_rate": 5.4951932443072764e-06, "loss": 0.16521704196929932, "memory(GiB)": 33.6, "step": 1120, "token_acc": 0.9421646929220601, "train_speed(iter/s)": 0.120808 }, { "epoch": 1.403920031360251, "eval_loss": 0.21606019139289856, "eval_runtime": 29.9411, "eval_samples_per_second": 17.2, "eval_steps_per_second": 4.308, "eval_token_acc": 0.9273176232858622, "step": 1120 }, { "epoch": 1.4101920815366524, "grad_norm": 0.7224271297454834, "learning_rate": 5.462496184186118e-06, "loss": 0.15909309387207032, "memory(GiB)": 33.6, "step": 1125, "token_acc": 0.9397663407498653, "train_speed(iter/s)": 0.120435 }, { "epoch": 1.4164641317130537, "grad_norm": 0.7142929434776306, "learning_rate": 5.429779162839787e-06, "loss": 0.16222875118255614, "memory(GiB)": 33.6, "step": 1130, "token_acc": 0.9455863719555118, "train_speed(iter/s)": 0.120578 }, { "epoch": 1.422736181889455, "grad_norm": 0.6916890144348145, "learning_rate": 5.397043592327129e-06, "loss": 0.15585269927978515, "memory(GiB)": 33.6, "step": 1135, "token_acc": 0.9430803571428571, "train_speed(iter/s)": 0.120706 }, { "epoch": 1.4290082320658566, "grad_norm": 0.7470511198043823, "learning_rate": 5.364290885507577e-06, "loss": 0.1534827470779419, "memory(GiB)": 33.6, "step": 1140, "token_acc": 0.9486189913884969, "train_speed(iter/s)": 0.120798 }, { "epoch": 1.4290082320658566, "eval_loss": 0.21471655368804932, "eval_runtime": 29.873, "eval_samples_per_second": 17.24, "eval_steps_per_second": 4.318, "eval_token_acc": 0.9275440089548109, "step": 1140 }, { "epoch": 1.435280282242258, "grad_norm": 0.7307199835777283, "learning_rate": 5.3315224559801555e-06, "loss": 0.14947969913482667, "memory(GiB)": 33.6, "step": 1145, "token_acc": 0.939248102132458, "train_speed(iter/s)": 0.120404 }, { "epoch": 1.4415523324186594, "grad_norm": 0.7301707863807678, "learning_rate": 5.2987397180224795e-06, "loss": 0.15617960691452026, "memory(GiB)": 33.6, "step": 1150, "token_acc": 0.9474308925933741, "train_speed(iter/s)": 0.120508 }, { "epoch": 1.4478243825950607, "grad_norm": 0.7371909022331238, "learning_rate": 5.265944086529714e-06, "loss": 0.15231599807739257, "memory(GiB)": 33.6, "step": 1155, "token_acc": 0.9433844406587166, "train_speed(iter/s)": 0.120639 }, { "epoch": 1.454096432771462, "grad_norm": 0.7134169340133667, "learning_rate": 5.233136976953504e-06, "loss": 0.158011531829834, "memory(GiB)": 33.6, "step": 1160, "token_acc": 0.9418867924528301, "train_speed(iter/s)": 0.120795 }, { "epoch": 1.454096432771462, "eval_loss": 0.2147841602563858, "eval_runtime": 29.9333, "eval_samples_per_second": 17.205, "eval_steps_per_second": 4.31, "eval_token_acc": 0.927066083653697, "step": 1160 }, { "epoch": 1.4603684829478636, "grad_norm": 0.7018805146217346, "learning_rate": 5.200319805240884e-06, "loss": 0.15690932273864747, "memory(GiB)": 33.6, "step": 1165, "token_acc": 0.9392478977732894, "train_speed(iter/s)": 0.120417 }, { "epoch": 1.4666405331242651, "grad_norm": 0.7469993233680725, "learning_rate": 5.167493987773175e-06, "loss": 0.15955485105514527, "memory(GiB)": 33.6, "step": 1170, "token_acc": 0.9344873812438071, "train_speed(iter/s)": 0.120557 }, { "epoch": 1.4729125833006664, "grad_norm": 0.7378620505332947, "learning_rate": 5.134660941304838e-06, "loss": 0.1497912287712097, "memory(GiB)": 33.6, "step": 1175, "token_acc": 0.9491378587597462, "train_speed(iter/s)": 0.120684 }, { "epoch": 1.4791846334770677, "grad_norm": 0.7320712208747864, "learning_rate": 5.10182208290234e-06, "loss": 0.15272881984710693, "memory(GiB)": 33.6, "step": 1180, "token_acc": 0.9514241554427025, "train_speed(iter/s)": 0.120797 }, { "epoch": 1.4791846334770677, "eval_loss": 0.21511444449424744, "eval_runtime": 29.7909, "eval_samples_per_second": 17.287, "eval_steps_per_second": 4.33, "eval_token_acc": 0.9274056621571201, "step": 1180 }, { "epoch": 1.4854566836534693, "grad_norm": 0.647217333316803, "learning_rate": 5.068978829882992e-06, "loss": 0.15485861301422119, "memory(GiB)": 33.6, "step": 1185, "token_acc": 0.9385337002183685, "train_speed(iter/s)": 0.120365 }, { "epoch": 1.4917287338298706, "grad_norm": 0.650230884552002, "learning_rate": 5.036132599753771e-06, "loss": 0.15730609893798828, "memory(GiB)": 33.6, "step": 1190, "token_acc": 0.9470712591523152, "train_speed(iter/s)": 0.120508 }, { "epoch": 1.4980007840062721, "grad_norm": 0.704609751701355, "learning_rate": 5.003284810150152e-06, "loss": 0.14192657470703124, "memory(GiB)": 33.6, "step": 1195, "token_acc": 0.9487411800236114, "train_speed(iter/s)": 0.120634 }, { "epoch": 1.5042728341826734, "grad_norm": 0.6966667771339417, "learning_rate": 4.970436878774907e-06, "loss": 0.14936549663543702, "memory(GiB)": 33.6, "step": 1200, "token_acc": 0.940895846426327, "train_speed(iter/s)": 0.120761 }, { "epoch": 1.5042728341826734, "eval_loss": 0.2132187932729721, "eval_runtime": 29.8173, "eval_samples_per_second": 17.272, "eval_steps_per_second": 4.326, "eval_token_acc": 0.927678163425299, "step": 1200 }, { "epoch": 1.5105448843590747, "grad_norm": 0.7017958760261536, "learning_rate": 4.937590223336936e-06, "loss": 0.15734946727752686, "memory(GiB)": 33.6, "step": 1205, "token_acc": 0.9369624490741228, "train_speed(iter/s)": 0.120418 }, { "epoch": 1.5168169345354763, "grad_norm": 0.7165507674217224, "learning_rate": 4.904746261490062e-06, "loss": 0.15068832635879517, "memory(GiB)": 33.6, "step": 1210, "token_acc": 0.9456936989216113, "train_speed(iter/s)": 0.120528 }, { "epoch": 1.5230889847118778, "grad_norm": 0.6853801012039185, "learning_rate": 4.87190641077186e-06, "loss": 0.15125684738159179, "memory(GiB)": 33.6, "step": 1215, "token_acc": 0.9438098534671744, "train_speed(iter/s)": 0.120629 }, { "epoch": 1.5293610348882791, "grad_norm": 0.6704487204551697, "learning_rate": 4.8390720885424665e-06, "loss": 0.14999151229858398, "memory(GiB)": 33.6, "step": 1220, "token_acc": 0.9518440275904198, "train_speed(iter/s)": 0.120748 }, { "epoch": 1.5293610348882791, "eval_loss": 0.21275770664215088, "eval_runtime": 29.8668, "eval_samples_per_second": 17.243, "eval_steps_per_second": 4.319, "eval_token_acc": 0.9274769317195668, "step": 1220 }, { "epoch": 1.5356330850646804, "grad_norm": 0.6753378510475159, "learning_rate": 4.806244711923408e-06, "loss": 0.15547568798065187, "memory(GiB)": 33.6, "step": 1225, "token_acc": 0.9402999940240234, "train_speed(iter/s)": 0.120393 }, { "epoch": 1.541905135241082, "grad_norm": 0.7050623893737793, "learning_rate": 4.773425697736445e-06, "loss": 0.14445589780807494, "memory(GiB)": 33.6, "step": 1230, "token_acc": 0.949685360241732, "train_speed(iter/s)": 0.120511 }, { "epoch": 1.5481771854174835, "grad_norm": 0.7278842329978943, "learning_rate": 4.7406164624424135e-06, "loss": 0.14890639781951903, "memory(GiB)": 33.6, "step": 1235, "token_acc": 0.9443320079049501, "train_speed(iter/s)": 0.120618 }, { "epoch": 1.5544492355938848, "grad_norm": 0.7538560032844543, "learning_rate": 4.707818422080094e-06, "loss": 0.1574314832687378, "memory(GiB)": 33.6, "step": 1240, "token_acc": 0.9465081309868567, "train_speed(iter/s)": 0.120754 }, { "epoch": 1.5544492355938848, "eval_loss": 0.21373403072357178, "eval_runtime": 29.905, "eval_samples_per_second": 17.221, "eval_steps_per_second": 4.314, "eval_token_acc": 0.9275230473187971, "step": 1240 }, { "epoch": 1.5607212857702861, "grad_norm": 0.7221870422363281, "learning_rate": 4.675032992205099e-06, "loss": 0.14533066749572754, "memory(GiB)": 33.6, "step": 1245, "token_acc": 0.940144099378882, "train_speed(iter/s)": 0.120412 }, { "epoch": 1.5669933359466874, "grad_norm": 0.6934393048286438, "learning_rate": 4.642261587828778e-06, "loss": 0.1509866714477539, "memory(GiB)": 33.6, "step": 1250, "token_acc": 0.9452032867356739, "train_speed(iter/s)": 0.120529 }, { "epoch": 1.573265386123089, "grad_norm": 0.7100276350975037, "learning_rate": 4.609505623357135e-06, "loss": 0.1503272294998169, "memory(GiB)": 33.6, "step": 1255, "token_acc": 0.945176036085127, "train_speed(iter/s)": 0.12066 }, { "epoch": 1.5795374362994905, "grad_norm": 0.7433052659034729, "learning_rate": 4.576766512529799e-06, "loss": 0.1667776346206665, "memory(GiB)": 33.6, "step": 1260, "token_acc": 0.9403739570010354, "train_speed(iter/s)": 0.120774 }, { "epoch": 1.5795374362994905, "eval_loss": 0.21276888251304626, "eval_runtime": 29.8799, "eval_samples_per_second": 17.236, "eval_steps_per_second": 4.317, "eval_token_acc": 0.9279380877118697, "step": 1260 }, { "epoch": 1.5858094864758918, "grad_norm": 0.6793298125267029, "learning_rate": 4.544045668358999e-06, "loss": 0.1555434823036194, "memory(GiB)": 33.6, "step": 1265, "token_acc": 0.9374781493998369, "train_speed(iter/s)": 0.120424 }, { "epoch": 1.5920815366522931, "grad_norm": 0.7161461710929871, "learning_rate": 4.511344503068574e-06, "loss": 0.15700291395187377, "memory(GiB)": 33.6, "step": 1270, "token_acc": 0.9427421933283598, "train_speed(iter/s)": 0.120517 }, { "epoch": 1.5983535868286947, "grad_norm": 0.6421639919281006, "learning_rate": 4.478664428033031e-06, "loss": 0.1498015284538269, "memory(GiB)": 33.6, "step": 1275, "token_acc": 0.9436655491212029, "train_speed(iter/s)": 0.120625 }, { "epoch": 1.6046256370050962, "grad_norm": 0.7012119293212891, "learning_rate": 4.446006853716628e-06, "loss": 0.15100154876708985, "memory(GiB)": 33.6, "step": 1280, "token_acc": 0.9480080409356725, "train_speed(iter/s)": 0.120734 }, { "epoch": 1.6046256370050962, "eval_loss": 0.21206073462963104, "eval_runtime": 29.6181, "eval_samples_per_second": 17.388, "eval_steps_per_second": 4.355, "eval_token_acc": 0.9279967802927083, "step": 1280 }, { "epoch": 1.6108976871814975, "grad_norm": 0.6183798909187317, "learning_rate": 4.413373189612497e-06, "loss": 0.14532687664031982, "memory(GiB)": 33.6, "step": 1285, "token_acc": 0.9389383520807664, "train_speed(iter/s)": 0.120397 }, { "epoch": 1.6171697373578988, "grad_norm": 0.6965980529785156, "learning_rate": 4.380764844181806e-06, "loss": 0.15175777673721313, "memory(GiB)": 33.6, "step": 1290, "token_acc": 0.942019661331, "train_speed(iter/s)": 0.120532 }, { "epoch": 1.6234417875343001, "grad_norm": 0.7467201352119446, "learning_rate": 4.34818322479298e-06, "loss": 0.1542289137840271, "memory(GiB)": 33.6, "step": 1295, "token_acc": 0.9416962545716651, "train_speed(iter/s)": 0.120641 }, { "epoch": 1.6297138377107017, "grad_norm": 0.7295259833335876, "learning_rate": 4.315629737660956e-06, "loss": 0.14708173274993896, "memory(GiB)": 33.6, "step": 1300, "token_acc": 0.9478447445877919, "train_speed(iter/s)": 0.120737 }, { "epoch": 1.6297138377107017, "eval_loss": 0.21190744638442993, "eval_runtime": 29.8419, "eval_samples_per_second": 17.258, "eval_steps_per_second": 4.323, "eval_token_acc": 0.928495667229836, "step": 1300 }, { "epoch": 1.6359858878871032, "grad_norm": 0.7090888023376465, "learning_rate": 4.283105787786482e-06, "loss": 0.15199344158172606, "memory(GiB)": 33.6, "step": 1305, "token_acc": 0.9388616179391395, "train_speed(iter/s)": 0.12039 }, { "epoch": 1.6422579380635045, "grad_norm": 0.6687735915184021, "learning_rate": 4.250612778895492e-06, "loss": 0.1566769599914551, "memory(GiB)": 33.6, "step": 1310, "token_acc": 0.9447847002229262, "train_speed(iter/s)": 0.120504 }, { "epoch": 1.6485299882399058, "grad_norm": 0.7526585459709167, "learning_rate": 4.218152113378513e-06, "loss": 0.15292699337005616, "memory(GiB)": 33.6, "step": 1315, "token_acc": 0.9508290451686678, "train_speed(iter/s)": 0.120624 }, { "epoch": 1.6548020384163074, "grad_norm": 0.6574867367744446, "learning_rate": 4.185725192230136e-06, "loss": 0.1453101873397827, "memory(GiB)": 33.6, "step": 1320, "token_acc": 0.943889951905673, "train_speed(iter/s)": 0.120725 }, { "epoch": 1.6548020384163074, "eval_loss": 0.21127335727214813, "eval_runtime": 29.9057, "eval_samples_per_second": 17.221, "eval_steps_per_second": 4.314, "eval_token_acc": 0.9287597838436095, "step": 1320 }, { "epoch": 1.6610740885927089, "grad_norm": 0.7367099523544312, "learning_rate": 4.1533334149885594e-06, "loss": 0.157798171043396, "memory(GiB)": 33.6, "step": 1325, "token_acc": 0.9389246418932946, "train_speed(iter/s)": 0.1204 }, { "epoch": 1.6673461387691102, "grad_norm": 0.7515724897384644, "learning_rate": 4.120978179675172e-06, "loss": 0.149272882938385, "memory(GiB)": 33.6, "step": 1330, "token_acc": 0.9415829318651067, "train_speed(iter/s)": 0.120496 }, { "epoch": 1.6736181889455115, "grad_norm": 0.7276756763458252, "learning_rate": 4.088660882734228e-06, "loss": 0.15989675521850585, "memory(GiB)": 33.6, "step": 1335, "token_acc": 0.9443810526931742, "train_speed(iter/s)": 0.120605 }, { "epoch": 1.6798902391219128, "grad_norm": 0.6809377670288086, "learning_rate": 4.056382918972565e-06, "loss": 0.150339674949646, "memory(GiB)": 33.6, "step": 1340, "token_acc": 0.9478320715760495, "train_speed(iter/s)": 0.120703 }, { "epoch": 1.6798902391219128, "eval_loss": 0.21092940866947174, "eval_runtime": 29.83, "eval_samples_per_second": 17.264, "eval_steps_per_second": 4.324, "eval_token_acc": 0.9285711291194855, "step": 1340 }, { "epoch": 1.6861622892983144, "grad_norm": 0.7006319165229797, "learning_rate": 4.024145681499416e-06, "loss": 0.14731377363204956, "memory(GiB)": 33.6, "step": 1345, "token_acc": 0.9404405572409874, "train_speed(iter/s)": 0.120406 }, { "epoch": 1.6924343394747159, "grad_norm": 0.6811453700065613, "learning_rate": 3.991950561666269e-06, "loss": 0.14400005340576172, "memory(GiB)": 33.6, "step": 1350, "token_acc": 0.9514704326668783, "train_speed(iter/s)": 0.120501 }, { "epoch": 1.6987063896511172, "grad_norm": 0.6884180307388306, "learning_rate": 3.959798949006831e-06, "loss": 0.1443554401397705, "memory(GiB)": 33.6, "step": 1355, "token_acc": 0.952190047945391, "train_speed(iter/s)": 0.12061 }, { "epoch": 1.7049784398275185, "grad_norm": 0.642373263835907, "learning_rate": 3.927692231177053e-06, "loss": 0.14928441047668456, "memory(GiB)": 33.6, "step": 1360, "token_acc": 0.9533662833875387, "train_speed(iter/s)": 0.12072 }, { "epoch": 1.7049784398275185, "eval_loss": 0.21123968064785004, "eval_runtime": 29.8426, "eval_samples_per_second": 17.257, "eval_steps_per_second": 4.323, "eval_token_acc": 0.9288436303876645, "step": 1360 }, { "epoch": 1.71125049000392, "grad_norm": 0.673079252243042, "learning_rate": 3.895631793895223e-06, "loss": 0.14722020626068116, "memory(GiB)": 33.6, "step": 1365, "token_acc": 0.9392204906405309, "train_speed(iter/s)": 0.120411 }, { "epoch": 1.7175225401803216, "grad_norm": 0.7413303256034851, "learning_rate": 3.863619020882184e-06, "loss": 0.1495545506477356, "memory(GiB)": 33.6, "step": 1370, "token_acc": 0.9497109224438773, "train_speed(iter/s)": 0.120525 }, { "epoch": 1.7237945903567229, "grad_norm": 0.6686860918998718, "learning_rate": 3.831655293801596e-06, "loss": 0.15514848232269288, "memory(GiB)": 33.6, "step": 1375, "token_acc": 0.9489922206506365, "train_speed(iter/s)": 0.120638 }, { "epoch": 1.7300666405331242, "grad_norm": 0.6420913338661194, "learning_rate": 3.7997419922003077e-06, "loss": 0.15427151918411255, "memory(GiB)": 33.6, "step": 1380, "token_acc": 0.9402859545836838, "train_speed(iter/s)": 0.120745 }, { "epoch": 1.7300666405331242, "eval_loss": 0.2099025994539261, "eval_runtime": 29.7426, "eval_samples_per_second": 17.315, "eval_steps_per_second": 4.337, "eval_token_acc": 0.9287681684980149, "step": 1380 }, { "epoch": 1.7363386907095255, "grad_norm": 0.6821540594100952, "learning_rate": 3.7678804934488146e-06, "loss": 0.158866024017334, "memory(GiB)": 33.6, "step": 1385, "token_acc": 0.9397809287559081, "train_speed(iter/s)": 0.120441 }, { "epoch": 1.742610740885927, "grad_norm": 0.6591536998748779, "learning_rate": 3.736072172681818e-06, "loss": 0.1457535743713379, "memory(GiB)": 33.6, "step": 1390, "token_acc": 0.9491301798279906, "train_speed(iter/s)": 0.120572 }, { "epoch": 1.7488827910623286, "grad_norm": 0.6923695802688599, "learning_rate": 3.704318402738867e-06, "loss": 0.14236855506896973, "memory(GiB)": 33.6, "step": 1395, "token_acc": 0.9516265603234001, "train_speed(iter/s)": 0.120687 }, { "epoch": 1.75515484123873, "grad_norm": 0.6892858147621155, "learning_rate": 3.672620554105111e-06, "loss": 0.14654231071472168, "memory(GiB)": 33.6, "step": 1400, "token_acc": 0.9462526829555143, "train_speed(iter/s)": 0.120781 }, { "epoch": 1.75515484123873, "eval_loss": 0.2089109718799591, "eval_runtime": 29.8795, "eval_samples_per_second": 17.236, "eval_steps_per_second": 4.317, "eval_token_acc": 0.9291622472550738, "step": 1400 }, { "epoch": 1.7614268914151312, "grad_norm": 0.7279197573661804, "learning_rate": 3.6409799948521473e-06, "loss": 0.14290038347244263, "memory(GiB)": 33.6, "step": 1405, "token_acc": 0.9411003428074647, "train_speed(iter/s)": 0.120448 }, { "epoch": 1.7676989415915327, "grad_norm": 0.7627122402191162, "learning_rate": 3.6093980905789824e-06, "loss": 0.16706535816192628, "memory(GiB)": 33.6, "step": 1410, "token_acc": 0.9413535575754067, "train_speed(iter/s)": 0.120561 }, { "epoch": 1.7739709917679343, "grad_norm": 0.6972676515579224, "learning_rate": 3.577876204353079e-06, "loss": 0.1592485189437866, "memory(GiB)": 33.6, "step": 1415, "token_acc": 0.9439551849921834, "train_speed(iter/s)": 0.120676 }, { "epoch": 1.7802430419443356, "grad_norm": 0.6900568604469299, "learning_rate": 3.5464156966515426e-06, "loss": 0.14554288387298583, "memory(GiB)": 33.6, "step": 1420, "token_acc": 0.951461222546277, "train_speed(iter/s)": 0.120765 }, { "epoch": 1.7802430419443356, "eval_loss": 0.20876409113407135, "eval_runtime": 29.5896, "eval_samples_per_second": 17.405, "eval_steps_per_second": 4.36, "eval_token_acc": 0.9296234032473767, "step": 1420 }, { "epoch": 1.786515092120737, "grad_norm": 0.684529721736908, "learning_rate": 3.515017925302396e-06, "loss": 0.14716337919235228, "memory(GiB)": 33.6, "step": 1425, "token_acc": 0.9401438678547113, "train_speed(iter/s)": 0.120461 }, { "epoch": 1.7927871422971384, "grad_norm": 0.6805464029312134, "learning_rate": 3.48368424542597e-06, "loss": 0.16177623271942138, "memory(GiB)": 33.6, "step": 1430, "token_acc": 0.9364077811055218, "train_speed(iter/s)": 0.12056 }, { "epoch": 1.7990591924735397, "grad_norm": 0.7333641648292542, "learning_rate": 3.4524160093764288e-06, "loss": 0.13987714052200317, "memory(GiB)": 33.6, "step": 1435, "token_acc": 0.94569744345486, "train_speed(iter/s)": 0.120636 }, { "epoch": 1.8053312426499413, "grad_norm": 0.6505182385444641, "learning_rate": 3.421214566683395e-06, "loss": 0.14928100109100342, "memory(GiB)": 33.6, "step": 1440, "token_acc": 0.9490582512161656, "train_speed(iter/s)": 0.12074 }, { "epoch": 1.8053312426499413, "eval_loss": 0.20936539769172668, "eval_runtime": 29.7766, "eval_samples_per_second": 17.295, "eval_steps_per_second": 4.332, "eval_token_acc": 0.9291370932918572, "step": 1440 }, { "epoch": 1.8116032928263426, "grad_norm": 0.6788591742515564, "learning_rate": 3.390081263993702e-06, "loss": 0.1493847608566284, "memory(GiB)": 33.6, "step": 1445, "token_acc": 0.941620617599257, "train_speed(iter/s)": 0.120458 }, { "epoch": 1.817875343002744, "grad_norm": 0.6276586055755615, "learning_rate": 3.3590174450132828e-06, "loss": 0.15320565700531005, "memory(GiB)": 33.6, "step": 1450, "token_acc": 0.9462449451184286, "train_speed(iter/s)": 0.120567 }, { "epoch": 1.8241473931791454, "grad_norm": 0.7135562300682068, "learning_rate": 3.3280244504491664e-06, "loss": 0.15439343452453613, "memory(GiB)": 33.6, "step": 1455, "token_acc": 0.9476100611215954, "train_speed(iter/s)": 0.120673 }, { "epoch": 1.830419443355547, "grad_norm": 0.7349167466163635, "learning_rate": 3.297103617951618e-06, "loss": 0.149544358253479, "memory(GiB)": 33.6, "step": 1460, "token_acc": 0.9508973838977895, "train_speed(iter/s)": 0.120772 }, { "epoch": 1.830419443355547, "eval_loss": 0.20811545848846436, "eval_runtime": 29.7337, "eval_samples_per_second": 17.32, "eval_steps_per_second": 4.339, "eval_token_acc": 0.9293760559424142, "step": 1460 }, { "epoch": 1.8366914935319483, "grad_norm": 0.6688914895057678, "learning_rate": 3.2662562820564043e-06, "loss": 0.147084379196167, "memory(GiB)": 33.6, "step": 1465, "token_acc": 0.9402785349655548, "train_speed(iter/s)": 0.120466 }, { "epoch": 1.8429635437083496, "grad_norm": 0.7431237697601318, "learning_rate": 3.2354837741271994e-06, "loss": 0.15128002166748047, "memory(GiB)": 33.6, "step": 1470, "token_acc": 0.9480812641083521, "train_speed(iter/s)": 0.120567 }, { "epoch": 1.8492355938847511, "grad_norm": 0.6432802677154541, "learning_rate": 3.2047874222981134e-06, "loss": 0.14261975288391113, "memory(GiB)": 33.6, "step": 1475, "token_acc": 0.9464294764583651, "train_speed(iter/s)": 0.120663 }, { "epoch": 1.8555076440611527, "grad_norm": 0.7087119221687317, "learning_rate": 3.174168551416384e-06, "loss": 0.1470237135887146, "memory(GiB)": 33.6, "step": 1480, "token_acc": 0.9525287905322931, "train_speed(iter/s)": 0.120742 }, { "epoch": 1.8555076440611527, "eval_loss": 0.20842251181602478, "eval_runtime": 29.9521, "eval_samples_per_second": 17.194, "eval_steps_per_second": 4.307, "eval_token_acc": 0.9292880170711564, "step": 1480 }, { "epoch": 1.861779694237554, "grad_norm": 0.7013330459594727, "learning_rate": 3.1436284829851883e-06, "loss": 0.1440601348876953, "memory(GiB)": 33.6, "step": 1485, "token_acc": 0.94125851177291, "train_speed(iter/s)": 0.120446 }, { "epoch": 1.8680517444139553, "grad_norm": 0.7456852793693542, "learning_rate": 3.113168535106604e-06, "loss": 0.15421888828277588, "memory(GiB)": 33.6, "step": 1490, "token_acc": 0.944170604009705, "train_speed(iter/s)": 0.120552 }, { "epoch": 1.8743237945903566, "grad_norm": 0.7362022995948792, "learning_rate": 3.08279002242473e-06, "loss": 0.15221171379089354, "memory(GiB)": 33.6, "step": 1495, "token_acc": 0.9467418723959071, "train_speed(iter/s)": 0.120646 }, { "epoch": 1.8805958447667581, "grad_norm": 0.6779168844223022, "learning_rate": 3.0524942560689387e-06, "loss": 0.14756014347076415, "memory(GiB)": 33.6, "step": 1500, "token_acc": 0.9489157165213503, "train_speed(iter/s)": 0.120733 }, { "epoch": 1.8805958447667581, "eval_loss": 0.20791077613830566, "eval_runtime": 29.8876, "eval_samples_per_second": 17.231, "eval_steps_per_second": 4.316, "eval_token_acc": 0.9297491730634593, "step": 1500 }, { "epoch": 1.8868678949431597, "grad_norm": 0.6737608909606934, "learning_rate": 3.0222825435972948e-06, "loss": 0.14706544876098632, "memory(GiB)": 33.6, "step": 1505, "token_acc": 0.9416515751653609, "train_speed(iter/s)": 0.120438 }, { "epoch": 1.893139945119561, "grad_norm": 0.6713505983352661, "learning_rate": 2.99215618894011e-06, "loss": 0.14257076978683472, "memory(GiB)": 33.6, "step": 1510, "token_acc": 0.947631754503002, "train_speed(iter/s)": 0.120524 }, { "epoch": 1.8994119952959623, "grad_norm": 0.7137247920036316, "learning_rate": 2.9621164923436774e-06, "loss": 0.14342806339263917, "memory(GiB)": 33.6, "step": 1515, "token_acc": 0.9520854223691699, "train_speed(iter/s)": 0.120618 }, { "epoch": 1.9056840454723638, "grad_norm": 0.6587111949920654, "learning_rate": 2.9321647503141525e-06, "loss": 0.14919731616973878, "memory(GiB)": 33.6, "step": 1520, "token_acc": 0.9465758429898736, "train_speed(iter/s)": 0.120695 }, { "epoch": 1.9056840454723638, "eval_loss": 0.20832034945487976, "eval_runtime": 29.8838, "eval_samples_per_second": 17.233, "eval_steps_per_second": 4.317, "eval_token_acc": 0.9296108262657684, "step": 1520 }, { "epoch": 1.9119560956487653, "grad_norm": 0.6283432841300964, "learning_rate": 2.902302255561585e-06, "loss": 0.14435771703720093, "memory(GiB)": 33.6, "step": 1525, "token_acc": 0.9410489589892338, "train_speed(iter/s)": 0.120411 }, { "epoch": 1.9182281458251667, "grad_norm": 0.6523663997650146, "learning_rate": 2.87253029694414e-06, "loss": 0.14620786905288696, "memory(GiB)": 33.6, "step": 1530, "token_acc": 0.9439097941523534, "train_speed(iter/s)": 0.120487 }, { "epoch": 1.924500196001568, "grad_norm": 0.814400851726532, "learning_rate": 2.8428501594124602e-06, "loss": 0.14187668561935424, "memory(GiB)": 33.6, "step": 1535, "token_acc": 0.9552411118676178, "train_speed(iter/s)": 0.120588 }, { "epoch": 1.9307722461779693, "grad_norm": 0.680105447769165, "learning_rate": 2.813263123954214e-06, "loss": 0.14542250633239745, "memory(GiB)": 33.6, "step": 1540, "token_acc": 0.946710125341177, "train_speed(iter/s)": 0.12067 }, { "epoch": 1.9307722461779693, "eval_loss": 0.20705881714820862, "eval_runtime": 29.9919, "eval_samples_per_second": 17.171, "eval_steps_per_second": 4.301, "eval_token_acc": 0.9296569418649987, "step": 1540 }, { "epoch": 1.9370442963543708, "grad_norm": 0.6705245971679688, "learning_rate": 2.7837704675388045e-06, "loss": 0.14242172241210938, "memory(GiB)": 33.6, "step": 1545, "token_acc": 0.9419143033907438, "train_speed(iter/s)": 0.120382 }, { "epoch": 1.9433163465307723, "grad_norm": 0.6794357299804688, "learning_rate": 2.7543734630622622e-06, "loss": 0.14580047130584717, "memory(GiB)": 33.6, "step": 1550, "token_acc": 0.9519676920433064, "train_speed(iter/s)": 0.120474 }, { "epoch": 1.9495883967071737, "grad_norm": 0.6641804575920105, "learning_rate": 2.7250733792922997e-06, "loss": 0.14899333715438842, "memory(GiB)": 33.6, "step": 1555, "token_acc": 0.9489447236180905, "train_speed(iter/s)": 0.120542 }, { "epoch": 1.955860446883575, "grad_norm": 0.6625697016716003, "learning_rate": 2.6958714808135546e-06, "loss": 0.1446676015853882, "memory(GiB)": 33.6, "step": 1560, "token_acc": 0.9556168359941944, "train_speed(iter/s)": 0.120652 }, { "epoch": 1.955860446883575, "eval_loss": 0.2073841542005539, "eval_runtime": 29.9565, "eval_samples_per_second": 17.192, "eval_steps_per_second": 4.306, "eval_token_acc": 0.929736596081851, "step": 1560 }, { "epoch": 1.9621324970599765, "grad_norm": 0.6013507843017578, "learning_rate": 2.6667690279730096e-06, "loss": 0.1421922564506531, "memory(GiB)": 33.6, "step": 1565, "token_acc": 0.9436457058967458, "train_speed(iter/s)": 0.120368 }, { "epoch": 1.968404547236378, "grad_norm": 0.7043313980102539, "learning_rate": 2.6377672768256003e-06, "loss": 0.1387406349182129, "memory(GiB)": 33.6, "step": 1570, "token_acc": 0.9493978394583139, "train_speed(iter/s)": 0.12043 }, { "epoch": 1.9746765974127793, "grad_norm": 0.7223751544952393, "learning_rate": 2.608867479080001e-06, "loss": 0.14758012294769288, "memory(GiB)": 33.6, "step": 1575, "token_acc": 0.9447510837080315, "train_speed(iter/s)": 0.120522 }, { "epoch": 1.9809486475891807, "grad_norm": 0.7050609588623047, "learning_rate": 2.5800708820446002e-06, "loss": 0.14392924308776855, "memory(GiB)": 33.6, "step": 1580, "token_acc": 0.9467297587598339, "train_speed(iter/s)": 0.120624 }, { "epoch": 1.9809486475891807, "eval_loss": 0.20730111002922058, "eval_runtime": 29.9348, "eval_samples_per_second": 17.204, "eval_steps_per_second": 4.309, "eval_token_acc": 0.9300384436404493, "step": 1580 }, { "epoch": 1.987220697765582, "grad_norm": 0.7090184092521667, "learning_rate": 2.551378728573668e-06, "loss": 0.140655517578125, "memory(GiB)": 33.6, "step": 1585, "token_acc": 0.9442207091256332, "train_speed(iter/s)": 0.120333 }, { "epoch": 1.9934927479419835, "grad_norm": 0.6970275640487671, "learning_rate": 2.5227922570137143e-06, "loss": 0.15067524909973146, "memory(GiB)": 33.6, "step": 1590, "token_acc": 0.9442119257472057, "train_speed(iter/s)": 0.120437 }, { "epoch": 1.999764798118385, "grad_norm": 0.6549662351608276, "learning_rate": 2.4943127011500483e-06, "loss": 0.1493726849555969, "memory(GiB)": 33.6, "step": 1595, "token_acc": 0.9526065156592507, "train_speed(iter/s)": 0.120543 }, { "epoch": 2.005017640141121, "grad_norm": 0.6562784314155579, "learning_rate": 2.465941290153514e-06, "loss": 0.1286258101463318, "memory(GiB)": 33.6, "step": 1600, "token_acc": 0.9614165081272321, "train_speed(iter/s)": 0.120686 }, { "epoch": 2.005017640141121, "eval_loss": 0.20796315371990204, "eval_runtime": 29.931, "eval_samples_per_second": 17.206, "eval_steps_per_second": 4.31, "eval_token_acc": 0.9298959045155556, "step": 1600 }, { "epoch": 2.0112896903175224, "grad_norm": 0.6960498690605164, "learning_rate": 2.4376792485274577e-06, "loss": 0.1140947699546814, "memory(GiB)": 33.6, "step": 1605, "token_acc": 0.9474975268013971, "train_speed(iter/s)": 0.120408 }, { "epoch": 2.017561740493924, "grad_norm": 0.6196739673614502, "learning_rate": 2.409527796054863e-06, "loss": 0.1102461576461792, "memory(GiB)": 33.6, "step": 1610, "token_acc": 0.9649468933272074, "train_speed(iter/s)": 0.120505 }, { "epoch": 2.0238337906703254, "grad_norm": 0.6972460150718689, "learning_rate": 2.38148814774572e-06, "loss": 0.10172897577285767, "memory(GiB)": 33.6, "step": 1615, "token_acc": 0.9602301717784455, "train_speed(iter/s)": 0.120575 }, { "epoch": 2.0301058408467267, "grad_norm": 0.7768440842628479, "learning_rate": 2.353561513784566e-06, "loss": 0.10658919811248779, "memory(GiB)": 33.6, "step": 1620, "token_acc": 0.9643368583388412, "train_speed(iter/s)": 0.120672 }, { "epoch": 2.0301058408467267, "eval_loss": 0.2239648997783661, "eval_runtime": 29.8719, "eval_samples_per_second": 17.24, "eval_steps_per_second": 4.318, "eval_token_acc": 0.9290071311485719, "step": 1620 }, { "epoch": 2.036377891023128, "grad_norm": 0.7098206281661987, "learning_rate": 2.325749099478277e-06, "loss": 0.10938189029693604, "memory(GiB)": 33.6, "step": 1625, "token_acc": 0.948818377439692, "train_speed(iter/s)": 0.120401 }, { "epoch": 2.04264994119953, "grad_norm": 0.6591574549674988, "learning_rate": 2.29805210520403e-06, "loss": 0.10488543510437012, "memory(GiB)": 33.6, "step": 1630, "token_acc": 0.9647417816298272, "train_speed(iter/s)": 0.120485 }, { "epoch": 2.048921991375931, "grad_norm": 0.6965081691741943, "learning_rate": 2.270471726357501e-06, "loss": 0.10199937820434571, "memory(GiB)": 33.6, "step": 1635, "token_acc": 0.9662853371466286, "train_speed(iter/s)": 0.120557 }, { "epoch": 2.0551940415523324, "grad_norm": 0.7288631200790405, "learning_rate": 2.243009153301276e-06, "loss": 0.10732921361923217, "memory(GiB)": 33.6, "step": 1640, "token_acc": 0.9629427346459488, "train_speed(iter/s)": 0.120642 }, { "epoch": 2.0551940415523324, "eval_loss": 0.2211890071630478, "eval_runtime": 29.9921, "eval_samples_per_second": 17.171, "eval_steps_per_second": 4.301, "eval_token_acc": 0.9286843219539599, "step": 1640 }, { "epoch": 2.0614660917287337, "grad_norm": 0.6177812218666077, "learning_rate": 2.215665571313468e-06, "loss": 0.10446252822875976, "memory(GiB)": 33.6, "step": 1645, "token_acc": 0.946579760130165, "train_speed(iter/s)": 0.120396 }, { "epoch": 2.067738141905135, "grad_norm": 0.6719108819961548, "learning_rate": 2.188442160536562e-06, "loss": 0.10937647819519043, "memory(GiB)": 33.6, "step": 1650, "token_acc": 0.9570199762322449, "train_speed(iter/s)": 0.120466 }, { "epoch": 2.074010192081537, "grad_norm": 0.6592283844947815, "learning_rate": 2.1613400959264845e-06, "loss": 0.09818293452262879, "memory(GiB)": 33.6, "step": 1655, "token_acc": 0.9663418954827281, "train_speed(iter/s)": 0.120538 }, { "epoch": 2.080282242257938, "grad_norm": 0.7156064510345459, "learning_rate": 2.1343605472018954e-06, "loss": 0.10150223970413208, "memory(GiB)": 33.6, "step": 1660, "token_acc": 0.9639677935587118, "train_speed(iter/s)": 0.120626 }, { "epoch": 2.080282242257938, "eval_loss": 0.22235038876533508, "eval_runtime": 29.8842, "eval_samples_per_second": 17.233, "eval_steps_per_second": 4.317, "eval_token_acc": 0.9289903618397609, "step": 1660 }, { "epoch": 2.0865542924343394, "grad_norm": 0.7405542731285095, "learning_rate": 2.1075046787936842e-06, "loss": 0.11414774656295776, "memory(GiB)": 33.6, "step": 1665, "token_acc": 0.9445764825060391, "train_speed(iter/s)": 0.120361 }, { "epoch": 2.0928263426107407, "grad_norm": 0.6007137894630432, "learning_rate": 2.0807736497947436e-06, "loss": 0.1068692922592163, "memory(GiB)": 33.6, "step": 1670, "token_acc": 0.9613585407036289, "train_speed(iter/s)": 0.120437 }, { "epoch": 2.0990983927871425, "grad_norm": 0.6333921551704407, "learning_rate": 2.0541686139099164e-06, "loss": 0.10767915248870849, "memory(GiB)": 33.6, "step": 1675, "token_acc": 0.9514025948814184, "train_speed(iter/s)": 0.120521 }, { "epoch": 2.105370442963544, "grad_norm": 0.6882405877113342, "learning_rate": 2.0276907194062167e-06, "loss": 0.11104511022567749, "memory(GiB)": 33.6, "step": 1680, "token_acc": 0.9605332552007032, "train_speed(iter/s)": 0.120608 }, { "epoch": 2.105370442963544, "eval_loss": 0.22228793799877167, "eval_runtime": 29.9337, "eval_samples_per_second": 17.205, "eval_steps_per_second": 4.31, "eval_token_acc": 0.9290364774389912, "step": 1680 }, { "epoch": 2.111642493139945, "grad_norm": 0.7313436269760132, "learning_rate": 2.0013411090632638e-06, "loss": 0.1036494255065918, "memory(GiB)": 33.6, "step": 1685, "token_acc": 0.9467640326150703, "train_speed(iter/s)": 0.120351 }, { "epoch": 2.1179145433163464, "grad_norm": 0.6357504725456238, "learning_rate": 1.9751209201239696e-06, "loss": 0.1004453420639038, "memory(GiB)": 33.6, "step": 1690, "token_acc": 0.9673980703392469, "train_speed(iter/s)": 0.12045 }, { "epoch": 2.1241865934927477, "grad_norm": 0.6574280858039856, "learning_rate": 1.9490312842454425e-06, "loss": 0.09599907994270325, "memory(GiB)": 33.6, "step": 1695, "token_acc": 0.9642799567029778, "train_speed(iter/s)": 0.120536 }, { "epoch": 2.1304586436691495, "grad_norm": 0.6352968811988831, "learning_rate": 1.9230733274501525e-06, "loss": 0.10356111526489258, "memory(GiB)": 33.6, "step": 1700, "token_acc": 0.965252210367902, "train_speed(iter/s)": 0.120605 }, { "epoch": 2.1304586436691495, "eval_loss": 0.2226356714963913, "eval_runtime": 29.9685, "eval_samples_per_second": 17.185, "eval_steps_per_second": 4.305, "eval_token_acc": 0.9290155158029774, "step": 1700 }, { "epoch": 2.136730693845551, "grad_norm": 0.6789947748184204, "learning_rate": 1.8972481700773388e-06, "loss": 0.10871880054473877, "memory(GiB)": 33.6, "step": 1705, "token_acc": 0.9450670361465422, "train_speed(iter/s)": 0.120351 }, { "epoch": 2.143002744021952, "grad_norm": 0.6862888932228088, "learning_rate": 1.8715569267346368e-06, "loss": 0.10977823734283447, "memory(GiB)": 33.6, "step": 1710, "token_acc": 0.9672735959231341, "train_speed(iter/s)": 0.120428 }, { "epoch": 2.1492747941983534, "grad_norm": 0.6545423865318298, "learning_rate": 1.846000706249997e-06, "loss": 0.10351777076721191, "memory(GiB)": 33.6, "step": 1715, "token_acc": 0.962293618920125, "train_speed(iter/s)": 0.120499 }, { "epoch": 2.155546844374755, "grad_norm": 0.7650525569915771, "learning_rate": 1.8205806116238055e-06, "loss": 0.1088717222213745, "memory(GiB)": 33.6, "step": 1720, "token_acc": 0.9641661465130795, "train_speed(iter/s)": 0.120563 }, { "epoch": 2.155546844374755, "eval_loss": 0.22163553535938263, "eval_runtime": 29.9778, "eval_samples_per_second": 17.179, "eval_steps_per_second": 4.303, "eval_token_acc": 0.9293844405968197, "step": 1720 }, { "epoch": 2.1618188945511565, "grad_norm": 0.6473621129989624, "learning_rate": 1.7952977399812988e-06, "loss": 0.10216574668884278, "memory(GiB)": 33.6, "step": 1725, "token_acc": 0.9470272328316934, "train_speed(iter/s)": 0.120306 }, { "epoch": 2.168090944727558, "grad_norm": 0.6527573466300964, "learning_rate": 1.7701531825251888e-06, "loss": 0.10740480422973633, "memory(GiB)": 33.6, "step": 1730, "token_acc": 0.9613802435723952, "train_speed(iter/s)": 0.120405 }, { "epoch": 2.174362994903959, "grad_norm": 0.7024506330490112, "learning_rate": 1.7451480244885938e-06, "loss": 0.10878567695617676, "memory(GiB)": 33.6, "step": 1735, "token_acc": 0.9636387817528926, "train_speed(iter/s)": 0.120492 }, { "epoch": 2.1806350450803604, "grad_norm": 0.686829686164856, "learning_rate": 1.720283345088178e-06, "loss": 0.11087257862091064, "memory(GiB)": 33.6, "step": 1740, "token_acc": 0.9646302250803859, "train_speed(iter/s)": 0.120584 }, { "epoch": 2.1806350450803604, "eval_loss": 0.2214018851518631, "eval_runtime": 29.9476, "eval_samples_per_second": 17.197, "eval_steps_per_second": 4.308, "eval_token_acc": 0.9294934411040913, "step": 1740 }, { "epoch": 2.186907095256762, "grad_norm": 0.6728172898292542, "learning_rate": 1.695560217477582e-06, "loss": 0.10692278146743775, "memory(GiB)": 33.6, "step": 1745, "token_acc": 0.9460528590459338, "train_speed(iter/s)": 0.120324 }, { "epoch": 2.1931791454331635, "grad_norm": 0.6230509281158447, "learning_rate": 1.6709797087011066e-06, "loss": 0.10696847438812256, "memory(GiB)": 33.6, "step": 1750, "token_acc": 0.9615373673579699, "train_speed(iter/s)": 0.120404 }, { "epoch": 2.199451195609565, "grad_norm": 0.6261888742446899, "learning_rate": 1.6465428796476584e-06, "loss": 0.10366283655166626, "memory(GiB)": 33.6, "step": 1755, "token_acc": 0.9615300546448088, "train_speed(iter/s)": 0.120475 }, { "epoch": 2.205723245785966, "grad_norm": 0.7547042369842529, "learning_rate": 1.6222507850049602e-06, "loss": 0.1082529902458191, "memory(GiB)": 33.6, "step": 1760, "token_acc": 0.9618447339542474, "train_speed(iter/s)": 0.120566 }, { "epoch": 2.205723245785966, "eval_loss": 0.22259920835494995, "eval_runtime": 29.8466, "eval_samples_per_second": 17.255, "eval_steps_per_second": 4.322, "eval_token_acc": 0.9292796324167508, "step": 1760 }, { "epoch": 2.211995295962368, "grad_norm": 0.636020302772522, "learning_rate": 1.598104473214031e-06, "loss": 0.10505471229553223, "memory(GiB)": 33.6, "step": 1765, "token_acc": 0.9457782654231709, "train_speed(iter/s)": 0.120309 }, { "epoch": 2.218267346138769, "grad_norm": 0.617948591709137, "learning_rate": 1.5741049864239383e-06, "loss": 0.10182752609252929, "memory(GiB)": 33.6, "step": 1770, "token_acc": 0.963300613814716, "train_speed(iter/s)": 0.120392 }, { "epoch": 2.2245393963151705, "grad_norm": 0.7750356197357178, "learning_rate": 1.550253360446815e-06, "loss": 0.10825409889221191, "memory(GiB)": 33.6, "step": 1775, "token_acc": 0.9659604215960421, "train_speed(iter/s)": 0.120481 }, { "epoch": 2.230811446491572, "grad_norm": 0.6845636367797852, "learning_rate": 1.5265506247131617e-06, "loss": 0.10921690464019776, "memory(GiB)": 33.6, "step": 1780, "token_acc": 0.9601528384279476, "train_speed(iter/s)": 0.120568 }, { "epoch": 2.230811446491572, "eval_loss": 0.2214292734861374, "eval_runtime": 29.9074, "eval_samples_per_second": 17.22, "eval_steps_per_second": 4.313, "eval_token_acc": 0.9292964017255618, "step": 1780 }, { "epoch": 2.2370834966679736, "grad_norm": 0.735072135925293, "learning_rate": 1.5029978022274067e-06, "loss": 0.11698575019836426, "memory(GiB)": 33.6, "step": 1785, "token_acc": 0.9456881099382594, "train_speed(iter/s)": 0.120308 }, { "epoch": 2.243355546844375, "grad_norm": 0.6945551037788391, "learning_rate": 1.47959590952376e-06, "loss": 0.10342628955841064, "memory(GiB)": 33.6, "step": 1790, "token_acc": 0.9655268490374873, "train_speed(iter/s)": 0.120393 }, { "epoch": 2.249627597020776, "grad_norm": 0.6537898778915405, "learning_rate": 1.4563459566223358e-06, "loss": 0.10419995784759521, "memory(GiB)": 33.6, "step": 1795, "token_acc": 0.9687678159779888, "train_speed(iter/s)": 0.120479 }, { "epoch": 2.2558996471971775, "grad_norm": 0.665179431438446, "learning_rate": 1.4332489469855698e-06, "loss": 0.10044981241226196, "memory(GiB)": 33.6, "step": 1800, "token_acc": 0.9632239107769107, "train_speed(iter/s)": 0.120548 }, { "epoch": 2.2558996471971775, "eval_loss": 0.22338801622390747, "eval_runtime": 29.7434, "eval_samples_per_second": 17.315, "eval_steps_per_second": 4.337, "eval_token_acc": 0.9293676712880087, "step": 1800 }, { "epoch": 2.262171697373579, "grad_norm": 0.7259742617607117, "learning_rate": 1.4103058774748923e-06, "loss": 0.10409928560256958, "memory(GiB)": 33.6, "step": 1805, "token_acc": 0.9449998480658787, "train_speed(iter/s)": 0.120311 }, { "epoch": 2.2684437475499806, "grad_norm": 0.6720697283744812, "learning_rate": 1.3875177383077233e-06, "loss": 0.11027616262435913, "memory(GiB)": 33.6, "step": 1810, "token_acc": 0.9617116015070705, "train_speed(iter/s)": 0.120381 }, { "epoch": 2.274715797726382, "grad_norm": 0.6582772731781006, "learning_rate": 1.3648855130147216e-06, "loss": 0.10254979133605957, "memory(GiB)": 33.6, "step": 1815, "token_acc": 0.9641775983854692, "train_speed(iter/s)": 0.120474 }, { "epoch": 2.280987847902783, "grad_norm": 0.6948631405830383, "learning_rate": 1.3424101783973403e-06, "loss": 0.10602834224700927, "memory(GiB)": 33.6, "step": 1820, "token_acc": 0.9628847951276872, "train_speed(iter/s)": 0.120552 }, { "epoch": 2.280987847902783, "eval_loss": 0.22278502583503723, "eval_runtime": 29.9808, "eval_samples_per_second": 17.178, "eval_steps_per_second": 4.303, "eval_token_acc": 0.9293760559424142, "step": 1820 }, { "epoch": 2.2872598980791845, "grad_norm": 0.6872897744178772, "learning_rate": 1.3200927044856714e-06, "loss": 0.11157424449920654, "memory(GiB)": 33.6, "step": 1825, "token_acc": 0.9470804190586105, "train_speed(iter/s)": 0.120308 }, { "epoch": 2.293531948255586, "grad_norm": 0.7110899686813354, "learning_rate": 1.2979340544965745e-06, "loss": 0.10765695571899414, "memory(GiB)": 33.6, "step": 1830, "token_acc": 0.9619730551933942, "train_speed(iter/s)": 0.120391 }, { "epoch": 2.2998039984319876, "grad_norm": 0.6805204749107361, "learning_rate": 1.2759351847921053e-06, "loss": 0.10896315574645996, "memory(GiB)": 33.6, "step": 1835, "token_acc": 0.9559441922637779, "train_speed(iter/s)": 0.120473 }, { "epoch": 2.306076048608389, "grad_norm": 0.6619516015052795, "learning_rate": 1.25409704483824e-06, "loss": 0.1140247106552124, "memory(GiB)": 33.6, "step": 1840, "token_acc": 0.9612636720272272, "train_speed(iter/s)": 0.12055 }, { "epoch": 2.306076048608389, "eval_loss": 0.2214568555355072, "eval_runtime": 29.967, "eval_samples_per_second": 17.186, "eval_steps_per_second": 4.305, "eval_token_acc": 0.9293173633615757, "step": 1840 }, { "epoch": 2.31234809878479, "grad_norm": 0.6659355759620667, "learning_rate": 1.232420577163902e-06, "loss": 0.10365439653396606, "memory(GiB)": 33.6, "step": 1845, "token_acc": 0.9470177324019344, "train_speed(iter/s)": 0.120308 }, { "epoch": 2.3186201489611915, "grad_norm": 0.6475389003753662, "learning_rate": 1.2109067173202731e-06, "loss": 0.10801565647125244, "memory(GiB)": 33.6, "step": 1850, "token_acc": 0.9615019262230197, "train_speed(iter/s)": 0.12039 }, { "epoch": 2.3248921991375933, "grad_norm": 0.8018389344215393, "learning_rate": 1.1895563938404203e-06, "loss": 0.11211535930633545, "memory(GiB)": 33.6, "step": 1855, "token_acc": 0.9572030113563864, "train_speed(iter/s)": 0.120471 }, { "epoch": 2.3311642493139946, "grad_norm": 0.7461184859275818, "learning_rate": 1.1683705281992202e-06, "loss": 0.10777713060379028, "memory(GiB)": 33.6, "step": 1860, "token_acc": 0.959629618707794, "train_speed(iter/s)": 0.120548 }, { "epoch": 2.3311642493139946, "eval_loss": 0.2218623012304306, "eval_runtime": 29.9356, "eval_samples_per_second": 17.204, "eval_steps_per_second": 4.309, "eval_token_acc": 0.9295772876481464, "step": 1860 }, { "epoch": 2.337436299490396, "grad_norm": 0.6567296385765076, "learning_rate": 1.1473500347735927e-06, "loss": 0.11489678621292114, "memory(GiB)": 33.6, "step": 1865, "token_acc": 0.9453161257195897, "train_speed(iter/s)": 0.120343 }, { "epoch": 2.343708349666797, "grad_norm": 0.7275116443634033, "learning_rate": 1.1264958208030224e-06, "loss": 0.11094659566879272, "memory(GiB)": 33.6, "step": 1870, "token_acc": 0.9629886036851635, "train_speed(iter/s)": 0.120428 }, { "epoch": 2.349980399843199, "grad_norm": 0.6866867542266846, "learning_rate": 1.105808786350423e-06, "loss": 0.11223549842834472, "memory(GiB)": 33.6, "step": 1875, "token_acc": 0.9640074018087519, "train_speed(iter/s)": 0.120507 }, { "epoch": 2.3562524500196003, "grad_norm": 0.7598003149032593, "learning_rate": 1.085289824263273e-06, "loss": 0.10983138084411621, "memory(GiB)": 33.6, "step": 1880, "token_acc": 0.9628211185993882, "train_speed(iter/s)": 0.120591 }, { "epoch": 2.3562524500196003, "eval_loss": 0.22093415260314941, "eval_runtime": 29.9911, "eval_samples_per_second": 17.172, "eval_steps_per_second": 4.301, "eval_token_acc": 0.9296737111738097, "step": 1880 }, { "epoch": 2.3625245001960016, "grad_norm": 0.7150808572769165, "learning_rate": 1.0649398201350907e-06, "loss": 0.10479578971862794, "memory(GiB)": 33.6, "step": 1885, "token_acc": 0.9480835490841136, "train_speed(iter/s)": 0.120353 }, { "epoch": 2.368796550372403, "grad_norm": 0.6622815728187561, "learning_rate": 1.044759652267207e-06, "loss": 0.10107295513153076, "memory(GiB)": 33.6, "step": 1890, "token_acc": 0.9652460603127552, "train_speed(iter/s)": 0.120416 }, { "epoch": 2.375068600548804, "grad_norm": 0.6194722056388855, "learning_rate": 1.024750191630864e-06, "loss": 0.10245490074157715, "memory(GiB)": 33.6, "step": 1895, "token_acc": 0.9643746110765401, "train_speed(iter/s)": 0.120494 }, { "epoch": 2.381340650725206, "grad_norm": 0.6570760011672974, "learning_rate": 1.0049123018296158e-06, "loss": 0.10547176599502564, "memory(GiB)": 33.6, "step": 1900, "token_acc": 0.9632573448738266, "train_speed(iter/s)": 0.120572 }, { "epoch": 2.381340650725206, "eval_loss": 0.22135132551193237, "eval_runtime": 29.9894, "eval_samples_per_second": 17.173, "eval_steps_per_second": 4.302, "eval_token_acc": 0.9297952886626896, "step": 1900 }, { "epoch": 2.3876127009016073, "grad_norm": 0.6823681592941284, "learning_rate": 9.852468390620624e-07, "loss": 0.11187875270843506, "memory(GiB)": 33.6, "step": 1905, "token_acc": 0.9470449919974708, "train_speed(iter/s)": 0.120341 }, { "epoch": 2.3938847510780086, "grad_norm": 0.6770759224891663, "learning_rate": 9.65754652084896e-07, "loss": 0.1058814525604248, "memory(GiB)": 33.6, "step": 1910, "token_acc": 0.9609285414627324, "train_speed(iter/s)": 0.120413 }, { "epoch": 2.40015680125441, "grad_norm": 0.6467224955558777, "learning_rate": 9.464365821762611e-07, "loss": 0.10833286046981812, "memory(GiB)": 33.6, "step": 1915, "token_acc": 0.9618701158717327, "train_speed(iter/s)": 0.120485 }, { "epoch": 2.406428851430811, "grad_norm": 0.7155383825302124, "learning_rate": 9.272934630994579e-07, "loss": 0.1067124843597412, "memory(GiB)": 33.6, "step": 1920, "token_acc": 0.9663948320886975, "train_speed(iter/s)": 0.120568 }, { "epoch": 2.406428851430811, "eval_loss": 0.22103355824947357, "eval_runtime": 29.9865, "eval_samples_per_second": 17.174, "eval_steps_per_second": 4.302, "eval_token_acc": 0.929770134699473, "step": 1920 }, { "epoch": 2.412700901607213, "grad_norm": 0.6960355639457703, "learning_rate": 9.083261210669458e-07, "loss": 0.10286239385604859, "memory(GiB)": 33.6, "step": 1925, "token_acc": 0.9472781065088758, "train_speed(iter/s)": 0.12034 }, { "epoch": 2.4189729517836143, "grad_norm": 0.8168506622314453, "learning_rate": 8.895353747046903e-07, "loss": 0.10974031686782837, "memory(GiB)": 33.6, "step": 1930, "token_acc": 0.9560156270763016, "train_speed(iter/s)": 0.120426 }, { "epoch": 2.4252450019600156, "grad_norm": 0.7116117477416992, "learning_rate": 8.70922035016829e-07, "loss": 0.11615951061248779, "memory(GiB)": 33.6, "step": 1935, "token_acc": 0.9588699861295893, "train_speed(iter/s)": 0.120504 }, { "epoch": 2.4315170521364173, "grad_norm": 0.793286919593811, "learning_rate": 8.524869053506718e-07, "loss": 0.11020160913467407, "memory(GiB)": 33.6, "step": 1940, "token_acc": 0.9633706189410888, "train_speed(iter/s)": 0.120586 }, { "epoch": 2.4315170521364173, "eval_loss": 0.22128398716449738, "eval_runtime": 29.8622, "eval_samples_per_second": 17.246, "eval_steps_per_second": 4.32, "eval_token_acc": 0.9298959045155556, "step": 1940 }, { "epoch": 2.4377891023128186, "grad_norm": 0.6766318678855896, "learning_rate": 8.342307813620254e-07, "loss": 0.10068587064743043, "memory(GiB)": 33.6, "step": 1945, "token_acc": 0.9491064989973427, "train_speed(iter/s)": 0.120381 }, { "epoch": 2.44406115248922, "grad_norm": 0.6612719893455505, "learning_rate": 8.161544509808522e-07, "loss": 0.10740329027175903, "memory(GiB)": 33.6, "step": 1950, "token_acc": 0.9598074812125306, "train_speed(iter/s)": 0.120452 }, { "epoch": 2.4503332026656213, "grad_norm": 0.7400087714195251, "learning_rate": 7.982586943772663e-07, "loss": 0.1041949987411499, "memory(GiB)": 33.6, "step": 1955, "token_acc": 0.9606271261647685, "train_speed(iter/s)": 0.120526 }, { "epoch": 2.4566052528420226, "grad_norm": 0.6745719909667969, "learning_rate": 7.805442839278643e-07, "loss": 0.10791645050048829, "memory(GiB)": 33.6, "step": 1960, "token_acc": 0.9610862521215862, "train_speed(iter/s)": 0.120595 }, { "epoch": 2.4566052528420226, "eval_loss": 0.22153809666633606, "eval_runtime": 29.8544, "eval_samples_per_second": 17.25, "eval_steps_per_second": 4.321, "eval_token_acc": 0.9296988651370263, "step": 1960 }, { "epoch": 2.4628773030184243, "grad_norm": 0.6609322428703308, "learning_rate": 7.630119841823808e-07, "loss": 0.10820503234863281, "memory(GiB)": 33.6, "step": 1965, "token_acc": 0.9463575963963611, "train_speed(iter/s)": 0.120382 }, { "epoch": 2.4691493531948256, "grad_norm": 0.6629140973091125, "learning_rate": 7.456625518306976e-07, "loss": 0.10982118844985962, "memory(GiB)": 33.6, "step": 1970, "token_acc": 0.9646770143802785, "train_speed(iter/s)": 0.120462 }, { "epoch": 2.475421403371227, "grad_norm": 0.6865978837013245, "learning_rate": 7.284967356701839e-07, "loss": 0.10275110006332397, "memory(GiB)": 33.6, "step": 1975, "token_acc": 0.9637741118063815, "train_speed(iter/s)": 0.120509 }, { "epoch": 2.4816934535476283, "grad_norm": 0.6618251800537109, "learning_rate": 7.115152765733768e-07, "loss": 0.10197668075561524, "memory(GiB)": 33.6, "step": 1980, "token_acc": 0.9611416209019804, "train_speed(iter/s)": 0.120592 }, { "epoch": 2.4816934535476283, "eval_loss": 0.22130271792411804, "eval_runtime": 29.9692, "eval_samples_per_second": 17.184, "eval_steps_per_second": 4.304, "eval_token_acc": 0.9297156344458373, "step": 1980 }, { "epoch": 2.4879655037240296, "grad_norm": 0.6805600523948669, "learning_rate": 6.94718907456009e-07, "loss": 0.11028853654861451, "memory(GiB)": 33.6, "step": 1985, "token_acc": 0.9475056321262383, "train_speed(iter/s)": 0.120373 }, { "epoch": 2.4942375539004313, "grad_norm": 0.7021499276161194, "learning_rate": 6.781083532453702e-07, "loss": 0.10008060932159424, "memory(GiB)": 33.6, "step": 1990, "token_acc": 0.958559067450638, "train_speed(iter/s)": 0.120444 }, { "epoch": 2.5005096040768326, "grad_norm": 0.68918377161026, "learning_rate": 6.61684330849025e-07, "loss": 0.10927926301956177, "memory(GiB)": 33.6, "step": 1995, "token_acc": 0.9565252438401806, "train_speed(iter/s)": 0.120521 }, { "epoch": 2.506781654253234, "grad_norm": 0.6521994471549988, "learning_rate": 6.454475491238682e-07, "loss": 0.11399447917938232, "memory(GiB)": 33.6, "step": 2000, "token_acc": 0.9624060150375939, "train_speed(iter/s)": 0.1206 }, { "epoch": 2.506781654253234, "eval_loss": 0.2205990105867386, "eval_runtime": 30.0032, "eval_samples_per_second": 17.165, "eval_steps_per_second": 4.3, "eval_token_acc": 0.9299881357140162, "step": 2000 }, { "epoch": 2.5130537044296353, "grad_norm": 0.6786354184150696, "learning_rate": 6.293987088455355e-07, "loss": 0.10214885473251342, "memory(GiB)": 33.6, "step": 2005, "token_acc": 0.9475616708376412, "train_speed(iter/s)": 0.120376 }, { "epoch": 2.5193257546060366, "grad_norm": 0.6837747097015381, "learning_rate": 6.135385026781476e-07, "loss": 0.10503888130187988, "memory(GiB)": 33.6, "step": 2010, "token_acc": 0.9636853327348222, "train_speed(iter/s)": 0.120453 }, { "epoch": 2.5255978047824383, "grad_norm": 0.6747323274612427, "learning_rate": 5.978676151444285e-07, "loss": 0.10235412120819092, "memory(GiB)": 33.6, "step": 2015, "token_acc": 0.9618530311543985, "train_speed(iter/s)": 0.120526 }, { "epoch": 2.5318698549588396, "grad_norm": 0.6173009872436523, "learning_rate": 5.823867225961516e-07, "loss": 0.10736865997314453, "memory(GiB)": 33.6, "step": 2020, "token_acc": 0.9654360340644179, "train_speed(iter/s)": 0.120592 }, { "epoch": 2.5318698549588396, "eval_loss": 0.2209625095129013, "eval_runtime": 29.8886, "eval_samples_per_second": 17.231, "eval_steps_per_second": 4.316, "eval_token_acc": 0.9297785193538786, "step": 2020 }, { "epoch": 2.538141905135241, "grad_norm": 0.7332006096839905, "learning_rate": 5.670964931849521e-07, "loss": 0.10466567277908326, "memory(GiB)": 33.6, "step": 2025, "token_acc": 0.9465066273634904, "train_speed(iter/s)": 0.120388 }, { "epoch": 2.5444139553116427, "grad_norm": 0.7321441769599915, "learning_rate": 5.519975868334914e-07, "loss": 0.09656277894973755, "memory(GiB)": 33.6, "step": 2030, "token_acc": 0.9659533350385086, "train_speed(iter/s)": 0.12046 }, { "epoch": 2.550686005488044, "grad_norm": 0.6885952949523926, "learning_rate": 5.370906552069721e-07, "loss": 0.11789785623550415, "memory(GiB)": 33.6, "step": 2035, "token_acc": 0.9600053756215562, "train_speed(iter/s)": 0.120546 }, { "epoch": 2.5569580556644453, "grad_norm": 0.6895261406898499, "learning_rate": 5.22376341685013e-07, "loss": 0.10133184194564819, "memory(GiB)": 33.6, "step": 2040, "token_acc": 0.9643827639751553, "train_speed(iter/s)": 0.120608 }, { "epoch": 2.5569580556644453, "eval_loss": 0.2212093323469162, "eval_runtime": 29.9992, "eval_samples_per_second": 17.167, "eval_steps_per_second": 4.3, "eval_token_acc": 0.9297617500450676, "step": 2040 }, { "epoch": 2.5632301058408467, "grad_norm": 0.6516171097755432, "learning_rate": 5.07855281333881e-07, "loss": 0.11106686592102051, "memory(GiB)": 33.6, "step": 2045, "token_acc": 0.9468649356358528, "train_speed(iter/s)": 0.120396 }, { "epoch": 2.569502156017248, "grad_norm": 0.682096004486084, "learning_rate": 4.935281008790843e-07, "loss": 0.10403594970703126, "memory(GiB)": 33.6, "step": 2050, "token_acc": 0.9631915123957995, "train_speed(iter/s)": 0.120471 }, { "epoch": 2.5757742061936497, "grad_norm": 0.641323983669281, "learning_rate": 4.793954186783195e-07, "loss": 0.10982873439788818, "memory(GiB)": 33.6, "step": 2055, "token_acc": 0.9590548445010714, "train_speed(iter/s)": 0.120551 }, { "epoch": 2.582046256370051, "grad_norm": 0.6976042985916138, "learning_rate": 4.6545784469478386e-07, "loss": 0.09905983209609985, "memory(GiB)": 33.6, "step": 2060, "token_acc": 0.9606581714709885, "train_speed(iter/s)": 0.120623 }, { "epoch": 2.582046256370051, "eval_loss": 0.2210971564054489, "eval_runtime": 29.7693, "eval_samples_per_second": 17.3, "eval_steps_per_second": 4.333, "eval_token_acc": 0.9296611341922014, "step": 2060 }, { "epoch": 2.5883183065464523, "grad_norm": 0.6458984613418579, "learning_rate": 4.5171598047085153e-07, "loss": 0.10688018798828125, "memory(GiB)": 33.6, "step": 2065, "token_acc": 0.9488463005339436, "train_speed(iter/s)": 0.120424 }, { "epoch": 2.5945903567228537, "grad_norm": 0.7071846723556519, "learning_rate": 4.381704191021119e-07, "loss": 0.10872792005538941, "memory(GiB)": 33.6, "step": 2070, "token_acc": 0.9589340920905037, "train_speed(iter/s)": 0.120501 }, { "epoch": 2.600862406899255, "grad_norm": 0.7648762464523315, "learning_rate": 4.248217452117653e-07, "loss": 0.10998923778533935, "memory(GiB)": 33.6, "step": 2075, "token_acc": 0.9643058531634149, "train_speed(iter/s)": 0.120567 }, { "epoch": 2.6071344570756567, "grad_norm": 0.7118704319000244, "learning_rate": 4.1167053492540023e-07, "loss": 0.11142784357070923, "memory(GiB)": 33.6, "step": 2080, "token_acc": 0.9593960300853928, "train_speed(iter/s)": 0.120634 }, { "epoch": 2.6071344570756567, "eval_loss": 0.22096213698387146, "eval_runtime": 29.7839, "eval_samples_per_second": 17.291, "eval_steps_per_second": 4.331, "eval_token_acc": 0.9297827116810813, "step": 2080 }, { "epoch": 2.613406507252058, "grad_norm": 0.7053963541984558, "learning_rate": 3.987173558461199e-07, "loss": 0.10944682359695435, "memory(GiB)": 33.6, "step": 2085, "token_acc": 0.94411167563964, "train_speed(iter/s)": 0.120434 }, { "epoch": 2.6196785574284593, "grad_norm": 0.6822431087493896, "learning_rate": 3.8596276703004974e-07, "loss": 0.10377117395401, "memory(GiB)": 33.6, "step": 2090, "token_acc": 0.9615926525074362, "train_speed(iter/s)": 0.1205 }, { "epoch": 2.625950607604861, "grad_norm": 0.6763447523117065, "learning_rate": 3.7340731896220393e-07, "loss": 0.10526052713394166, "memory(GiB)": 33.6, "step": 2095, "token_acc": 0.9638053139407766, "train_speed(iter/s)": 0.120571 }, { "epoch": 2.6322226577812624, "grad_norm": 0.8038213849067688, "learning_rate": 3.6105155353273305e-07, "loss": 0.10737766027450561, "memory(GiB)": 33.6, "step": 2100, "token_acc": 0.960337552742616, "train_speed(iter/s)": 0.120622 }, { "epoch": 2.6322226577812624, "eval_loss": 0.22071143984794617, "eval_runtime": 29.9597, "eval_samples_per_second": 17.19, "eval_steps_per_second": 4.306, "eval_token_acc": 0.9298539812435281, "step": 2100 }, { "epoch": 2.6384947079576637, "grad_norm": 0.7455542087554932, "learning_rate": 3.488960040135303e-07, "loss": 0.10756160020828247, "memory(GiB)": 33.6, "step": 2105, "token_acc": 0.9456971683355857, "train_speed(iter/s)": 0.120413 }, { "epoch": 2.644766758134065, "grad_norm": 0.6782827377319336, "learning_rate": 3.369411950352175e-07, "loss": 0.10511963367462158, "memory(GiB)": 33.6, "step": 2110, "token_acc": 0.9645807367902379, "train_speed(iter/s)": 0.120483 }, { "epoch": 2.6510388083104663, "grad_norm": 0.6251741051673889, "learning_rate": 3.251876425645051e-07, "loss": 0.10916777849197387, "memory(GiB)": 33.6, "step": 2115, "token_acc": 0.9620457248579358, "train_speed(iter/s)": 0.120548 }, { "epoch": 2.657310858486868, "grad_norm": 0.7826245427131653, "learning_rate": 3.136358538819162e-07, "loss": 0.11334476470947266, "memory(GiB)": 33.6, "step": 2120, "token_acc": 0.9589237920833921, "train_speed(iter/s)": 0.120628 }, { "epoch": 2.657310858486868, "eval_loss": 0.22062310576438904, "eval_runtime": 29.8394, "eval_samples_per_second": 17.259, "eval_steps_per_second": 4.323, "eval_token_acc": 0.930025866658841, "step": 2120 }, { "epoch": 2.6635829086632694, "grad_norm": 0.7201940417289734, "learning_rate": 3.0228632755990197e-07, "loss": 0.1089336633682251, "memory(GiB)": 33.6, "step": 2125, "token_acc": 0.9453159041394336, "train_speed(iter/s)": 0.120428 }, { "epoch": 2.6698549588396707, "grad_norm": 0.7320123910903931, "learning_rate": 2.911395534413147e-07, "loss": 0.10883692502975464, "memory(GiB)": 33.6, "step": 2130, "token_acc": 0.9583949549348438, "train_speed(iter/s)": 0.120498 }, { "epoch": 2.676127009016072, "grad_norm": 0.6891148686408997, "learning_rate": 2.8019601261827123e-07, "loss": 0.1058511734008789, "memory(GiB)": 33.6, "step": 2135, "token_acc": 0.964820651358247, "train_speed(iter/s)": 0.120566 }, { "epoch": 2.6823990591924733, "grad_norm": 0.665065586566925, "learning_rate": 2.694561774113863e-07, "loss": 0.10193836688995361, "memory(GiB)": 33.6, "step": 2140, "token_acc": 0.9663121444471927, "train_speed(iter/s)": 0.120643 }, { "epoch": 2.6823990591924733, "eval_loss": 0.22056354582309723, "eval_runtime": 29.9765, "eval_samples_per_second": 17.18, "eval_steps_per_second": 4.303, "eval_token_acc": 0.9299629817507997, "step": 2140 }, { "epoch": 2.688671109368875, "grad_norm": 0.746462345123291, "learning_rate": 2.5892051134939256e-07, "loss": 0.10777335166931153, "memory(GiB)": 33.6, "step": 2145, "token_acc": 0.9450254900930257, "train_speed(iter/s)": 0.120438 }, { "epoch": 2.6949431595452764, "grad_norm": 0.7365299463272095, "learning_rate": 2.485894691491253e-07, "loss": 0.10137251615524293, "memory(GiB)": 33.6, "step": 2150, "token_acc": 0.9649379303011776, "train_speed(iter/s)": 0.120509 }, { "epoch": 2.7012152097216777, "grad_norm": 0.6272339224815369, "learning_rate": 2.384634966959076e-07, "loss": 0.10637471675872803, "memory(GiB)": 33.6, "step": 2155, "token_acc": 0.9610995993921813, "train_speed(iter/s)": 0.120579 }, { "epoch": 2.707487259898079, "grad_norm": 0.7295854091644287, "learning_rate": 2.2854303102429808e-07, "loss": 0.10675235986709594, "memory(GiB)": 33.6, "step": 2160, "token_acc": 0.961855927963982, "train_speed(iter/s)": 0.120641 }, { "epoch": 2.707487259898079, "eval_loss": 0.22078193724155426, "eval_runtime": 29.9825, "eval_samples_per_second": 17.177, "eval_steps_per_second": 4.303, "eval_token_acc": 0.9299420201147859, "step": 2160 }, { "epoch": 2.7137593100744803, "grad_norm": 0.7343592643737793, "learning_rate": 2.1882850029923463e-07, "loss": 0.1030248761177063, "memory(GiB)": 33.6, "step": 2165, "token_acc": 0.9477623684469362, "train_speed(iter/s)": 0.120452 }, { "epoch": 2.720031360250882, "grad_norm": 0.6812222599983215, "learning_rate": 2.093203237975483e-07, "loss": 0.10615785121917724, "memory(GiB)": 33.6, "step": 2170, "token_acc": 0.9640974343723809, "train_speed(iter/s)": 0.120515 }, { "epoch": 2.7263034104272834, "grad_norm": 0.6542791128158569, "learning_rate": 2.0001891188987265e-07, "loss": 0.10438240766525268, "memory(GiB)": 33.6, "step": 2175, "token_acc": 0.9666564149879543, "train_speed(iter/s)": 0.120568 }, { "epoch": 2.7325754606036847, "grad_norm": 0.6803576946258545, "learning_rate": 1.9092466602293247e-07, "loss": 0.11247079372406006, "memory(GiB)": 33.6, "step": 2180, "token_acc": 0.964954353454149, "train_speed(iter/s)": 0.120636 }, { "epoch": 2.7325754606036847, "eval_loss": 0.2204427272081375, "eval_runtime": 29.9646, "eval_samples_per_second": 17.187, "eval_steps_per_second": 4.305, "eval_token_acc": 0.930025866658841, "step": 2180 }, { "epoch": 2.7388475107800865, "grad_norm": 0.7342873215675354, "learning_rate": 1.8203797870221197e-07, "loss": 0.10811096429824829, "memory(GiB)": 33.6, "step": 2185, "token_acc": 0.9444978916772424, "train_speed(iter/s)": 0.120421 }, { "epoch": 2.745119560956488, "grad_norm": 0.6856096982955933, "learning_rate": 1.7335923347502003e-07, "loss": 0.10507526397705078, "memory(GiB)": 33.6, "step": 2190, "token_acc": 0.9626754255001493, "train_speed(iter/s)": 0.12049 }, { "epoch": 2.751391611132889, "grad_norm": 0.7308098077774048, "learning_rate": 1.6488880491393467e-07, "loss": 0.10211585760116577, "memory(GiB)": 33.6, "step": 2195, "token_acc": 0.959207675642216, "train_speed(iter/s)": 0.120553 }, { "epoch": 2.7576636613092904, "grad_norm": 0.7188462018966675, "learning_rate": 1.5662705860063465e-07, "loss": 0.10453490018844605, "memory(GiB)": 33.6, "step": 2200, "token_acc": 0.9691890125907868, "train_speed(iter/s)": 0.120613 }, { "epoch": 2.7576636613092904, "eval_loss": 0.22036312520503998, "eval_runtime": 29.8702, "eval_samples_per_second": 17.241, "eval_steps_per_second": 4.319, "eval_token_acc": 0.9299294431331777, "step": 2200 }, { "epoch": 2.7639357114856917, "grad_norm": 0.6238301396369934, "learning_rate": 1.485743511101234e-07, "loss": 0.10971046686172485, "memory(GiB)": 33.6, "step": 2205, "token_acc": 0.9466987384026312, "train_speed(iter/s)": 0.120416 }, { "epoch": 2.7702077616620935, "grad_norm": 0.6658957600593567, "learning_rate": 1.4073102999534017e-07, "loss": 0.11103521585464478, "memory(GiB)": 33.6, "step": 2210, "token_acc": 0.9630309852479412, "train_speed(iter/s)": 0.120481 }, { "epoch": 2.776479811838495, "grad_norm": 0.7214736938476562, "learning_rate": 1.3309743377215468e-07, "loss": 0.10242490768432617, "memory(GiB)": 33.6, "step": 2215, "token_acc": 0.9676976699508741, "train_speed(iter/s)": 0.120536 }, { "epoch": 2.782751862014896, "grad_norm": 0.6873740553855896, "learning_rate": 1.2567389190476287e-07, "loss": 0.11063306331634522, "memory(GiB)": 33.6, "step": 2220, "token_acc": 0.962315525785547, "train_speed(iter/s)": 0.120607 }, { "epoch": 2.782751862014896, "eval_loss": 0.22046540677547455, "eval_runtime": 29.9929, "eval_samples_per_second": 17.171, "eval_steps_per_second": 4.301, "eval_token_acc": 0.9297994809898923, "step": 2220 }, { "epoch": 2.7890239121912974, "grad_norm": 0.6654419898986816, "learning_rate": 1.1846072479146431e-07, "loss": 0.09902162551879883, "memory(GiB)": 33.6, "step": 2225, "token_acc": 0.948145285935085, "train_speed(iter/s)": 0.120399 }, { "epoch": 2.7952959623676987, "grad_norm": 0.6633173227310181, "learning_rate": 1.114582437508327e-07, "loss": 0.10771057605743409, "memory(GiB)": 33.6, "step": 2230, "token_acc": 0.9644760213143873, "train_speed(iter/s)": 0.120473 }, { "epoch": 2.8015680125441005, "grad_norm": 0.6813404560089111, "learning_rate": 1.0466675100828383e-07, "loss": 0.10407230854034424, "memory(GiB)": 33.6, "step": 2235, "token_acc": 0.9577818418523915, "train_speed(iter/s)": 0.120526 }, { "epoch": 2.807840062720502, "grad_norm": 0.6118773221969604, "learning_rate": 9.808653968302607e-08, "loss": 0.09836616516113281, "memory(GiB)": 33.6, "step": 2240, "token_acc": 0.963512739408732, "train_speed(iter/s)": 0.120591 }, { "epoch": 2.807840062720502, "eval_loss": 0.22051523625850677, "eval_runtime": 29.9565, "eval_samples_per_second": 17.192, "eval_steps_per_second": 4.306, "eval_token_acc": 0.9300132896772327, "step": 2240 }, { "epoch": 2.814112112896903, "grad_norm": 0.7042478322982788, "learning_rate": 9.17178937754143e-08, "loss": 0.1051060438156128, "memory(GiB)": 33.6, "step": 2245, "token_acc": 0.9483617807171872, "train_speed(iter/s)": 0.120393 }, { "epoch": 2.820384163073305, "grad_norm": 0.5747640132904053, "learning_rate": 8.556108815468756e-08, "loss": 0.09687448740005493, "memory(GiB)": 33.6, "step": 2250, "token_acc": 0.9668047793409135, "train_speed(iter/s)": 0.120457 }, { "epoch": 2.8266562132497057, "grad_norm": 0.7168111205101013, "learning_rate": 7.961638854711296e-08, "loss": 0.10943119525909424, "memory(GiB)": 33.6, "step": 2255, "token_acc": 0.9633674692232269, "train_speed(iter/s)": 0.120513 }, { "epoch": 2.8329282634261075, "grad_norm": 0.7360076308250427, "learning_rate": 7.388405152450706e-08, "loss": 0.10468497276306152, "memory(GiB)": 33.6, "step": 2260, "token_acc": 0.9621856728621001, "train_speed(iter/s)": 0.120585 }, { "epoch": 2.8329282634261075, "eval_loss": 0.2205626368522644, "eval_runtime": 29.8508, "eval_samples_per_second": 17.252, "eval_steps_per_second": 4.321, "eval_token_acc": 0.9299881357140162, "step": 2260 }, { "epoch": 2.839200313602509, "grad_norm": 0.6543593406677246, "learning_rate": 6.836432449317255e-08, "loss": 0.10062656402587891, "memory(GiB)": 33.6, "step": 2265, "token_acc": 0.9466067584011981, "train_speed(iter/s)": 0.120388 }, { "epoch": 2.84547236377891, "grad_norm": 0.6974958181381226, "learning_rate": 6.305744568321281e-08, "loss": 0.10191984176635742, "memory(GiB)": 33.6, "step": 2270, "token_acc": 0.9627590979146353, "train_speed(iter/s)": 0.120453 }, { "epoch": 2.851744413955312, "grad_norm": 0.7041538953781128, "learning_rate": 5.7963644138254175e-08, "loss": 0.10670938491821289, "memory(GiB)": 33.6, "step": 2275, "token_acc": 0.9608410787749505, "train_speed(iter/s)": 0.120516 }, { "epoch": 2.858016464131713, "grad_norm": 0.6692150831222534, "learning_rate": 5.308313970555812e-08, "loss": 0.1109403133392334, "memory(GiB)": 33.6, "step": 2280, "token_acc": 0.9577824715116431, "train_speed(iter/s)": 0.120589 }, { "epoch": 2.858016464131713, "eval_loss": 0.22065654397010803, "eval_runtime": 29.7512, "eval_samples_per_second": 17.31, "eval_steps_per_second": 4.336, "eval_token_acc": 0.9299797510596107, "step": 2280 }, { "epoch": 2.8642885143081145, "grad_norm": 0.7244044542312622, "learning_rate": 4.841614302653341e-08, "loss": 0.09498413801193237, "memory(GiB)": 33.6, "step": 2285, "token_acc": 0.9474822521052157, "train_speed(iter/s)": 0.120388 }, { "epoch": 2.870560564484516, "grad_norm": 0.6670571565628052, "learning_rate": 4.396285552764557e-08, "loss": 0.10060865879058838, "memory(GiB)": 33.6, "step": 2290, "token_acc": 0.9639465521355285, "train_speed(iter/s)": 0.120464 }, { "epoch": 2.876832614660917, "grad_norm": 0.7304293513298035, "learning_rate": 3.9723469411723226e-08, "loss": 0.10391623973846435, "memory(GiB)": 33.6, "step": 2295, "token_acc": 0.9663193870238685, "train_speed(iter/s)": 0.12053 }, { "epoch": 2.883104664837319, "grad_norm": 0.6565809845924377, "learning_rate": 3.5698167649660384e-08, "loss": 0.10505614280700684, "memory(GiB)": 33.6, "step": 2300, "token_acc": 0.9635771315655959, "train_speed(iter/s)": 0.12059 }, { "epoch": 2.883104664837319, "eval_loss": 0.22073638439178467, "eval_runtime": 29.9946, "eval_samples_per_second": 17.17, "eval_steps_per_second": 4.301, "eval_token_acc": 0.9300342513132465, "step": 2300 }, { "epoch": 2.88937671501372, "grad_norm": 0.7473600506782532, "learning_rate": 3.188712397252325e-08, "loss": 0.10624938011169434, "memory(GiB)": 33.6, "step": 2305, "token_acc": 0.9467632230970124, "train_speed(iter/s)": 0.120398 }, { "epoch": 2.8956487651901215, "grad_norm": 0.6958088874816895, "learning_rate": 2.8290502864049553e-08, "loss": 0.09693416357040405, "memory(GiB)": 33.6, "step": 2310, "token_acc": 0.9698841898459312, "train_speed(iter/s)": 0.120461 }, { "epoch": 2.901920815366523, "grad_norm": 0.6366994976997375, "learning_rate": 2.4908459553549257e-08, "loss": 0.10598138570785523, "memory(GiB)": 33.6, "step": 2315, "token_acc": 0.9589003310040456, "train_speed(iter/s)": 0.120529 }, { "epoch": 2.908192865542924, "grad_norm": 0.7216346263885498, "learning_rate": 2.174114000920713e-08, "loss": 0.10354976654052735, "memory(GiB)": 33.6, "step": 2320, "token_acc": 0.964891239164804, "train_speed(iter/s)": 0.120594 }, { "epoch": 2.908192865542924, "eval_loss": 0.22061631083488464, "eval_runtime": 29.9036, "eval_samples_per_second": 17.222, "eval_steps_per_second": 4.314, "eval_token_acc": 0.930076174585274, "step": 2320 }, { "epoch": 2.914464915719326, "grad_norm": 0.6899539828300476, "learning_rate": 1.878868093177999e-08, "loss": 0.10037648677825928, "memory(GiB)": 33.6, "step": 2325, "token_acc": 0.9473721149411458, "train_speed(iter/s)": 0.120416 }, { "epoch": 2.920736965895727, "grad_norm": 0.6711906790733337, "learning_rate": 1.6051209748698116e-08, "loss": 0.1048201560974121, "memory(GiB)": 33.6, "step": 2330, "token_acc": 0.9611446773011098, "train_speed(iter/s)": 0.120473 }, { "epoch": 2.9270090160721285, "grad_norm": 0.6756038665771484, "learning_rate": 1.3528844608566848e-08, "loss": 0.10034064054489136, "memory(GiB)": 33.6, "step": 2335, "token_acc": 0.959424851944726, "train_speed(iter/s)": 0.120528 }, { "epoch": 2.9332810662485302, "grad_norm": 0.7135525941848755, "learning_rate": 1.1221694376064018e-08, "loss": 0.10905985832214356, "memory(GiB)": 33.6, "step": 2340, "token_acc": 0.9628500906709652, "train_speed(iter/s)": 0.120593 }, { "epoch": 2.9332810662485302, "eval_loss": 0.22068579494953156, "eval_runtime": 29.8342, "eval_samples_per_second": 17.262, "eval_steps_per_second": 4.324, "eval_token_acc": 0.9299168661515694, "step": 2340 }, { "epoch": 2.9395531164249316, "grad_norm": 0.7024103999137878, "learning_rate": 9.129858627244802e-09, "loss": 0.10470427274703979, "memory(GiB)": 33.6, "step": 2345, "token_acc": 0.9460003800114003, "train_speed(iter/s)": 0.120414 }, { "epoch": 2.945825166601333, "grad_norm": 0.8265781402587891, "learning_rate": 7.25342764524184e-09, "loss": 0.10396888256072997, "memory(GiB)": 33.6, "step": 2350, "token_acc": 0.9658552348125807, "train_speed(iter/s)": 0.120474 }, { "epoch": 2.952097216777734, "grad_norm": 0.6834991574287415, "learning_rate": 5.592482416369449e-09, "loss": 0.10240061283111572, "memory(GiB)": 33.6, "step": 2355, "token_acc": 0.9637053223821365, "train_speed(iter/s)": 0.120538 }, { "epoch": 2.9583692669541355, "grad_norm": 0.6985086798667908, "learning_rate": 4.147094626628656e-09, "loss": 0.10518196821212769, "memory(GiB)": 33.6, "step": 2360, "token_acc": 0.9589985193161933, "train_speed(iter/s)": 0.120597 }, { "epoch": 2.9583692669541355, "eval_loss": 0.22063224017620087, "eval_runtime": 29.9892, "eval_samples_per_second": 17.173, "eval_steps_per_second": 4.302, "eval_token_acc": 0.9300216743316382, "step": 2360 }, { "epoch": 2.9646413171305372, "grad_norm": 0.6361654996871948, "learning_rate": 2.9173266586113303e-09, "loss": 0.11210713386535645, "memory(GiB)": 33.6, "step": 2365, "token_acc": 0.9465896933852769, "train_speed(iter/s)": 0.120393 }, { "epoch": 2.9709133673069386, "grad_norm": 0.6814746260643005, "learning_rate": 1.9032315888106724e-09, "loss": 0.10490133762359619, "memory(GiB)": 33.6, "step": 2370, "token_acc": 0.9647347687658518, "train_speed(iter/s)": 0.12045 }, { "epoch": 2.97718541748334, "grad_norm": 0.6994942426681519, "learning_rate": 1.1048531853286027e-09, "loss": 0.10961159467697143, "memory(GiB)": 33.6, "step": 2375, "token_acc": 0.9629837053630957, "train_speed(iter/s)": 0.120504 }, { "epoch": 2.983457467659741, "grad_norm": 0.717983067035675, "learning_rate": 5.222259059867174e-10, "loss": 0.11561372280120849, "memory(GiB)": 33.6, "step": 2380, "token_acc": 0.9572651972483505, "train_speed(iter/s)": 0.120566 }, { "epoch": 2.983457467659741, "eval_loss": 0.22063779830932617, "eval_runtime": 29.6218, "eval_samples_per_second": 17.386, "eval_steps_per_second": 4.355, "eval_token_acc": 0.9298959045155556, "step": 2380 }, { "epoch": 2.9897295178361425, "grad_norm": 0.7122631072998047, "learning_rate": 1.5537489683914442e-10, "loss": 0.10554230213165283, "memory(GiB)": 33.6, "step": 2385, "token_acc": 0.9472894319111068, "train_speed(iter/s)": 0.120389 }, { "epoch": 2.9960015680125442, "grad_norm": 0.6785567998886108, "learning_rate": 4.315991088965632e-12, "loss": 0.10587785243988038, "memory(GiB)": 33.6, "step": 2390, "token_acc": 0.9635652811337659, "train_speed(iter/s)": 0.120449 }, { "epoch": 2.9972559780478245, "eval_loss": 0.22064490616321564, "eval_runtime": 29.9722, "eval_samples_per_second": 17.183, "eval_steps_per_second": 4.304, "eval_token_acc": 0.93000909735003, "step": 2391 } ], "logging_steps": 5, "max_steps": 2391, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.893078571339743e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }