{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 100, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta_used": 0.10002562403678894, "beta_dpo/beta_used_raw": 0.10002562403678894, "beta_dpo/gap_mean": -0.002544061280786991, "beta_dpo/gap_std": 0.05413506180047989, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.0015117157974300832, "grad_norm": 19.21511459350586, "learning_rate": 0.0, "logits/chosen": 1.6779730319976807, "logits/rejected": 1.8961677551269531, "loss": 1.3862, "step": 1 }, { "beta_dpo/beta_used": 0.09614178538322449, "beta_dpo/beta_used_raw": 0.09614178538322449, "beta_dpo/gap_mean": 0.001475283526815474, "beta_dpo/gap_std": 0.1301599144935608, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.0030234315948601664, "grad_norm": 17.83782958984375, "learning_rate": 7.462686567164179e-09, "logits/chosen": 1.873326063156128, "logits/rejected": 1.763237714767456, "loss": 1.3922, "step": 2 }, { "beta_dpo/beta_used": 0.0969439223408699, "beta_dpo/beta_used_raw": 0.0969439223408699, "beta_dpo/gap_mean": -0.004292218014597893, "beta_dpo/gap_std": 0.18407246470451355, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.0045351473922902496, "grad_norm": 21.088054656982422, "learning_rate": 1.4925373134328357e-08, "logits/chosen": 1.84206223487854, "logits/rejected": 1.5545785427093506, "loss": 1.3928, "step": 3 }, { "beta_dpo/beta_used": 0.09930766373872757, "beta_dpo/beta_used_raw": 0.09930766373872757, "beta_dpo/gap_mean": -0.017835495993494987, "beta_dpo/gap_std": 0.22892938554286957, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.006046863189720333, "grad_norm": 21.443336486816406, "learning_rate": 2.2388059701492534e-08, "logits/chosen": 1.959693193435669, "logits/rejected": 1.9233078956604004, "loss": 1.3881, "step": 4 }, { "beta_dpo/beta_used": 0.09406433999538422, "beta_dpo/beta_used_raw": 0.09406433999538422, "beta_dpo/gap_mean": -0.018799975514411926, "beta_dpo/gap_std": 0.27064457535743713, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.007558578987150416, "grad_norm": 18.985071182250977, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 1.7132606506347656, "logits/rejected": 1.4830102920532227, "loss": 1.3973, "step": 5 }, { "beta_dpo/beta_used": 0.10003212094306946, "beta_dpo/beta_used_raw": 0.10003212094306946, "beta_dpo/gap_mean": -0.01150619424879551, "beta_dpo/gap_std": 0.3005719780921936, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.009070294784580499, "grad_norm": 19.63991928100586, "learning_rate": 3.731343283582089e-08, "logits/chosen": 1.6464662551879883, "logits/rejected": 1.3061785697937012, "loss": 1.3873, "step": 6 }, { "beta_dpo/beta_used": 0.09404729306697845, "beta_dpo/beta_used_raw": 0.09404729306697845, "beta_dpo/gap_mean": -0.025072161108255386, "beta_dpo/gap_std": 0.32929858565330505, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.010582010582010581, "grad_norm": 17.747419357299805, "learning_rate": 4.477611940298507e-08, "logits/chosen": 1.2335506677627563, "logits/rejected": 1.066222071647644, "loss": 1.3977, "step": 7 }, { "beta_dpo/beta_used": 0.10024942457675934, "beta_dpo/beta_used_raw": 0.10024942457675934, "beta_dpo/gap_mean": -0.03881003335118294, "beta_dpo/gap_std": 0.3406470715999603, "beta_dpo/mask_keep_frac": 0.5, "epoch": 0.012093726379440665, "grad_norm": 21.047828674316406, "learning_rate": 5.223880597014925e-08, "logits/chosen": 1.7191338539123535, "logits/rejected": 1.676999807357788, "loss": 1.3906, "step": 8 }, { "beta_dpo/beta_used": 0.10512945801019669, "beta_dpo/beta_used_raw": 0.10512945801019669, "beta_dpo/gap_mean": -0.029969248920679092, "beta_dpo/gap_std": 0.34616127610206604, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.013605442176870748, "grad_norm": 20.141834259033203, "learning_rate": 5.970149253731343e-08, "logits/chosen": 1.5813239812850952, "logits/rejected": 1.5274288654327393, "loss": 1.3799, "step": 9 }, { "beta_dpo/beta_used": 0.10164432227611542, "beta_dpo/beta_used_raw": 0.10164432227611542, "beta_dpo/gap_mean": -0.024617386981844902, "beta_dpo/gap_std": 0.36627668142318726, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.015117157974300832, "grad_norm": 21.468704223632812, "learning_rate": 6.71641791044776e-08, "logits/chosen": 1.8985390663146973, "logits/rejected": 1.7897529602050781, "loss": 1.386, "step": 10 }, { "beta_dpo/beta_used": 0.10073283314704895, "beta_dpo/beta_used_raw": 0.10073283314704895, "beta_dpo/gap_mean": -0.01293960027396679, "beta_dpo/gap_std": 0.3726397156715393, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.016628873771730914, "grad_norm": 19.517457962036133, "learning_rate": 7.462686567164178e-08, "logits/chosen": 1.5561755895614624, "logits/rejected": 1.4315879344940186, "loss": 1.3857, "step": 11 }, { "beta_dpo/beta_used": 0.09879305958747864, "beta_dpo/beta_used_raw": 0.09879305958747864, "beta_dpo/gap_mean": -0.01451108418405056, "beta_dpo/gap_std": 0.3724828362464905, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.018140589569160998, "grad_norm": 19.350879669189453, "learning_rate": 8.208955223880596e-08, "logits/chosen": 1.5376639366149902, "logits/rejected": 1.5898655652999878, "loss": 1.3903, "step": 12 }, { "beta_dpo/beta_used": 0.09995156526565552, "beta_dpo/beta_used_raw": 0.09995156526565552, "beta_dpo/gap_mean": -0.00666454154998064, "beta_dpo/gap_std": 0.37418586015701294, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.019652305366591082, "grad_norm": 20.34635353088379, "learning_rate": 8.955223880597014e-08, "logits/chosen": 1.6992497444152832, "logits/rejected": 1.421729564666748, "loss": 1.3869, "step": 13 }, { "beta_dpo/beta_used": 0.10110987722873688, "beta_dpo/beta_used_raw": 0.10110987722873688, "beta_dpo/gap_mean": -0.0020657971035689116, "beta_dpo/gap_std": 0.3755612373352051, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.021164021164021163, "grad_norm": 23.23613166809082, "learning_rate": 9.701492537313432e-08, "logits/chosen": 2.0708484649658203, "logits/rejected": 1.755119800567627, "loss": 1.3851, "step": 14 }, { "beta_dpo/beta_used": 0.09904544055461884, "beta_dpo/beta_used_raw": 0.09904544055461884, "beta_dpo/gap_mean": 0.00015588663518428802, "beta_dpo/gap_std": 0.36678993701934814, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.022675736961451247, "grad_norm": 22.2674617767334, "learning_rate": 1.044776119402985e-07, "logits/chosen": 1.3940855264663696, "logits/rejected": 1.119559407234192, "loss": 1.3882, "step": 15 }, { "beta_dpo/beta_used": 0.10093901306390762, "beta_dpo/beta_used_raw": 0.10093901306390762, "beta_dpo/gap_mean": -0.0052482327446341515, "beta_dpo/gap_std": 0.3735610246658325, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.02418745275888133, "grad_norm": 18.811349868774414, "learning_rate": 1.1194029850746268e-07, "logits/chosen": 1.9675464630126953, "logits/rejected": 1.8949251174926758, "loss": 1.3853, "step": 16 }, { "beta_dpo/beta_used": 0.10186785459518433, "beta_dpo/beta_used_raw": 0.10186785459518433, "beta_dpo/gap_mean": -0.0005937099922448397, "beta_dpo/gap_std": 0.3771466016769409, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.025699168556311415, "grad_norm": 20.184123992919922, "learning_rate": 1.1940298507462686e-07, "logits/chosen": 1.7564290761947632, "logits/rejected": 1.6898235082626343, "loss": 1.3831, "step": 17 }, { "beta_dpo/beta_used": 0.10032984614372253, "beta_dpo/beta_used_raw": 0.10032984614372253, "beta_dpo/gap_mean": 0.011224126443266869, "beta_dpo/gap_std": 0.3758787512779236, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.027210884353741496, "grad_norm": 18.658119201660156, "learning_rate": 1.2686567164179106e-07, "logits/chosen": 1.5427706241607666, "logits/rejected": 1.4021761417388916, "loss": 1.3844, "step": 18 }, { "beta_dpo/beta_used": 0.0984867587685585, "beta_dpo/beta_used_raw": 0.0984867587685585, "beta_dpo/gap_mean": 0.0026761912740767, "beta_dpo/gap_std": 0.39050090312957764, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.02872260015117158, "grad_norm": 22.035978317260742, "learning_rate": 1.343283582089552e-07, "logits/chosen": 1.7621450424194336, "logits/rejected": 1.6765937805175781, "loss": 1.3894, "step": 19 }, { "beta_dpo/beta_used": 0.10234874486923218, "beta_dpo/beta_used_raw": 0.10234874486923218, "beta_dpo/gap_mean": 0.002019322942942381, "beta_dpo/gap_std": 0.391927033662796, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.030234315948601664, "grad_norm": 21.037214279174805, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 1.822493076324463, "logits/rejected": 1.6277220249176025, "loss": 1.3828, "step": 20 }, { "beta_dpo/beta_used": 0.10313962399959564, "beta_dpo/beta_used_raw": 0.10313962399959564, "beta_dpo/gap_mean": 0.007663208059966564, "beta_dpo/gap_std": 0.3802725672721863, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.031746031746031744, "grad_norm": 19.205894470214844, "learning_rate": 1.4925373134328355e-07, "logits/chosen": 1.2832739353179932, "logits/rejected": 1.4847989082336426, "loss": 1.3804, "step": 21 }, { "beta_dpo/beta_used": 0.0988527238368988, "beta_dpo/beta_used_raw": 0.0988527238368988, "beta_dpo/gap_mean": 0.0034460527822375298, "beta_dpo/gap_std": 0.3804360628128052, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.03325774754346183, "grad_norm": 19.234243392944336, "learning_rate": 1.5671641791044775e-07, "logits/chosen": 1.3626708984375, "logits/rejected": 1.13639235496521, "loss": 1.3873, "step": 22 }, { "beta_dpo/beta_used": 0.09999721497297287, "beta_dpo/beta_used_raw": 0.09999721497297287, "beta_dpo/gap_mean": 0.003892315551638603, "beta_dpo/gap_std": 0.3914201259613037, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.03476946334089191, "grad_norm": 19.51723861694336, "learning_rate": 1.6417910447761193e-07, "logits/chosen": 2.0461347103118896, "logits/rejected": 1.7789829969406128, "loss": 1.3863, "step": 23 }, { "beta_dpo/beta_used": 0.09975261986255646, "beta_dpo/beta_used_raw": 0.09975261986255646, "beta_dpo/gap_mean": -0.00156848831102252, "beta_dpo/gap_std": 0.3875770568847656, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.036281179138321996, "grad_norm": 18.69975471496582, "learning_rate": 1.716417910447761e-07, "logits/chosen": 1.5550925731658936, "logits/rejected": 1.3700810670852661, "loss": 1.3871, "step": 24 }, { "beta_dpo/beta_used": 0.09970206022262573, "beta_dpo/beta_used_raw": 0.09970206022262573, "beta_dpo/gap_mean": -0.006716427858918905, "beta_dpo/gap_std": 0.4007958173751831, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.03779289493575208, "grad_norm": 20.19523811340332, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 1.5238394737243652, "logits/rejected": 1.441294550895691, "loss": 1.3877, "step": 25 }, { "beta_dpo/beta_used": 0.10117530822753906, "beta_dpo/beta_used_raw": 0.10117530822753906, "beta_dpo/gap_mean": 0.0002460250398144126, "beta_dpo/gap_std": 0.3976287841796875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.039304610733182165, "grad_norm": 20.19486427307129, "learning_rate": 1.8656716417910447e-07, "logits/chosen": 2.229462146759033, "logits/rejected": 2.186990261077881, "loss": 1.3845, "step": 26 }, { "beta_dpo/beta_used": 0.099585622549057, "beta_dpo/beta_used_raw": 0.099585622549057, "beta_dpo/gap_mean": 0.013156171888113022, "beta_dpo/gap_std": 0.402152419090271, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.04081632653061224, "grad_norm": 21.181793212890625, "learning_rate": 1.9402985074626865e-07, "logits/chosen": 1.9965670108795166, "logits/rejected": 1.999671220779419, "loss": 1.3855, "step": 27 }, { "beta_dpo/beta_used": 0.09757953137159348, "beta_dpo/beta_used_raw": 0.09757953137159348, "beta_dpo/gap_mean": 0.01814894564449787, "beta_dpo/gap_std": 0.39564138650894165, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.042328042328042326, "grad_norm": 19.1815128326416, "learning_rate": 2.0149253731343282e-07, "logits/chosen": 1.282888412475586, "logits/rejected": 1.315780758857727, "loss": 1.3892, "step": 28 }, { "beta_dpo/beta_used": 0.10066419839859009, "beta_dpo/beta_used_raw": 0.10066419839859009, "beta_dpo/gap_mean": 0.02637687511742115, "beta_dpo/gap_std": 0.39498424530029297, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.04383975812547241, "grad_norm": 23.41122055053711, "learning_rate": 2.08955223880597e-07, "logits/chosen": 2.0083210468292236, "logits/rejected": 2.0637381076812744, "loss": 1.3826, "step": 29 }, { "beta_dpo/beta_used": 0.10326778143644333, "beta_dpo/beta_used_raw": 0.10326778143644333, "beta_dpo/gap_mean": 0.028650924563407898, "beta_dpo/gap_std": 0.3952373266220093, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.045351473922902494, "grad_norm": 21.245594024658203, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 2.035595178604126, "logits/rejected": 1.9250398874282837, "loss": 1.3775, "step": 30 }, { "beta_dpo/beta_used": 0.09900397062301636, "beta_dpo/beta_used_raw": 0.09900397062301636, "beta_dpo/gap_mean": 0.022034619003534317, "beta_dpo/gap_std": 0.3975624442100525, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.04686318972033258, "grad_norm": 22.802661895751953, "learning_rate": 2.2388059701492537e-07, "logits/chosen": 1.9883079528808594, "logits/rejected": 1.705573320388794, "loss": 1.3851, "step": 31 }, { "beta_dpo/beta_used": 0.10333971679210663, "beta_dpo/beta_used_raw": 0.10333971679210663, "beta_dpo/gap_mean": 0.03041520155966282, "beta_dpo/gap_std": 0.3910978436470032, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.04837490551776266, "grad_norm": 22.380115509033203, "learning_rate": 2.3134328358208954e-07, "logits/chosen": 1.5761935710906982, "logits/rejected": 1.5760250091552734, "loss": 1.3766, "step": 32 }, { "beta_dpo/beta_used": 0.10148920118808746, "beta_dpo/beta_used_raw": 0.10148920118808746, "beta_dpo/gap_mean": 0.034046024084091187, "beta_dpo/gap_std": 0.38926005363464355, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.049886621315192746, "grad_norm": 21.281326293945312, "learning_rate": 2.388059701492537e-07, "logits/chosen": 1.4101418256759644, "logits/rejected": 1.3217897415161133, "loss": 1.3804, "step": 33 }, { "beta_dpo/beta_used": 0.09844163060188293, "beta_dpo/beta_used_raw": 0.09844163060188293, "beta_dpo/gap_mean": 0.042101725935935974, "beta_dpo/gap_std": 0.3880201280117035, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.05139833711262283, "grad_norm": 19.162466049194336, "learning_rate": 2.4626865671641786e-07, "logits/chosen": 2.0277884006500244, "logits/rejected": 1.8365530967712402, "loss": 1.385, "step": 34 }, { "beta_dpo/beta_used": 0.10297001898288727, "beta_dpo/beta_used_raw": 0.10297001898288727, "beta_dpo/gap_mean": 0.04004104435443878, "beta_dpo/gap_std": 0.3860953450202942, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.05291005291005291, "grad_norm": 19.943506240844727, "learning_rate": 2.537313432835821e-07, "logits/chosen": 1.2702012062072754, "logits/rejected": 1.3567094802856445, "loss": 1.3769, "step": 35 }, { "beta_dpo/beta_used": 0.09879133850336075, "beta_dpo/beta_used_raw": 0.09879133850336075, "beta_dpo/gap_mean": 0.03860355541110039, "beta_dpo/gap_std": 0.3801459074020386, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.05442176870748299, "grad_norm": 19.540878295898438, "learning_rate": 2.611940298507462e-07, "logits/chosen": 2.013148546218872, "logits/rejected": 1.7665867805480957, "loss": 1.3854, "step": 36 }, { "beta_dpo/beta_used": 0.09817594289779663, "beta_dpo/beta_used_raw": 0.09817594289779663, "beta_dpo/gap_mean": 0.03356537967920303, "beta_dpo/gap_std": 0.37876373529434204, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.055933484504913075, "grad_norm": 26.17854118347168, "learning_rate": 2.686567164179104e-07, "logits/chosen": 1.748681902885437, "logits/rejected": 1.5148720741271973, "loss": 1.3859, "step": 37 }, { "beta_dpo/beta_used": 0.09723386913537979, "beta_dpo/beta_used_raw": 0.09723386913537979, "beta_dpo/gap_mean": 0.017998045310378075, "beta_dpo/gap_std": 0.376539945602417, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.05744520030234316, "grad_norm": 20.4818115234375, "learning_rate": 2.761194029850746e-07, "logits/chosen": 1.6136703491210938, "logits/rejected": 1.5167253017425537, "loss": 1.3896, "step": 38 }, { "beta_dpo/beta_used": 0.09825208783149719, "beta_dpo/beta_used_raw": 0.09825208783149719, "beta_dpo/gap_mean": 0.019816506654024124, "beta_dpo/gap_std": 0.37512654066085815, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.05895691609977324, "grad_norm": 21.145137786865234, "learning_rate": 2.8358208955223876e-07, "logits/chosen": 2.1623120307922363, "logits/rejected": 2.083242654800415, "loss": 1.3872, "step": 39 }, { "beta_dpo/beta_used": 0.09770508855581284, "beta_dpo/beta_used_raw": 0.09770508855581284, "beta_dpo/gap_mean": 0.02871175855398178, "beta_dpo/gap_std": 0.38634994626045227, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.06046863189720333, "grad_norm": 18.661518096923828, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 1.9411481618881226, "logits/rejected": 1.8581569194793701, "loss": 1.387, "step": 40 }, { "beta_dpo/beta_used": 0.10622584819793701, "beta_dpo/beta_used_raw": 0.10622584819793701, "beta_dpo/gap_mean": 0.031016860157251358, "beta_dpo/gap_std": 0.39376121759414673, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.06198034769463341, "grad_norm": 22.026676177978516, "learning_rate": 2.985074626865671e-07, "logits/chosen": 1.3353779315948486, "logits/rejected": 1.3540756702423096, "loss": 1.3724, "step": 41 }, { "beta_dpo/beta_used": 0.0990532785654068, "beta_dpo/beta_used_raw": 0.0990532785654068, "beta_dpo/gap_mean": 0.03635905683040619, "beta_dpo/gap_std": 0.3946530222892761, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.06349206349206349, "grad_norm": 20.40145492553711, "learning_rate": 3.059701492537313e-07, "logits/chosen": 1.4092631340026855, "logits/rejected": 1.3597307205200195, "loss": 1.3837, "step": 42 }, { "beta_dpo/beta_used": 0.09917229413986206, "beta_dpo/beta_used_raw": 0.09917229413986206, "beta_dpo/gap_mean": 0.020734082907438278, "beta_dpo/gap_std": 0.3832094669342041, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.06500377928949358, "grad_norm": 22.262836456298828, "learning_rate": 3.134328358208955e-07, "logits/chosen": 1.4906641244888306, "logits/rejected": 1.297049641609192, "loss": 1.3857, "step": 43 }, { "beta_dpo/beta_used": 0.10303438454866409, "beta_dpo/beta_used_raw": 0.10303438454866409, "beta_dpo/gap_mean": 0.015459949150681496, "beta_dpo/gap_std": 0.3839811682701111, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.06651549508692366, "grad_norm": 22.17262840270996, "learning_rate": 3.2089552238805965e-07, "logits/chosen": 1.881546139717102, "logits/rejected": 1.903512954711914, "loss": 1.3785, "step": 44 }, { "beta_dpo/beta_used": 0.0984266847372055, "beta_dpo/beta_used_raw": 0.0984266847372055, "beta_dpo/gap_mean": 0.01941034197807312, "beta_dpo/gap_std": 0.3831687569618225, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.06802721088435375, "grad_norm": 20.917078018188477, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 1.663498044013977, "logits/rejected": 1.6589391231536865, "loss": 1.3874, "step": 45 }, { "beta_dpo/beta_used": 0.10082878172397614, "beta_dpo/beta_used_raw": 0.10082878172397614, "beta_dpo/gap_mean": 0.020450761541724205, "beta_dpo/gap_std": 0.39133739471435547, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.06953892668178382, "grad_norm": 20.342512130737305, "learning_rate": 3.3582089552238805e-07, "logits/chosen": 1.640000581741333, "logits/rejected": 1.665790319442749, "loss": 1.3827, "step": 46 }, { "beta_dpo/beta_used": 0.09943661093711853, "beta_dpo/beta_used_raw": 0.09943661093711853, "beta_dpo/gap_mean": 0.024434737861156464, "beta_dpo/gap_std": 0.38775908946990967, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.0710506424792139, "grad_norm": 18.455398559570312, "learning_rate": 3.432835820895522e-07, "logits/chosen": 1.6844422817230225, "logits/rejected": 1.6502798795700073, "loss": 1.385, "step": 47 }, { "beta_dpo/beta_used": 0.0995083749294281, "beta_dpo/beta_used_raw": 0.0995083749294281, "beta_dpo/gap_mean": 0.0223417766392231, "beta_dpo/gap_std": 0.3805840313434601, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.07256235827664399, "grad_norm": 18.35866355895996, "learning_rate": 3.507462686567164e-07, "logits/chosen": 1.8899521827697754, "logits/rejected": 1.8364651203155518, "loss": 1.386, "step": 48 }, { "beta_dpo/beta_used": 0.10304830223321915, "beta_dpo/beta_used_raw": 0.10304830223321915, "beta_dpo/gap_mean": 0.02127697691321373, "beta_dpo/gap_std": 0.37601011991500854, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.07407407407407407, "grad_norm": 20.155515670776367, "learning_rate": 3.5820895522388055e-07, "logits/chosen": 1.607337236404419, "logits/rejected": 1.5268868207931519, "loss": 1.3787, "step": 49 }, { "beta_dpo/beta_used": 0.0964367538690567, "beta_dpo/beta_used_raw": 0.0964367538690567, "beta_dpo/gap_mean": 0.017594996839761734, "beta_dpo/gap_std": 0.36542147397994995, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.07558578987150416, "grad_norm": 19.53146743774414, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 1.6052238941192627, "logits/rejected": 1.4814239740371704, "loss": 1.3898, "step": 50 }, { "beta_dpo/beta_used": 0.10200951993465424, "beta_dpo/beta_used_raw": 0.10200951993465424, "beta_dpo/gap_mean": 0.02372138947248459, "beta_dpo/gap_std": 0.3648919463157654, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.07709750566893424, "grad_norm": 22.443063735961914, "learning_rate": 3.7313432835820895e-07, "logits/chosen": 1.822296380996704, "logits/rejected": 1.6959524154663086, "loss": 1.3805, "step": 51 }, { "beta_dpo/beta_used": 0.09915009140968323, "beta_dpo/beta_used_raw": 0.09915009140968323, "beta_dpo/gap_mean": 0.02502043917775154, "beta_dpo/gap_std": 0.37956005334854126, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.07860922146636433, "grad_norm": 22.795974731445312, "learning_rate": 3.805970149253731e-07, "logits/chosen": 2.0509259700775146, "logits/rejected": 1.8106316328048706, "loss": 1.3872, "step": 52 }, { "beta_dpo/beta_used": 0.10304185748100281, "beta_dpo/beta_used_raw": 0.10304185748100281, "beta_dpo/gap_mean": 0.028562255203723907, "beta_dpo/gap_std": 0.3935072422027588, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.0801209372637944, "grad_norm": 20.028133392333984, "learning_rate": 3.880597014925373e-07, "logits/chosen": 1.4427279233932495, "logits/rejected": 1.4917798042297363, "loss": 1.378, "step": 53 }, { "beta_dpo/beta_used": 0.10378183424472809, "beta_dpo/beta_used_raw": 0.10378183424472809, "beta_dpo/gap_mean": 0.04398445785045624, "beta_dpo/gap_std": 0.41044336557388306, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.08163265306122448, "grad_norm": 18.53426170349121, "learning_rate": 3.9552238805970144e-07, "logits/chosen": 1.62733793258667, "logits/rejected": 1.6121970415115356, "loss": 1.3751, "step": 54 }, { "beta_dpo/beta_used": 0.10331679880619049, "beta_dpo/beta_used_raw": 0.10331679880619049, "beta_dpo/gap_mean": 0.05415666103363037, "beta_dpo/gap_std": 0.41562163829803467, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.08314436885865457, "grad_norm": 22.8297176361084, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 1.5697447061538696, "logits/rejected": 1.358530044555664, "loss": 1.3757, "step": 55 }, { "beta_dpo/beta_used": 0.10186167806386948, "beta_dpo/beta_used_raw": 0.10186167806386948, "beta_dpo/gap_mean": 0.06482543796300888, "beta_dpo/gap_std": 0.4240786135196686, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.08465608465608465, "grad_norm": 18.29169273376465, "learning_rate": 4.1044776119402984e-07, "logits/chosen": 1.5528168678283691, "logits/rejected": 1.682697057723999, "loss": 1.3764, "step": 56 }, { "beta_dpo/beta_used": 0.10814331471920013, "beta_dpo/beta_used_raw": 0.10814331471920013, "beta_dpo/gap_mean": 0.0803925096988678, "beta_dpo/gap_std": 0.4210129976272583, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.08616780045351474, "grad_norm": 23.717344284057617, "learning_rate": 4.17910447761194e-07, "logits/chosen": 2.119786024093628, "logits/rejected": 2.0530922412872314, "loss": 1.3638, "step": 57 }, { "beta_dpo/beta_used": 0.09881128370761871, "beta_dpo/beta_used_raw": 0.09881128370761871, "beta_dpo/gap_mean": 0.08094684034585953, "beta_dpo/gap_std": 0.4258253574371338, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.08767951625094482, "grad_norm": 21.60850715637207, "learning_rate": 4.253731343283582e-07, "logits/chosen": 1.5111796855926514, "logits/rejected": 1.3472614288330078, "loss": 1.3792, "step": 58 }, { "beta_dpo/beta_used": 0.10262042284011841, "beta_dpo/beta_used_raw": 0.10262042284011841, "beta_dpo/gap_mean": 0.08273988962173462, "beta_dpo/gap_std": 0.4285188913345337, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.08919123204837491, "grad_norm": 18.995695114135742, "learning_rate": 4.3283582089552234e-07, "logits/chosen": 1.840743064880371, "logits/rejected": 1.5437428951263428, "loss": 1.3735, "step": 59 }, { "beta_dpo/beta_used": 0.09564212709665298, "beta_dpo/beta_used_raw": 0.09564212709665298, "beta_dpo/gap_mean": 0.08741338551044464, "beta_dpo/gap_std": 0.4274219870567322, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.09070294784580499, "grad_norm": 18.22614288330078, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 2.1714425086975098, "logits/rejected": 2.212477684020996, "loss": 1.3846, "step": 60 }, { "beta_dpo/beta_used": 0.0980357974767685, "beta_dpo/beta_used_raw": 0.0980357974767685, "beta_dpo/gap_mean": 0.08621242642402649, "beta_dpo/gap_std": 0.4376525282859802, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.09221466364323508, "grad_norm": 17.79900550842285, "learning_rate": 4.4776119402985074e-07, "logits/chosen": 1.433061122894287, "logits/rejected": 1.248925805091858, "loss": 1.3788, "step": 61 }, { "beta_dpo/beta_used": 0.10317344218492508, "beta_dpo/beta_used_raw": 0.10317344218492508, "beta_dpo/gap_mean": 0.09457056224346161, "beta_dpo/gap_std": 0.44412726163864136, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.09372637944066516, "grad_norm": 22.70246696472168, "learning_rate": 4.552238805970149e-07, "logits/chosen": 1.8834363222122192, "logits/rejected": 1.6489927768707275, "loss": 1.3714, "step": 62 }, { "beta_dpo/beta_used": 0.09803298115730286, "beta_dpo/beta_used_raw": 0.09803298115730286, "beta_dpo/gap_mean": 0.10839153081178665, "beta_dpo/gap_std": 0.45854881405830383, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.09523809523809523, "grad_norm": 20.222421646118164, "learning_rate": 4.626865671641791e-07, "logits/chosen": 2.5329227447509766, "logits/rejected": 2.572336196899414, "loss": 1.38, "step": 63 }, { "beta_dpo/beta_used": 0.09539124369621277, "beta_dpo/beta_used_raw": 0.09539124369621277, "beta_dpo/gap_mean": 0.09624745696783066, "beta_dpo/gap_std": 0.4746573567390442, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.09674981103552532, "grad_norm": 17.34638214111328, "learning_rate": 4.701492537313433e-07, "logits/chosen": 1.5154216289520264, "logits/rejected": 1.2605938911437988, "loss": 1.3842, "step": 64 }, { "beta_dpo/beta_used": 0.10210136324167252, "beta_dpo/beta_used_raw": 0.10210136324167252, "beta_dpo/gap_mean": 0.10235883295536041, "beta_dpo/gap_std": 0.47725844383239746, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.0982615268329554, "grad_norm": 21.575925827026367, "learning_rate": 4.776119402985074e-07, "logits/chosen": 1.746931552886963, "logits/rejected": 1.831960678100586, "loss": 1.3734, "step": 65 }, { "beta_dpo/beta_used": 0.10511539876461029, "beta_dpo/beta_used_raw": 0.10511539876461029, "beta_dpo/gap_mean": 0.11833730340003967, "beta_dpo/gap_std": 0.4751163125038147, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.09977324263038549, "grad_norm": 22.723419189453125, "learning_rate": 4.850746268656717e-07, "logits/chosen": 1.9626502990722656, "logits/rejected": 1.846794605255127, "loss": 1.3663, "step": 66 }, { "beta_dpo/beta_used": 0.10089154541492462, "beta_dpo/beta_used_raw": 0.10089154541492462, "beta_dpo/gap_mean": 0.12341433763504028, "beta_dpo/gap_std": 0.48649847507476807, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.10128495842781557, "grad_norm": 20.514158248901367, "learning_rate": 4.925373134328357e-07, "logits/chosen": 1.7593741416931152, "logits/rejected": 1.6084721088409424, "loss": 1.3732, "step": 67 }, { "beta_dpo/beta_used": 0.10202755033969879, "beta_dpo/beta_used_raw": 0.10202755033969879, "beta_dpo/gap_mean": 0.13001835346221924, "beta_dpo/gap_std": 0.4906574487686157, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.10279667422524566, "grad_norm": 23.70347785949707, "learning_rate": 5e-07, "logits/chosen": 1.9041826725006104, "logits/rejected": 1.6085681915283203, "loss": 1.3687, "step": 68 }, { "beta_dpo/beta_used": 0.10150092095136642, "beta_dpo/beta_used_raw": 0.10150092095136642, "beta_dpo/gap_mean": 0.12389479577541351, "beta_dpo/gap_std": 0.4982506036758423, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.10430839002267574, "grad_norm": 25.167369842529297, "learning_rate": 4.999965034812934e-07, "logits/chosen": 1.972752332687378, "logits/rejected": 1.8324453830718994, "loss": 1.3729, "step": 69 }, { "beta_dpo/beta_used": 0.09665323793888092, "beta_dpo/beta_used_raw": 0.09665323793888092, "beta_dpo/gap_mean": 0.14055848121643066, "beta_dpo/gap_std": 0.5118545293807983, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.10582010582010581, "grad_norm": 19.85059356689453, "learning_rate": 4.999860140229787e-07, "logits/chosen": 1.6715452671051025, "logits/rejected": 1.7321879863739014, "loss": 1.3801, "step": 70 }, { "beta_dpo/beta_used": 0.09737221896648407, "beta_dpo/beta_used_raw": 0.09737221896648407, "beta_dpo/gap_mean": 0.1417197287082672, "beta_dpo/gap_std": 0.521407425403595, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.1073318216175359, "grad_norm": 19.420684814453125, "learning_rate": 4.999685319184688e-07, "logits/chosen": 1.5145726203918457, "logits/rejected": 1.5156700611114502, "loss": 1.3773, "step": 71 }, { "beta_dpo/beta_used": 0.10205356776714325, "beta_dpo/beta_used_raw": 0.10205356776714325, "beta_dpo/gap_mean": 0.14077220857143402, "beta_dpo/gap_std": 0.5270059704780579, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.10884353741496598, "grad_norm": 20.89740562438965, "learning_rate": 4.999440576567755e-07, "logits/chosen": 1.4117028713226318, "logits/rejected": 1.1985228061676025, "loss": 1.3695, "step": 72 }, { "beta_dpo/beta_used": 0.09661644697189331, "beta_dpo/beta_used_raw": 0.09661644697189331, "beta_dpo/gap_mean": 0.11880473792552948, "beta_dpo/gap_std": 0.5475245714187622, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.11035525321239607, "grad_norm": 19.121309280395508, "learning_rate": 4.999125919224965e-07, "logits/chosen": 1.3632001876831055, "logits/rejected": 1.357191801071167, "loss": 1.3799, "step": 73 }, { "beta_dpo/beta_used": 0.10562695562839508, "beta_dpo/beta_used_raw": 0.10562695562839508, "beta_dpo/gap_mean": 0.13782186806201935, "beta_dpo/gap_std": 0.5654876232147217, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.11186696900982615, "grad_norm": 21.445409774780273, "learning_rate": 4.998741355957963e-07, "logits/chosen": 1.8615484237670898, "logits/rejected": 1.6468513011932373, "loss": 1.3625, "step": 74 }, { "beta_dpo/beta_used": 0.09824702143669128, "beta_dpo/beta_used_raw": 0.09824702143669128, "beta_dpo/gap_mean": 0.15777266025543213, "beta_dpo/gap_std": 0.5702564716339111, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.11337868480725624, "grad_norm": 19.100439071655273, "learning_rate": 4.998286897523808e-07, "logits/chosen": 1.8663270473480225, "logits/rejected": 1.7803092002868652, "loss": 1.373, "step": 75 }, { "beta_dpo/beta_used": 0.09762119501829147, "beta_dpo/beta_used_raw": 0.09762119501829147, "beta_dpo/gap_mean": 0.1676311492919922, "beta_dpo/gap_std": 0.5960586071014404, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.11489040060468632, "grad_norm": 19.10355567932129, "learning_rate": 4.997762556634679e-07, "logits/chosen": 1.2259998321533203, "logits/rejected": 1.106650948524475, "loss": 1.3749, "step": 76 }, { "beta_dpo/beta_used": 0.10073893517255783, "beta_dpo/beta_used_raw": 0.10073893517255783, "beta_dpo/gap_mean": 0.2034570872783661, "beta_dpo/gap_std": 0.6084505319595337, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1164021164021164, "grad_norm": 21.278688430786133, "learning_rate": 4.99716834795752e-07, "logits/chosen": 1.0265986919403076, "logits/rejected": 1.192859411239624, "loss": 1.3651, "step": 77 }, { "beta_dpo/beta_used": 0.1015826165676117, "beta_dpo/beta_used_raw": 0.1015826165676117, "beta_dpo/gap_mean": 0.20583921670913696, "beta_dpo/gap_std": 0.6371290683746338, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.11791383219954649, "grad_norm": 20.532432556152344, "learning_rate": 4.996504288113623e-07, "logits/chosen": 1.601978063583374, "logits/rejected": 1.5848236083984375, "loss": 1.3664, "step": 78 }, { "beta_dpo/beta_used": 0.10259930044412613, "beta_dpo/beta_used_raw": 0.10259930044412613, "beta_dpo/gap_mean": 0.2349783480167389, "beta_dpo/gap_std": 0.6695432066917419, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.11942554799697656, "grad_norm": 20.652477264404297, "learning_rate": 4.995770395678171e-07, "logits/chosen": 1.9225590229034424, "logits/rejected": 1.9619791507720947, "loss": 1.3567, "step": 79 }, { "beta_dpo/beta_used": 0.0984174907207489, "beta_dpo/beta_used_raw": 0.0984174907207489, "beta_dpo/gap_mean": 0.2428167164325714, "beta_dpo/gap_std": 0.7031147480010986, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.12093726379440665, "grad_norm": 19.037858963012695, "learning_rate": 4.994966691179711e-07, "logits/chosen": 1.53074312210083, "logits/rejected": 1.2819523811340332, "loss": 1.3637, "step": 80 }, { "beta_dpo/beta_used": 0.10066931694746017, "beta_dpo/beta_used_raw": 0.10066931694746017, "beta_dpo/gap_mean": 0.2573161721229553, "beta_dpo/gap_std": 0.7247613668441772, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.12244897959183673, "grad_norm": 20.873491287231445, "learning_rate": 4.994093197099587e-07, "logits/chosen": 1.4366528987884521, "logits/rejected": 1.3545148372650146, "loss": 1.36, "step": 81 }, { "beta_dpo/beta_used": 0.10298150777816772, "beta_dpo/beta_used_raw": 0.10298150777816772, "beta_dpo/gap_mean": 0.28897643089294434, "beta_dpo/gap_std": 0.7567130327224731, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.12396069538926682, "grad_norm": 19.724485397338867, "learning_rate": 4.993149937871306e-07, "logits/chosen": 1.6228649616241455, "logits/rejected": 1.4233934879302979, "loss": 1.3517, "step": 82 }, { "beta_dpo/beta_used": 0.10570499300956726, "beta_dpo/beta_used_raw": 0.10570499300956726, "beta_dpo/gap_mean": 0.31459736824035645, "beta_dpo/gap_std": 0.752688467502594, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.1254724111866969, "grad_norm": 21.929424285888672, "learning_rate": 4.992136939879856e-07, "logits/chosen": 1.4179167747497559, "logits/rejected": 1.0729384422302246, "loss": 1.3439, "step": 83 }, { "beta_dpo/beta_used": 0.10633272677659988, "beta_dpo/beta_used_raw": 0.10633272677659988, "beta_dpo/gap_mean": 0.33907008171081543, "beta_dpo/gap_std": 0.7554141283035278, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.12698412698412698, "grad_norm": 22.371440887451172, "learning_rate": 4.991054231460969e-07, "logits/chosen": 1.7531371116638184, "logits/rejected": 1.5867257118225098, "loss": 1.3399, "step": 84 }, { "beta_dpo/beta_used": 0.09879690408706665, "beta_dpo/beta_used_raw": 0.09879690408706665, "beta_dpo/gap_mean": 0.3559741973876953, "beta_dpo/gap_std": 0.7542663812637329, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.12849584278155707, "grad_norm": 18.66929817199707, "learning_rate": 4.989901842900325e-07, "logits/chosen": 2.157787322998047, "logits/rejected": 2.0606753826141357, "loss": 1.3527, "step": 85 }, { "beta_dpo/beta_used": 0.09159150719642639, "beta_dpo/beta_used_raw": 0.09159150719642639, "beta_dpo/gap_mean": 0.3374265432357788, "beta_dpo/gap_std": 0.7478652000427246, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.13000755857898716, "grad_norm": 18.308496475219727, "learning_rate": 4.988679806432711e-07, "logits/chosen": 1.6739656925201416, "logits/rejected": 1.657767653465271, "loss": 1.3675, "step": 86 }, { "beta_dpo/beta_used": 0.10155273973941803, "beta_dpo/beta_used_raw": 0.10155273973941803, "beta_dpo/gap_mean": 0.35617873072624207, "beta_dpo/gap_std": 0.7686408162117004, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.13151927437641722, "grad_norm": 19.75534439086914, "learning_rate": 4.987388156241114e-07, "logits/chosen": 1.3862335681915283, "logits/rejected": 1.216930627822876, "loss": 1.3498, "step": 87 }, { "beta_dpo/beta_used": 0.09993347525596619, "beta_dpo/beta_used_raw": 0.09993347525596619, "beta_dpo/gap_mean": 0.36177581548690796, "beta_dpo/gap_std": 0.7989368438720703, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.1330309901738473, "grad_norm": 20.010814666748047, "learning_rate": 4.986026928455767e-07, "logits/chosen": 1.2944331169128418, "logits/rejected": 1.2594363689422607, "loss": 1.348, "step": 88 }, { "beta_dpo/beta_used": 0.09803235530853271, "beta_dpo/beta_used_raw": 0.09803235530853271, "beta_dpo/gap_mean": 0.3744267523288727, "beta_dpo/gap_std": 0.8301786184310913, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1345427059712774, "grad_norm": 17.966041564941406, "learning_rate": 4.984596161153135e-07, "logits/chosen": 1.8058236837387085, "logits/rejected": 1.5427722930908203, "loss": 1.3533, "step": 89 }, { "beta_dpo/beta_used": 0.1015101820230484, "beta_dpo/beta_used_raw": 0.1015101820230484, "beta_dpo/gap_mean": 0.4083542227745056, "beta_dpo/gap_std": 0.875269889831543, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1360544217687075, "grad_norm": 33.3956298828125, "learning_rate": 4.983095894354857e-07, "logits/chosen": 1.816709280014038, "logits/rejected": 1.4326956272125244, "loss": 1.3432, "step": 90 }, { "beta_dpo/beta_used": 0.10312025249004364, "beta_dpo/beta_used_raw": 0.10312025249004364, "beta_dpo/gap_mean": 0.40964722633361816, "beta_dpo/gap_std": 0.9051263332366943, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.13756613756613756, "grad_norm": 19.644180297851562, "learning_rate": 4.98152617002662e-07, "logits/chosen": 2.385554790496826, "logits/rejected": 2.0319085121154785, "loss": 1.3437, "step": 91 }, { "beta_dpo/beta_used": 0.09450967609882355, "beta_dpo/beta_used_raw": 0.09450967609882355, "beta_dpo/gap_mean": 0.4264791011810303, "beta_dpo/gap_std": 0.934371829032898, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.13907785336356765, "grad_norm": 18.947912216186523, "learning_rate": 4.979887032076988e-07, "logits/chosen": 1.7886258363723755, "logits/rejected": 1.6689845323562622, "loss": 1.3548, "step": 92 }, { "beta_dpo/beta_used": 0.09470728039741516, "beta_dpo/beta_used_raw": 0.09470728039741516, "beta_dpo/gap_mean": 0.4246646761894226, "beta_dpo/gap_std": 0.9929322004318237, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.14058956916099774, "grad_norm": 16.796300888061523, "learning_rate": 4.978178526356172e-07, "logits/chosen": 1.7977063655853271, "logits/rejected": 1.609261393547058, "loss": 1.3555, "step": 93 }, { "beta_dpo/beta_used": 0.11019230633974075, "beta_dpo/beta_used_raw": 0.11019230633974075, "beta_dpo/gap_mean": 0.47246092557907104, "beta_dpo/gap_std": 1.048844575881958, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.1421012849584278, "grad_norm": 37.086185455322266, "learning_rate": 4.976400700654751e-07, "logits/chosen": 1.6770544052124023, "logits/rejected": 1.748682975769043, "loss": 1.3159, "step": 94 }, { "beta_dpo/beta_used": 0.1036120057106018, "beta_dpo/beta_used_raw": 0.1036120057106018, "beta_dpo/gap_mean": 0.4928344488143921, "beta_dpo/gap_std": 1.090996503829956, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1436130007558579, "grad_norm": 21.989009857177734, "learning_rate": 4.974553604702332e-07, "logits/chosen": 1.2581148147583008, "logits/rejected": 1.1436889171600342, "loss": 1.337, "step": 95 }, { "beta_dpo/beta_used": 0.08425632119178772, "beta_dpo/beta_used_raw": 0.08425632119178772, "beta_dpo/gap_mean": 0.5038758516311646, "beta_dpo/gap_std": 1.110231637954712, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.14512471655328799, "grad_norm": 16.523128509521484, "learning_rate": 4.972637290166157e-07, "logits/chosen": 1.5648579597473145, "logits/rejected": 1.2802821397781372, "loss": 1.3627, "step": 96 }, { "beta_dpo/beta_used": 0.08535897731781006, "beta_dpo/beta_used_raw": 0.08535897731781006, "beta_dpo/gap_mean": 0.48465272784233093, "beta_dpo/gap_std": 1.1230860948562622, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.14663643235071808, "grad_norm": 18.421024322509766, "learning_rate": 4.970651810649666e-07, "logits/chosen": 1.367387294769287, "logits/rejected": 1.616769552230835, "loss": 1.3704, "step": 97 }, { "beta_dpo/beta_used": 0.10194718837738037, "beta_dpo/beta_used_raw": 0.10194718837738037, "beta_dpo/gap_mean": 0.44564807415008545, "beta_dpo/gap_std": 1.1249895095825195, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.14814814814814814, "grad_norm": 20.25482749938965, "learning_rate": 4.968597221690985e-07, "logits/chosen": 1.3039189577102661, "logits/rejected": 1.3426978588104248, "loss": 1.3431, "step": 98 }, { "beta_dpo/beta_used": 0.09411941468715668, "beta_dpo/beta_used_raw": 0.09411941468715668, "beta_dpo/gap_mean": 0.4496995806694031, "beta_dpo/gap_std": 1.2001111507415771, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.14965986394557823, "grad_norm": 20.5500545501709, "learning_rate": 4.966473580761389e-07, "logits/chosen": 1.912778615951538, "logits/rejected": 1.7203798294067383, "loss": 1.3597, "step": 99 }, { "beta_dpo/beta_used": 0.10309496521949768, "beta_dpo/beta_used_raw": 0.10309496521949768, "beta_dpo/gap_mean": 0.4821561872959137, "beta_dpo/gap_std": 1.2820096015930176, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.15117157974300832, "grad_norm": 22.72227668762207, "learning_rate": 4.964280947263676e-07, "logits/chosen": 1.8114492893218994, "logits/rejected": 1.7894963026046753, "loss": 1.3382, "step": 100 }, { "epoch": 0.15117157974300832, "eval_beta_dpo/beta_used": 0.10933709144592285, "eval_beta_dpo/beta_used_raw": 0.10933709144592285, "eval_beta_dpo/gap_mean": 0.5103484392166138, "eval_beta_dpo/gap_std": 1.3374193906784058, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.8052293062210083, "eval_logits/rejected": 1.6945847272872925, "eval_loss": 0.6595867276191711, "eval_runtime": 42.667, "eval_samples_per_second": 53.976, "eval_steps_per_second": 1.687, "step": 100 }, { "beta_dpo/beta_used": 0.10079428553581238, "beta_dpo/beta_used_raw": 0.10079428553581238, "beta_dpo/gap_mean": 0.5434271097183228, "beta_dpo/gap_std": 1.337038278579712, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.15268329554043839, "grad_norm": 18.15898323059082, "learning_rate": 4.96201938253052e-07, "logits/chosen": 0.826664924621582, "logits/rejected": 0.6784051060676575, "loss": 1.3319, "step": 101 }, { "beta_dpo/beta_used": 0.08604797720909119, "beta_dpo/beta_used_raw": 0.08604797720909119, "beta_dpo/gap_mean": 0.5208926200866699, "beta_dpo/gap_std": 1.3681602478027344, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.15419501133786848, "grad_norm": 16.30609703063965, "learning_rate": 4.959688949822748e-07, "logits/chosen": 1.3592952489852905, "logits/rejected": 1.462346076965332, "loss": 1.3624, "step": 102 }, { "beta_dpo/beta_used": 0.10142231732606888, "beta_dpo/beta_used_raw": 0.10142231732606888, "beta_dpo/gap_mean": 0.5772026181221008, "beta_dpo/gap_std": 1.416075348854065, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.15570672713529857, "grad_norm": 19.743104934692383, "learning_rate": 4.957289714327572e-07, "logits/chosen": 1.7426373958587646, "logits/rejected": 1.7846993207931519, "loss": 1.3296, "step": 103 }, { "beta_dpo/beta_used": 0.09698724746704102, "beta_dpo/beta_used_raw": 0.09698724746704102, "beta_dpo/gap_mean": 0.6326186656951904, "beta_dpo/gap_std": 1.4738898277282715, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.15721844293272866, "grad_norm": 20.580957412719727, "learning_rate": 4.954821743156767e-07, "logits/chosen": 1.9435052871704102, "logits/rejected": 1.6555917263031006, "loss": 1.33, "step": 104 }, { "beta_dpo/beta_used": 0.10263784229755402, "beta_dpo/beta_used_raw": 0.10263784229755402, "beta_dpo/gap_mean": 0.6641653776168823, "beta_dpo/gap_std": 1.6008001565933228, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.15873015873015872, "grad_norm": 21.733863830566406, "learning_rate": 4.952285105344791e-07, "logits/chosen": 1.7513964176177979, "logits/rejected": 1.516118049621582, "loss": 1.3237, "step": 105 }, { "beta_dpo/beta_used": 0.10779309272766113, "beta_dpo/beta_used_raw": 0.10779309272766113, "beta_dpo/gap_mean": 0.674252450466156, "beta_dpo/gap_std": 1.6650457382202148, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1602418745275888, "grad_norm": 21.42998504638672, "learning_rate": 4.949679871846857e-07, "logits/chosen": 1.7994898557662964, "logits/rejected": 1.6022930145263672, "loss": 1.3007, "step": 106 }, { "beta_dpo/beta_used": 0.08230964839458466, "beta_dpo/beta_used_raw": 0.08230964839458466, "beta_dpo/gap_mean": 0.6531383395195007, "beta_dpo/gap_std": 1.6853680610656738, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.1617535903250189, "grad_norm": 17.759193420410156, "learning_rate": 4.947006115536947e-07, "logits/chosen": 1.3034350872039795, "logits/rejected": 1.4832148551940918, "loss": 1.3548, "step": 107 }, { "beta_dpo/beta_used": 0.10429038107395172, "beta_dpo/beta_used_raw": 0.10429038107395172, "beta_dpo/gap_mean": 0.6379518508911133, "beta_dpo/gap_std": 1.6854841709136963, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.16326530612244897, "grad_norm": 20.740493774414062, "learning_rate": 4.944263911205772e-07, "logits/chosen": 1.1033403873443604, "logits/rejected": 0.858239471912384, "loss": 1.313, "step": 108 }, { "beta_dpo/beta_used": 0.08762006461620331, "beta_dpo/beta_used_raw": 0.08762006461620331, "beta_dpo/gap_mean": 0.6862611770629883, "beta_dpo/gap_std": 1.7554314136505127, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.16477702191987906, "grad_norm": 17.178913116455078, "learning_rate": 4.941453335558681e-07, "logits/chosen": 1.7482877969741821, "logits/rejected": 1.5088105201721191, "loss": 1.3494, "step": 109 }, { "beta_dpo/beta_used": 0.0838538110256195, "beta_dpo/beta_used_raw": 0.0838538110256195, "beta_dpo/gap_mean": 0.6247843503952026, "beta_dpo/gap_std": 1.8059306144714355, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.16628873771730915, "grad_norm": 17.97353744506836, "learning_rate": 4.938574467213517e-07, "logits/chosen": 1.412921667098999, "logits/rejected": 1.4799084663391113, "loss": 1.3564, "step": 110 }, { "beta_dpo/beta_used": 0.10175025463104248, "beta_dpo/beta_used_raw": 0.10175025463104248, "beta_dpo/gap_mean": 0.6145044565200806, "beta_dpo/gap_std": 1.817657470703125, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.16780045351473924, "grad_norm": 19.397005081176758, "learning_rate": 4.935627386698418e-07, "logits/chosen": 1.7223619222640991, "logits/rejected": 1.7298330068588257, "loss": 1.3224, "step": 111 }, { "beta_dpo/beta_used": 0.10769188404083252, "beta_dpo/beta_used_raw": 0.10769188404083252, "beta_dpo/gap_mean": 0.6791462898254395, "beta_dpo/gap_std": 1.8483753204345703, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1693121693121693, "grad_norm": 22.855987548828125, "learning_rate": 4.932612176449559e-07, "logits/chosen": 1.450548529624939, "logits/rejected": 1.254005789756775, "loss": 1.3142, "step": 112 }, { "beta_dpo/beta_used": 0.10324016213417053, "beta_dpo/beta_used_raw": 0.10324016213417053, "beta_dpo/gap_mean": 0.6565523743629456, "beta_dpo/gap_std": 1.891095757484436, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1708238851095994, "grad_norm": 18.695514678955078, "learning_rate": 4.929528920808854e-07, "logits/chosen": 1.0113716125488281, "logits/rejected": 1.1878894567489624, "loss": 1.3211, "step": 113 }, { "beta_dpo/beta_used": 0.09588593244552612, "beta_dpo/beta_used_raw": 0.09588593244552612, "beta_dpo/gap_mean": 0.7151613235473633, "beta_dpo/gap_std": 1.9551301002502441, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.17233560090702948, "grad_norm": 19.19485855102539, "learning_rate": 4.92637770602159e-07, "logits/chosen": 2.16898250579834, "logits/rejected": 2.159493923187256, "loss": 1.328, "step": 114 }, { "beta_dpo/beta_used": 0.09995594620704651, "beta_dpo/beta_used_raw": 0.09995594620704651, "beta_dpo/gap_mean": 0.7490643858909607, "beta_dpo/gap_std": 1.9629037380218506, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.17384731670445955, "grad_norm": 19.53900718688965, "learning_rate": 4.923158620234019e-07, "logits/chosen": 1.8840827941894531, "logits/rejected": 1.6221849918365479, "loss": 1.3267, "step": 115 }, { "beta_dpo/beta_used": 0.10833384841680527, "beta_dpo/beta_used_raw": 0.10833384841680527, "beta_dpo/gap_mean": 0.7924877405166626, "beta_dpo/gap_std": 1.9609473943710327, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.17535903250188964, "grad_norm": 22.43212890625, "learning_rate": 4.91987175349089e-07, "logits/chosen": 1.73817777633667, "logits/rejected": 1.499211072921753, "loss": 1.2895, "step": 116 }, { "beta_dpo/beta_used": 0.09760797768831253, "beta_dpo/beta_used_raw": 0.09760797768831253, "beta_dpo/gap_mean": 0.9041982889175415, "beta_dpo/gap_std": 1.9778673648834229, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.17687074829931973, "grad_norm": 18.086820602416992, "learning_rate": 4.916517197732933e-07, "logits/chosen": 1.8457000255584717, "logits/rejected": 1.6579217910766602, "loss": 1.3005, "step": 117 }, { "beta_dpo/beta_used": 0.08526713401079178, "beta_dpo/beta_used_raw": 0.08526713401079178, "beta_dpo/gap_mean": 0.8945071697235107, "beta_dpo/gap_std": 2.001413345336914, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.17838246409674982, "grad_norm": 17.47612190246582, "learning_rate": 4.913095046794281e-07, "logits/chosen": 0.9872667789459229, "logits/rejected": 0.9681127071380615, "loss": 1.3328, "step": 118 }, { "beta_dpo/beta_used": 0.09429244697093964, "beta_dpo/beta_used_raw": 0.09429244697093964, "beta_dpo/gap_mean": 0.8825496435165405, "beta_dpo/gap_std": 2.0767202377319336, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.17989417989417988, "grad_norm": 17.861783981323242, "learning_rate": 4.909605396399855e-07, "logits/chosen": 1.8808125257492065, "logits/rejected": 2.0650906562805176, "loss": 1.3181, "step": 119 }, { "beta_dpo/beta_used": 0.1025141030550003, "beta_dpo/beta_used_raw": 0.1025141030550003, "beta_dpo/gap_mean": 0.9791843891143799, "beta_dpo/gap_std": 2.105668067932129, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.18140589569160998, "grad_norm": 21.278732299804688, "learning_rate": 4.906048344162676e-07, "logits/chosen": 1.837113380432129, "logits/rejected": 1.619814395904541, "loss": 1.292, "step": 120 }, { "beta_dpo/beta_used": 0.08260353654623032, "beta_dpo/beta_used_raw": 0.08260353654623032, "beta_dpo/gap_mean": 1.0040740966796875, "beta_dpo/gap_std": 2.16635799407959, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.18291761148904007, "grad_norm": 18.3269100189209, "learning_rate": 4.902423989581143e-07, "logits/chosen": 1.975892186164856, "logits/rejected": 1.6966898441314697, "loss": 1.335, "step": 121 }, { "beta_dpo/beta_used": 0.08021458238363266, "beta_dpo/beta_used_raw": 0.08021458238363266, "beta_dpo/gap_mean": 0.9787734746932983, "beta_dpo/gap_std": 2.237492561340332, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.18442932728647016, "grad_norm": 17.738651275634766, "learning_rate": 4.898732434036243e-07, "logits/chosen": 1.569380283355713, "logits/rejected": 1.292022466659546, "loss": 1.336, "step": 122 }, { "beta_dpo/beta_used": 0.10076497495174408, "beta_dpo/beta_used_raw": 0.10076497495174408, "beta_dpo/gap_mean": 0.9931870698928833, "beta_dpo/gap_std": 2.2701330184936523, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.18594104308390022, "grad_norm": 18.512353897094727, "learning_rate": 4.894973780788722e-07, "logits/chosen": 1.728874921798706, "logits/rejected": 1.3382471799850464, "loss": 1.2965, "step": 123 }, { "beta_dpo/beta_used": 0.10337992012500763, "beta_dpo/beta_used_raw": 0.10337992012500763, "beta_dpo/gap_mean": 1.021672010421753, "beta_dpo/gap_std": 2.3348677158355713, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1874527588813303, "grad_norm": 20.14508628845215, "learning_rate": 4.89114813497619e-07, "logits/chosen": 1.8601114749908447, "logits/rejected": 1.4402656555175781, "loss": 1.2728, "step": 124 }, { "beta_dpo/beta_used": 0.11034771800041199, "beta_dpo/beta_used_raw": 0.11034771800041199, "beta_dpo/gap_mean": 1.1416581869125366, "beta_dpo/gap_std": 2.3627383708953857, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.1889644746787604, "grad_norm": 26.62055778503418, "learning_rate": 4.887255603610184e-07, "logits/chosen": 2.262009382247925, "logits/rejected": 1.918278694152832, "loss": 1.2408, "step": 125 }, { "beta_dpo/beta_used": 0.07098745554685593, "beta_dpo/beta_used_raw": 0.07098745554685593, "beta_dpo/gap_mean": 1.104027271270752, "beta_dpo/gap_std": 2.409133195877075, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.19047619047619047, "grad_norm": 15.002470970153809, "learning_rate": 4.883296295573176e-07, "logits/chosen": 1.2601641416549683, "logits/rejected": 1.2013548612594604, "loss": 1.3446, "step": 126 }, { "beta_dpo/beta_used": 0.11030158400535583, "beta_dpo/beta_used_raw": 0.11030158400535583, "beta_dpo/gap_mean": 1.2010130882263184, "beta_dpo/gap_std": 2.4333302974700928, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.19198790627362056, "grad_norm": 19.414600372314453, "learning_rate": 4.87927032161552e-07, "logits/chosen": 2.3014814853668213, "logits/rejected": 2.174217939376831, "loss": 1.2273, "step": 127 }, { "beta_dpo/beta_used": 0.10572034865617752, "beta_dpo/beta_used_raw": 0.10572034865617752, "beta_dpo/gap_mean": 1.1918036937713623, "beta_dpo/gap_std": 2.564605236053467, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.19349962207105065, "grad_norm": 23.319074630737305, "learning_rate": 4.875177794352363e-07, "logits/chosen": 1.6076852083206177, "logits/rejected": 1.4918580055236816, "loss": 1.26, "step": 128 }, { "beta_dpo/beta_used": 0.0822620838880539, "beta_dpo/beta_used_raw": 0.0822620838880539, "beta_dpo/gap_mean": 1.131927251815796, "beta_dpo/gap_std": 2.708618640899658, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.19501133786848074, "grad_norm": 16.706241607666016, "learning_rate": 4.871018828260491e-07, "logits/chosen": 1.3592138290405273, "logits/rejected": 1.2817442417144775, "loss": 1.3246, "step": 129 }, { "beta_dpo/beta_used": 0.08625729382038116, "beta_dpo/beta_used_raw": 0.08625729382038116, "beta_dpo/gap_mean": 1.1664378643035889, "beta_dpo/gap_std": 2.7734792232513428, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1965230536659108, "grad_norm": 17.94251823425293, "learning_rate": 4.866793539675126e-07, "logits/chosen": 1.725219488143921, "logits/rejected": 1.631973147392273, "loss": 1.3189, "step": 130 }, { "beta_dpo/beta_used": 0.11515168845653534, "beta_dpo/beta_used_raw": 0.11515168845653534, "beta_dpo/gap_mean": 1.2549471855163574, "beta_dpo/gap_std": 2.857564687728882, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1980347694633409, "grad_norm": 22.736597061157227, "learning_rate": 4.86250204678667e-07, "logits/chosen": 1.5887306928634644, "logits/rejected": 1.3573846817016602, "loss": 1.2451, "step": 131 }, { "beta_dpo/beta_used": 0.11845074594020844, "beta_dpo/beta_used_raw": 0.11845074594020844, "beta_dpo/gap_mean": 1.2605340480804443, "beta_dpo/gap_std": 2.817857265472412, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.19954648526077098, "grad_norm": 25.156320571899414, "learning_rate": 4.858144469637408e-07, "logits/chosen": 1.8204238414764404, "logits/rejected": 1.8674492835998535, "loss": 1.2091, "step": 132 }, { "beta_dpo/beta_used": 0.08995058387517929, "beta_dpo/beta_used_raw": 0.08995058387517929, "beta_dpo/gap_mean": 1.248791217803955, "beta_dpo/gap_std": 2.8684797286987305, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.20105820105820105, "grad_norm": 17.265727996826172, "learning_rate": 4.853720930118138e-07, "logits/chosen": 1.4804385900497437, "logits/rejected": 1.4534518718719482, "loss": 1.3045, "step": 133 }, { "beta_dpo/beta_used": 0.08332835137844086, "beta_dpo/beta_used_raw": 0.08332835137844086, "beta_dpo/gap_mean": 1.3323745727539062, "beta_dpo/gap_std": 2.947547435760498, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.20256991685563114, "grad_norm": 16.170001983642578, "learning_rate": 4.849231551964771e-07, "logits/chosen": 2.1145153045654297, "logits/rejected": 2.052708148956299, "loss": 1.31, "step": 134 }, { "beta_dpo/beta_used": 0.10449250787496567, "beta_dpo/beta_used_raw": 0.10449250787496567, "beta_dpo/gap_mean": 1.3598275184631348, "beta_dpo/gap_std": 2.9727349281311035, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.20408163265306123, "grad_norm": 20.1523380279541, "learning_rate": 4.844676460754862e-07, "logits/chosen": 1.7255396842956543, "logits/rejected": 1.7618924379348755, "loss": 1.2582, "step": 135 }, { "beta_dpo/beta_used": 0.10133585333824158, "beta_dpo/beta_used_raw": 0.10133585333824158, "beta_dpo/gap_mean": 1.4881207942962646, "beta_dpo/gap_std": 3.178489923477173, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.20559334845049132, "grad_norm": 22.073606491088867, "learning_rate": 4.840055783904106e-07, "logits/chosen": 1.3234667778015137, "logits/rejected": 1.4829561710357666, "loss": 1.2474, "step": 136 }, { "beta_dpo/beta_used": 0.09466598182916641, "beta_dpo/beta_used_raw": 0.09466598182916641, "beta_dpo/gap_mean": 1.4787802696228027, "beta_dpo/gap_std": 3.2747902870178223, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.20710506424792138, "grad_norm": 18.009923934936523, "learning_rate": 4.835369650662767e-07, "logits/chosen": 1.520973801612854, "logits/rejected": 1.3727699518203735, "loss": 1.2673, "step": 137 }, { "beta_dpo/beta_used": 0.08019311726093292, "beta_dpo/beta_used_raw": 0.08019311726093292, "beta_dpo/gap_mean": 1.5378533601760864, "beta_dpo/gap_std": 3.3426733016967773, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.20861678004535147, "grad_norm": 15.670638084411621, "learning_rate": 4.830618192112065e-07, "logits/chosen": 1.321858286857605, "logits/rejected": 1.368009090423584, "loss": 1.3035, "step": 138 }, { "beta_dpo/beta_used": 0.10574564337730408, "beta_dpo/beta_used_raw": 0.10574564337730408, "beta_dpo/gap_mean": 1.5016133785247803, "beta_dpo/gap_std": 3.4950404167175293, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.21012849584278157, "grad_norm": 24.677949905395508, "learning_rate": 4.825801541160509e-07, "logits/chosen": 1.1677018404006958, "logits/rejected": 1.1444388628005981, "loss": 1.2488, "step": 139 }, { "beta_dpo/beta_used": 0.12155772745609283, "beta_dpo/beta_used_raw": 0.12155772745609283, "beta_dpo/gap_mean": 1.6673638820648193, "beta_dpo/gap_std": 3.619114398956299, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.21164021164021163, "grad_norm": 26.427644729614258, "learning_rate": 4.820919832540181e-07, "logits/chosen": 1.3720524311065674, "logits/rejected": 1.4061660766601562, "loss": 1.2148, "step": 140 }, { "beta_dpo/beta_used": 0.12430672347545624, "beta_dpo/beta_used_raw": 0.12430672347545624, "beta_dpo/gap_mean": 1.8043220043182373, "beta_dpo/gap_std": 3.798015594482422, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.21315192743764172, "grad_norm": 22.859867095947266, "learning_rate": 4.815973202802966e-07, "logits/chosen": 1.7817519903182983, "logits/rejected": 1.6559662818908691, "loss": 1.1736, "step": 141 }, { "beta_dpo/beta_used": 0.0646064430475235, "beta_dpo/beta_used_raw": 0.0646064430475235, "beta_dpo/gap_mean": 1.7735939025878906, "beta_dpo/gap_std": 3.847339630126953, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2146636432350718, "grad_norm": 13.155664443969727, "learning_rate": 4.810961790316729e-07, "logits/chosen": 1.7617301940917969, "logits/rejected": 1.6993064880371094, "loss": 1.3175, "step": 142 }, { "beta_dpo/beta_used": 0.10804080963134766, "beta_dpo/beta_used_raw": 0.10804080963134766, "beta_dpo/gap_mean": 1.6947863101959229, "beta_dpo/gap_std": 3.9340009689331055, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2161753590325019, "grad_norm": 21.003841400146484, "learning_rate": 4.805885735261454e-07, "logits/chosen": 1.9638588428497314, "logits/rejected": 1.8217556476593018, "loss": 1.2281, "step": 143 }, { "beta_dpo/beta_used": 0.08677056431770325, "beta_dpo/beta_used_raw": 0.08677056431770325, "beta_dpo/gap_mean": 1.650296926498413, "beta_dpo/gap_std": 4.212867259979248, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.21768707482993196, "grad_norm": 18.106487274169922, "learning_rate": 4.800745179625307e-07, "logits/chosen": 1.9297895431518555, "logits/rejected": 1.861382246017456, "loss": 1.2954, "step": 144 }, { "beta_dpo/beta_used": 0.16803482174873352, "beta_dpo/beta_used_raw": 0.16803482174873352, "beta_dpo/gap_mean": 1.6556284427642822, "beta_dpo/gap_std": 4.449127674102783, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.21919879062736206, "grad_norm": 31.1252384185791, "learning_rate": 4.795540267200686e-07, "logits/chosen": 1.0947413444519043, "logits/rejected": 1.0834863185882568, "loss": 1.0266, "step": 145 }, { "beta_dpo/beta_used": 0.12755730748176575, "beta_dpo/beta_used_raw": 0.12755730748176575, "beta_dpo/gap_mean": 1.6483957767486572, "beta_dpo/gap_std": 4.460909366607666, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.22071050642479215, "grad_norm": 23.488847732543945, "learning_rate": 4.790271143580173e-07, "logits/chosen": 1.5351951122283936, "logits/rejected": 1.6117818355560303, "loss": 1.1962, "step": 146 }, { "beta_dpo/beta_used": 0.06719569861888885, "beta_dpo/beta_used_raw": 0.06719569861888885, "beta_dpo/gap_mean": 1.6245243549346924, "beta_dpo/gap_std": 4.596627235412598, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2222222222222222, "grad_norm": 15.262714385986328, "learning_rate": 4.784937956152489e-07, "logits/chosen": 1.4650212526321411, "logits/rejected": 1.396628737449646, "loss": 1.3401, "step": 147 }, { "beta_dpo/beta_used": 0.15461790561676025, "beta_dpo/beta_used_raw": 0.15461790561676025, "beta_dpo/gap_mean": 1.712737798690796, "beta_dpo/gap_std": 4.725405216217041, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2237339380196523, "grad_norm": 24.29132080078125, "learning_rate": 4.779540854098347e-07, "logits/chosen": 2.417107105255127, "logits/rejected": 2.175968647003174, "loss": 1.0703, "step": 148 }, { "beta_dpo/beta_used": 0.09070044755935669, "beta_dpo/beta_used_raw": 0.09070044755935669, "beta_dpo/gap_mean": 1.810120701789856, "beta_dpo/gap_std": 4.730660438537598, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.2252456538170824, "grad_norm": 18.492767333984375, "learning_rate": 4.774079988386296e-07, "logits/chosen": 1.2221202850341797, "logits/rejected": 1.3723053932189941, "loss": 1.2579, "step": 149 }, { "beta_dpo/beta_used": 0.11454713344573975, "beta_dpo/beta_used_raw": 0.11454713344573975, "beta_dpo/gap_mean": 2.0056028366088867, "beta_dpo/gap_std": 5.036479949951172, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.22675736961451248, "grad_norm": 26.087398529052734, "learning_rate": 4.768555511768486e-07, "logits/chosen": 1.4887826442718506, "logits/rejected": 1.5462052822113037, "loss": 1.2657, "step": 150 }, { "beta_dpo/beta_used": 0.1551978588104248, "beta_dpo/beta_used_raw": 0.1551978588104248, "beta_dpo/gap_mean": 2.2529079914093018, "beta_dpo/gap_std": 5.154142379760742, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.22826908541194255, "grad_norm": 25.759302139282227, "learning_rate": 4.762967578776406e-07, "logits/chosen": 1.5137187242507935, "logits/rejected": 1.3097262382507324, "loss": 1.0226, "step": 151 }, { "beta_dpo/beta_used": 0.10738147795200348, "beta_dpo/beta_used_raw": 0.10738147795200348, "beta_dpo/gap_mean": 2.308027744293213, "beta_dpo/gap_std": 5.2502970695495605, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.22978080120937264, "grad_norm": 22.590726852416992, "learning_rate": 4.757316345716553e-07, "logits/chosen": 1.6765403747558594, "logits/rejected": 1.669187068939209, "loss": 1.1676, "step": 152 }, { "beta_dpo/beta_used": 0.10762692987918854, "beta_dpo/beta_used_raw": 0.10762692987918854, "beta_dpo/gap_mean": 2.3354625701904297, "beta_dpo/gap_std": 5.303244590759277, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.23129251700680273, "grad_norm": 19.168458938598633, "learning_rate": 4.751601970666064e-07, "logits/chosen": 0.8450142741203308, "logits/rejected": 0.7212068438529968, "loss": 1.1869, "step": 153 }, { "beta_dpo/beta_used": 0.12070707976818085, "beta_dpo/beta_used_raw": 0.12070707976818085, "beta_dpo/gap_mean": 2.360574960708618, "beta_dpo/gap_std": 5.460031986236572, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2328042328042328, "grad_norm": 21.795913696289062, "learning_rate": 4.745824613468292e-07, "logits/chosen": 1.0570695400238037, "logits/rejected": 1.2983663082122803, "loss": 1.1894, "step": 154 }, { "beta_dpo/beta_used": 0.1528300940990448, "beta_dpo/beta_used_raw": 0.1528300940990448, "beta_dpo/gap_mean": 2.3790721893310547, "beta_dpo/gap_std": 5.66038703918457, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.23431594860166288, "grad_norm": 39.34039306640625, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 1.4706714153289795, "logits/rejected": 1.307586431503296, "loss": 1.1006, "step": 155 }, { "beta_dpo/beta_used": 0.14839857816696167, "beta_dpo/beta_used_raw": 0.14839857816696167, "beta_dpo/gap_mean": 2.5533552169799805, "beta_dpo/gap_std": 5.758601188659668, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.23582766439909297, "grad_norm": 29.02472686767578, "learning_rate": 4.7340816008085305e-07, "logits/chosen": 1.2706935405731201, "logits/rejected": 1.512930154800415, "loss": 1.0439, "step": 156 }, { "beta_dpo/beta_used": 0.09339036047458649, "beta_dpo/beta_used_raw": 0.09339036047458649, "beta_dpo/gap_mean": 2.6657767295837402, "beta_dpo/gap_std": 5.844965934753418, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.23733938019652306, "grad_norm": 16.238609313964844, "learning_rate": 4.728116273823847e-07, "logits/chosen": 1.0520925521850586, "logits/rejected": 1.0182958841323853, "loss": 1.1508, "step": 157 }, { "beta_dpo/beta_used": 0.168321892619133, "beta_dpo/beta_used_raw": 0.168321892619133, "beta_dpo/gap_mean": 2.6134791374206543, "beta_dpo/gap_std": 6.089890480041504, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.23885109599395313, "grad_norm": 28.28655242919922, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 1.3058767318725586, "logits/rejected": 1.2950568199157715, "loss": 1.0635, "step": 158 }, { "beta_dpo/beta_used": 0.08723060041666031, "beta_dpo/beta_used_raw": 0.08723060041666031, "beta_dpo/gap_mean": 2.738887310028076, "beta_dpo/gap_std": 6.282135963439941, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.24036281179138322, "grad_norm": 18.19098472595215, "learning_rate": 4.715998812855304e-07, "logits/chosen": 1.397586464881897, "logits/rejected": 1.3978208303451538, "loss": 1.2233, "step": 159 }, { "beta_dpo/beta_used": 0.09871069341897964, "beta_dpo/beta_used_raw": 0.09871069341897964, "beta_dpo/gap_mean": 2.721683979034424, "beta_dpo/gap_std": 6.240549087524414, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2418745275888133, "grad_norm": 23.070819854736328, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 1.0362298488616943, "logits/rejected": 0.9539611339569092, "loss": 1.1959, "step": 160 }, { "beta_dpo/beta_used": 0.13182277977466583, "beta_dpo/beta_used_raw": 0.13182277977466583, "beta_dpo/gap_mean": 2.772425413131714, "beta_dpo/gap_std": 6.313028335571289, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.24338624338624337, "grad_norm": 25.8764591217041, "learning_rate": 4.703633408618955e-07, "logits/chosen": 1.6497914791107178, "logits/rejected": 1.4926035404205322, "loss": 1.0769, "step": 161 }, { "beta_dpo/beta_used": 0.15703752636909485, "beta_dpo/beta_used_raw": 0.15703752636909485, "beta_dpo/gap_mean": 3.0325098037719727, "beta_dpo/gap_std": 6.246161460876465, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.24489795918367346, "grad_norm": 25.955524444580078, "learning_rate": 4.697358159051549e-07, "logits/chosen": 1.6864545345306396, "logits/rejected": 1.713794231414795, "loss": 0.9487, "step": 162 }, { "beta_dpo/beta_used": 0.034870997071266174, "beta_dpo/beta_used_raw": 0.02417636662721634, "beta_dpo/gap_mean": 3.10768723487854, "beta_dpo/gap_std": 6.307939529418945, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.24640967498110355, "grad_norm": 10.59152603149414, "learning_rate": 4.691021444652876e-07, "logits/chosen": 1.5043668746948242, "logits/rejected": 1.0593593120574951, "loss": 1.3254, "step": 163 }, { "beta_dpo/beta_used": 0.1477426290512085, "beta_dpo/beta_used_raw": 0.1477426290512085, "beta_dpo/gap_mean": 3.3669991493225098, "beta_dpo/gap_std": 6.525307655334473, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.24792139077853365, "grad_norm": 33.83066177368164, "learning_rate": 4.6846234426744624e-07, "logits/chosen": 1.4280340671539307, "logits/rejected": 1.1328227519989014, "loss": 1.0885, "step": 164 }, { "beta_dpo/beta_used": 0.12049752473831177, "beta_dpo/beta_used_raw": 0.12049752473831177, "beta_dpo/gap_mean": 3.529249429702759, "beta_dpo/gap_std": 6.616786956787109, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2494331065759637, "grad_norm": 20.40849494934082, "learning_rate": 4.678164332082175e-07, "logits/chosen": 1.7118687629699707, "logits/rejected": 1.7932038307189941, "loss": 1.0097, "step": 165 }, { "beta_dpo/beta_used": 0.0652805045247078, "beta_dpo/beta_used_raw": 0.0652805045247078, "beta_dpo/gap_mean": 3.6699838638305664, "beta_dpo/gap_std": 6.657036781311035, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2509448223733938, "grad_norm": 16.213890075683594, "learning_rate": 4.6716442935512214e-07, "logits/chosen": 1.7215876579284668, "logits/rejected": 1.584639310836792, "loss": 1.2227, "step": 166 }, { "beta_dpo/beta_used": 0.0965694785118103, "beta_dpo/beta_used_raw": 0.0965694785118103, "beta_dpo/gap_mean": 3.741748809814453, "beta_dpo/gap_std": 6.662418365478516, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.25245653817082386, "grad_norm": 17.435684204101562, "learning_rate": 4.6650635094610966e-07, "logits/chosen": 1.4423127174377441, "logits/rejected": 1.248117446899414, "loss": 1.1171, "step": 167 }, { "beta_dpo/beta_used": 0.08136512339115143, "beta_dpo/beta_used_raw": 0.06723477691411972, "beta_dpo/gap_mean": 3.637478828430176, "beta_dpo/gap_std": 6.633077621459961, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.25396825396825395, "grad_norm": 16.939830780029297, "learning_rate": 4.6584221638904767e-07, "logits/chosen": 1.5010664463043213, "logits/rejected": 1.5989562273025513, "loss": 1.1493, "step": 168 }, { "beta_dpo/beta_used": 0.10735826194286346, "beta_dpo/beta_used_raw": 0.10735826194286346, "beta_dpo/gap_mean": 3.6373238563537598, "beta_dpo/gap_std": 6.84861421585083, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.25547996976568405, "grad_norm": 22.096202850341797, "learning_rate": 4.651720442612075e-07, "logits/chosen": 1.2934666872024536, "logits/rejected": 1.2724759578704834, "loss": 1.1346, "step": 169 }, { "beta_dpo/beta_used": 0.09798265993595123, "beta_dpo/beta_used_raw": 0.09798265993595123, "beta_dpo/gap_mean": 3.4399917125701904, "beta_dpo/gap_std": 7.241048812866211, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.25699168556311414, "grad_norm": 20.32015037536621, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 1.449697494506836, "logits/rejected": 1.5872085094451904, "loss": 1.182, "step": 170 }, { "beta_dpo/beta_used": 0.09298588335514069, "beta_dpo/beta_used_raw": 0.09298588335514069, "beta_dpo/gap_mean": 3.642791271209717, "beta_dpo/gap_std": 7.622129440307617, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2585034013605442, "grad_norm": 21.328685760498047, "learning_rate": 4.6381366244617224e-07, "logits/chosen": 2.529806613922119, "logits/rejected": 2.442068099975586, "loss": 1.1731, "step": 171 }, { "beta_dpo/beta_used": 0.053223028779029846, "beta_dpo/beta_used_raw": 0.053223028779029846, "beta_dpo/gap_mean": 3.6746082305908203, "beta_dpo/gap_std": 7.658779144287109, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2600151171579743, "grad_norm": 11.974435806274414, "learning_rate": 4.631254907558365e-07, "logits/chosen": 2.2132601737976074, "logits/rejected": 2.062042474746704, "loss": 1.2614, "step": 172 }, { "beta_dpo/beta_used": 0.07246831804513931, "beta_dpo/beta_used_raw": 0.056058838963508606, "beta_dpo/gap_mean": 3.904388904571533, "beta_dpo/gap_std": 7.858163833618164, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2615268329554044, "grad_norm": 16.666893005371094, "learning_rate": 4.624313574873786e-07, "logits/chosen": 1.2529126405715942, "logits/rejected": 1.277488112449646, "loss": 1.2057, "step": 173 }, { "beta_dpo/beta_used": 0.14423680305480957, "beta_dpo/beta_used_raw": 0.14423680305480957, "beta_dpo/gap_mean": 4.094144344329834, "beta_dpo/gap_std": 7.942702293395996, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.26303854875283444, "grad_norm": 29.772438049316406, "learning_rate": 4.61731282057198e-07, "logits/chosen": 1.4726738929748535, "logits/rejected": 1.160088062286377, "loss": 0.9648, "step": 174 }, { "beta_dpo/beta_used": 0.1600879430770874, "beta_dpo/beta_used_raw": 0.1600879430770874, "beta_dpo/gap_mean": 4.209630012512207, "beta_dpo/gap_std": 8.244287490844727, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.26455026455026454, "grad_norm": 27.950191497802734, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 2.0793564319610596, "logits/rejected": 1.8890061378479004, "loss": 1.0063, "step": 175 }, { "beta_dpo/beta_used": 0.05491591989994049, "beta_dpo/beta_used_raw": -0.007800232619047165, "beta_dpo/gap_mean": 4.055308818817139, "beta_dpo/gap_std": 8.47665023803711, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.2660619803476946, "grad_norm": 16.41081428527832, "learning_rate": 4.603133832077953e-07, "logits/chosen": 1.8980469703674316, "logits/rejected": 1.8360246419906616, "loss": 1.2205, "step": 176 }, { "beta_dpo/beta_used": 0.18383970856666565, "beta_dpo/beta_used_raw": 0.18383970856666565, "beta_dpo/gap_mean": 4.529065132141113, "beta_dpo/gap_std": 8.682202339172363, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2675736961451247, "grad_norm": 38.8032112121582, "learning_rate": 4.5959559945025183e-07, "logits/chosen": 1.898195743560791, "logits/rejected": 1.8038549423217773, "loss": 0.9231, "step": 177 }, { "beta_dpo/beta_used": 0.19010929763317108, "beta_dpo/beta_used_raw": 0.19010929763317108, "beta_dpo/gap_mean": 4.975588798522949, "beta_dpo/gap_std": 8.499929428100586, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2690854119425548, "grad_norm": 33.64447021484375, "learning_rate": 4.588719528532341e-07, "logits/chosen": 1.8813047409057617, "logits/rejected": 1.6573269367218018, "loss": 0.7533, "step": 178 }, { "beta_dpo/beta_used": 0.09621996432542801, "beta_dpo/beta_used_raw": 0.09621996432542801, "beta_dpo/gap_mean": 4.808865070343018, "beta_dpo/gap_std": 8.72989273071289, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2705971277399849, "grad_norm": 27.32986831665039, "learning_rate": 4.581424636586928e-07, "logits/chosen": 1.7015210390090942, "logits/rejected": 1.7635328769683838, "loss": 1.068, "step": 179 }, { "beta_dpo/beta_used": 0.025560760870575905, "beta_dpo/beta_used_raw": 0.006768429651856422, "beta_dpo/gap_mean": 4.6550421714782715, "beta_dpo/gap_std": 8.802743911743164, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.272108843537415, "grad_norm": 6.477420806884766, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 1.3054771423339844, "logits/rejected": 0.9867875576019287, "loss": 1.3093, "step": 180 }, { "beta_dpo/beta_used": 0.08936936408281326, "beta_dpo/beta_used_raw": 0.08936936408281326, "beta_dpo/gap_mean": 4.8378005027771, "beta_dpo/gap_std": 8.81364631652832, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.273620559334845, "grad_norm": 22.288198471069336, "learning_rate": 4.566660392614228e-07, "logits/chosen": 1.4038888216018677, "logits/rejected": 1.3131110668182373, "loss": 1.1065, "step": 181 }, { "beta_dpo/beta_used": 0.1858215630054474, "beta_dpo/beta_used_raw": 0.1858215630054474, "beta_dpo/gap_mean": 5.14285945892334, "beta_dpo/gap_std": 8.877325057983398, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2751322751322751, "grad_norm": 33.52143859863281, "learning_rate": 4.5591914535745817e-07, "logits/chosen": 1.562524676322937, "logits/rejected": 1.2491695880889893, "loss": 0.8402, "step": 182 }, { "beta_dpo/beta_used": 0.04903354123234749, "beta_dpo/beta_used_raw": 0.020485244691371918, "beta_dpo/gap_mean": 4.961426734924316, "beta_dpo/gap_std": 9.019545555114746, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2766439909297052, "grad_norm": 12.803986549377441, "learning_rate": 4.551664914523433e-07, "logits/chosen": 1.5661842823028564, "logits/rejected": 1.6295418739318848, "loss": 1.2587, "step": 183 }, { "beta_dpo/beta_used": 0.03347941115498543, "beta_dpo/beta_used_raw": 0.03347941115498543, "beta_dpo/gap_mean": 4.973166465759277, "beta_dpo/gap_std": 8.975120544433594, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2781557067271353, "grad_norm": 6.735782623291016, "learning_rate": 4.544080985994258e-07, "logits/chosen": 1.7749900817871094, "logits/rejected": 1.7194840908050537, "loss": 1.278, "step": 184 }, { "beta_dpo/beta_used": 0.1335582137107849, "beta_dpo/beta_used_raw": 0.09271209686994553, "beta_dpo/gap_mean": 5.115813255310059, "beta_dpo/gap_std": 9.346285820007324, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.2796674225245654, "grad_norm": 23.257200241088867, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 1.6902034282684326, "logits/rejected": 1.6307165622711182, "loss": 1.0535, "step": 185 }, { "beta_dpo/beta_used": 0.08969143778085709, "beta_dpo/beta_used_raw": 0.08969143778085709, "beta_dpo/gap_mean": 5.149080753326416, "beta_dpo/gap_std": 9.851451873779297, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2811791383219955, "grad_norm": 18.274940490722656, "learning_rate": 4.5287418106563354e-07, "logits/chosen": 1.2696905136108398, "logits/rejected": 1.034300446510315, "loss": 1.1545, "step": 186 }, { "beta_dpo/beta_used": 0.19267341494560242, "beta_dpo/beta_used_raw": 0.19267341494560242, "beta_dpo/gap_mean": 5.213037490844727, "beta_dpo/gap_std": 10.063613891601562, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.28269085411942557, "grad_norm": 36.83070755004883, "learning_rate": 4.520986992917297e-07, "logits/chosen": 1.6270906925201416, "logits/rejected": 1.3702093362808228, "loss": 0.868, "step": 187 }, { "beta_dpo/beta_used": 0.07237689942121506, "beta_dpo/beta_used_raw": 0.011760570108890533, "beta_dpo/gap_mean": 5.006385326385498, "beta_dpo/gap_std": 9.972711563110352, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.2842025699168556, "grad_norm": 18.037521362304688, "learning_rate": 4.5131756438276466e-07, "logits/chosen": 1.9290728569030762, "logits/rejected": 1.627413034439087, "loss": 1.2184, "step": 188 }, { "beta_dpo/beta_used": 0.13262778520584106, "beta_dpo/beta_used_raw": 0.08808554708957672, "beta_dpo/gap_mean": 5.106810092926025, "beta_dpo/gap_std": 9.941263198852539, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.2857142857142857, "grad_norm": 49.00212478637695, "learning_rate": 4.5053079818876096e-07, "logits/chosen": 1.581903338432312, "logits/rejected": 1.6713547706604004, "loss": 1.1338, "step": 189 }, { "beta_dpo/beta_used": 0.2871774435043335, "beta_dpo/beta_used_raw": 0.2871774435043335, "beta_dpo/gap_mean": 5.393362522125244, "beta_dpo/gap_std": 9.98210620880127, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2872260015117158, "grad_norm": 62.98942947387695, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 1.2159141302108765, "logits/rejected": 0.8774590492248535, "loss": 0.6566, "step": 190 }, { "beta_dpo/beta_used": 0.07902415096759796, "beta_dpo/beta_used_raw": 0.07902415096759796, "beta_dpo/gap_mean": 5.325117588043213, "beta_dpo/gap_std": 10.09085750579834, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2887377173091459, "grad_norm": 17.34073257446289, "learning_rate": 4.48940460132708e-07, "logits/chosen": 1.7142860889434814, "logits/rejected": 1.5624032020568848, "loss": 1.1187, "step": 191 }, { "beta_dpo/beta_used": 0.022713923826813698, "beta_dpo/beta_used_raw": 0.022713923826813698, "beta_dpo/gap_mean": 4.850440502166748, "beta_dpo/gap_std": 9.887323379516602, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.29024943310657597, "grad_norm": 4.582393169403076, "learning_rate": 4.481369327558329e-07, "logits/chosen": 1.753859281539917, "logits/rejected": 1.7524299621582031, "loss": 1.306, "step": 192 }, { "beta_dpo/beta_used": 0.03702099993824959, "beta_dpo/beta_used_raw": 0.03702099993824959, "beta_dpo/gap_mean": 4.930882453918457, "beta_dpo/gap_std": 9.861164093017578, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.29176114890400606, "grad_norm": 10.907903671264648, "learning_rate": 4.47327863063023e-07, "logits/chosen": 1.5122041702270508, "logits/rejected": 1.3613293170928955, "loss": 1.2766, "step": 193 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.030461229383945465, "beta_dpo/gap_mean": 4.714853286743164, "beta_dpo/gap_std": 10.037712097167969, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.29327286470143615, "grad_norm": 0.24061015248298645, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 1.5545375347137451, "logits/rejected": 1.6280193328857422, "loss": 1.3836, "step": 194 }, { "beta_dpo/beta_used": 0.06694042682647705, "beta_dpo/beta_used_raw": 0.06694042682647705, "beta_dpo/gap_mean": 4.624754905700684, "beta_dpo/gap_std": 9.953048706054688, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.2947845804988662, "grad_norm": 16.01689338684082, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.9506034851074219, "logits/rejected": 1.1181230545043945, "loss": 1.1767, "step": 195 }, { "beta_dpo/beta_used": 0.1468542218208313, "beta_dpo/beta_used_raw": 0.1468542218208313, "beta_dpo/gap_mean": 4.510015487670898, "beta_dpo/gap_std": 9.975471496582031, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2962962962962963, "grad_norm": 29.909286499023438, "learning_rate": 4.448676271745197e-07, "logits/chosen": 1.5411239862442017, "logits/rejected": 1.5550258159637451, "loss": 1.0114, "step": 196 }, { "beta_dpo/beta_used": 0.1864607036113739, "beta_dpo/beta_used_raw": 0.1864607036113739, "beta_dpo/gap_mean": 4.715234756469727, "beta_dpo/gap_std": 10.445943832397461, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.29780801209372637, "grad_norm": 40.97339630126953, "learning_rate": 4.440366160729392e-07, "logits/chosen": 2.425356149673462, "logits/rejected": 1.9260857105255127, "loss": 1.0392, "step": 197 }, { "beta_dpo/beta_used": 0.1677129566669464, "beta_dpo/beta_used_raw": 0.1677129566669464, "beta_dpo/gap_mean": 4.945888996124268, "beta_dpo/gap_std": 10.412927627563477, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.29931972789115646, "grad_norm": 28.744064331054688, "learning_rate": 4.432001773500957e-07, "logits/chosen": 1.90618896484375, "logits/rejected": 1.743265151977539, "loss": 0.9825, "step": 198 }, { "beta_dpo/beta_used": 0.14470118284225464, "beta_dpo/beta_used_raw": 0.14254896342754364, "beta_dpo/gap_mean": 4.955746650695801, "beta_dpo/gap_std": 10.591995239257812, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.30083144368858655, "grad_norm": 35.50462341308594, "learning_rate": 4.4235833440297856e-07, "logits/chosen": 1.899355411529541, "logits/rejected": 1.5145988464355469, "loss": 0.9911, "step": 199 }, { "beta_dpo/beta_used": 0.19559510052204132, "beta_dpo/beta_used_raw": 0.19559510052204132, "beta_dpo/gap_mean": 5.2142181396484375, "beta_dpo/gap_std": 10.851507186889648, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.30234315948601664, "grad_norm": 38.11675262451172, "learning_rate": 4.415111107797445e-07, "logits/chosen": 1.70686674118042, "logits/rejected": 1.0834190845489502, "loss": 1.0452, "step": 200 }, { "epoch": 0.30234315948601664, "eval_beta_dpo/beta_used": 0.1302367001771927, "eval_beta_dpo/beta_used_raw": 0.12496456503868103, "eval_beta_dpo/gap_mean": 5.1820478439331055, "eval_beta_dpo/gap_std": 10.96353816986084, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.6393016576766968, "eval_logits/rejected": 1.5120911598205566, "eval_loss": 0.6041610240936279, "eval_runtime": 42.6034, "eval_samples_per_second": 54.057, "eval_steps_per_second": 1.69, "step": 200 }, { "beta_dpo/beta_used": 0.05950671434402466, "beta_dpo/beta_used_raw": 0.05950671434402466, "beta_dpo/gap_mean": 5.228536605834961, "beta_dpo/gap_std": 10.815942764282227, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.30385487528344673, "grad_norm": 18.87432861328125, "learning_rate": 4.4065853017905953e-07, "logits/chosen": 2.147465229034424, "logits/rejected": 2.1975698471069336, "loss": 1.1811, "step": 201 }, { "beta_dpo/beta_used": 0.1301541030406952, "beta_dpo/beta_used_raw": 0.1301541030406952, "beta_dpo/gap_mean": 5.4303483963012695, "beta_dpo/gap_std": 10.73287582397461, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.30536659108087677, "grad_norm": 34.74480056762695, "learning_rate": 4.3980061644943575e-07, "logits/chosen": 1.184483289718628, "logits/rejected": 0.7701964378356934, "loss": 1.0818, "step": 202 }, { "beta_dpo/beta_used": 0.10333988070487976, "beta_dpo/beta_used_raw": 0.10333988070487976, "beta_dpo/gap_mean": 5.64778995513916, "beta_dpo/gap_std": 10.710124969482422, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.30687830687830686, "grad_norm": 24.69707679748535, "learning_rate": 4.3893739358856455e-07, "logits/chosen": 2.1665844917297363, "logits/rejected": 1.642435073852539, "loss": 1.0076, "step": 203 }, { "beta_dpo/beta_used": 0.05128917843103409, "beta_dpo/beta_used_raw": 0.05128917843103409, "beta_dpo/gap_mean": 5.986999988555908, "beta_dpo/gap_std": 10.805131912231445, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.30839002267573695, "grad_norm": 18.80857276916504, "learning_rate": 4.380688857426449e-07, "logits/chosen": 1.4700884819030762, "logits/rejected": 1.0781567096710205, "loss": 1.2208, "step": 204 }, { "beta_dpo/beta_used": 0.10645169019699097, "beta_dpo/beta_used_raw": 0.09335462003946304, "beta_dpo/gap_mean": 5.741988182067871, "beta_dpo/gap_std": 10.960041046142578, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.30990173847316704, "grad_norm": 24.979455947875977, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 2.1651041507720947, "logits/rejected": 1.954960823059082, "loss": 1.2116, "step": 205 }, { "beta_dpo/beta_used": 0.019261833280324936, "beta_dpo/beta_used_raw": -0.05605250597000122, "beta_dpo/gap_mean": 5.41782808303833, "beta_dpo/gap_std": 11.143302917480469, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.31141345427059713, "grad_norm": 5.909642219543457, "learning_rate": 4.363161124189387e-07, "logits/chosen": 2.501114845275879, "logits/rejected": 2.2312614917755127, "loss": 1.3189, "step": 206 }, { "beta_dpo/beta_used": 0.04499204084277153, "beta_dpo/beta_used_raw": 0.04499204084277153, "beta_dpo/gap_mean": 5.597379207611084, "beta_dpo/gap_std": 11.20595932006836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3129251700680272, "grad_norm": 16.016199111938477, "learning_rate": 4.3543189596998986e-07, "logits/chosen": 1.4587275981903076, "logits/rejected": 1.1297156810760498, "loss": 1.2432, "step": 207 }, { "beta_dpo/beta_used": 0.15474805235862732, "beta_dpo/beta_used_raw": 0.15474805235862732, "beta_dpo/gap_mean": 5.2720513343811035, "beta_dpo/gap_std": 11.097529411315918, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3144368858654573, "grad_norm": 35.47798538208008, "learning_rate": 4.3454249259229664e-07, "logits/chosen": 1.320824384689331, "logits/rejected": 1.2546792030334473, "loss": 1.0041, "step": 208 }, { "beta_dpo/beta_used": 0.24433788657188416, "beta_dpo/beta_used_raw": 0.24433788657188416, "beta_dpo/gap_mean": 5.672760486602783, "beta_dpo/gap_std": 11.376781463623047, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.31594860166288735, "grad_norm": 56.8420524597168, "learning_rate": 4.336479271643833e-07, "logits/chosen": 1.5092371702194214, "logits/rejected": 1.3591229915618896, "loss": 1.0248, "step": 209 }, { "beta_dpo/beta_used": 0.18511611223220825, "beta_dpo/beta_used_raw": 0.18511611223220825, "beta_dpo/gap_mean": 5.985712051391602, "beta_dpo/gap_std": 11.49533462524414, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.31746031746031744, "grad_norm": 31.574161529541016, "learning_rate": 4.327482247091679e-07, "logits/chosen": 1.6522598266601562, "logits/rejected": 1.1164844036102295, "loss": 0.9775, "step": 210 }, { "beta_dpo/beta_used": 0.015916820615530014, "beta_dpo/beta_used_raw": 0.015916820615530014, "beta_dpo/gap_mean": 6.245479106903076, "beta_dpo/gap_std": 11.601383209228516, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.31897203325774753, "grad_norm": 5.433824062347412, "learning_rate": 4.3184341039326217e-07, "logits/chosen": 2.0384957790374756, "logits/rejected": 1.6139662265777588, "loss": 1.3211, "step": 211 }, { "beta_dpo/beta_used": 0.27274635434150696, "beta_dpo/beta_used_raw": 0.27274635434150696, "beta_dpo/gap_mean": 6.564366340637207, "beta_dpo/gap_std": 11.817914962768555, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3204837490551776, "grad_norm": 52.9380989074707, "learning_rate": 4.309335095262675e-07, "logits/chosen": 1.7562899589538574, "logits/rejected": 1.825326919555664, "loss": 0.7789, "step": 212 }, { "beta_dpo/beta_used": 0.012187526561319828, "beta_dpo/beta_used_raw": 0.0010126382112503052, "beta_dpo/gap_mean": 6.627925872802734, "beta_dpo/gap_std": 12.03477954864502, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3219954648526077, "grad_norm": 4.3769941329956055, "learning_rate": 4.3001854756006724e-07, "logits/chosen": 1.3175151348114014, "logits/rejected": 1.5719773769378662, "loss": 1.3265, "step": 213 }, { "beta_dpo/beta_used": 0.03768792375922203, "beta_dpo/beta_used_raw": 0.01986430399119854, "beta_dpo/gap_mean": 6.281346321105957, "beta_dpo/gap_std": 11.880584716796875, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3235071806500378, "grad_norm": 11.269039154052734, "learning_rate": 4.290985500881143e-07, "logits/chosen": 1.5611655712127686, "logits/rejected": 1.757429838180542, "loss": 1.2343, "step": 214 }, { "beta_dpo/beta_used": 0.17131496965885162, "beta_dpo/beta_used_raw": 0.17131496965885162, "beta_dpo/gap_mean": 6.383757591247559, "beta_dpo/gap_std": 11.635639190673828, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3250188964474679, "grad_norm": 48.6794548034668, "learning_rate": 4.281735428447157e-07, "logits/chosen": 1.0305719375610352, "logits/rejected": 0.8588269352912903, "loss": 1.1715, "step": 215 }, { "beta_dpo/beta_used": 0.09299275279045105, "beta_dpo/beta_used_raw": 0.09046853333711624, "beta_dpo/gap_mean": 6.680278301239014, "beta_dpo/gap_std": 11.714441299438477, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.32653061224489793, "grad_norm": 21.09217071533203, "learning_rate": 4.2724355170431247e-07, "logits/chosen": 2.2120964527130127, "logits/rejected": 2.084207534790039, "loss": 1.1185, "step": 216 }, { "beta_dpo/beta_used": 0.04985278844833374, "beta_dpo/beta_used_raw": 0.04985278844833374, "beta_dpo/gap_mean": 6.69057559967041, "beta_dpo/gap_std": 11.938087463378906, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.328042328042328, "grad_norm": 11.565571784973145, "learning_rate": 4.26308602680756e-07, "logits/chosen": 2.1804494857788086, "logits/rejected": 1.817223072052002, "loss": 1.1928, "step": 217 }, { "beta_dpo/beta_used": 0.12828893959522247, "beta_dpo/beta_used_raw": 0.11110377311706543, "beta_dpo/gap_mean": 6.170825958251953, "beta_dpo/gap_std": 11.909095764160156, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3295540438397581, "grad_norm": 34.02565383911133, "learning_rate": 4.253687219265803e-07, "logits/chosen": 1.4786970615386963, "logits/rejected": 1.3133083581924438, "loss": 1.0952, "step": 218 }, { "beta_dpo/beta_used": 0.0129962507635355, "beta_dpo/beta_used_raw": 0.004371422342956066, "beta_dpo/gap_mean": 6.15762996673584, "beta_dpo/gap_std": 11.741506576538086, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.3310657596371882, "grad_norm": 5.00832986831665, "learning_rate": 4.2442393573227043e-07, "logits/chosen": 1.4401828050613403, "logits/rejected": 1.342416763305664, "loss": 1.3225, "step": 219 }, { "beta_dpo/beta_used": 0.028154663741588593, "beta_dpo/beta_used_raw": 0.028154663741588593, "beta_dpo/gap_mean": 6.027561187744141, "beta_dpo/gap_std": 11.516753196716309, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3325774754346183, "grad_norm": 7.318558692932129, "learning_rate": 4.234742705255272e-07, "logits/chosen": 1.6872892379760742, "logits/rejected": 1.3944776058197021, "loss": 1.2728, "step": 220 }, { "beta_dpo/beta_used": 0.09166809916496277, "beta_dpo/beta_used_raw": 0.09166809916496277, "beta_dpo/gap_mean": 6.0580058097839355, "beta_dpo/gap_std": 11.597650527954102, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3340891912320484, "grad_norm": 19.652095794677734, "learning_rate": 4.22519752870528e-07, "logits/chosen": 1.3477516174316406, "logits/rejected": 1.0663343667984009, "loss": 1.089, "step": 221 }, { "beta_dpo/beta_used": 0.16691642999649048, "beta_dpo/beta_used_raw": 0.16691642999649048, "beta_dpo/gap_mean": 6.380154609680176, "beta_dpo/gap_std": 11.571673393249512, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3356009070294785, "grad_norm": 42.29206085205078, "learning_rate": 4.2156040946718343e-07, "logits/chosen": 2.014256477355957, "logits/rejected": 1.907914161682129, "loss": 1.0351, "step": 222 }, { "beta_dpo/beta_used": 0.06735613942146301, "beta_dpo/beta_used_raw": 0.06735613942146301, "beta_dpo/gap_mean": 6.677520751953125, "beta_dpo/gap_std": 11.566620826721191, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3371126228269085, "grad_norm": 16.786890029907227, "learning_rate": 4.2059626715039065e-07, "logits/chosen": 1.4429056644439697, "logits/rejected": 1.3182603120803833, "loss": 1.1409, "step": 223 }, { "beta_dpo/beta_used": 0.06260553002357483, "beta_dpo/beta_used_raw": 0.06260553002357483, "beta_dpo/gap_mean": 6.619193077087402, "beta_dpo/gap_std": 11.379542350769043, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.3386243386243386, "grad_norm": 15.259867668151855, "learning_rate": 4.1962735288928304e-07, "logits/chosen": 2.385403633117676, "logits/rejected": 2.2249648571014404, "loss": 1.113, "step": 224 }, { "beta_dpo/beta_used": 0.04934084415435791, "beta_dpo/beta_used_raw": 0.038659606128931046, "beta_dpo/gap_mean": 6.746703147888184, "beta_dpo/gap_std": 11.56619644165039, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3401360544217687, "grad_norm": 13.229011535644531, "learning_rate": 4.186536937864752e-07, "logits/chosen": 1.5810472965240479, "logits/rejected": 1.0562224388122559, "loss": 1.1953, "step": 225 }, { "beta_dpo/beta_used": 0.07097682356834412, "beta_dpo/beta_used_raw": 0.07097682356834412, "beta_dpo/gap_mean": 6.604011535644531, "beta_dpo/gap_std": 11.779237747192383, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3416477702191988, "grad_norm": 16.87116241455078, "learning_rate": 4.176753170773052e-07, "logits/chosen": 1.5594934225082397, "logits/rejected": 1.3470158576965332, "loss": 1.1094, "step": 226 }, { "beta_dpo/beta_used": 0.16113229095935822, "beta_dpo/beta_used_raw": 0.16113229095935822, "beta_dpo/gap_mean": 6.743369102478027, "beta_dpo/gap_std": 12.084405899047852, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3431594860166289, "grad_norm": 35.86256790161133, "learning_rate": 4.166922501290729e-07, "logits/chosen": 1.400483250617981, "logits/rejected": 1.3121165037155151, "loss": 0.9626, "step": 227 }, { "beta_dpo/beta_used": 0.05211072787642479, "beta_dpo/beta_used_raw": 0.018288929015398026, "beta_dpo/gap_mean": 6.6497087478637695, "beta_dpo/gap_std": 12.260591506958008, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.34467120181405897, "grad_norm": 16.787290573120117, "learning_rate": 4.1570452044027405e-07, "logits/chosen": 1.9922467470169067, "logits/rejected": 1.8599324226379395, "loss": 1.2106, "step": 228 }, { "beta_dpo/beta_used": 0.16892960667610168, "beta_dpo/beta_used_raw": 0.16892960667610168, "beta_dpo/gap_mean": 6.767346382141113, "beta_dpo/gap_std": 12.224922180175781, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.34618291761148906, "grad_norm": 29.138980865478516, "learning_rate": 4.147121556398312e-07, "logits/chosen": 1.9473903179168701, "logits/rejected": 1.702131748199463, "loss": 0.8738, "step": 229 }, { "beta_dpo/beta_used": 0.11095847934484482, "beta_dpo/beta_used_raw": 0.11095847934484482, "beta_dpo/gap_mean": 6.441825866699219, "beta_dpo/gap_std": 12.481451988220215, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3476946334089191, "grad_norm": 26.85580062866211, "learning_rate": 4.137151834863213e-07, "logits/chosen": 1.0509746074676514, "logits/rejected": 1.3630282878875732, "loss": 1.0661, "step": 230 }, { "beta_dpo/beta_used": 0.197315976023674, "beta_dpo/beta_used_raw": 0.197315976023674, "beta_dpo/gap_mean": 6.801876068115234, "beta_dpo/gap_std": 12.54133129119873, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3492063492063492, "grad_norm": 51.88249206542969, "learning_rate": 4.1271363186719835e-07, "logits/chosen": 0.7952204942703247, "logits/rejected": 0.5976537466049194, "loss": 0.968, "step": 231 }, { "beta_dpo/beta_used": 0.04585336521267891, "beta_dpo/beta_used_raw": 0.04585336521267891, "beta_dpo/gap_mean": 6.546322345733643, "beta_dpo/gap_std": 12.779912948608398, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3507180650037793, "grad_norm": 11.413716316223145, "learning_rate": 4.1170752879801436e-07, "logits/chosen": 1.2692692279815674, "logits/rejected": 1.3366895914077759, "loss": 1.2179, "step": 232 }, { "beta_dpo/beta_used": 0.1407492756843567, "beta_dpo/beta_used_raw": 0.046730317175388336, "beta_dpo/gap_mean": 6.603410720825195, "beta_dpo/gap_std": 12.996603012084961, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.35222978080120937, "grad_norm": 42.801273345947266, "learning_rate": 4.106969024216348e-07, "logits/chosen": 1.7770150899887085, "logits/rejected": 1.4710367918014526, "loss": 1.186, "step": 233 }, { "beta_dpo/beta_used": 0.12140364944934845, "beta_dpo/beta_used_raw": 0.10727863758802414, "beta_dpo/gap_mean": 6.156139373779297, "beta_dpo/gap_std": 13.207222938537598, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.35374149659863946, "grad_norm": 38.265342712402344, "learning_rate": 4.09681781007452e-07, "logits/chosen": 0.5206916332244873, "logits/rejected": 0.37996482849121094, "loss": 1.2569, "step": 234 }, { "beta_dpo/beta_used": 0.11484545469284058, "beta_dpo/beta_used_raw": 0.10733015090227127, "beta_dpo/gap_mean": 6.554360389709473, "beta_dpo/gap_std": 12.979118347167969, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.35525321239606955, "grad_norm": 50.358585357666016, "learning_rate": 4.08662192950594e-07, "logits/chosen": 1.2737979888916016, "logits/rejected": 1.3781143426895142, "loss": 1.2159, "step": 235 }, { "beta_dpo/beta_used": 0.19294582307338715, "beta_dpo/beta_used_raw": 0.18594704568386078, "beta_dpo/gap_mean": 6.644139289855957, "beta_dpo/gap_std": 13.24412727355957, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.35676492819349964, "grad_norm": 50.509952545166016, "learning_rate": 4.076381667711306e-07, "logits/chosen": 1.7458560466766357, "logits/rejected": 1.6359169483184814, "loss": 1.0928, "step": 236 }, { "beta_dpo/beta_used": 0.09629102051258087, "beta_dpo/beta_used_raw": 0.03170393407344818, "beta_dpo/gap_mean": 6.329275608062744, "beta_dpo/gap_std": 13.261556625366211, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.35827664399092973, "grad_norm": 18.643909454345703, "learning_rate": 4.066097311132753e-07, "logits/chosen": 1.4134365320205688, "logits/rejected": 1.3123092651367188, "loss": 1.0787, "step": 237 }, { "beta_dpo/beta_used": 0.11005407571792603, "beta_dpo/beta_used_raw": 0.11005407571792603, "beta_dpo/gap_mean": 6.265144348144531, "beta_dpo/gap_std": 13.122074127197266, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.35978835978835977, "grad_norm": 21.7652587890625, "learning_rate": 4.0557691474458414e-07, "logits/chosen": 1.703669786453247, "logits/rejected": 1.675083875656128, "loss": 1.0768, "step": 238 }, { "beta_dpo/beta_used": 0.11901892721652985, "beta_dpo/beta_used_raw": 0.11901892721652985, "beta_dpo/gap_mean": 6.37298583984375, "beta_dpo/gap_std": 13.223270416259766, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.36130007558578986, "grad_norm": 27.12213134765625, "learning_rate": 4.045397465551513e-07, "logits/chosen": 1.5994396209716797, "logits/rejected": 1.4502242803573608, "loss": 1.1095, "step": 239 }, { "beta_dpo/beta_used": 0.3060862421989441, "beta_dpo/beta_used_raw": 0.3060862421989441, "beta_dpo/gap_mean": 7.014960289001465, "beta_dpo/gap_std": 13.332306861877441, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.36281179138321995, "grad_norm": 71.83600616455078, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 1.3177558183670044, "logits/rejected": 1.2535611391067505, "loss": 0.9217, "step": 240 }, { "beta_dpo/beta_used": 0.058323122560977936, "beta_dpo/beta_used_raw": 0.058323122560977936, "beta_dpo/gap_mean": 7.006235599517822, "beta_dpo/gap_std": 13.238981246948242, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.36432350718065004, "grad_norm": 15.872024536132812, "learning_rate": 4.0245247088227377e-07, "logits/chosen": 1.207369327545166, "logits/rejected": 1.034806489944458, "loss": 1.1805, "step": 241 }, { "beta_dpo/beta_used": 0.11554928123950958, "beta_dpo/beta_used_raw": 0.10966099053621292, "beta_dpo/gap_mean": 7.3681416511535645, "beta_dpo/gap_std": 13.11304759979248, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.36583522297808013, "grad_norm": 21.17530059814453, "learning_rate": 4.0140242178441665e-07, "logits/chosen": 0.5199865698814392, "logits/rejected": 0.37630772590637207, "loss": 1.0416, "step": 242 }, { "beta_dpo/beta_used": 0.0793665200471878, "beta_dpo/beta_used_raw": 0.06487854570150375, "beta_dpo/gap_mean": 7.23813533782959, "beta_dpo/gap_std": 12.889257431030273, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3673469387755102, "grad_norm": 24.303476333618164, "learning_rate": 4.003481376353596e-07, "logits/chosen": 1.7084109783172607, "logits/rejected": 1.7079315185546875, "loss": 1.2085, "step": 243 }, { "beta_dpo/beta_used": 0.11602146923542023, "beta_dpo/beta_used_raw": 0.11602146923542023, "beta_dpo/gap_mean": 7.612434387207031, "beta_dpo/gap_std": 12.60782241821289, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.3688586545729403, "grad_norm": 25.81456184387207, "learning_rate": 3.9928964792569654e-07, "logits/chosen": 1.3396780490875244, "logits/rejected": 1.2662789821624756, "loss": 0.9081, "step": 244 }, { "beta_dpo/beta_used": 0.17276377975940704, "beta_dpo/beta_used_raw": 0.17276377975940704, "beta_dpo/gap_mean": 8.035795211791992, "beta_dpo/gap_std": 12.561846733093262, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.37037037037037035, "grad_norm": 28.689769744873047, "learning_rate": 3.982269822636601e-07, "logits/chosen": 1.5368680953979492, "logits/rejected": 1.4403884410858154, "loss": 0.7303, "step": 245 }, { "beta_dpo/beta_used": 0.07997963577508926, "beta_dpo/beta_used_raw": 0.07323883473873138, "beta_dpo/gap_mean": 8.246414184570312, "beta_dpo/gap_std": 12.713071823120117, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.37188208616780044, "grad_norm": 24.240697860717773, "learning_rate": 3.971601703742932e-07, "logits/chosen": 1.8209779262542725, "logits/rejected": 1.883533239364624, "loss": 1.0866, "step": 246 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14714615046977997, "beta_dpo/gap_mean": 7.606928825378418, "beta_dpo/gap_std": 12.773536682128906, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.37339380196523053, "grad_norm": 0.31187567114830017, "learning_rate": 3.960892420986177e-07, "logits/chosen": 0.9519743323326111, "logits/rejected": 0.7347662448883057, "loss": 1.3828, "step": 247 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08996061980724335, "beta_dpo/gap_mean": 7.225895881652832, "beta_dpo/gap_std": 12.812955856323242, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3749055177626606, "grad_norm": 0.3030962347984314, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 1.2478711605072021, "logits/rejected": 1.4599595069885254, "loss": 1.3826, "step": 248 }, { "beta_dpo/beta_used": 0.038103874772787094, "beta_dpo/beta_used_raw": 0.012837713584303856, "beta_dpo/gap_mean": 6.602936744689941, "beta_dpo/gap_std": 12.938857078552246, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3764172335600907, "grad_norm": 11.848731994628906, "learning_rate": 3.9393515632731094e-07, "logits/chosen": 2.3792171478271484, "logits/rejected": 2.3942737579345703, "loss": 1.2594, "step": 249 }, { "beta_dpo/beta_used": 0.195104718208313, "beta_dpo/beta_used_raw": 0.195104718208313, "beta_dpo/gap_mean": 6.806901931762695, "beta_dpo/gap_std": 13.18899917602539, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3779289493575208, "grad_norm": 35.285213470458984, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 1.5496623516082764, "logits/rejected": 1.4084426164627075, "loss": 0.8475, "step": 250 }, { "beta_dpo/beta_used": 0.03262628987431526, "beta_dpo/beta_used_raw": -0.007272530347108841, "beta_dpo/gap_mean": 6.856196880340576, "beta_dpo/gap_std": 13.041912078857422, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.3794406651549509, "grad_norm": 11.482399940490723, "learning_rate": 3.9176496596569265e-07, "logits/chosen": 1.570683240890503, "logits/rejected": 1.6037306785583496, "loss": 1.2477, "step": 251 }, { "beta_dpo/beta_used": 0.009753710590302944, "beta_dpo/beta_used_raw": -0.043241649866104126, "beta_dpo/gap_mean": 6.3600311279296875, "beta_dpo/gap_std": 12.867057800292969, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.38095238095238093, "grad_norm": 2.8048999309539795, "learning_rate": 3.9067390737445254e-07, "logits/chosen": 1.5606493949890137, "logits/rejected": 1.436813473701477, "loss": 1.3433, "step": 252 }, { "beta_dpo/beta_used": 0.011095372959971428, "beta_dpo/beta_used_raw": -0.00485480111092329, "beta_dpo/gap_mean": 6.311408996582031, "beta_dpo/gap_std": 12.606225967407227, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.382464096749811, "grad_norm": 3.6359784603118896, "learning_rate": 3.8957891383162304e-07, "logits/chosen": 1.5644431114196777, "logits/rejected": 1.4657902717590332, "loss": 1.3373, "step": 253 }, { "beta_dpo/beta_used": 0.038476165384054184, "beta_dpo/beta_used_raw": 0.006513316184282303, "beta_dpo/gap_mean": 6.202248573303223, "beta_dpo/gap_std": 12.51596450805664, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3839758125472411, "grad_norm": 13.625349998474121, "learning_rate": 3.884800159665276e-07, "logits/chosen": 1.114762306213379, "logits/rejected": 0.9899729490280151, "loss": 1.2823, "step": 254 }, { "beta_dpo/beta_used": 0.21120937168598175, "beta_dpo/beta_used_raw": 0.16640348732471466, "beta_dpo/gap_mean": 6.46546745300293, "beta_dpo/gap_std": 12.447296142578125, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3854875283446712, "grad_norm": 41.72684097290039, "learning_rate": 3.873772445177015e-07, "logits/chosen": 1.5619221925735474, "logits/rejected": 1.3914833068847656, "loss": 1.0468, "step": 255 }, { "beta_dpo/beta_used": 0.10800749063491821, "beta_dpo/beta_used_raw": 0.05630078166723251, "beta_dpo/gap_mean": 6.492776870727539, "beta_dpo/gap_std": 12.558290481567383, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3869992441421013, "grad_norm": 33.22319030761719, "learning_rate": 3.862706303320329e-07, "logits/chosen": 1.068698525428772, "logits/rejected": 0.9799892902374268, "loss": 1.2679, "step": 256 }, { "beta_dpo/beta_used": 0.023676693439483643, "beta_dpo/beta_used_raw": 0.015448857098817825, "beta_dpo/gap_mean": 6.576137065887451, "beta_dpo/gap_std": 12.788522720336914, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3885109599395314, "grad_norm": 7.665389537811279, "learning_rate": 3.851602043638994e-07, "logits/chosen": 1.521716594696045, "logits/rejected": 1.2976162433624268, "loss": 1.2872, "step": 257 }, { "beta_dpo/beta_used": 0.127933531999588, "beta_dpo/beta_used_raw": 0.11033637076616287, "beta_dpo/gap_mean": 6.729240417480469, "beta_dpo/gap_std": 12.479969024658203, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.3900226757369615, "grad_norm": 31.811519622802734, "learning_rate": 3.840459976743023e-07, "logits/chosen": 1.6439337730407715, "logits/rejected": 1.4948757886886597, "loss": 0.9875, "step": 258 }, { "beta_dpo/beta_used": 0.2876676619052887, "beta_dpo/beta_used_raw": 0.2876676619052887, "beta_dpo/gap_mean": 7.496322154998779, "beta_dpo/gap_std": 12.738725662231445, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.3915343915343915, "grad_norm": 69.39122772216797, "learning_rate": 3.8292804142999796e-07, "logits/chosen": 1.76267409324646, "logits/rejected": 1.7653789520263672, "loss": 0.8932, "step": 259 }, { "beta_dpo/beta_used": 0.003968134988099337, "beta_dpo/beta_used_raw": -0.08447183668613434, "beta_dpo/gap_mean": 7.647377014160156, "beta_dpo/gap_std": 12.91828441619873, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3930461073318216, "grad_norm": 1.4290848970413208, "learning_rate": 3.818063669026256e-07, "logits/chosen": 1.6024353504180908, "logits/rejected": 1.4254289865493774, "loss": 1.3641, "step": 260 }, { "beta_dpo/beta_used": 0.09260217845439911, "beta_dpo/beta_used_raw": 0.09260217845439911, "beta_dpo/gap_mean": 7.191786766052246, "beta_dpo/gap_std": 12.985448837280273, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3945578231292517, "grad_norm": 23.355159759521484, "learning_rate": 3.806810054678331e-07, "logits/chosen": 1.637736201286316, "logits/rejected": 2.0598158836364746, "loss": 1.1513, "step": 261 }, { "beta_dpo/beta_used": 0.055604852735996246, "beta_dpo/beta_used_raw": 0.020321451127529144, "beta_dpo/gap_mean": 6.929043769836426, "beta_dpo/gap_std": 12.48460578918457, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3960695389266818, "grad_norm": 14.879576683044434, "learning_rate": 3.7955198860439887e-07, "logits/chosen": 1.6142950057983398, "logits/rejected": 1.5240156650543213, "loss": 1.1614, "step": 262 }, { "beta_dpo/beta_used": 0.06638128310441971, "beta_dpo/beta_used_raw": 0.0491025447845459, "beta_dpo/gap_mean": 7.156874179840088, "beta_dpo/gap_std": 12.549823760986328, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.3975812547241119, "grad_norm": 17.282392501831055, "learning_rate": 3.784193478933516e-07, "logits/chosen": 1.7361516952514648, "logits/rejected": 1.5868427753448486, "loss": 1.1135, "step": 263 }, { "beta_dpo/beta_used": 0.0455821193754673, "beta_dpo/beta_used_raw": 0.009301692247390747, "beta_dpo/gap_mean": 7.250235557556152, "beta_dpo/gap_std": 12.345619201660156, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.39909297052154197, "grad_norm": 15.046016693115234, "learning_rate": 3.7728311501708674e-07, "logits/chosen": 1.4044766426086426, "logits/rejected": 1.309002161026001, "loss": 1.1949, "step": 264 }, { "beta_dpo/beta_used": 0.27329275012016296, "beta_dpo/beta_used_raw": 0.27329275012016296, "beta_dpo/gap_mean": 7.598145961761475, "beta_dpo/gap_std": 12.671415328979492, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.40060468631897206, "grad_norm": 47.54646301269531, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 1.227288007736206, "logits/rejected": 1.1892151832580566, "loss": 0.7855, "step": 265 }, { "beta_dpo/beta_used": 0.09071945399045944, "beta_dpo/beta_used_raw": 0.09071945399045944, "beta_dpo/gap_mean": 7.997687816619873, "beta_dpo/gap_std": 12.852567672729492, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4021164021164021, "grad_norm": 20.935165405273438, "learning_rate": 3.75e-07, "logits/chosen": 2.1720075607299805, "logits/rejected": 1.9046260118484497, "loss": 1.0269, "step": 266 }, { "beta_dpo/beta_used": 0.1523996889591217, "beta_dpo/beta_used_raw": 0.1523996889591217, "beta_dpo/gap_mean": 7.685354709625244, "beta_dpo/gap_std": 12.811508178710938, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4036281179138322, "grad_norm": 20.654251098632812, "learning_rate": 3.738531817228131e-07, "logits/chosen": 1.4944243431091309, "logits/rejected": 1.2840352058410645, "loss": 0.8099, "step": 267 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14010438323020935, "beta_dpo/gap_mean": 7.252220630645752, "beta_dpo/gap_std": 12.721076965332031, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4051398337112623, "grad_norm": 0.2638804614543915, "learning_rate": 3.7270289900589204e-07, "logits/chosen": 1.4701387882232666, "logits/rejected": 1.3832581043243408, "loss": 1.3831, "step": 268 }, { "beta_dpo/beta_used": 0.16782352328300476, "beta_dpo/beta_used_raw": 0.16782352328300476, "beta_dpo/gap_mean": 7.114346504211426, "beta_dpo/gap_std": 12.626228332519531, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.40665154950869237, "grad_norm": 31.832950592041016, "learning_rate": 3.7154918402511714e-07, "logits/chosen": 1.55270516872406, "logits/rejected": 1.756973147392273, "loss": 0.9503, "step": 269 }, { "beta_dpo/beta_used": 0.00175630790181458, "beta_dpo/beta_used_raw": -0.00642303517088294, "beta_dpo/gap_mean": 7.145930767059326, "beta_dpo/gap_std": 12.572525024414062, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.40816326530612246, "grad_norm": 0.5824019312858582, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 1.472477674484253, "logits/rejected": 1.4838604927062988, "loss": 1.3767, "step": 270 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.030945777893066406, "beta_dpo/gap_mean": 6.868946075439453, "beta_dpo/gap_std": 12.882951736450195, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.40967498110355255, "grad_norm": 0.305041640996933, "learning_rate": 3.692315864546635e-07, "logits/chosen": 1.6203490495681763, "logits/rejected": 1.2593576908111572, "loss": 1.382, "step": 271 }, { "beta_dpo/beta_used": 0.26175782084465027, "beta_dpo/beta_used_raw": 0.26175782084465027, "beta_dpo/gap_mean": 7.0912322998046875, "beta_dpo/gap_std": 12.92785358428955, "beta_dpo/mask_keep_frac": 0.5, "epoch": 0.41118669690098264, "grad_norm": 46.45206069946289, "learning_rate": 3.6806776869317067e-07, "logits/chosen": 2.0279994010925293, "logits/rejected": 2.015707492828369, "loss": 0.7908, "step": 272 }, { "beta_dpo/beta_used": 0.12874624133110046, "beta_dpo/beta_used_raw": 0.12874624133110046, "beta_dpo/gap_mean": 7.3807525634765625, "beta_dpo/gap_std": 13.080790519714355, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4126984126984127, "grad_norm": 32.41847610473633, "learning_rate": 3.669006483223828e-07, "logits/chosen": 1.844411849975586, "logits/rejected": 1.5684620141983032, "loss": 1.0028, "step": 273 }, { "beta_dpo/beta_used": 0.14629867672920227, "beta_dpo/beta_used_raw": 0.14629867672920227, "beta_dpo/gap_mean": 7.499863624572754, "beta_dpo/gap_std": 13.270782470703125, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.41421012849584277, "grad_norm": 31.732133865356445, "learning_rate": 3.657302579891656e-07, "logits/chosen": 1.6515002250671387, "logits/rejected": 1.8607064485549927, "loss": 0.9586, "step": 274 }, { "beta_dpo/beta_used": 0.09432569891214371, "beta_dpo/beta_used_raw": 0.06229160353541374, "beta_dpo/gap_mean": 7.779719829559326, "beta_dpo/gap_std": 13.27018928527832, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.41572184429327286, "grad_norm": 23.149499893188477, "learning_rate": 3.645566304318526e-07, "logits/chosen": 1.7068809270858765, "logits/rejected": 1.6265830993652344, "loss": 1.0502, "step": 275 }, { "beta_dpo/beta_used": 0.1937197744846344, "beta_dpo/beta_used_raw": 0.1937197744846344, "beta_dpo/gap_mean": 8.17389965057373, "beta_dpo/gap_std": 13.258670806884766, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.41723356009070295, "grad_norm": 36.68490982055664, "learning_rate": 3.633797984793294e-07, "logits/chosen": 1.0973702669143677, "logits/rejected": 1.1315345764160156, "loss": 0.8111, "step": 276 }, { "beta_dpo/beta_used": 0.04128948226571083, "beta_dpo/beta_used_raw": -0.01279013603925705, "beta_dpo/gap_mean": 7.719527721405029, "beta_dpo/gap_std": 13.315411567687988, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.41874527588813304, "grad_norm": 13.151788711547852, "learning_rate": 3.6219979505011555e-07, "logits/chosen": 0.9874433875083923, "logits/rejected": 0.9136591553688049, "loss": 1.2261, "step": 277 }, { "beta_dpo/beta_used": 0.08919985592365265, "beta_dpo/beta_used_raw": 0.03976030275225639, "beta_dpo/gap_mean": 7.159448623657227, "beta_dpo/gap_std": 13.33280086517334, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.42025699168556313, "grad_norm": 36.67196273803711, "learning_rate": 3.6101665315144353e-07, "logits/chosen": 1.772204875946045, "logits/rejected": 1.61760413646698, "loss": 1.3171, "step": 278 }, { "beta_dpo/beta_used": 0.2875370979309082, "beta_dpo/beta_used_raw": 0.2875370979309082, "beta_dpo/gap_mean": 7.724452495574951, "beta_dpo/gap_std": 13.336029052734375, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.4217687074829932, "grad_norm": 42.04952621459961, "learning_rate": 3.5983040587833563e-07, "logits/chosen": 1.4997611045837402, "logits/rejected": 1.4169633388519287, "loss": 0.6457, "step": 279 }, { "beta_dpo/beta_used": 0.1273547261953354, "beta_dpo/beta_used_raw": 0.1273547261953354, "beta_dpo/gap_mean": 8.448027610778809, "beta_dpo/gap_std": 13.09150505065918, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.42328042328042326, "grad_norm": 22.6016902923584, "learning_rate": 3.586410864126781e-07, "logits/chosen": 1.3335264921188354, "logits/rejected": 1.205428123474121, "loss": 0.789, "step": 280 }, { "beta_dpo/beta_used": 0.09392253309488297, "beta_dpo/beta_used_raw": 0.09392253309488297, "beta_dpo/gap_mean": 8.494600296020508, "beta_dpo/gap_std": 13.018985748291016, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.42479213907785335, "grad_norm": 17.760608673095703, "learning_rate": 3.574487280222929e-07, "logits/chosen": 1.7236804962158203, "logits/rejected": 1.7596588134765625, "loss": 0.9312, "step": 281 }, { "beta_dpo/beta_used": 0.07099371403455734, "beta_dpo/beta_used_raw": 0.0519629567861557, "beta_dpo/gap_mean": 8.544626235961914, "beta_dpo/gap_std": 13.091127395629883, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.42630385487528344, "grad_norm": 19.166406631469727, "learning_rate": 3.562533640600075e-07, "logits/chosen": 1.3285274505615234, "logits/rejected": 1.0681095123291016, "loss": 1.123, "step": 282 }, { "beta_dpo/beta_used": 0.05208796635270119, "beta_dpo/beta_used_raw": 0.05208796635270119, "beta_dpo/gap_mean": 8.585311889648438, "beta_dpo/gap_std": 13.22861099243164, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.42781557067271353, "grad_norm": 14.542259216308594, "learning_rate": 3.550550279627215e-07, "logits/chosen": 1.4741332530975342, "logits/rejected": 1.3183352947235107, "loss": 1.1539, "step": 283 }, { "beta_dpo/beta_used": 0.09346778690814972, "beta_dpo/beta_used_raw": 0.09346778690814972, "beta_dpo/gap_mean": 8.709510803222656, "beta_dpo/gap_std": 13.263925552368164, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4293272864701436, "grad_norm": 23.085359573364258, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 1.9889543056488037, "logits/rejected": 1.9929530620574951, "loss": 0.9258, "step": 284 }, { "beta_dpo/beta_used": 0.03014732524752617, "beta_dpo/beta_used_raw": -0.04297472536563873, "beta_dpo/gap_mean": 8.226021766662598, "beta_dpo/gap_std": 13.281122207641602, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4308390022675737, "grad_norm": 13.746954917907715, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 1.8868443965911865, "logits/rejected": 1.9052425622940063, "loss": 1.2401, "step": 285 }, { "beta_dpo/beta_used": 0.11650238931179047, "beta_dpo/beta_used_raw": 0.11650238931179047, "beta_dpo/gap_mean": 8.26309585571289, "beta_dpo/gap_std": 13.527618408203125, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.4323507180650038, "grad_norm": 27.496360778808594, "learning_rate": 3.514425224712835e-07, "logits/chosen": 1.469580888748169, "logits/rejected": 1.596300482749939, "loss": 0.9446, "step": 286 }, { "beta_dpo/beta_used": 0.07653540372848511, "beta_dpo/beta_used_raw": 0.07653540372848511, "beta_dpo/gap_mean": 8.673506736755371, "beta_dpo/gap_std": 13.553279876708984, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.43386243386243384, "grad_norm": 22.05826187133789, "learning_rate": 3.502326338516534e-07, "logits/chosen": 1.3630534410476685, "logits/rejected": 0.8927639126777649, "loss": 1.0864, "step": 287 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.024330193176865578, "beta_dpo/gap_mean": 8.601228713989258, "beta_dpo/gap_std": 13.768001556396484, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.43537414965986393, "grad_norm": 0.32395094633102417, "learning_rate": 3.490199415097892e-07, "logits/chosen": 0.8396840691566467, "logits/rejected": 0.8194477558135986, "loss": 1.3798, "step": 288 }, { "beta_dpo/beta_used": 0.0935235470533371, "beta_dpo/beta_used_raw": 0.0935235470533371, "beta_dpo/gap_mean": 8.546222686767578, "beta_dpo/gap_std": 13.85302448272705, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.436885865457294, "grad_norm": 19.63606071472168, "learning_rate": 3.4780447936730247e-07, "logits/chosen": 1.1697039604187012, "logits/rejected": 1.3744932413101196, "loss": 0.9199, "step": 289 }, { "beta_dpo/beta_used": 0.0780460461974144, "beta_dpo/beta_used_raw": 0.0780460461974144, "beta_dpo/gap_mean": 8.599958419799805, "beta_dpo/gap_std": 13.72342300415039, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4383975812547241, "grad_norm": 17.42572593688965, "learning_rate": 3.465862814232821e-07, "logits/chosen": 1.5614783763885498, "logits/rejected": 1.4205409288406372, "loss": 1.0388, "step": 290 }, { "beta_dpo/beta_used": 0.1943441778421402, "beta_dpo/beta_used_raw": 0.17343299090862274, "beta_dpo/gap_mean": 8.35032844543457, "beta_dpo/gap_std": 13.765388488769531, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4399092970521542, "grad_norm": 57.41442108154297, "learning_rate": 3.4536538175334343e-07, "logits/chosen": 1.446760892868042, "logits/rejected": 1.3593605756759644, "loss": 1.1068, "step": 291 }, { "beta_dpo/beta_used": 0.09613867104053497, "beta_dpo/beta_used_raw": 0.09613867104053497, "beta_dpo/gap_mean": 8.555745124816895, "beta_dpo/gap_std": 14.046996116638184, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4414210128495843, "grad_norm": 27.844497680664062, "learning_rate": 3.4414181450867465e-07, "logits/chosen": 1.3435033559799194, "logits/rejected": 1.224219560623169, "loss": 1.0311, "step": 292 }, { "beta_dpo/beta_used": 0.275066614151001, "beta_dpo/beta_used_raw": 0.275066614151001, "beta_dpo/gap_mean": 8.830119132995605, "beta_dpo/gap_std": 14.660975456237793, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4429327286470144, "grad_norm": 62.09123992919922, "learning_rate": 3.4291561391508185e-07, "logits/chosen": 1.338533878326416, "logits/rejected": 1.4490954875946045, "loss": 0.8841, "step": 293 }, { "beta_dpo/beta_used": 0.14097994565963745, "beta_dpo/beta_used_raw": 0.07972858846187592, "beta_dpo/gap_mean": 9.071542739868164, "beta_dpo/gap_std": 14.639909744262695, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.4444444444444444, "grad_norm": 29.380847930908203, "learning_rate": 3.4168681427203153e-07, "logits/chosen": 1.6176725625991821, "logits/rejected": 1.4952876567840576, "loss": 0.9302, "step": 294 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07594194263219833, "beta_dpo/gap_mean": 8.659149169921875, "beta_dpo/gap_std": 14.577923774719238, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4459561602418745, "grad_norm": 0.29677557945251465, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 1.670243501663208, "logits/rejected": 1.410202980041504, "loss": 1.3802, "step": 295 }, { "beta_dpo/beta_used": 0.034500446170568466, "beta_dpo/beta_used_raw": 0.024874616414308548, "beta_dpo/gap_mean": 8.963502883911133, "beta_dpo/gap_std": 14.65359115600586, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4474678760393046, "grad_norm": 12.611300468444824, "learning_rate": 3.392215553979679e-07, "logits/chosen": 1.4267054796218872, "logits/rejected": 1.1156208515167236, "loss": 1.2078, "step": 296 }, { "beta_dpo/beta_used": 0.087165467441082, "beta_dpo/beta_used_raw": 0.087165467441082, "beta_dpo/gap_mean": 8.99653148651123, "beta_dpo/gap_std": 14.433828353881836, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.4489795918367347, "grad_norm": 37.467220306396484, "learning_rate": 3.3798516512554485e-07, "logits/chosen": 1.572485089302063, "logits/rejected": 1.3337376117706299, "loss": 1.0566, "step": 297 }, { "beta_dpo/beta_used": 0.10362537205219269, "beta_dpo/beta_used_raw": 0.10362537205219269, "beta_dpo/gap_mean": 9.2714262008667, "beta_dpo/gap_std": 14.870027542114258, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4504913076341648, "grad_norm": 23.68560028076172, "learning_rate": 3.367463137189156e-07, "logits/chosen": 2.0254149436950684, "logits/rejected": 2.0223116874694824, "loss": 0.9876, "step": 298 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07137240469455719, "beta_dpo/gap_mean": 8.997974395751953, "beta_dpo/gap_std": 14.866207122802734, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4520030234315949, "grad_norm": 0.31590133905410767, "learning_rate": 3.355050358314172e-07, "logits/chosen": 1.4713513851165771, "logits/rejected": 1.425032138824463, "loss": 1.3806, "step": 299 }, { "beta_dpo/beta_used": 0.044669389724731445, "beta_dpo/beta_used_raw": 0.044669389724731445, "beta_dpo/gap_mean": 8.637369155883789, "beta_dpo/gap_std": 15.14453125, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.45351473922902497, "grad_norm": 13.89785099029541, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 1.490415334701538, "logits/rejected": 1.5728942155838013, "loss": 1.1502, "step": 300 }, { "epoch": 0.45351473922902497, "eval_beta_dpo/beta_used": 0.14202851057052612, "eval_beta_dpo/beta_used_raw": 0.12426428496837616, "eval_beta_dpo/gap_mean": 8.545414924621582, "eval_beta_dpo/gap_std": 15.385650634765625, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.4621630907058716, "eval_logits/rejected": 1.3383522033691406, "eval_loss": 0.6282562613487244, "eval_runtime": 42.7236, "eval_samples_per_second": 53.905, "eval_steps_per_second": 1.685, "step": 300 }, { "beta_dpo/beta_used": 0.062314994633197784, "beta_dpo/beta_used_raw": 0.053756166249513626, "beta_dpo/gap_mean": 8.569022178649902, "beta_dpo/gap_std": 15.335243225097656, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.455026455026455, "grad_norm": 26.874900817871094, "learning_rate": 3.3301533956555885e-07, "logits/chosen": 1.6456646919250488, "logits/rejected": 1.4892668724060059, "loss": 1.2159, "step": 301 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09743010997772217, "beta_dpo/gap_mean": 7.805597305297852, "beta_dpo/gap_std": 15.338693618774414, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4565381708238851, "grad_norm": 0.30480095744132996, "learning_rate": 3.317669908293554e-07, "logits/chosen": 0.9524801969528198, "logits/rejected": 0.733474612236023, "loss": 1.382, "step": 302 }, { "beta_dpo/beta_used": 0.13179755210876465, "beta_dpo/beta_used_raw": 0.13179755210876465, "beta_dpo/gap_mean": 8.191202163696289, "beta_dpo/gap_std": 15.557994842529297, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4580498866213152, "grad_norm": 35.628971099853516, "learning_rate": 3.3051635489464793e-07, "logits/chosen": 1.696607232093811, "logits/rejected": 1.6582088470458984, "loss": 1.0721, "step": 303 }, { "beta_dpo/beta_used": 0.11351722478866577, "beta_dpo/beta_used_raw": 0.11351722478866577, "beta_dpo/gap_mean": 8.502336502075195, "beta_dpo/gap_std": 15.554780960083008, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4595616024187453, "grad_norm": 18.954330444335938, "learning_rate": 3.292634667444117e-07, "logits/chosen": 1.550957202911377, "logits/rejected": 1.3838510513305664, "loss": 0.8149, "step": 304 }, { "beta_dpo/beta_used": 0.15219593048095703, "beta_dpo/beta_used_raw": 0.15219593048095703, "beta_dpo/gap_mean": 8.879709243774414, "beta_dpo/gap_std": 15.658042907714844, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.46107331821617537, "grad_norm": 49.46084213256836, "learning_rate": 3.280083614246217e-07, "logits/chosen": 1.1397596597671509, "logits/rejected": 1.192775011062622, "loss": 1.0138, "step": 305 }, { "beta_dpo/beta_used": 0.0675729289650917, "beta_dpo/beta_used_raw": -0.029155783355236053, "beta_dpo/gap_mean": 8.949745178222656, "beta_dpo/gap_std": 15.603281021118164, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.46258503401360546, "grad_norm": 14.935590744018555, "learning_rate": 3.267510740432719e-07, "logits/chosen": 1.4761111736297607, "logits/rejected": 1.2490260601043701, "loss": 1.0791, "step": 306 }, { "beta_dpo/beta_used": 0.06433078646659851, "beta_dpo/beta_used_raw": 0.032508764415979385, "beta_dpo/gap_mean": 8.469474792480469, "beta_dpo/gap_std": 15.57625961303711, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.46409674981103555, "grad_norm": 17.987796783447266, "learning_rate": 3.2549163976939285e-07, "logits/chosen": 1.498106598854065, "logits/rejected": 1.4667487144470215, "loss": 1.2054, "step": 307 }, { "beta_dpo/beta_used": 0.10494223982095718, "beta_dpo/beta_used_raw": 0.08697421848773956, "beta_dpo/gap_mean": 8.70065975189209, "beta_dpo/gap_std": 15.584355354309082, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4656084656084656, "grad_norm": 33.857913970947266, "learning_rate": 3.2423009383206874e-07, "logits/chosen": 1.12371826171875, "logits/rejected": 1.2730367183685303, "loss": 1.1937, "step": 308 }, { "beta_dpo/beta_used": 0.05916978791356087, "beta_dpo/beta_used_raw": 0.05702737346291542, "beta_dpo/gap_mean": 8.90184211730957, "beta_dpo/gap_std": 15.37250804901123, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4671201814058957, "grad_norm": 15.79340648651123, "learning_rate": 3.229664715194511e-07, "logits/chosen": 1.3931810855865479, "logits/rejected": 1.3838417530059814, "loss": 1.1177, "step": 309 }, { "beta_dpo/beta_used": 0.07162805646657944, "beta_dpo/beta_used_raw": 0.029773060232400894, "beta_dpo/gap_mean": 8.199593544006348, "beta_dpo/gap_std": 15.24665641784668, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.46863189720332576, "grad_norm": 95.10002899169922, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 1.5242080688476562, "logits/rejected": 1.6771780252456665, "loss": 1.1889, "step": 310 }, { "beta_dpo/beta_used": 0.11595961451530457, "beta_dpo/beta_used_raw": 0.06024138256907463, "beta_dpo/gap_mean": 8.104015350341797, "beta_dpo/gap_std": 15.395170211791992, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.47014361300075586, "grad_norm": 29.715646743774414, "learning_rate": 3.204331392103574e-07, "logits/chosen": 1.322619915008545, "logits/rejected": 1.3766727447509766, "loss": 1.0216, "step": 311 }, { "beta_dpo/beta_used": 0.047369327396154404, "beta_dpo/beta_used_raw": 0.038489848375320435, "beta_dpo/gap_mean": 8.375673294067383, "beta_dpo/gap_std": 15.524256706237793, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.47165532879818595, "grad_norm": 14.85205078125, "learning_rate": 3.1916350007663176e-07, "logits/chosen": 1.7834012508392334, "logits/rejected": 1.7123432159423828, "loss": 1.192, "step": 312 }, { "beta_dpo/beta_used": 0.17071092128753662, "beta_dpo/beta_used_raw": 0.17071092128753662, "beta_dpo/gap_mean": 8.300642967224121, "beta_dpo/gap_std": 15.795055389404297, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.47316704459561604, "grad_norm": 38.206764221191406, "learning_rate": 3.178919262911314e-07, "logits/chosen": 1.3894063234329224, "logits/rejected": 1.470247507095337, "loss": 0.9714, "step": 313 }, { "beta_dpo/beta_used": 0.19798541069030762, "beta_dpo/beta_used_raw": 0.19798541069030762, "beta_dpo/gap_mean": 8.686609268188477, "beta_dpo/gap_std": 15.914053916931152, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.47467876039304613, "grad_norm": 43.56782531738281, "learning_rate": 3.166184534225087e-07, "logits/chosen": 1.5680770874023438, "logits/rejected": 1.4430394172668457, "loss": 0.9567, "step": 314 }, { "beta_dpo/beta_used": 0.03711218759417534, "beta_dpo/beta_used_raw": -0.0003358498215675354, "beta_dpo/gap_mean": 8.705648422241211, "beta_dpo/gap_std": 15.65288257598877, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.47619047619047616, "grad_norm": 12.367003440856934, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 1.3505761623382568, "logits/rejected": 1.321984052658081, "loss": 1.2002, "step": 315 }, { "beta_dpo/beta_used": 0.13538314402103424, "beta_dpo/beta_used_raw": 0.08412972092628479, "beta_dpo/gap_mean": 8.707979202270508, "beta_dpo/gap_std": 15.149721145629883, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.47770219198790626, "grad_norm": 23.324026107788086, "learning_rate": 3.1406595297511564e-07, "logits/chosen": 1.2432329654693604, "logits/rejected": 0.7631069421768188, "loss": 0.9318, "step": 316 }, { "beta_dpo/beta_used": 0.09405739605426788, "beta_dpo/beta_used_raw": 0.09405739605426788, "beta_dpo/gap_mean": 9.40979290008545, "beta_dpo/gap_std": 15.358884811401367, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.47921390778533635, "grad_norm": 24.697673797607422, "learning_rate": 3.1278699679526975e-07, "logits/chosen": 1.263979196548462, "logits/rejected": 1.1331511735916138, "loss": 1.0274, "step": 317 }, { "beta_dpo/beta_used": 0.01103425957262516, "beta_dpo/beta_used_raw": -0.07546316087245941, "beta_dpo/gap_mean": 9.228424072265625, "beta_dpo/gap_std": 15.684703826904297, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.48072562358276644, "grad_norm": 3.974104642868042, "learning_rate": 3.1150628432815336e-07, "logits/chosen": 1.6318895816802979, "logits/rejected": 1.7524826526641846, "loss": 1.3091, "step": 318 }, { "beta_dpo/beta_used": 0.059235621243715286, "beta_dpo/beta_used_raw": 0.03933485597372055, "beta_dpo/gap_mean": 9.031213760375977, "beta_dpo/gap_std": 15.591960906982422, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.48223733938019653, "grad_norm": 15.467484474182129, "learning_rate": 3.1022385139804707e-07, "logits/chosen": 1.1446490287780762, "logits/rejected": 0.9336162805557251, "loss": 1.1188, "step": 319 }, { "beta_dpo/beta_used": 0.039282217621803284, "beta_dpo/beta_used_raw": -0.03736276924610138, "beta_dpo/gap_mean": 8.335182189941406, "beta_dpo/gap_std": 15.608378410339355, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4837490551776266, "grad_norm": 13.889800071716309, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 1.0464937686920166, "logits/rejected": 0.97780442237854, "loss": 1.2383, "step": 320 }, { "beta_dpo/beta_used": 0.15540650486946106, "beta_dpo/beta_used_raw": 0.0922960415482521, "beta_dpo/gap_mean": 8.612838745117188, "beta_dpo/gap_std": 15.559803009033203, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4852607709750567, "grad_norm": 41.25529098510742, "learning_rate": 3.0765396768561004e-07, "logits/chosen": 1.2193944454193115, "logits/rejected": 1.1687374114990234, "loss": 0.9509, "step": 321 }, { "beta_dpo/beta_used": 0.2929460108280182, "beta_dpo/beta_used_raw": 0.2929460108280182, "beta_dpo/gap_mean": 8.876199722290039, "beta_dpo/gap_std": 15.44024658203125, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.48677248677248675, "grad_norm": 59.753841400146484, "learning_rate": 3.063665887884511e-07, "logits/chosen": 1.8868658542633057, "logits/rejected": 1.554215431213379, "loss": 0.7392, "step": 322 }, { "beta_dpo/beta_used": 0.11718940734863281, "beta_dpo/beta_used_raw": 0.11718940734863281, "beta_dpo/gap_mean": 8.979276657104492, "beta_dpo/gap_std": 15.77180290222168, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.48828420256991684, "grad_norm": 33.23453903198242, "learning_rate": 3.0507763319663517e-07, "logits/chosen": 1.370314121246338, "logits/rejected": 1.3247946500778198, "loss": 0.969, "step": 323 }, { "beta_dpo/beta_used": 0.1240943893790245, "beta_dpo/beta_used_raw": 0.1240943893790245, "beta_dpo/gap_mean": 9.192289352416992, "beta_dpo/gap_std": 16.003286361694336, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4897959183673469, "grad_norm": 24.16273307800293, "learning_rate": 3.0378713696502097e-07, "logits/chosen": 1.34813392162323, "logits/rejected": 1.3388067483901978, "loss": 0.9585, "step": 324 }, { "beta_dpo/beta_used": 0.18239615857601166, "beta_dpo/beta_used_raw": 0.17694588005542755, "beta_dpo/gap_mean": 9.277650833129883, "beta_dpo/gap_std": 15.912029266357422, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.491307634164777, "grad_norm": 62.159393310546875, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 1.887860655784607, "logits/rejected": 1.854949951171875, "loss": 1.0449, "step": 325 }, { "beta_dpo/beta_used": 0.05071749910712242, "beta_dpo/beta_used_raw": -0.0035161487758159637, "beta_dpo/gap_mean": 8.763511657714844, "beta_dpo/gap_std": 15.70359992980957, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4928193499622071, "grad_norm": 21.598426818847656, "learning_rate": 3.012016670162977e-07, "logits/chosen": 1.5172902345657349, "logits/rejected": 1.4291995763778687, "loss": 1.2608, "step": 326 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13349460065364838, "beta_dpo/gap_mean": 8.288639068603516, "beta_dpo/gap_std": 15.90619945526123, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4943310657596372, "grad_norm": 0.3719576895236969, "learning_rate": 2.99906765620341e-07, "logits/chosen": 0.9552278518676758, "logits/rejected": 0.8223298788070679, "loss": 1.3816, "step": 327 }, { "beta_dpo/beta_used": 0.03803815692663193, "beta_dpo/beta_used_raw": -0.006140265613794327, "beta_dpo/gap_mean": 8.157548904418945, "beta_dpo/gap_std": 15.70623779296875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4958427815570673, "grad_norm": 14.157182693481445, "learning_rate": 2.9861046822486766e-07, "logits/chosen": 1.2303402423858643, "logits/rejected": 1.089834451675415, "loss": 1.2513, "step": 328 }, { "beta_dpo/beta_used": 0.030700990930199623, "beta_dpo/beta_used_raw": -0.0006328783929347992, "beta_dpo/gap_mean": 8.243142127990723, "beta_dpo/gap_std": 15.664817810058594, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4973544973544973, "grad_norm": 11.445138931274414, "learning_rate": 2.9731281109010253e-07, "logits/chosen": 1.5622519254684448, "logits/rejected": 1.2833863496780396, "loss": 1.2369, "step": 329 }, { "beta_dpo/beta_used": 0.1286730319261551, "beta_dpo/beta_used_raw": 0.10966426879167557, "beta_dpo/gap_mean": 8.6826171875, "beta_dpo/gap_std": 16.049409866333008, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.4988662131519274, "grad_norm": 29.91504669189453, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 1.5348610877990723, "logits/rejected": 1.484304666519165, "loss": 0.9868, "step": 330 }, { "beta_dpo/beta_used": 0.12080587446689606, "beta_dpo/beta_used_raw": 0.12080587446689606, "beta_dpo/gap_mean": 9.420382499694824, "beta_dpo/gap_std": 16.184450149536133, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5003779289493575, "grad_norm": 27.50879669189453, "learning_rate": 2.947135628327544e-07, "logits/chosen": 1.2042312622070312, "logits/rejected": 0.9803166389465332, "loss": 0.8804, "step": 331 }, { "beta_dpo/beta_used": 0.02131238579750061, "beta_dpo/beta_used_raw": -0.05307789891958237, "beta_dpo/gap_mean": 9.491806030273438, "beta_dpo/gap_std": 16.1578369140625, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5018896447467877, "grad_norm": 8.015897750854492, "learning_rate": 2.934120444167326e-07, "logits/chosen": 1.0138837099075317, "logits/rejected": 0.9243895411491394, "loss": 1.2392, "step": 332 }, { "beta_dpo/beta_used": 0.08504879474639893, "beta_dpo/beta_used_raw": 0.07206695526838303, "beta_dpo/gap_mean": 9.9852294921875, "beta_dpo/gap_std": 15.994035720825195, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5034013605442177, "grad_norm": 20.979957580566406, "learning_rate": 2.921093116725076e-07, "logits/chosen": 1.406402349472046, "logits/rejected": 1.2631025314331055, "loss": 0.9934, "step": 333 }, { "beta_dpo/beta_used": 0.07348217070102692, "beta_dpo/beta_used_raw": 0.04673399776220322, "beta_dpo/gap_mean": 9.785324096679688, "beta_dpo/gap_std": 16.334577560424805, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.5049130763416477, "grad_norm": 17.75541877746582, "learning_rate": 2.9080540104031484e-07, "logits/chosen": 1.5521423816680908, "logits/rejected": 1.1226956844329834, "loss": 1.1227, "step": 334 }, { "beta_dpo/beta_used": 0.08314938098192215, "beta_dpo/beta_used_raw": 0.08314938098192215, "beta_dpo/gap_mean": 9.443593978881836, "beta_dpo/gap_std": 17.190528869628906, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5064247921390779, "grad_norm": 21.105512619018555, "learning_rate": 2.895003489933375e-07, "logits/chosen": 1.9064218997955322, "logits/rejected": 1.5738611221313477, "loss": 1.169, "step": 335 }, { "beta_dpo/beta_used": 0.02810182236135006, "beta_dpo/beta_used_raw": -0.02267879620194435, "beta_dpo/gap_mean": 9.678264617919922, "beta_dpo/gap_std": 17.16312026977539, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5079365079365079, "grad_norm": 10.10340404510498, "learning_rate": 2.8819419203668675e-07, "logits/chosen": 1.4943532943725586, "logits/rejected": 1.4970781803131104, "loss": 1.2165, "step": 336 }, { "beta_dpo/beta_used": 0.06397874653339386, "beta_dpo/beta_used_raw": 0.06397874653339386, "beta_dpo/gap_mean": 9.417184829711914, "beta_dpo/gap_std": 17.194931030273438, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.509448223733938, "grad_norm": 17.00838851928711, "learning_rate": 2.8688696670638053e-07, "logits/chosen": 1.0973150730133057, "logits/rejected": 1.0407588481903076, "loss": 1.1465, "step": 337 }, { "beta_dpo/beta_used": 0.007970977574586868, "beta_dpo/beta_used_raw": -0.026147443801164627, "beta_dpo/gap_mean": 8.84959602355957, "beta_dpo/gap_std": 16.847551345825195, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5109599395313681, "grad_norm": 3.7325870990753174, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 1.1141670942306519, "logits/rejected": 1.0108463764190674, "loss": 1.3312, "step": 338 }, { "beta_dpo/beta_used": 0.21938511729240417, "beta_dpo/beta_used_raw": 0.21938511729240417, "beta_dpo/gap_mean": 9.198416709899902, "beta_dpo/gap_std": 16.58599090576172, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5124716553287982, "grad_norm": 52.04912567138672, "learning_rate": 2.842694572172736e-07, "logits/chosen": 1.6343212127685547, "logits/rejected": 1.2077702283859253, "loss": 1.0037, "step": 339 }, { "beta_dpo/beta_used": 0.01670524850487709, "beta_dpo/beta_used_raw": 0.01670524850487709, "beta_dpo/gap_mean": 9.110252380371094, "beta_dpo/gap_std": 16.77898406982422, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5139833711262283, "grad_norm": 5.165824890136719, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 1.2082417011260986, "logits/rejected": 1.0188452005386353, "loss": 1.2743, "step": 340 }, { "beta_dpo/beta_used": 0.3238562047481537, "beta_dpo/beta_used_raw": 0.24568364024162292, "beta_dpo/gap_mean": 9.692229270935059, "beta_dpo/gap_std": 16.962947845458984, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5154950869236583, "grad_norm": 52.99232864379883, "learning_rate": 2.816481133934373e-07, "logits/chosen": 1.5995168685913086, "logits/rejected": 1.4972131252288818, "loss": 0.9498, "step": 341 }, { "beta_dpo/beta_used": 0.15345560014247894, "beta_dpo/beta_used_raw": 0.07748877257108688, "beta_dpo/gap_mean": 9.49228286743164, "beta_dpo/gap_std": 16.987442016601562, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5170068027210885, "grad_norm": 44.00324630737305, "learning_rate": 2.8033609524527046e-07, "logits/chosen": 1.4441843032836914, "logits/rejected": 1.491701364517212, "loss": 1.3024, "step": 342 }, { "beta_dpo/beta_used": 0.08163314312696457, "beta_dpo/beta_used_raw": -0.1123107373714447, "beta_dpo/gap_mean": 9.09318733215332, "beta_dpo/gap_std": 16.66823387145996, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5185185185185185, "grad_norm": 20.944377899169922, "learning_rate": 2.7902322853130753e-07, "logits/chosen": 1.295729637145996, "logits/rejected": 1.4093396663665771, "loss": 1.0645, "step": 343 }, { "beta_dpo/beta_used": 0.09156259149312973, "beta_dpo/beta_used_raw": 0.09156259149312973, "beta_dpo/gap_mean": 9.121692657470703, "beta_dpo/gap_std": 16.57402229309082, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5200302343159486, "grad_norm": 24.541534423828125, "learning_rate": 2.7770954997525274e-07, "logits/chosen": 1.721388339996338, "logits/rejected": 1.4250373840332031, "loss": 1.1417, "step": 344 }, { "beta_dpo/beta_used": 0.19141808152198792, "beta_dpo/beta_used_raw": 0.19141808152198792, "beta_dpo/gap_mean": 9.139965057373047, "beta_dpo/gap_std": 16.651588439941406, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5215419501133787, "grad_norm": 40.367469787597656, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 1.4737789630889893, "logits/rejected": 1.4216864109039307, "loss": 0.7941, "step": 345 }, { "beta_dpo/beta_used": 0.09768233448266983, "beta_dpo/beta_used_raw": 0.09768233448266983, "beta_dpo/gap_mean": 9.070549011230469, "beta_dpo/gap_std": 16.83832550048828, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5230536659108088, "grad_norm": 27.266891479492188, "learning_rate": 2.7507990434420123e-07, "logits/chosen": 1.2734328508377075, "logits/rejected": 1.149863839149475, "loss": 1.2551, "step": 346 }, { "beta_dpo/beta_used": 0.02022167667746544, "beta_dpo/beta_used_raw": -0.12329346686601639, "beta_dpo/gap_mean": 8.993759155273438, "beta_dpo/gap_std": 17.17070198059082, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5245653817082389, "grad_norm": 7.088632583618164, "learning_rate": 2.737640108260456e-07, "logits/chosen": 1.9005743265151978, "logits/rejected": 1.7757856845855713, "loss": 1.2606, "step": 347 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09922761470079422, "beta_dpo/gap_mean": 9.317571640014648, "beta_dpo/gap_std": 17.419286727905273, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5260770975056689, "grad_norm": 0.33556678891181946, "learning_rate": 2.724474525774229e-07, "logits/chosen": 1.6129817962646484, "logits/rejected": 1.5542514324188232, "loss": 1.3806, "step": 348 }, { "beta_dpo/beta_used": 0.17093956470489502, "beta_dpo/beta_used_raw": 0.17093956470489502, "beta_dpo/gap_mean": 9.49870777130127, "beta_dpo/gap_std": 17.64126205444336, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.527588813303099, "grad_norm": 42.406131744384766, "learning_rate": 2.711302664252973e-07, "logits/chosen": 1.3581299781799316, "logits/rejected": 1.0760269165039062, "loss": 1.1208, "step": 349 }, { "beta_dpo/beta_used": 0.16534699499607086, "beta_dpo/beta_used_raw": 0.16534699499607086, "beta_dpo/gap_mean": 10.416524887084961, "beta_dpo/gap_std": 17.57219886779785, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5291005291005291, "grad_norm": 27.851696014404297, "learning_rate": 2.698124892141971e-07, "logits/chosen": 1.536478042602539, "logits/rejected": 1.4709566831588745, "loss": 0.8406, "step": 350 }, { "beta_dpo/beta_used": 0.19340933859348297, "beta_dpo/beta_used_raw": 0.19340933859348297, "beta_dpo/gap_mean": 10.431373596191406, "beta_dpo/gap_std": 17.373821258544922, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5306122448979592, "grad_norm": 49.59145736694336, "learning_rate": 2.6849415780518357e-07, "logits/chosen": 1.3625681400299072, "logits/rejected": 0.9931057691574097, "loss": 0.7915, "step": 351 }, { "beta_dpo/beta_used": 0.06435239315032959, "beta_dpo/beta_used_raw": 0.02895892783999443, "beta_dpo/gap_mean": 10.19567584991455, "beta_dpo/gap_std": 17.5516357421875, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5321239606953893, "grad_norm": 24.568038940429688, "learning_rate": 2.6717530907482024e-07, "logits/chosen": 1.1959900856018066, "logits/rejected": 1.2047438621520996, "loss": 1.2503, "step": 352 }, { "beta_dpo/beta_used": 0.20792317390441895, "beta_dpo/beta_used_raw": 0.20792317390441895, "beta_dpo/gap_mean": 9.921865463256836, "beta_dpo/gap_std": 17.66985321044922, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5336356764928194, "grad_norm": 59.17499923706055, "learning_rate": 2.658559799141411e-07, "logits/chosen": 1.4057915210723877, "logits/rejected": 1.0512161254882812, "loss": 0.9536, "step": 353 }, { "beta_dpo/beta_used": 0.11257414519786835, "beta_dpo/beta_used_raw": 0.11257414519786835, "beta_dpo/gap_mean": 10.141077041625977, "beta_dpo/gap_std": 17.43115997314453, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5351473922902494, "grad_norm": 26.042173385620117, "learning_rate": 2.6453620722761895e-07, "logits/chosen": 1.1837090253829956, "logits/rejected": 1.1246318817138672, "loss": 0.9353, "step": 354 }, { "beta_dpo/beta_used": 0.05702915042638779, "beta_dpo/beta_used_raw": 0.05702915042638779, "beta_dpo/gap_mean": 10.287775039672852, "beta_dpo/gap_std": 17.543479919433594, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.5366591080876795, "grad_norm": 12.683091163635254, "learning_rate": 2.632160279321328e-07, "logits/chosen": 1.833913803100586, "logits/rejected": 1.4571876525878906, "loss": 1.0623, "step": 355 }, { "beta_dpo/beta_used": 0.1175190806388855, "beta_dpo/beta_used_raw": 0.1175190806388855, "beta_dpo/gap_mean": 10.312297821044922, "beta_dpo/gap_std": 17.510921478271484, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5381708238851096, "grad_norm": 24.64614486694336, "learning_rate": 2.618954789559356e-07, "logits/chosen": 1.7809038162231445, "logits/rejected": 1.408195972442627, "loss": 0.9348, "step": 356 }, { "beta_dpo/beta_used": 0.027612989768385887, "beta_dpo/beta_used_raw": -0.05110103636980057, "beta_dpo/gap_mean": 10.247565269470215, "beta_dpo/gap_std": 17.473857879638672, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.5396825396825397, "grad_norm": 10.66695499420166, "learning_rate": 2.6057459723762076e-07, "logits/chosen": 1.309868335723877, "logits/rejected": 0.9117208123207092, "loss": 1.2033, "step": 357 }, { "beta_dpo/beta_used": 0.10322414338588715, "beta_dpo/beta_used_raw": 0.10322414338588715, "beta_dpo/gap_mean": 10.111295700073242, "beta_dpo/gap_std": 17.269624710083008, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.5411942554799698, "grad_norm": 29.352130889892578, "learning_rate": 2.5925341972508954e-07, "logits/chosen": 0.8893525004386902, "logits/rejected": 0.9518415927886963, "loss": 0.9401, "step": 358 }, { "beta_dpo/beta_used": 0.011187026277184486, "beta_dpo/beta_used_raw": -0.0717354491353035, "beta_dpo/gap_mean": 9.60980224609375, "beta_dpo/gap_std": 16.95635986328125, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5427059712773998, "grad_norm": 6.215968608856201, "learning_rate": 2.579319833745169e-07, "logits/chosen": 1.6677427291870117, "logits/rejected": 1.8107473850250244, "loss": 1.3071, "step": 359 }, { "beta_dpo/beta_used": 0.052680958062410355, "beta_dpo/beta_used_raw": -0.013556074351072311, "beta_dpo/gap_mean": 9.709366798400879, "beta_dpo/gap_std": 16.987445831298828, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.54421768707483, "grad_norm": 16.22953987121582, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 1.0773403644561768, "logits/rejected": 0.6769781708717346, "loss": 1.1578, "step": 360 }, { "beta_dpo/beta_used": 0.08674684911966324, "beta_dpo/beta_used_raw": 0.0780373364686966, "beta_dpo/gap_mean": 9.90941333770752, "beta_dpo/gap_std": 17.054189682006836, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.54572940287226, "grad_norm": 20.603309631347656, "learning_rate": 2.552884820191154e-07, "logits/chosen": 1.3290486335754395, "logits/rejected": 1.2161469459533691, "loss": 1.0395, "step": 361 }, { "beta_dpo/beta_used": 0.13358384370803833, "beta_dpo/beta_used_raw": 0.09427288174629211, "beta_dpo/gap_mean": 9.608295440673828, "beta_dpo/gap_std": 16.78176498413086, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.54724111866969, "grad_norm": 25.390155792236328, "learning_rate": 2.53966490958702e-07, "logits/chosen": 1.6379358768463135, "logits/rejected": 1.2990127801895142, "loss": 1.0946, "step": 362 }, { "beta_dpo/beta_used": 0.12207494676113129, "beta_dpo/beta_used_raw": 0.12207494676113129, "beta_dpo/gap_mean": 10.08150577545166, "beta_dpo/gap_std": 16.658737182617188, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5487528344671202, "grad_norm": 32.736759185791016, "learning_rate": 2.526443889470099e-07, "logits/chosen": 1.4099113941192627, "logits/rejected": 0.814749002456665, "loss": 0.843, "step": 363 }, { "beta_dpo/beta_used": 0.13381703197956085, "beta_dpo/beta_used_raw": 0.13381703197956085, "beta_dpo/gap_mean": 10.593009948730469, "beta_dpo/gap_std": 17.01114845275879, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5502645502645502, "grad_norm": 44.58602523803711, "learning_rate": 2.513222129660744e-07, "logits/chosen": 1.493070125579834, "logits/rejected": 1.1450066566467285, "loss": 1.0415, "step": 364 }, { "beta_dpo/beta_used": 0.04936742037534714, "beta_dpo/beta_used_raw": 0.04936742037534714, "beta_dpo/gap_mean": 10.426152229309082, "beta_dpo/gap_std": 16.404680252075195, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5517762660619804, "grad_norm": 12.130597114562988, "learning_rate": 2.5e-07, "logits/chosen": 1.6267120838165283, "logits/rejected": 1.6452577114105225, "loss": 1.0652, "step": 365 }, { "beta_dpo/beta_used": 0.07397685199975967, "beta_dpo/beta_used_raw": 0.07397685199975967, "beta_dpo/gap_mean": 10.14107894897461, "beta_dpo/gap_std": 16.657485961914062, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5532879818594104, "grad_norm": 18.45604133605957, "learning_rate": 2.486777870339255e-07, "logits/chosen": 1.6944191455841064, "logits/rejected": 1.6772571802139282, "loss": 1.0363, "step": 366 }, { "beta_dpo/beta_used": 0.07414257526397705, "beta_dpo/beta_used_raw": 0.0723227709531784, "beta_dpo/gap_mean": 9.737115859985352, "beta_dpo/gap_std": 16.248645782470703, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.5547996976568406, "grad_norm": 17.768054962158203, "learning_rate": 2.4735561105299014e-07, "logits/chosen": 1.6562645435333252, "logits/rejected": 1.3367321491241455, "loss": 1.044, "step": 367 }, { "beta_dpo/beta_used": 0.12580986320972443, "beta_dpo/beta_used_raw": 0.1125003844499588, "beta_dpo/gap_mean": 9.598726272583008, "beta_dpo/gap_std": 16.235118865966797, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5563114134542706, "grad_norm": 30.600473403930664, "learning_rate": 2.46033509041298e-07, "logits/chosen": 1.194272518157959, "logits/rejected": 1.42368483543396, "loss": 0.9831, "step": 368 }, { "beta_dpo/beta_used": 0.022077616304159164, "beta_dpo/beta_used_raw": 0.022077616304159164, "beta_dpo/gap_mean": 9.324756622314453, "beta_dpo/gap_std": 16.485767364501953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5578231292517006, "grad_norm": 7.461435794830322, "learning_rate": 2.447115179808846e-07, "logits/chosen": 1.2820395231246948, "logits/rejected": 0.9497278332710266, "loss": 1.2487, "step": 369 }, { "beta_dpo/beta_used": 0.23217812180519104, "beta_dpo/beta_used_raw": 0.23217812180519104, "beta_dpo/gap_mean": 9.887712478637695, "beta_dpo/gap_std": 16.934602737426758, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5593348450491308, "grad_norm": 57.79582977294922, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 1.8189184665679932, "logits/rejected": 1.7541735172271729, "loss": 0.9118, "step": 370 }, { "beta_dpo/beta_used": 0.11088813841342926, "beta_dpo/beta_used_raw": 0.09978704899549484, "beta_dpo/gap_mean": 9.728986740112305, "beta_dpo/gap_std": 17.203359603881836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5608465608465608, "grad_norm": 31.766374588012695, "learning_rate": 2.420680166254831e-07, "logits/chosen": 2.12520432472229, "logits/rejected": 2.003981113433838, "loss": 1.1124, "step": 371 }, { "beta_dpo/beta_used": 0.12042045593261719, "beta_dpo/beta_used_raw": 0.08532939851284027, "beta_dpo/gap_mean": 9.579994201660156, "beta_dpo/gap_std": 17.36013412475586, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.562358276643991, "grad_norm": 43.5960578918457, "learning_rate": 2.4074658027491044e-07, "logits/chosen": 1.1307945251464844, "logits/rejected": 0.8450255990028381, "loss": 1.3893, "step": 372 }, { "beta_dpo/beta_used": 0.015285984613001347, "beta_dpo/beta_used_raw": 0.011251095682382584, "beta_dpo/gap_mean": 9.891489028930664, "beta_dpo/gap_std": 17.593887329101562, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.563869992441421, "grad_norm": 6.121160507202148, "learning_rate": 2.394254027623792e-07, "logits/chosen": 1.755456805229187, "logits/rejected": 1.4722647666931152, "loss": 1.2721, "step": 373 }, { "beta_dpo/beta_used": 0.3823484182357788, "beta_dpo/beta_used_raw": 0.3823484182357788, "beta_dpo/gap_mean": 10.362098693847656, "beta_dpo/gap_std": 17.728212356567383, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5653817082388511, "grad_norm": 103.98133087158203, "learning_rate": 2.381045210440644e-07, "logits/chosen": 1.1939418315887451, "logits/rejected": 0.8221108913421631, "loss": 1.0492, "step": 374 }, { "beta_dpo/beta_used": 0.10127855837345123, "beta_dpo/beta_used_raw": 0.10127855837345123, "beta_dpo/gap_mean": 10.388755798339844, "beta_dpo/gap_std": 17.460376739501953, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5668934240362812, "grad_norm": 22.008392333984375, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 1.6911749839782715, "logits/rejected": 1.3473389148712158, "loss": 0.8548, "step": 375 }, { "beta_dpo/beta_used": 0.07813645899295807, "beta_dpo/beta_used_raw": 0.07813645899295807, "beta_dpo/gap_mean": 10.727385520935059, "beta_dpo/gap_std": 17.87335205078125, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5684051398337112, "grad_norm": 21.66082000732422, "learning_rate": 2.3546379277238103e-07, "logits/chosen": 0.9075419902801514, "logits/rejected": 1.0352015495300293, "loss": 1.15, "step": 376 }, { "beta_dpo/beta_used": 0.043544746935367584, "beta_dpo/beta_used_raw": 0.043544746935367584, "beta_dpo/gap_mean": 10.423905372619629, "beta_dpo/gap_std": 17.556154251098633, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5699168556311414, "grad_norm": 10.022980690002441, "learning_rate": 2.3414402008585886e-07, "logits/chosen": 1.7869625091552734, "logits/rejected": 1.7410027980804443, "loss": 1.1213, "step": 377 }, { "beta_dpo/beta_used": 0.028150945901870728, "beta_dpo/beta_used_raw": 0.023979444056749344, "beta_dpo/gap_mean": 9.919803619384766, "beta_dpo/gap_std": 17.070709228515625, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5714285714285714, "grad_norm": 9.40400505065918, "learning_rate": 2.3282469092517977e-07, "logits/chosen": 1.5019217729568481, "logits/rejected": 1.3452924489974976, "loss": 1.2086, "step": 378 }, { "beta_dpo/beta_used": 0.29144594073295593, "beta_dpo/beta_used_raw": 0.29144594073295593, "beta_dpo/gap_mean": 10.127754211425781, "beta_dpo/gap_std": 17.333236694335938, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5729402872260015, "grad_norm": 100.93724060058594, "learning_rate": 2.3150584219481643e-07, "logits/chosen": 1.4389901161193848, "logits/rejected": 1.281882882118225, "loss": 0.9985, "step": 379 }, { "beta_dpo/beta_used": 0.37376725673675537, "beta_dpo/beta_used_raw": 0.37376725673675537, "beta_dpo/gap_mean": 10.577753067016602, "beta_dpo/gap_std": 17.541908264160156, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5744520030234316, "grad_norm": 66.6102523803711, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 1.1558235883712769, "logits/rejected": 1.2484815120697021, "loss": 0.8812, "step": 380 }, { "beta_dpo/beta_used": 0.06865327805280685, "beta_dpo/beta_used_raw": 0.010333731770515442, "beta_dpo/gap_mean": 10.033830642700195, "beta_dpo/gap_std": 17.42238998413086, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5759637188208617, "grad_norm": 19.617332458496094, "learning_rate": 2.288697335747027e-07, "logits/chosen": 1.6343696117401123, "logits/rejected": 1.4790246486663818, "loss": 1.1353, "step": 381 }, { "beta_dpo/beta_used": 0.06595531105995178, "beta_dpo/beta_used_raw": -0.021138787269592285, "beta_dpo/gap_mean": 9.706841468811035, "beta_dpo/gap_std": 17.24261474609375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5774754346182918, "grad_norm": 16.492521286010742, "learning_rate": 2.2755254742257706e-07, "logits/chosen": 1.7128905057907104, "logits/rejected": 1.4657937288284302, "loss": 1.1008, "step": 382 }, { "beta_dpo/beta_used": 0.19009645283222198, "beta_dpo/beta_used_raw": 0.19009645283222198, "beta_dpo/gap_mean": 9.920913696289062, "beta_dpo/gap_std": 17.623497009277344, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.5789871504157218, "grad_norm": 43.94319534301758, "learning_rate": 2.2623598917395436e-07, "logits/chosen": 1.3148654699325562, "logits/rejected": 1.2381043434143066, "loss": 0.926, "step": 383 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06760846078395844, "beta_dpo/gap_mean": 10.044574737548828, "beta_dpo/gap_std": 17.557830810546875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5804988662131519, "grad_norm": 0.30568212270736694, "learning_rate": 2.2492009565579875e-07, "logits/chosen": 1.3191230297088623, "logits/rejected": 1.470552921295166, "loss": 1.379, "step": 384 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08156859129667282, "beta_dpo/gap_mean": 10.185813903808594, "beta_dpo/gap_std": 17.263328552246094, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.582010582010582, "grad_norm": 0.2859506607055664, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 1.271431565284729, "logits/rejected": 1.160420298576355, "loss": 1.3792, "step": 385 }, { "beta_dpo/beta_used": 0.0168894175440073, "beta_dpo/beta_used_raw": 0.012856299057602882, "beta_dpo/gap_mean": 9.815888404846191, "beta_dpo/gap_std": 17.33496856689453, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5835222978080121, "grad_norm": 6.143118858337402, "learning_rate": 2.2229045002474724e-07, "logits/chosen": 1.4685890674591064, "logits/rejected": 1.161041259765625, "loss": 1.2644, "step": 386 }, { "beta_dpo/beta_used": 0.03634551912546158, "beta_dpo/beta_used_raw": -0.024081122130155563, "beta_dpo/gap_mean": 9.894031524658203, "beta_dpo/gap_std": 17.30755615234375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5850340136054422, "grad_norm": 15.074189186096191, "learning_rate": 2.209767714686924e-07, "logits/chosen": 1.6589672565460205, "logits/rejected": 1.4474884271621704, "loss": 1.2562, "step": 387 }, { "beta_dpo/beta_used": 0.16004110872745514, "beta_dpo/beta_used_raw": 0.08152688294649124, "beta_dpo/gap_mean": 9.766632080078125, "beta_dpo/gap_std": 17.252300262451172, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.5865457294028723, "grad_norm": 66.86634063720703, "learning_rate": 2.1966390475472954e-07, "logits/chosen": 1.8072469234466553, "logits/rejected": 1.4911160469055176, "loss": 1.1245, "step": 388 }, { "beta_dpo/beta_used": 0.09485035389661789, "beta_dpo/beta_used_raw": 0.050514545291662216, "beta_dpo/gap_mean": 9.639822006225586, "beta_dpo/gap_std": 16.98550796508789, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5880574452003023, "grad_norm": 32.758907318115234, "learning_rate": 2.1835188660656265e-07, "logits/chosen": 1.597560167312622, "logits/rejected": 1.378977656364441, "loss": 1.3376, "step": 389 }, { "beta_dpo/beta_used": 0.02094288542866707, "beta_dpo/beta_used_raw": -0.003135114908218384, "beta_dpo/gap_mean": 9.789543151855469, "beta_dpo/gap_std": 16.734346389770508, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5895691609977324, "grad_norm": 7.311458587646484, "learning_rate": 2.170407537241599e-07, "logits/chosen": 1.0474040508270264, "logits/rejected": 0.9567930698394775, "loss": 1.2509, "step": 390 }, { "beta_dpo/beta_used": 0.20918205380439758, "beta_dpo/beta_used_raw": 0.20918205380439758, "beta_dpo/gap_mean": 10.458446502685547, "beta_dpo/gap_std": 17.252222061157227, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5910808767951625, "grad_norm": 55.30534362792969, "learning_rate": 2.1573054278272636e-07, "logits/chosen": 1.5581355094909668, "logits/rejected": 1.4264538288116455, "loss": 1.0945, "step": 391 }, { "beta_dpo/beta_used": 0.16335958242416382, "beta_dpo/beta_used_raw": 0.16335958242416382, "beta_dpo/gap_mean": 11.235108375549316, "beta_dpo/gap_std": 17.644351959228516, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5925925925925926, "grad_norm": 34.14118576049805, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 1.8860807418823242, "logits/rejected": 1.8984272480010986, "loss": 0.841, "step": 392 }, { "beta_dpo/beta_used": 0.006513515952974558, "beta_dpo/beta_used_raw": -0.04768542945384979, "beta_dpo/gap_mean": 11.507149696350098, "beta_dpo/gap_std": 17.389968872070312, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5941043083900227, "grad_norm": 2.505648612976074, "learning_rate": 2.131130332936195e-07, "logits/chosen": 0.96453857421875, "logits/rejected": 0.9612942934036255, "loss": 1.3245, "step": 393 }, { "beta_dpo/beta_used": 0.09355347603559494, "beta_dpo/beta_used_raw": 0.07451394945383072, "beta_dpo/gap_mean": 11.174118041992188, "beta_dpo/gap_std": 16.89433479309082, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5956160241874527, "grad_norm": 21.494396209716797, "learning_rate": 2.1180580796331323e-07, "logits/chosen": 1.8636196851730347, "logits/rejected": 1.477508783340454, "loss": 0.964, "step": 394 }, { "beta_dpo/beta_used": 0.055694933980703354, "beta_dpo/beta_used_raw": -0.04353347793221474, "beta_dpo/gap_mean": 10.723880767822266, "beta_dpo/gap_std": 16.56464385986328, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5971277399848829, "grad_norm": 14.470325469970703, "learning_rate": 2.104996510066625e-07, "logits/chosen": 1.576015830039978, "logits/rejected": 1.2373056411743164, "loss": 1.1004, "step": 395 }, { "beta_dpo/beta_used": 0.1036173403263092, "beta_dpo/beta_used_raw": 0.1036173403263092, "beta_dpo/gap_mean": 10.984663963317871, "beta_dpo/gap_std": 16.140155792236328, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5986394557823129, "grad_norm": 19.261430740356445, "learning_rate": 2.0919459895968517e-07, "logits/chosen": 1.497571587562561, "logits/rejected": 1.4676814079284668, "loss": 0.6805, "step": 396 }, { "beta_dpo/beta_used": 0.04165812209248543, "beta_dpo/beta_used_raw": 0.029148761183023453, "beta_dpo/gap_mean": 10.157581329345703, "beta_dpo/gap_std": 15.98454475402832, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.600151171579743, "grad_norm": 14.16849136352539, "learning_rate": 2.078906883274924e-07, "logits/chosen": 1.41060209274292, "logits/rejected": 1.3652551174163818, "loss": 1.1823, "step": 397 }, { "beta_dpo/beta_used": 0.07533486187458038, "beta_dpo/beta_used_raw": 0.07533486187458038, "beta_dpo/gap_mean": 10.367633819580078, "beta_dpo/gap_std": 16.406509399414062, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6016628873771731, "grad_norm": 20.259742736816406, "learning_rate": 2.065879555832674e-07, "logits/chosen": 1.3536689281463623, "logits/rejected": 0.9888167381286621, "loss": 1.118, "step": 398 }, { "beta_dpo/beta_used": 0.042631130665540695, "beta_dpo/beta_used_raw": -0.033679697662591934, "beta_dpo/gap_mean": 10.726426124572754, "beta_dpo/gap_std": 16.663000106811523, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.6031746031746031, "grad_norm": 14.0131254196167, "learning_rate": 2.052864371672457e-07, "logits/chosen": 1.7245910167694092, "logits/rejected": 1.4539391994476318, "loss": 1.1571, "step": 399 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.17764988541603088, "beta_dpo/gap_mean": 10.223685264587402, "beta_dpo/gap_std": 16.621475219726562, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6046863189720333, "grad_norm": 0.3649911880493164, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 1.7496578693389893, "logits/rejected": 1.7105956077575684, "loss": 1.3806, "step": 400 }, { "epoch": 0.6046863189720333, "eval_beta_dpo/beta_used": 0.1405337005853653, "eval_beta_dpo/beta_used_raw": 0.1188855767250061, "eval_beta_dpo/gap_mean": 9.9655179977417, "eval_beta_dpo/gap_std": 16.57029914855957, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.4474022388458252, "eval_logits/rejected": 1.3215140104293823, "eval_loss": 0.6464195251464844, "eval_runtime": 42.5646, "eval_samples_per_second": 54.106, "eval_steps_per_second": 1.692, "step": 400 }, { "beta_dpo/beta_used": 0.17774954438209534, "beta_dpo/beta_used_raw": 0.17774954438209534, "beta_dpo/gap_mean": 10.540786743164062, "beta_dpo/gap_std": 16.417646408081055, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6061980347694633, "grad_norm": 23.48154640197754, "learning_rate": 2.0268718890989752e-07, "logits/chosen": 0.7174030542373657, "logits/rejected": 0.683144211769104, "loss": 0.6649, "step": 401 }, { "beta_dpo/beta_used": 0.14793431758880615, "beta_dpo/beta_used_raw": 0.14793431758880615, "beta_dpo/gap_mean": 10.927159309387207, "beta_dpo/gap_std": 16.666133880615234, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.6077097505668935, "grad_norm": 36.50631332397461, "learning_rate": 2.013895317751323e-07, "logits/chosen": 1.9912834167480469, "logits/rejected": 1.5349533557891846, "loss": 0.8941, "step": 402 }, { "beta_dpo/beta_used": 0.07585098594427109, "beta_dpo/beta_used_raw": -0.03452427685260773, "beta_dpo/gap_mean": 11.218865394592285, "beta_dpo/gap_std": 17.455312728881836, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6092214663643235, "grad_norm": 17.68917465209961, "learning_rate": 2.0009323437965898e-07, "logits/chosen": 1.6365692615509033, "logits/rejected": 1.36814284324646, "loss": 1.0721, "step": 403 }, { "beta_dpo/beta_used": 0.16760532557964325, "beta_dpo/beta_used_raw": 0.16760532557964325, "beta_dpo/gap_mean": 11.889843940734863, "beta_dpo/gap_std": 17.469863891601562, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6107331821617535, "grad_norm": 30.121400833129883, "learning_rate": 1.9879833298370237e-07, "logits/chosen": 1.6164308786392212, "logits/rejected": 1.756433129310608, "loss": 0.8641, "step": 404 }, { "beta_dpo/beta_used": 0.05700894072651863, "beta_dpo/beta_used_raw": -0.1042499840259552, "beta_dpo/gap_mean": 11.69076156616211, "beta_dpo/gap_std": 17.205629348754883, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6122448979591837, "grad_norm": 18.09614372253418, "learning_rate": 1.975048638084379e-07, "logits/chosen": 1.9099351167678833, "logits/rejected": 1.6809766292572021, "loss": 1.0989, "step": 405 }, { "beta_dpo/beta_used": 0.2577747702598572, "beta_dpo/beta_used_raw": 0.2577747702598572, "beta_dpo/gap_mean": 11.609317779541016, "beta_dpo/gap_std": 17.342086791992188, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6137566137566137, "grad_norm": 38.84489059448242, "learning_rate": 1.9621286303497914e-07, "logits/chosen": 1.4694623947143555, "logits/rejected": 0.9219260215759277, "loss": 0.7517, "step": 406 }, { "beta_dpo/beta_used": 0.09797775745391846, "beta_dpo/beta_used_raw": -0.01702454686164856, "beta_dpo/gap_mean": 11.227970123291016, "beta_dpo/gap_std": 17.10640525817871, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6152683295540439, "grad_norm": 30.631189346313477, "learning_rate": 1.9492236680336483e-07, "logits/chosen": 1.7332323789596558, "logits/rejected": 1.6841402053833008, "loss": 1.0343, "step": 407 }, { "beta_dpo/beta_used": 0.020529722794890404, "beta_dpo/beta_used_raw": -0.0022693034261465073, "beta_dpo/gap_mean": 11.435039520263672, "beta_dpo/gap_std": 16.896324157714844, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6167800453514739, "grad_norm": 7.630633354187012, "learning_rate": 1.9363341121154895e-07, "logits/chosen": 1.5824682712554932, "logits/rejected": 1.3653960227966309, "loss": 1.2268, "step": 408 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.31050288677215576, "beta_dpo/gap_mean": 11.112115859985352, "beta_dpo/gap_std": 17.006160736083984, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.618291761148904, "grad_norm": 0.287369966506958, "learning_rate": 1.9234603231438994e-07, "logits/chosen": 1.9061976671218872, "logits/rejected": 2.0974600315093994, "loss": 1.3822, "step": 409 }, { "beta_dpo/beta_used": 0.07312033325433731, "beta_dpo/beta_used_raw": 0.04204032942652702, "beta_dpo/gap_mean": 11.199286460876465, "beta_dpo/gap_std": 16.997974395751953, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6198034769463341, "grad_norm": 22.923328399658203, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 1.3687880039215088, "logits/rejected": 1.413557529449463, "loss": 1.08, "step": 410 }, { "beta_dpo/beta_used": 0.07877589762210846, "beta_dpo/beta_used_raw": 0.04541406035423279, "beta_dpo/gap_mean": 11.319759368896484, "beta_dpo/gap_std": 17.231704711914062, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6213151927437641, "grad_norm": 25.793495178222656, "learning_rate": 1.8977614860195296e-07, "logits/chosen": 1.318861961364746, "logits/rejected": 1.349341869354248, "loss": 1.1873, "step": 411 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09030976891517639, "beta_dpo/gap_mean": 11.505082130432129, "beta_dpo/gap_std": 17.38372802734375, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6228269085411943, "grad_norm": 0.42393141984939575, "learning_rate": 1.8849371567184662e-07, "logits/chosen": 1.7599655389785767, "logits/rejected": 1.9180841445922852, "loss": 1.378, "step": 412 }, { "beta_dpo/beta_used": 0.05450423061847687, "beta_dpo/beta_used_raw": -0.029762066900730133, "beta_dpo/gap_mean": 11.014404296875, "beta_dpo/gap_std": 17.597949981689453, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6243386243386243, "grad_norm": 19.526386260986328, "learning_rate": 1.872130032047302e-07, "logits/chosen": 0.9658557772636414, "logits/rejected": 0.658934473991394, "loss": 1.1757, "step": 413 }, { "beta_dpo/beta_used": 0.0588601678609848, "beta_dpo/beta_used_raw": 0.021088402718305588, "beta_dpo/gap_mean": 11.073143005371094, "beta_dpo/gap_std": 17.767539978027344, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6258503401360545, "grad_norm": 17.184946060180664, "learning_rate": 1.8593404702488436e-07, "logits/chosen": 1.2476868629455566, "logits/rejected": 1.127249002456665, "loss": 1.1053, "step": 414 }, { "beta_dpo/beta_used": 0.11692694574594498, "beta_dpo/beta_used_raw": 0.06981240957975388, "beta_dpo/gap_mean": 11.076013565063477, "beta_dpo/gap_std": 18.022043228149414, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6273620559334845, "grad_norm": 56.64924621582031, "learning_rate": 1.846568829074628e-07, "logits/chosen": 1.4461565017700195, "logits/rejected": 1.6651735305786133, "loss": 1.5016, "step": 415 }, { "beta_dpo/beta_used": 0.01583676040172577, "beta_dpo/beta_used_raw": -0.06813767552375793, "beta_dpo/gap_mean": 10.323577880859375, "beta_dpo/gap_std": 18.21762466430664, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6288737717309146, "grad_norm": 5.505179405212402, "learning_rate": 1.8338154657749128e-07, "logits/chosen": 1.4125094413757324, "logits/rejected": 1.162198781967163, "loss": 1.2861, "step": 416 }, { "beta_dpo/beta_used": 0.15743154287338257, "beta_dpo/beta_used_raw": 0.11534958332777023, "beta_dpo/gap_mean": 10.844054222106934, "beta_dpo/gap_std": 18.56551742553711, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6303854875283447, "grad_norm": 34.31191635131836, "learning_rate": 1.8210807370886849e-07, "logits/chosen": 1.694962501525879, "logits/rejected": 1.656688928604126, "loss": 1.2438, "step": 417 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11418265104293823, "beta_dpo/gap_mean": 10.605875015258789, "beta_dpo/gap_std": 18.41242790222168, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6318972033257747, "grad_norm": 0.30966585874557495, "learning_rate": 1.8083649992336825e-07, "logits/chosen": 2.298833131790161, "logits/rejected": 2.1432628631591797, "loss": 1.3801, "step": 418 }, { "beta_dpo/beta_used": 0.22758902609348297, "beta_dpo/beta_used_raw": 0.22758902609348297, "beta_dpo/gap_mean": 11.043691635131836, "beta_dpo/gap_std": 18.33002281188965, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.6334089191232048, "grad_norm": 28.69251823425293, "learning_rate": 1.7956686078964255e-07, "logits/chosen": 1.6633756160736084, "logits/rejected": 1.2387137413024902, "loss": 0.7578, "step": 419 }, { "beta_dpo/beta_used": 0.038611479103565216, "beta_dpo/beta_used_raw": -0.06987833231687546, "beta_dpo/gap_mean": 10.993532180786133, "beta_dpo/gap_std": 18.756433486938477, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6349206349206349, "grad_norm": 11.58281421661377, "learning_rate": 1.782991918222275e-07, "logits/chosen": 1.3712520599365234, "logits/rejected": 1.3558213710784912, "loss": 1.2225, "step": 420 }, { "beta_dpo/beta_used": 0.2914969325065613, "beta_dpo/beta_used_raw": 0.2914969325065613, "beta_dpo/gap_mean": 10.698333740234375, "beta_dpo/gap_std": 19.29578399658203, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.636432350718065, "grad_norm": 79.03998565673828, "learning_rate": 1.7703352848054887e-07, "logits/chosen": 2.0994133949279785, "logits/rejected": 1.548837661743164, "loss": 1.6998, "step": 421 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10875105112791061, "beta_dpo/gap_mean": 10.545480728149414, "beta_dpo/gap_std": 19.398765563964844, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6379440665154951, "grad_norm": 0.35578790307044983, "learning_rate": 1.7576990616793137e-07, "logits/chosen": 1.771589756011963, "logits/rejected": 1.5337142944335938, "loss": 1.3794, "step": 422 }, { "beta_dpo/beta_used": 0.20271146297454834, "beta_dpo/beta_used_raw": 0.20271146297454834, "beta_dpo/gap_mean": 10.905920028686523, "beta_dpo/gap_std": 19.073719024658203, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.6394557823129252, "grad_norm": 37.11328125, "learning_rate": 1.745083602306071e-07, "logits/chosen": 1.7254886627197266, "logits/rejected": 1.634531021118164, "loss": 0.8132, "step": 423 }, { "beta_dpo/beta_used": 0.08102002739906311, "beta_dpo/beta_used_raw": 0.05924910679459572, "beta_dpo/gap_mean": 11.04400634765625, "beta_dpo/gap_std": 18.683273315429688, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.6409674981103552, "grad_norm": 20.051164627075195, "learning_rate": 1.7324892595672804e-07, "logits/chosen": 1.4589695930480957, "logits/rejected": 1.436366319656372, "loss": 1.0276, "step": 424 }, { "beta_dpo/beta_used": 0.19659112393856049, "beta_dpo/beta_used_raw": 0.15749159455299377, "beta_dpo/gap_mean": 11.374787330627441, "beta_dpo/gap_std": 18.452198028564453, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6424792139077853, "grad_norm": 60.02362823486328, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 1.6291189193725586, "logits/rejected": 1.6020748615264893, "loss": 1.4147, "step": 425 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.274090051651001, "beta_dpo/gap_mean": 10.909863471984863, "beta_dpo/gap_std": 18.784690856933594, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6439909297052154, "grad_norm": 0.31475549936294556, "learning_rate": 1.7073653325558828e-07, "logits/chosen": 1.2150731086730957, "logits/rejected": 1.204681634902954, "loss": 1.3823, "step": 426 }, { "beta_dpo/beta_used": 0.022039199247956276, "beta_dpo/beta_used_raw": -0.037459395825862885, "beta_dpo/gap_mean": 10.606595039367676, "beta_dpo/gap_std": 18.83213996887207, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6455026455026455, "grad_norm": 8.438140869140625, "learning_rate": 1.6948364510535218e-07, "logits/chosen": 0.9421446919441223, "logits/rejected": 0.9893728494644165, "loss": 1.2392, "step": 427 }, { "beta_dpo/beta_used": 0.09935323894023895, "beta_dpo/beta_used_raw": 0.09935323894023895, "beta_dpo/gap_mean": 10.691535949707031, "beta_dpo/gap_std": 18.80581283569336, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6470143613000756, "grad_norm": 27.139223098754883, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 1.3903647661209106, "logits/rejected": 1.6309527158737183, "loss": 1.0183, "step": 428 }, { "beta_dpo/beta_used": 0.30730801820755005, "beta_dpo/beta_used_raw": 0.30730801820755005, "beta_dpo/gap_mean": 10.742720603942871, "beta_dpo/gap_std": 18.884178161621094, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.6485260770975056, "grad_norm": 71.05332946777344, "learning_rate": 1.669846604344412e-07, "logits/chosen": 1.3571021556854248, "logits/rejected": 1.5711731910705566, "loss": 1.1584, "step": 429 }, { "beta_dpo/beta_used": 0.19743552803993225, "beta_dpo/beta_used_raw": 0.19743552803993225, "beta_dpo/gap_mean": 11.363592147827148, "beta_dpo/gap_std": 19.35413360595703, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6500377928949358, "grad_norm": 39.574615478515625, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 1.2930231094360352, "logits/rejected": 1.2674870491027832, "loss": 0.8395, "step": 430 }, { "beta_dpo/beta_used": 0.060200098901987076, "beta_dpo/beta_used_raw": 0.05172666907310486, "beta_dpo/gap_mean": 11.444803237915039, "beta_dpo/gap_std": 19.580089569091797, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6515495086923658, "grad_norm": 18.198490142822266, "learning_rate": 1.6449496416858282e-07, "logits/chosen": 0.9209311604499817, "logits/rejected": 0.6936602592468262, "loss": 1.1647, "step": 431 }, { "beta_dpo/beta_used": 0.1908065676689148, "beta_dpo/beta_used_raw": 0.1908065676689148, "beta_dpo/gap_mean": 11.477436065673828, "beta_dpo/gap_std": 19.80697250366211, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6530612244897959, "grad_norm": 57.51634979248047, "learning_rate": 1.632536862810844e-07, "logits/chosen": 1.5439039468765259, "logits/rejected": 1.8144121170043945, "loss": 0.8417, "step": 432 }, { "beta_dpo/beta_used": 0.15694357454776764, "beta_dpo/beta_used_raw": 0.15694357454776764, "beta_dpo/gap_mean": 11.754386901855469, "beta_dpo/gap_std": 20.23770523071289, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.654572940287226, "grad_norm": 51.7684440612793, "learning_rate": 1.6201483487445515e-07, "logits/chosen": 1.8874328136444092, "logits/rejected": 1.811736822128296, "loss": 1.1361, "step": 433 }, { "beta_dpo/beta_used": 0.17589187622070312, "beta_dpo/beta_used_raw": 0.17589187622070312, "beta_dpo/gap_mean": 12.395316123962402, "beta_dpo/gap_std": 20.479772567749023, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.656084656084656, "grad_norm": 76.38113403320312, "learning_rate": 1.6077844460203204e-07, "logits/chosen": 1.633279800415039, "logits/rejected": 1.4938979148864746, "loss": 1.4175, "step": 434 }, { "beta_dpo/beta_used": 0.05628956854343414, "beta_dpo/beta_used_raw": 0.011582344770431519, "beta_dpo/gap_mean": 11.693523406982422, "beta_dpo/gap_std": 20.01717758178711, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6575963718820862, "grad_norm": 19.633525848388672, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 1.6384367942810059, "logits/rejected": 1.6007449626922607, "loss": 1.1721, "step": 435 }, { "beta_dpo/beta_used": 0.11185856908559799, "beta_dpo/beta_used_raw": 0.11185856908559799, "beta_dpo/gap_mean": 11.502742767333984, "beta_dpo/gap_std": 19.742431640625, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6591080876795162, "grad_norm": 28.773508071899414, "learning_rate": 1.5831318572796847e-07, "logits/chosen": 1.2981607913970947, "logits/rejected": 1.4774749279022217, "loss": 1.1959, "step": 436 }, { "beta_dpo/beta_used": 0.0465235635638237, "beta_dpo/beta_used_raw": 0.0465235635638237, "beta_dpo/gap_mean": 11.57576847076416, "beta_dpo/gap_std": 19.905479431152344, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6606198034769464, "grad_norm": 11.688758850097656, "learning_rate": 1.5708438608491815e-07, "logits/chosen": 1.605779767036438, "logits/rejected": 1.3303096294403076, "loss": 1.0708, "step": 437 }, { "beta_dpo/beta_used": 0.17932583391666412, "beta_dpo/beta_used_raw": 0.11518719792366028, "beta_dpo/gap_mean": 11.271271705627441, "beta_dpo/gap_std": 19.939411163330078, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6621315192743764, "grad_norm": 51.175689697265625, "learning_rate": 1.558581854913253e-07, "logits/chosen": 1.687558889389038, "logits/rejected": 1.3356046676635742, "loss": 1.0644, "step": 438 }, { "beta_dpo/beta_used": 0.04887852445244789, "beta_dpo/beta_used_raw": 0.04887852445244789, "beta_dpo/gap_mean": 11.624393463134766, "beta_dpo/gap_std": 19.695316314697266, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6636432350718064, "grad_norm": 12.64301872253418, "learning_rate": 1.5463461824665658e-07, "logits/chosen": 1.8754761219024658, "logits/rejected": 1.7036700248718262, "loss": 1.0783, "step": 439 }, { "beta_dpo/beta_used": 0.194175124168396, "beta_dpo/beta_used_raw": 0.194175124168396, "beta_dpo/gap_mean": 12.504231452941895, "beta_dpo/gap_std": 19.355581283569336, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6651549508692366, "grad_norm": 42.490901947021484, "learning_rate": 1.534137185767178e-07, "logits/chosen": 1.1825112104415894, "logits/rejected": 0.6874880790710449, "loss": 0.7751, "step": 440 }, { "beta_dpo/beta_used": 0.1130920946598053, "beta_dpo/beta_used_raw": -0.008399426937103271, "beta_dpo/gap_mean": 12.945587158203125, "beta_dpo/gap_std": 19.07444953918457, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6666666666666666, "grad_norm": 29.434162139892578, "learning_rate": 1.521955206326976e-07, "logits/chosen": 1.2224631309509277, "logits/rejected": 0.8368812799453735, "loss": 1.2321, "step": 441 }, { "beta_dpo/beta_used": 0.12853366136550903, "beta_dpo/beta_used_raw": -0.03535076975822449, "beta_dpo/gap_mean": 12.509725570678711, "beta_dpo/gap_std": 18.713966369628906, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.6681783824640968, "grad_norm": 40.52631759643555, "learning_rate": 1.5098005849021078e-07, "logits/chosen": 1.8482825756072998, "logits/rejected": 1.715338945388794, "loss": 1.0151, "step": 442 }, { "beta_dpo/beta_used": 0.02449873648583889, "beta_dpo/beta_used_raw": 0.022881096228957176, "beta_dpo/gap_mean": 12.855720520019531, "beta_dpo/gap_std": 19.116792678833008, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6696900982615268, "grad_norm": 9.385747909545898, "learning_rate": 1.4976736614834662e-07, "logits/chosen": 1.205538272857666, "logits/rejected": 1.0337432622909546, "loss": 1.1801, "step": 443 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18675100803375244, "beta_dpo/gap_mean": 12.065677642822266, "beta_dpo/gap_std": 19.102123260498047, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.671201814058957, "grad_norm": 0.38620585203170776, "learning_rate": 1.4855747752871654e-07, "logits/chosen": 1.5884000062942505, "logits/rejected": 1.4578423500061035, "loss": 1.3789, "step": 444 }, { "beta_dpo/beta_used": 0.09681466966867447, "beta_dpo/beta_used_raw": 0.09337137639522552, "beta_dpo/gap_mean": 12.104242324829102, "beta_dpo/gap_std": 19.160350799560547, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.672713529856387, "grad_norm": 29.804115295410156, "learning_rate": 1.473504264745062e-07, "logits/chosen": 1.7354657649993896, "logits/rejected": 1.8415591716766357, "loss": 1.1427, "step": 445 }, { "beta_dpo/beta_used": 0.2682816982269287, "beta_dpo/beta_used_raw": 0.2269507795572281, "beta_dpo/gap_mean": 12.445560455322266, "beta_dpo/gap_std": 18.5366153717041, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.674225245653817, "grad_norm": 55.44277572631836, "learning_rate": 1.461462467495284e-07, "logits/chosen": 1.4882698059082031, "logits/rejected": 1.455931544303894, "loss": 1.0786, "step": 446 }, { "beta_dpo/beta_used": 0.07030583918094635, "beta_dpo/beta_used_raw": 0.07030583918094635, "beta_dpo/gap_mean": 12.943078994750977, "beta_dpo/gap_std": 18.398422241210938, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6757369614512472, "grad_norm": 16.960407257080078, "learning_rate": 1.4494497203727843e-07, "logits/chosen": 1.478388786315918, "logits/rejected": 1.0553447008132935, "loss": 0.918, "step": 447 }, { "beta_dpo/beta_used": 0.11920321732759476, "beta_dpo/beta_used_raw": 0.11920321732759476, "beta_dpo/gap_mean": 12.699634552001953, "beta_dpo/gap_std": 18.55364227294922, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6772486772486772, "grad_norm": 20.711158752441406, "learning_rate": 1.4374663593999256e-07, "logits/chosen": 1.8860962390899658, "logits/rejected": 1.671408772468567, "loss": 0.9085, "step": 448 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20916791260242462, "beta_dpo/gap_mean": 11.895469665527344, "beta_dpo/gap_std": 18.483585357666016, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6787603930461074, "grad_norm": 0.3559426963329315, "learning_rate": 1.4255127197770707e-07, "logits/chosen": 0.9508600831031799, "logits/rejected": 0.7793235182762146, "loss": 1.3797, "step": 449 }, { "beta_dpo/beta_used": 0.12843473255634308, "beta_dpo/beta_used_raw": 0.12843473255634308, "beta_dpo/gap_mean": 11.255586624145508, "beta_dpo/gap_std": 18.451894760131836, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.6802721088435374, "grad_norm": 21.258712768554688, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 1.2503652572631836, "logits/rejected": 0.761991560459137, "loss": 0.8848, "step": 450 }, { "beta_dpo/beta_used": 0.0015382266137748957, "beta_dpo/beta_used_raw": -0.05390516668558121, "beta_dpo/gap_mean": 10.862272262573242, "beta_dpo/gap_std": 18.184444427490234, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6817838246409675, "grad_norm": 0.5277694463729858, "learning_rate": 1.4016959412166437e-07, "logits/chosen": 1.491100549697876, "logits/rejected": 1.0314542055130005, "loss": 1.3738, "step": 451 }, { "beta_dpo/beta_used": 0.033183373510837555, "beta_dpo/beta_used_raw": -0.02967868000268936, "beta_dpo/gap_mean": 10.793105125427246, "beta_dpo/gap_std": 18.46420669555664, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6832955404383976, "grad_norm": 12.034192085266113, "learning_rate": 1.3898334684855645e-07, "logits/chosen": 1.0540153980255127, "logits/rejected": 0.7840179204940796, "loss": 1.2231, "step": 452 }, { "beta_dpo/beta_used": 0.06273314356803894, "beta_dpo/beta_used_raw": 0.06273314356803894, "beta_dpo/gap_mean": 10.497642517089844, "beta_dpo/gap_std": 18.56969451904297, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6848072562358276, "grad_norm": 16.02577781677246, "learning_rate": 1.3780020494988445e-07, "logits/chosen": 1.416991949081421, "logits/rejected": 1.2110953330993652, "loss": 1.1344, "step": 453 }, { "beta_dpo/beta_used": 0.1319224089384079, "beta_dpo/beta_used_raw": 0.1319224089384079, "beta_dpo/gap_mean": 10.963911056518555, "beta_dpo/gap_std": 19.008258819580078, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6863189720332578, "grad_norm": 30.299396514892578, "learning_rate": 1.366202015206706e-07, "logits/chosen": 1.5142550468444824, "logits/rejected": 1.539805293083191, "loss": 1.1613, "step": 454 }, { "beta_dpo/beta_used": 0.0735812857747078, "beta_dpo/beta_used_raw": 0.0735812857747078, "beta_dpo/gap_mean": 11.458322525024414, "beta_dpo/gap_std": 18.766376495361328, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.6878306878306878, "grad_norm": 19.040212631225586, "learning_rate": 1.354433695681474e-07, "logits/chosen": 1.2673579454421997, "logits/rejected": 1.1849486827850342, "loss": 0.9185, "step": 455 }, { "beta_dpo/beta_used": 0.09154469519853592, "beta_dpo/beta_used_raw": 0.04548892751336098, "beta_dpo/gap_mean": 11.764167785644531, "beta_dpo/gap_std": 18.304719924926758, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6893424036281179, "grad_norm": 17.924081802368164, "learning_rate": 1.3426974201083439e-07, "logits/chosen": 1.1485925912857056, "logits/rejected": 0.867099404335022, "loss": 0.9742, "step": 456 }, { "beta_dpo/beta_used": 0.002911860356107354, "beta_dpo/beta_used_raw": -0.021841388195753098, "beta_dpo/gap_mean": 11.507458686828613, "beta_dpo/gap_std": 17.799766540527344, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.690854119425548, "grad_norm": 1.2298791408538818, "learning_rate": 1.3309935167761717e-07, "logits/chosen": 1.556707501411438, "logits/rejected": 1.3103752136230469, "loss": 1.3574, "step": 457 }, { "beta_dpo/beta_used": 0.06613724678754807, "beta_dpo/beta_used_raw": 0.06613724678754807, "beta_dpo/gap_mean": 11.640986442565918, "beta_dpo/gap_std": 17.569747924804688, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6923658352229781, "grad_norm": 15.145212173461914, "learning_rate": 1.3193223130682936e-07, "logits/chosen": 1.1925835609436035, "logits/rejected": 0.9567406177520752, "loss": 0.9984, "step": 458 }, { "beta_dpo/beta_used": 0.1462525725364685, "beta_dpo/beta_used_raw": 0.06357168406248093, "beta_dpo/gap_mean": 11.50616455078125, "beta_dpo/gap_std": 17.23088836669922, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6938775510204082, "grad_norm": 33.99520492553711, "learning_rate": 1.3076841354533658e-07, "logits/chosen": 1.9023911952972412, "logits/rejected": 1.8868764638900757, "loss": 0.9019, "step": 459 }, { "beta_dpo/beta_used": 0.05953259766101837, "beta_dpo/beta_used_raw": 0.035434067249298096, "beta_dpo/gap_mean": 12.219179153442383, "beta_dpo/gap_std": 17.305801391601562, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6953892668178382, "grad_norm": 20.19550895690918, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 1.5630019903182983, "logits/rejected": 1.032307505607605, "loss": 1.1575, "step": 460 }, { "beta_dpo/beta_used": 0.15666146576404572, "beta_dpo/beta_used_raw": 0.05879899859428406, "beta_dpo/gap_mean": 12.488428115844727, "beta_dpo/gap_std": 17.192520141601562, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6969009826152683, "grad_norm": 33.026939392089844, "learning_rate": 1.2845081597488286e-07, "logits/chosen": 1.8508528470993042, "logits/rejected": 1.592889428138733, "loss": 0.8799, "step": 461 }, { "beta_dpo/beta_used": 0.25797462463378906, "beta_dpo/beta_used_raw": 0.25797462463378906, "beta_dpo/gap_mean": 12.744247436523438, "beta_dpo/gap_std": 17.581214904785156, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6984126984126984, "grad_norm": 68.12858581542969, "learning_rate": 1.27297100994108e-07, "logits/chosen": 1.4536336660385132, "logits/rejected": 1.4048317670822144, "loss": 0.9012, "step": 462 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.036650676280260086, "beta_dpo/gap_mean": 12.505157470703125, "beta_dpo/gap_std": 17.675983428955078, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6999244142101285, "grad_norm": 0.3852461278438568, "learning_rate": 1.2614681827718695e-07, "logits/chosen": 1.7947087287902832, "logits/rejected": 1.8371453285217285, "loss": 1.3769, "step": 463 }, { "beta_dpo/beta_used": 0.10625768452882767, "beta_dpo/beta_used_raw": 0.10625768452882767, "beta_dpo/gap_mean": 12.234790802001953, "beta_dpo/gap_std": 18.48796272277832, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7014361300075586, "grad_norm": 24.380142211914062, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 1.3279081583023071, "logits/rejected": 1.2735958099365234, "loss": 0.9684, "step": 464 }, { "beta_dpo/beta_used": 0.0729127824306488, "beta_dpo/beta_used_raw": 0.0729127824306488, "beta_dpo/gap_mean": 12.408645629882812, "beta_dpo/gap_std": 18.752695083618164, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7029478458049887, "grad_norm": 20.065946578979492, "learning_rate": 1.238566782415197e-07, "logits/chosen": 1.4167413711547852, "logits/rejected": 1.2189738750457764, "loss": 1.037, "step": 465 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14980760216712952, "beta_dpo/gap_mean": 11.742490768432617, "beta_dpo/gap_std": 18.520854949951172, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7044595616024187, "grad_norm": 0.4699816405773163, "learning_rate": 1.2271688498291334e-07, "logits/chosen": 1.2016394138336182, "logits/rejected": 1.34425950050354, "loss": 1.3787, "step": 466 }, { "beta_dpo/beta_used": 0.0710422620177269, "beta_dpo/beta_used_raw": 0.07022541761398315, "beta_dpo/gap_mean": 11.67038345336914, "beta_dpo/gap_std": 18.223949432373047, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7059712773998488, "grad_norm": 20.220792770385742, "learning_rate": 1.2158065210664848e-07, "logits/chosen": 0.9855274558067322, "logits/rejected": 0.5498029589653015, "loss": 1.1138, "step": 467 }, { "beta_dpo/beta_used": 0.13923662900924683, "beta_dpo/beta_used_raw": 0.037644751369953156, "beta_dpo/gap_mean": 11.826444625854492, "beta_dpo/gap_std": 18.259021759033203, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7074829931972789, "grad_norm": 93.8460693359375, "learning_rate": 1.204480113956011e-07, "logits/chosen": 1.6846306324005127, "logits/rejected": 1.574007511138916, "loss": 1.3031, "step": 468 }, { "beta_dpo/beta_used": 0.1386784166097641, "beta_dpo/beta_used_raw": 0.0496968999505043, "beta_dpo/gap_mean": 11.931732177734375, "beta_dpo/gap_std": 17.935604095458984, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.708994708994709, "grad_norm": 50.79655456542969, "learning_rate": 1.1931899453216697e-07, "logits/chosen": 1.7242679595947266, "logits/rejected": 1.4564831256866455, "loss": 1.1023, "step": 469 }, { "beta_dpo/beta_used": 0.12113356590270996, "beta_dpo/beta_used_raw": 0.10899336636066437, "beta_dpo/gap_mean": 11.499549865722656, "beta_dpo/gap_std": 17.669376373291016, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7105064247921391, "grad_norm": 30.518417358398438, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 1.467283844947815, "logits/rejected": 1.138906478881836, "loss": 1.0102, "step": 470 }, { "beta_dpo/beta_used": 0.2756751775741577, "beta_dpo/beta_used_raw": 0.2756751775741577, "beta_dpo/gap_mean": 11.841851234436035, "beta_dpo/gap_std": 17.771278381347656, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7120181405895691, "grad_norm": 46.136444091796875, "learning_rate": 1.1707195857000215e-07, "logits/chosen": 1.4691767692565918, "logits/rejected": 1.3501659631729126, "loss": 0.7718, "step": 471 }, { "beta_dpo/beta_used": 0.04809433966875076, "beta_dpo/beta_used_raw": -0.059414975345134735, "beta_dpo/gap_mean": 12.08486557006836, "beta_dpo/gap_std": 18.567523956298828, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7135298563869993, "grad_norm": 19.517423629760742, "learning_rate": 1.1595400232569768e-07, "logits/chosen": 1.2809163331985474, "logits/rejected": 1.3538299798965454, "loss": 1.215, "step": 472 }, { "beta_dpo/beta_used": 0.16872605681419373, "beta_dpo/beta_used_raw": 0.16872605681419373, "beta_dpo/gap_mean": 12.269996643066406, "beta_dpo/gap_std": 19.072513580322266, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7150415721844293, "grad_norm": 46.318756103515625, "learning_rate": 1.1483979563610069e-07, "logits/chosen": 1.4568700790405273, "logits/rejected": 0.9656409025192261, "loss": 0.8288, "step": 473 }, { "beta_dpo/beta_used": 0.01852474734187126, "beta_dpo/beta_used_raw": -0.02408505789935589, "beta_dpo/gap_mean": 11.981610298156738, "beta_dpo/gap_std": 18.998138427734375, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7165532879818595, "grad_norm": 7.194610118865967, "learning_rate": 1.1372936966796709e-07, "logits/chosen": 2.111870288848877, "logits/rejected": 1.9162969589233398, "loss": 1.234, "step": 474 }, { "beta_dpo/beta_used": 0.2989564538002014, "beta_dpo/beta_used_raw": 0.2989564538002014, "beta_dpo/gap_mean": 12.381606101989746, "beta_dpo/gap_std": 18.669628143310547, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.7180650037792895, "grad_norm": 51.27621841430664, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.9997602701187134, "logits/rejected": 1.075520396232605, "loss": 0.5417, "step": 475 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08130905032157898, "beta_dpo/gap_mean": 12.294593811035156, "beta_dpo/gap_std": 18.475555419921875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7195767195767195, "grad_norm": 0.35033127665519714, "learning_rate": 1.1151998403347243e-07, "logits/chosen": 1.256063461303711, "logits/rejected": 1.1085220575332642, "loss": 1.3772, "step": 476 }, { "beta_dpo/beta_used": 0.05871342495083809, "beta_dpo/beta_used_raw": -0.007116403430700302, "beta_dpo/gap_mean": 11.625802040100098, "beta_dpo/gap_std": 18.592151641845703, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7210884353741497, "grad_norm": 20.247365951538086, "learning_rate": 1.1042108616837692e-07, "logits/chosen": 1.5608341693878174, "logits/rejected": 1.4778110980987549, "loss": 1.1472, "step": 477 }, { "beta_dpo/beta_used": 0.012469051405787468, "beta_dpo/beta_used_raw": 0.011093566194176674, "beta_dpo/gap_mean": 11.136640548706055, "beta_dpo/gap_std": 19.246501922607422, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7226001511715797, "grad_norm": 4.416438579559326, "learning_rate": 1.0932609262554746e-07, "logits/chosen": 1.4383872747421265, "logits/rejected": 1.3677079677581787, "loss": 1.2937, "step": 478 }, { "beta_dpo/beta_used": 0.07337600737810135, "beta_dpo/beta_used_raw": 0.07337600737810135, "beta_dpo/gap_mean": 10.765281677246094, "beta_dpo/gap_std": 19.35091209411621, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7241118669690099, "grad_norm": 19.005462646484375, "learning_rate": 1.0823503403430734e-07, "logits/chosen": 0.7993252277374268, "logits/rejected": 0.21372252702713013, "loss": 1.0513, "step": 479 }, { "beta_dpo/beta_used": 0.2798859179019928, "beta_dpo/beta_used_raw": 0.2798859179019928, "beta_dpo/gap_mean": 10.71806526184082, "beta_dpo/gap_std": 19.444379806518555, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7256235827664399, "grad_norm": 71.94794464111328, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 1.1766527891159058, "logits/rejected": 1.1873457431793213, "loss": 1.2252, "step": 480 }, { "beta_dpo/beta_used": 0.058205485343933105, "beta_dpo/beta_used_raw": 0.058205485343933105, "beta_dpo/gap_mean": 10.897520065307617, "beta_dpo/gap_std": 19.466140747070312, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.72713529856387, "grad_norm": 15.312066078186035, "learning_rate": 1.0606484367268906e-07, "logits/chosen": 1.0759093761444092, "logits/rejected": 1.222165584564209, "loss": 1.0585, "step": 481 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.010358155705034733, "beta_dpo/gap_mean": 10.749324798583984, "beta_dpo/gap_std": 19.947269439697266, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7286470143613001, "grad_norm": 0.4036189615726471, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 1.2716319561004639, "logits/rejected": 1.175731897354126, "loss": 1.3772, "step": 482 }, { "beta_dpo/beta_used": 0.0638606920838356, "beta_dpo/beta_used_raw": 0.057676542550325394, "beta_dpo/gap_mean": 11.183818817138672, "beta_dpo/gap_std": 19.975425720214844, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7301587301587301, "grad_norm": 22.85965347290039, "learning_rate": 1.0391075790138232e-07, "logits/chosen": 1.5366387367248535, "logits/rejected": 1.613889217376709, "loss": 1.1919, "step": 483 }, { "beta_dpo/beta_used": 0.040944814682006836, "beta_dpo/beta_used_raw": 0.019066521897912025, "beta_dpo/gap_mean": 11.034358978271484, "beta_dpo/gap_std": 19.370399475097656, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7316704459561603, "grad_norm": 15.710288047790527, "learning_rate": 1.0283982962570681e-07, "logits/chosen": 1.4977807998657227, "logits/rejected": 1.473586082458496, "loss": 1.1728, "step": 484 }, { "beta_dpo/beta_used": 0.22265413403511047, "beta_dpo/beta_used_raw": 0.2221376746892929, "beta_dpo/gap_mean": 11.262885093688965, "beta_dpo/gap_std": 18.92700958251953, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7331821617535903, "grad_norm": 79.26393127441406, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 1.5991811752319336, "logits/rejected": 1.406498908996582, "loss": 1.0811, "step": 485 }, { "beta_dpo/beta_used": 0.18111911416053772, "beta_dpo/beta_used_raw": 0.13571885228157043, "beta_dpo/gap_mean": 11.03097152709961, "beta_dpo/gap_std": 19.104738235473633, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7346938775510204, "grad_norm": 62.48919677734375, "learning_rate": 1.007103520743035e-07, "logits/chosen": 1.260819911956787, "logits/rejected": 0.939678430557251, "loss": 1.2927, "step": 486 }, { "beta_dpo/beta_used": 0.16297666728496552, "beta_dpo/beta_used_raw": 0.16297666728496552, "beta_dpo/gap_mean": 11.155773162841797, "beta_dpo/gap_std": 19.169147491455078, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7362055933484505, "grad_norm": 59.966793060302734, "learning_rate": 9.965186236464046e-08, "logits/chosen": 1.0779931545257568, "logits/rejected": 1.3338478803634644, "loss": 1.0975, "step": 487 }, { "beta_dpo/beta_used": 0.2435801774263382, "beta_dpo/beta_used_raw": 0.23413166403770447, "beta_dpo/gap_mean": 11.18185806274414, "beta_dpo/gap_std": 19.392467498779297, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.7377173091458806, "grad_norm": 92.96924591064453, "learning_rate": 9.859757821558337e-08, "logits/chosen": 1.9946186542510986, "logits/rejected": 1.7588841915130615, "loss": 1.2888, "step": 488 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1315448135137558, "beta_dpo/gap_mean": 10.578689575195312, "beta_dpo/gap_std": 18.989961624145508, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7392290249433107, "grad_norm": 0.4756178855895996, "learning_rate": 9.754752911772615e-08, "logits/chosen": 1.6091415882110596, "logits/rejected": 1.572596549987793, "loss": 1.3803, "step": 489 }, { "beta_dpo/beta_used": 0.27745407819747925, "beta_dpo/beta_used_raw": 0.27745407819747925, "beta_dpo/gap_mean": 10.56434440612793, "beta_dpo/gap_std": 19.33395767211914, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.7407407407407407, "grad_norm": 55.287017822265625, "learning_rate": 9.650174444319956e-08, "logits/chosen": 2.1048507690429688, "logits/rejected": 2.1106972694396973, "loss": 0.9254, "step": 490 }, { "beta_dpo/beta_used": 0.15799355506896973, "beta_dpo/beta_used_raw": 0.05918142944574356, "beta_dpo/gap_mean": 11.070003509521484, "beta_dpo/gap_std": 19.092586517333984, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7422524565381708, "grad_norm": 38.35470199584961, "learning_rate": 9.546025344484868e-08, "logits/chosen": 1.3452179431915283, "logits/rejected": 1.4264013767242432, "loss": 1.0867, "step": 491 }, { "beta_dpo/beta_used": 0.09264776110649109, "beta_dpo/beta_used_raw": -0.04411589354276657, "beta_dpo/gap_mean": 10.30017375946045, "beta_dpo/gap_std": 18.976940155029297, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7437641723356009, "grad_norm": 41.40193557739258, "learning_rate": 9.442308525541589e-08, "logits/chosen": 1.6056393384933472, "logits/rejected": 1.0043350458145142, "loss": 1.148, "step": 492 }, { "beta_dpo/beta_used": 0.22295749187469482, "beta_dpo/beta_used_raw": 0.22295749187469482, "beta_dpo/gap_mean": 10.42473030090332, "beta_dpo/gap_std": 19.106109619140625, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.745275888133031, "grad_norm": 57.83147430419922, "learning_rate": 9.339026888672468e-08, "logits/chosen": 1.8143576383590698, "logits/rejected": 1.6363078355789185, "loss": 1.0346, "step": 493 }, { "beta_dpo/beta_used": 0.07032950222492218, "beta_dpo/beta_used_raw": 0.07032950222492218, "beta_dpo/gap_mean": 10.655120849609375, "beta_dpo/gap_std": 19.331012725830078, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7467876039304611, "grad_norm": 29.111852645874023, "learning_rate": 9.236183322886945e-08, "logits/chosen": 0.8167870044708252, "logits/rejected": 0.7540128231048584, "loss": 1.1794, "step": 494 }, { "beta_dpo/beta_used": 0.1115037351846695, "beta_dpo/beta_used_raw": 0.028124667704105377, "beta_dpo/gap_mean": 10.31930160522461, "beta_dpo/gap_std": 19.684932708740234, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7482993197278912, "grad_norm": 31.925792694091797, "learning_rate": 9.133780704940594e-08, "logits/chosen": 1.4023044109344482, "logits/rejected": 1.3672239780426025, "loss": 1.1475, "step": 495 }, { "beta_dpo/beta_used": 0.08872908353805542, "beta_dpo/beta_used_raw": 0.08872908353805542, "beta_dpo/gap_mean": 10.631675720214844, "beta_dpo/gap_std": 19.648696899414062, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7498110355253212, "grad_norm": 24.766550064086914, "learning_rate": 9.031821899254797e-08, "logits/chosen": 1.710012435913086, "logits/rejected": 1.3257718086242676, "loss": 0.9777, "step": 496 }, { "beta_dpo/beta_used": 0.10120611637830734, "beta_dpo/beta_used_raw": 0.07404671609401703, "beta_dpo/gap_mean": 10.990039825439453, "beta_dpo/gap_std": 19.455825805664062, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7513227513227513, "grad_norm": 28.4753475189209, "learning_rate": 8.930309757836516e-08, "logits/chosen": 1.7872216701507568, "logits/rejected": 1.4520567655563354, "loss": 1.1345, "step": 497 }, { "beta_dpo/beta_used": 0.12970580160617828, "beta_dpo/beta_used_raw": 0.07501335442066193, "beta_dpo/gap_mean": 11.042081832885742, "beta_dpo/gap_std": 19.401321411132812, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7528344671201814, "grad_norm": 53.85708236694336, "learning_rate": 8.829247120198563e-08, "logits/chosen": 1.6962220668792725, "logits/rejected": 1.4640264511108398, "loss": 1.1085, "step": 498 }, { "beta_dpo/beta_used": 0.22401860356330872, "beta_dpo/beta_used_raw": 0.16938845813274384, "beta_dpo/gap_mean": 11.08438491821289, "beta_dpo/gap_std": 19.733095169067383, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7543461829176115, "grad_norm": 50.10066223144531, "learning_rate": 8.728636813280163e-08, "logits/chosen": 1.4561519622802734, "logits/rejected": 1.1343849897384644, "loss": 1.5875, "step": 499 }, { "beta_dpo/beta_used": 0.10568296164274216, "beta_dpo/beta_used_raw": 0.09129762649536133, "beta_dpo/gap_mean": 10.93176555633545, "beta_dpo/gap_std": 19.578086853027344, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7558578987150416, "grad_norm": 42.66399383544922, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.674248218536377, "logits/rejected": 0.703514814376831, "loss": 1.3396, "step": 500 }, { "epoch": 0.7558578987150416, "eval_beta_dpo/beta_used": 0.1532546579837799, "eval_beta_dpo/beta_used_raw": 0.12686675786972046, "eval_beta_dpo/gap_mean": 11.020356178283691, "eval_beta_dpo/gap_std": 19.520551681518555, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.398350715637207, "eval_logits/rejected": 1.2734830379486084, "eval_loss": 0.6756347417831421, "eval_runtime": 42.7215, "eval_samples_per_second": 53.907, "eval_steps_per_second": 1.685, "step": 500 }, { "beta_dpo/beta_used": 0.10934046655893326, "beta_dpo/beta_used_raw": 0.01821669191122055, "beta_dpo/gap_mean": 10.712957382202148, "beta_dpo/gap_std": 18.95332908630371, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7573696145124716, "grad_norm": 32.02262496948242, "learning_rate": 8.528784436016878e-08, "logits/chosen": 1.116631269454956, "logits/rejected": 1.147378921508789, "loss": 1.1377, "step": 501 }, { "beta_dpo/beta_used": 0.0046331086196005344, "beta_dpo/beta_used_raw": -0.068320132791996, "beta_dpo/gap_mean": 10.554143905639648, "beta_dpo/gap_std": 18.39632225036621, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7588813303099018, "grad_norm": 2.11391019821167, "learning_rate": 8.4295479559726e-08, "logits/chosen": 1.6380161046981812, "logits/rejected": 1.426564335823059, "loss": 1.3467, "step": 502 }, { "beta_dpo/beta_used": 0.2738369405269623, "beta_dpo/beta_used_raw": 0.2738369405269623, "beta_dpo/gap_mean": 10.669290542602539, "beta_dpo/gap_std": 18.408435821533203, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7603930461073318, "grad_norm": 65.43000793457031, "learning_rate": 8.330774987092712e-08, "logits/chosen": 1.4922473430633545, "logits/rejected": 1.5633766651153564, "loss": 1.2771, "step": 503 }, { "beta_dpo/beta_used": 0.13454070687294006, "beta_dpo/beta_used_raw": 0.13454070687294006, "beta_dpo/gap_mean": 11.21607780456543, "beta_dpo/gap_std": 18.295108795166016, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7619047619047619, "grad_norm": 33.310768127441406, "learning_rate": 8.232468292269479e-08, "logits/chosen": 1.6152197122573853, "logits/rejected": 1.4100103378295898, "loss": 0.7788, "step": 504 }, { "beta_dpo/beta_used": 0.11502599716186523, "beta_dpo/beta_used_raw": 0.09766162186861038, "beta_dpo/gap_mean": 11.323755264282227, "beta_dpo/gap_std": 18.809341430664062, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.763416477702192, "grad_norm": 64.08597564697266, "learning_rate": 8.134630621352483e-08, "logits/chosen": 1.4845128059387207, "logits/rejected": 1.2584877014160156, "loss": 1.5933, "step": 505 }, { "beta_dpo/beta_used": 0.15312659740447998, "beta_dpo/beta_used_raw": 0.15312659740447998, "beta_dpo/gap_mean": 10.811168670654297, "beta_dpo/gap_std": 19.353378295898438, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.764928193499622, "grad_norm": 45.99757385253906, "learning_rate": 8.037264711071698e-08, "logits/chosen": 1.3323204517364502, "logits/rejected": 1.4588356018066406, "loss": 1.3287, "step": 506 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04676675423979759, "beta_dpo/gap_mean": 10.543416976928711, "beta_dpo/gap_std": 19.518844604492188, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7664399092970522, "grad_norm": 0.33119961619377136, "learning_rate": 7.940373284960933e-08, "logits/chosen": 1.1214375495910645, "logits/rejected": 1.2219690084457397, "loss": 1.378, "step": 507 }, { "beta_dpo/beta_used": 0.13913913071155548, "beta_dpo/beta_used_raw": 0.006052389740943909, "beta_dpo/gap_mean": 10.641485214233398, "beta_dpo/gap_std": 19.354970932006836, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7679516250944822, "grad_norm": 53.24369430541992, "learning_rate": 7.843959053281663e-08, "logits/chosen": 1.505273699760437, "logits/rejected": 1.071250081062317, "loss": 1.2156, "step": 508 }, { "beta_dpo/beta_used": 0.06729910522699356, "beta_dpo/beta_used_raw": 0.06729910522699356, "beta_dpo/gap_mean": 11.085270881652832, "beta_dpo/gap_std": 18.95514678955078, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7694633408919124, "grad_norm": 21.316387176513672, "learning_rate": 7.748024712947204e-08, "logits/chosen": 0.9079450368881226, "logits/rejected": 1.0837373733520508, "loss": 1.0235, "step": 509 }, { "beta_dpo/beta_used": 0.09236538410186768, "beta_dpo/beta_used_raw": 0.035030972212553024, "beta_dpo/gap_mean": 11.523921012878418, "beta_dpo/gap_std": 18.953319549560547, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7709750566893424, "grad_norm": 39.52785110473633, "learning_rate": 7.652572947447272e-08, "logits/chosen": 1.3003780841827393, "logits/rejected": 1.1664865016937256, "loss": 1.1283, "step": 510 }, { "beta_dpo/beta_used": 0.13791456818580627, "beta_dpo/beta_used_raw": 0.13791456818580627, "beta_dpo/gap_mean": 11.82453441619873, "beta_dpo/gap_std": 19.1085205078125, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7724867724867724, "grad_norm": 33.120235443115234, "learning_rate": 7.557606426772961e-08, "logits/chosen": 1.7288661003112793, "logits/rejected": 1.3858253955841064, "loss": 0.9967, "step": 511 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3171396851539612, "beta_dpo/gap_mean": 11.691259384155273, "beta_dpo/gap_std": 19.083370208740234, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7739984882842026, "grad_norm": 0.3699307441711426, "learning_rate": 7.463127807341966e-08, "logits/chosen": 1.2584737539291382, "logits/rejected": 1.4007148742675781, "loss": 1.3816, "step": 512 }, { "beta_dpo/beta_used": 0.05727185308933258, "beta_dpo/beta_used_raw": 0.05727185308933258, "beta_dpo/gap_mean": 11.749606132507324, "beta_dpo/gap_std": 18.947818756103516, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7755102040816326, "grad_norm": 17.115013122558594, "learning_rate": 7.369139731924401e-08, "logits/chosen": 1.8472414016723633, "logits/rejected": 1.7471710443496704, "loss": 1.0532, "step": 513 }, { "beta_dpo/beta_used": 0.16688939929008484, "beta_dpo/beta_used_raw": 0.16688939929008484, "beta_dpo/gap_mean": 11.778924942016602, "beta_dpo/gap_std": 18.789505004882812, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.7770219198790628, "grad_norm": 32.289146423339844, "learning_rate": 7.275644829568747e-08, "logits/chosen": 1.782091498374939, "logits/rejected": 1.713914394378662, "loss": 0.8214, "step": 514 }, { "beta_dpo/beta_used": 0.00235101324506104, "beta_dpo/beta_used_raw": -0.041592229157686234, "beta_dpo/gap_mean": 11.654163360595703, "beta_dpo/gap_std": 18.874595642089844, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7785336356764928, "grad_norm": 0.9957330226898193, "learning_rate": 7.182645715528435e-08, "logits/chosen": 2.1653988361358643, "logits/rejected": 1.9160587787628174, "loss": 1.3631, "step": 515 }, { "beta_dpo/beta_used": 0.020925289019942284, "beta_dpo/beta_used_raw": 0.004274457693099976, "beta_dpo/gap_mean": 11.362092018127441, "beta_dpo/gap_std": 18.48135757446289, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.780045351473923, "grad_norm": 9.73312759399414, "learning_rate": 7.090144991188568e-08, "logits/chosen": 1.586578369140625, "logits/rejected": 1.3872929811477661, "loss": 1.2347, "step": 516 }, { "beta_dpo/beta_used": 0.06535185873508453, "beta_dpo/beta_used_raw": -0.04223699867725372, "beta_dpo/gap_mean": 11.007303237915039, "beta_dpo/gap_std": 18.780548095703125, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.781557067271353, "grad_norm": 22.2569637298584, "learning_rate": 6.998145243993284e-08, "logits/chosen": 1.4606246948242188, "logits/rejected": 1.458913803100586, "loss": 1.1673, "step": 517 }, { "beta_dpo/beta_used": 0.039747219532728195, "beta_dpo/beta_used_raw": 0.017948148772120476, "beta_dpo/gap_mean": 10.854924201965332, "beta_dpo/gap_std": 18.901779174804688, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.783068783068783, "grad_norm": 13.430063247680664, "learning_rate": 6.906649047373245e-08, "logits/chosen": 1.23757004737854, "logits/rejected": 1.2449841499328613, "loss": 1.1996, "step": 518 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.16081054508686066, "beta_dpo/gap_mean": 10.096181869506836, "beta_dpo/gap_std": 19.545801162719727, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7845804988662132, "grad_norm": 0.32356882095336914, "learning_rate": 6.815658960673781e-08, "logits/chosen": 1.456130862236023, "logits/rejected": 1.342390537261963, "loss": 1.3808, "step": 519 }, { "beta_dpo/beta_used": 0.17143839597702026, "beta_dpo/beta_used_raw": 0.1322009265422821, "beta_dpo/gap_mean": 10.353086471557617, "beta_dpo/gap_std": 18.939144134521484, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7860922146636432, "grad_norm": 22.188499450683594, "learning_rate": 6.725177529083209e-08, "logits/chosen": 1.7241880893707275, "logits/rejected": 1.1929926872253418, "loss": 0.8542, "step": 520 }, { "beta_dpo/beta_used": 0.1159067451953888, "beta_dpo/beta_used_raw": 0.1159067451953888, "beta_dpo/gap_mean": 10.21092414855957, "beta_dpo/gap_std": 18.545848846435547, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7876039304610734, "grad_norm": 26.739612579345703, "learning_rate": 6.63520728356167e-08, "logits/chosen": 1.4143714904785156, "logits/rejected": 1.3485612869262695, "loss": 1.0487, "step": 521 }, { "beta_dpo/beta_used": 0.12564007937908173, "beta_dpo/beta_used_raw": 0.12564007937908173, "beta_dpo/gap_mean": 10.022201538085938, "beta_dpo/gap_std": 18.765472412109375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7891156462585034, "grad_norm": 59.06966018676758, "learning_rate": 6.545750740770336e-08, "logits/chosen": 1.6190658807754517, "logits/rejected": 1.4547877311706543, "loss": 1.3958, "step": 522 }, { "beta_dpo/beta_used": 0.23686088621616364, "beta_dpo/beta_used_raw": 0.23686088621616364, "beta_dpo/gap_mean": 10.224916458129883, "beta_dpo/gap_std": 18.703235626220703, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7906273620559335, "grad_norm": 61.67593765258789, "learning_rate": 6.456810403001012e-08, "logits/chosen": 1.7736190557479858, "logits/rejected": 0.9705901145935059, "loss": 0.9546, "step": 523 }, { "beta_dpo/beta_used": 0.10322294384241104, "beta_dpo/beta_used_raw": 0.10322294384241104, "beta_dpo/gap_mean": 9.980840682983398, "beta_dpo/gap_std": 18.50246810913086, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7921390778533636, "grad_norm": 37.48660659790039, "learning_rate": 6.368388758106134e-08, "logits/chosen": 1.1455576419830322, "logits/rejected": 1.1552529335021973, "loss": 1.0655, "step": 524 }, { "beta_dpo/beta_used": 0.003437028033658862, "beta_dpo/beta_used_raw": -0.013644227758049965, "beta_dpo/gap_mean": 9.956314086914062, "beta_dpo/gap_std": 18.658798217773438, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7936507936507936, "grad_norm": 1.9367986917495728, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.9969067573547363, "logits/rejected": 0.8848774433135986, "loss": 1.3582, "step": 525 }, { "beta_dpo/beta_used": 0.11064125597476959, "beta_dpo/beta_used_raw": 0.008903838694095612, "beta_dpo/gap_mean": 9.922897338867188, "beta_dpo/gap_std": 18.31914520263672, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7951625094482238, "grad_norm": 26.800683975219727, "learning_rate": 6.193111425735515e-08, "logits/chosen": 1.173614740371704, "logits/rejected": 0.848638653755188, "loss": 1.1171, "step": 526 }, { "beta_dpo/beta_used": 0.06301558017730713, "beta_dpo/beta_used_raw": 0.04940624535083771, "beta_dpo/gap_mean": 9.1787691116333, "beta_dpo/gap_std": 17.924055099487305, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7966742252456538, "grad_norm": 24.338993072509766, "learning_rate": 6.106260641143546e-08, "logits/chosen": 1.8160502910614014, "logits/rejected": 1.425750970840454, "loss": 1.192, "step": 527 }, { "beta_dpo/beta_used": 0.06259048730134964, "beta_dpo/beta_used_raw": 0.06259048730134964, "beta_dpo/gap_mean": 9.066558837890625, "beta_dpo/gap_std": 18.2850399017334, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7981859410430839, "grad_norm": 18.2799129486084, "learning_rate": 6.019938355056422e-08, "logits/chosen": 1.1618304252624512, "logits/rejected": 1.5348981618881226, "loss": 1.1168, "step": 528 }, { "beta_dpo/beta_used": 0.35698869824409485, "beta_dpo/beta_used_raw": 0.35698869824409485, "beta_dpo/gap_mean": 10.158662796020508, "beta_dpo/gap_std": 18.37487030029297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.799697656840514, "grad_norm": 77.24592590332031, "learning_rate": 5.934146982094049e-08, "logits/chosen": 1.2812597751617432, "logits/rejected": 1.2239878177642822, "loss": 1.1347, "step": 529 }, { "beta_dpo/beta_used": 0.14690837264060974, "beta_dpo/beta_used_raw": 0.11383026838302612, "beta_dpo/gap_mean": 10.558483123779297, "beta_dpo/gap_std": 18.502506256103516, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8012093726379441, "grad_norm": 49.202369689941406, "learning_rate": 5.848888922025552e-08, "logits/chosen": 1.660964012145996, "logits/rejected": 1.2829644680023193, "loss": 1.172, "step": 530 }, { "beta_dpo/beta_used": 0.11664751917123795, "beta_dpo/beta_used_raw": 0.05959582328796387, "beta_dpo/gap_mean": 10.151885032653809, "beta_dpo/gap_std": 17.962663650512695, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8027210884353742, "grad_norm": 28.00320053100586, "learning_rate": 5.7641665597021435e-08, "logits/chosen": 2.1570699214935303, "logits/rejected": 1.9092918634414673, "loss": 0.9562, "step": 531 }, { "beta_dpo/beta_used": 0.0765802264213562, "beta_dpo/beta_used_raw": 0.05343026667833328, "beta_dpo/gap_mean": 10.470291137695312, "beta_dpo/gap_std": 17.93800926208496, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8042328042328042, "grad_norm": 29.33611297607422, "learning_rate": 5.679982264990424e-08, "logits/chosen": 1.5656516551971436, "logits/rejected": 1.12631356716156, "loss": 1.306, "step": 532 }, { "beta_dpo/beta_used": 0.01579258404672146, "beta_dpo/beta_used_raw": -0.00625237263739109, "beta_dpo/gap_mean": 10.281692504882812, "beta_dpo/gap_std": 18.202903747558594, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8057445200302343, "grad_norm": 5.443710803985596, "learning_rate": 5.596338392706076e-08, "logits/chosen": 1.9019914865493774, "logits/rejected": 1.5987591743469238, "loss": 1.2761, "step": 533 }, { "beta_dpo/beta_used": 0.25557124614715576, "beta_dpo/beta_used_raw": 0.1648273766040802, "beta_dpo/gap_mean": 10.804718971252441, "beta_dpo/gap_std": 18.391155242919922, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8072562358276644, "grad_norm": 96.26758575439453, "learning_rate": 5.513237282548033e-08, "logits/chosen": 1.1472864151000977, "logits/rejected": 0.6411304473876953, "loss": 1.3409, "step": 534 }, { "beta_dpo/beta_used": 0.15714676678180695, "beta_dpo/beta_used_raw": 0.1257384568452835, "beta_dpo/gap_mean": 10.44320011138916, "beta_dpo/gap_std": 18.42446517944336, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8087679516250945, "grad_norm": 38.07860565185547, "learning_rate": 5.430681259032957e-08, "logits/chosen": 1.16520094871521, "logits/rejected": 0.858991801738739, "loss": 1.0452, "step": 535 }, { "beta_dpo/beta_used": 0.17811733484268188, "beta_dpo/beta_used_raw": 0.1541111320257187, "beta_dpo/gap_mean": 10.847288131713867, "beta_dpo/gap_std": 18.489604949951172, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8102796674225246, "grad_norm": 51.901405334472656, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 1.6397348642349243, "logits/rejected": 1.5184638500213623, "loss": 1.1259, "step": 536 }, { "beta_dpo/beta_used": 0.06095781922340393, "beta_dpo/beta_used_raw": 0.06095781922340393, "beta_dpo/gap_mean": 10.72732162475586, "beta_dpo/gap_std": 18.210926055908203, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8117913832199547, "grad_norm": 14.593372344970703, "learning_rate": 5.267213693697695e-08, "logits/chosen": 1.3957045078277588, "logits/rejected": 1.092875599861145, "loss": 1.0809, "step": 537 }, { "beta_dpo/beta_used": 0.09433559328317642, "beta_dpo/beta_used_raw": 0.06990790367126465, "beta_dpo/gap_mean": 10.813741683959961, "beta_dpo/gap_std": 18.126432418823242, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8133030990173847, "grad_norm": 22.37622833251953, "learning_rate": 5.1863067244167144e-08, "logits/chosen": 1.5632539987564087, "logits/rejected": 1.6953120231628418, "loss": 1.0197, "step": 538 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04334234446287155, "beta_dpo/gap_mean": 10.80185317993164, "beta_dpo/gap_std": 17.788105010986328, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8148148148148148, "grad_norm": 0.34440314769744873, "learning_rate": 5.105953986729195e-08, "logits/chosen": 1.4430513381958008, "logits/rejected": 1.2813853025436401, "loss": 1.3784, "step": 539 }, { "beta_dpo/beta_used": 0.2005300521850586, "beta_dpo/beta_used_raw": 0.2005300521850586, "beta_dpo/gap_mean": 11.087736129760742, "beta_dpo/gap_std": 17.963241577148438, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8163265306122449, "grad_norm": 67.64325714111328, "learning_rate": 5.026157728273966e-08, "logits/chosen": 1.9035638570785522, "logits/rejected": 1.7133615016937256, "loss": 0.9523, "step": 540 }, { "beta_dpo/beta_used": 0.1668683886528015, "beta_dpo/beta_used_raw": 0.13656221330165863, "beta_dpo/gap_mean": 11.58251667022705, "beta_dpo/gap_std": 17.98819351196289, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.817838246409675, "grad_norm": 57.01007843017578, "learning_rate": 4.9469201811239035e-08, "logits/chosen": 1.6449933052062988, "logits/rejected": 1.851159930229187, "loss": 1.1774, "step": 541 }, { "beta_dpo/beta_used": 0.2715034484863281, "beta_dpo/beta_used_raw": 0.2715034484863281, "beta_dpo/gap_mean": 12.08657455444336, "beta_dpo/gap_std": 18.468975067138672, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8193499622071051, "grad_norm": 71.46245574951172, "learning_rate": 4.868243561723534e-08, "logits/chosen": 1.407960057258606, "logits/rejected": 1.1887967586517334, "loss": 1.0508, "step": 542 }, { "beta_dpo/beta_used": 0.12943723797798157, "beta_dpo/beta_used_raw": 0.12943723797798157, "beta_dpo/gap_mean": 12.076602935791016, "beta_dpo/gap_std": 18.590787887573242, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8208616780045351, "grad_norm": 29.349206924438477, "learning_rate": 4.790130070827028e-08, "logits/chosen": 1.7124477624893188, "logits/rejected": 1.4872949123382568, "loss": 0.9852, "step": 543 }, { "beta_dpo/beta_used": 0.029429566115140915, "beta_dpo/beta_used_raw": 0.023324094712734222, "beta_dpo/gap_mean": 12.406301498413086, "beta_dpo/gap_std": 19.229873657226562, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8223733938019653, "grad_norm": 13.987939834594727, "learning_rate": 4.7125818934366454e-08, "logits/chosen": 1.2513970136642456, "logits/rejected": 1.3183088302612305, "loss": 1.1907, "step": 544 }, { "beta_dpo/beta_used": 0.1011621505022049, "beta_dpo/beta_used_raw": 0.1011621505022049, "beta_dpo/gap_mean": 12.334243774414062, "beta_dpo/gap_std": 19.540382385253906, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8238851095993953, "grad_norm": 15.821413040161133, "learning_rate": 4.635601198741607e-08, "logits/chosen": 1.6381149291992188, "logits/rejected": 1.4946880340576172, "loss": 0.823, "step": 545 }, { "beta_dpo/beta_used": 0.1270730048418045, "beta_dpo/beta_used_raw": 0.11782974004745483, "beta_dpo/gap_mean": 12.052278518676758, "beta_dpo/gap_std": 19.115442276000977, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8253968253968254, "grad_norm": 42.763160705566406, "learning_rate": 4.559190140057428e-08, "logits/chosen": 1.3137729167938232, "logits/rejected": 1.331726312637329, "loss": 1.117, "step": 546 }, { "beta_dpo/beta_used": 0.11487125605344772, "beta_dpo/beta_used_raw": 0.11487125605344772, "beta_dpo/gap_mean": 12.46994400024414, "beta_dpo/gap_std": 19.053627014160156, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.8269085411942555, "grad_norm": 18.704116821289062, "learning_rate": 4.483350854765672e-08, "logits/chosen": 1.0267034769058228, "logits/rejected": 0.6374800801277161, "loss": 0.8122, "step": 547 }, { "beta_dpo/beta_used": 0.03800983354449272, "beta_dpo/beta_used_raw": 0.00829574279487133, "beta_dpo/gap_mean": 11.772872924804688, "beta_dpo/gap_std": 18.622325897216797, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8284202569916855, "grad_norm": 19.206010818481445, "learning_rate": 4.4080854642541826e-08, "logits/chosen": 1.4546637535095215, "logits/rejected": 1.3802220821380615, "loss": 1.2243, "step": 548 }, { "beta_dpo/beta_used": 0.09710898995399475, "beta_dpo/beta_used_raw": 0.07338780164718628, "beta_dpo/gap_mean": 11.424276351928711, "beta_dpo/gap_std": 18.64669418334961, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8299319727891157, "grad_norm": 38.650184631347656, "learning_rate": 4.333396073857723e-08, "logits/chosen": 1.831252098083496, "logits/rejected": 1.8664486408233643, "loss": 1.1348, "step": 549 }, { "beta_dpo/beta_used": 0.1007058173418045, "beta_dpo/beta_used_raw": 0.1007058173418045, "beta_dpo/gap_mean": 11.098159790039062, "beta_dpo/gap_std": 18.92105484008789, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8314436885865457, "grad_norm": 33.382164001464844, "learning_rate": 4.259284772799099e-08, "logits/chosen": 1.819729208946228, "logits/rejected": 1.856877088546753, "loss": 1.0106, "step": 550 }, { "beta_dpo/beta_used": 0.044430945068597794, "beta_dpo/beta_used_raw": 0.03116544708609581, "beta_dpo/gap_mean": 10.615936279296875, "beta_dpo/gap_std": 18.659568786621094, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.8329554043839759, "grad_norm": 14.599547386169434, "learning_rate": 4.1857536341307176e-08, "logits/chosen": 1.8848488330841064, "logits/rejected": 1.6397110223770142, "loss": 1.1842, "step": 551 }, { "beta_dpo/beta_used": 0.19954092800617218, "beta_dpo/beta_used_raw": 0.19954092800617218, "beta_dpo/gap_mean": 10.359419822692871, "beta_dpo/gap_std": 18.463096618652344, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8344671201814059, "grad_norm": 42.67924118041992, "learning_rate": 4.112804714676593e-08, "logits/chosen": 1.7389799356460571, "logits/rejected": 1.3862097263336182, "loss": 1.0244, "step": 552 }, { "beta_dpo/beta_used": 0.2873495817184448, "beta_dpo/beta_used_raw": 0.2873495817184448, "beta_dpo/gap_mean": 10.555915832519531, "beta_dpo/gap_std": 18.88970184326172, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8359788359788359, "grad_norm": 65.49226379394531, "learning_rate": 4.0404400549748144e-08, "logits/chosen": 1.8026375770568848, "logits/rejected": 1.2519030570983887, "loss": 1.1918, "step": 553 }, { "beta_dpo/beta_used": 0.05840389430522919, "beta_dpo/beta_used_raw": 0.033750779926776886, "beta_dpo/gap_mean": 10.615468978881836, "beta_dpo/gap_std": 18.631549835205078, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8374905517762661, "grad_norm": 17.503585815429688, "learning_rate": 3.968661679220467e-08, "logits/chosen": 1.0402394533157349, "logits/rejected": 0.9546246528625488, "loss": 1.1337, "step": 554 }, { "beta_dpo/beta_used": 0.13094915449619293, "beta_dpo/beta_used_raw": 0.13006287813186646, "beta_dpo/gap_mean": 10.578245162963867, "beta_dpo/gap_std": 18.960113525390625, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8390022675736961, "grad_norm": 42.747737884521484, "learning_rate": 3.89747159520904e-08, "logits/chosen": 1.5776338577270508, "logits/rejected": 1.3985557556152344, "loss": 1.1577, "step": 555 }, { "beta_dpo/beta_used": 0.005854336079210043, "beta_dpo/beta_used_raw": -0.03035161830484867, "beta_dpo/gap_mean": 10.757535934448242, "beta_dpo/gap_std": 18.839813232421875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8405139833711263, "grad_norm": 2.7324094772338867, "learning_rate": 3.826871794280192e-08, "logits/chosen": 1.3301312923431396, "logits/rejected": 1.2104971408843994, "loss": 1.339, "step": 556 }, { "beta_dpo/beta_used": 0.12279447913169861, "beta_dpo/beta_used_raw": 0.09184837341308594, "beta_dpo/gap_mean": 11.148019790649414, "beta_dpo/gap_std": 18.934059143066406, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8420256991685563, "grad_norm": 28.103437423706055, "learning_rate": 3.756864251262143e-08, "logits/chosen": 1.197859525680542, "logits/rejected": 0.6767659187316895, "loss": 1.0637, "step": 557 }, { "beta_dpo/beta_used": 0.08440352976322174, "beta_dpo/beta_used_raw": 0.05450304225087166, "beta_dpo/gap_mean": 11.180231094360352, "beta_dpo/gap_std": 19.118072509765625, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8435374149659864, "grad_norm": 24.796546936035156, "learning_rate": 3.687450924416341e-08, "logits/chosen": 1.7133653163909912, "logits/rejected": 1.6142797470092773, "loss": 1.1398, "step": 558 }, { "beta_dpo/beta_used": 0.10217013955116272, "beta_dpo/beta_used_raw": 0.0785236731171608, "beta_dpo/gap_mean": 11.172683715820312, "beta_dpo/gap_std": 19.399667739868164, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8450491307634165, "grad_norm": 28.550260543823242, "learning_rate": 3.6186337553827743e-08, "logits/chosen": 1.3882708549499512, "logits/rejected": 0.937119722366333, "loss": 1.0391, "step": 559 }, { "beta_dpo/beta_used": 0.1818804293870926, "beta_dpo/beta_used_raw": 0.07039390504360199, "beta_dpo/gap_mean": 11.01672649383545, "beta_dpo/gap_std": 19.380664825439453, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8465608465608465, "grad_norm": 51.63716125488281, "learning_rate": 3.550414669125573e-08, "logits/chosen": 1.3455142974853516, "logits/rejected": 1.3270020484924316, "loss": 1.1142, "step": 560 }, { "beta_dpo/beta_used": 0.08334767073392868, "beta_dpo/beta_used_raw": 0.012797832489013672, "beta_dpo/gap_mean": 11.576347351074219, "beta_dpo/gap_std": 19.41046905517578, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8480725623582767, "grad_norm": 28.351787567138672, "learning_rate": 3.482795573879241e-08, "logits/chosen": 1.6244086027145386, "logits/rejected": 1.520763874053955, "loss": 1.0719, "step": 561 }, { "beta_dpo/beta_used": 0.053987935185432434, "beta_dpo/beta_used_raw": -0.038065314292907715, "beta_dpo/gap_mean": 11.818717956542969, "beta_dpo/gap_std": 19.462326049804688, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8495842781557067, "grad_norm": 18.966503143310547, "learning_rate": 3.415778361095226e-08, "logits/chosen": 1.7821249961853027, "logits/rejected": 1.3919886350631714, "loss": 1.1546, "step": 562 }, { "beta_dpo/beta_used": 0.11155681312084198, "beta_dpo/beta_used_raw": 0.11155681312084198, "beta_dpo/gap_mean": 11.847363471984863, "beta_dpo/gap_std": 20.0205020904541, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8510959939531368, "grad_norm": 25.363344192504883, "learning_rate": 3.349364905389032e-08, "logits/chosen": 1.2508881092071533, "logits/rejected": 0.9743169546127319, "loss": 0.9877, "step": 563 }, { "beta_dpo/beta_used": 0.16263367235660553, "beta_dpo/beta_used_raw": 0.16263367235660553, "beta_dpo/gap_mean": 11.87989616394043, "beta_dpo/gap_std": 20.273239135742188, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8526077097505669, "grad_norm": 38.15361785888672, "learning_rate": 3.283557064487785e-08, "logits/chosen": 1.650681734085083, "logits/rejected": 1.6767246723175049, "loss": 0.9964, "step": 564 }, { "beta_dpo/beta_used": 0.03271957114338875, "beta_dpo/beta_used_raw": -0.13078060746192932, "beta_dpo/gap_mean": 11.701870918273926, "beta_dpo/gap_std": 19.98330307006836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.854119425547997, "grad_norm": 13.035989761352539, "learning_rate": 3.218356679178252e-08, "logits/chosen": 1.7115111351013184, "logits/rejected": 1.2233140468597412, "loss": 1.1906, "step": 565 }, { "beta_dpo/beta_used": 0.061468079686164856, "beta_dpo/beta_used_raw": -0.002200111746788025, "beta_dpo/gap_mean": 11.254524230957031, "beta_dpo/gap_std": 19.918685913085938, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8556311413454271, "grad_norm": 36.61579132080078, "learning_rate": 3.1537655732553764e-08, "logits/chosen": 1.8470666408538818, "logits/rejected": 1.5994932651519775, "loss": 1.2811, "step": 566 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07913199067115784, "beta_dpo/gap_mean": 11.579656600952148, "beta_dpo/gap_std": 19.583362579345703, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8571428571428571, "grad_norm": 0.510140597820282, "learning_rate": 3.089785553471233e-08, "logits/chosen": 0.953148603439331, "logits/rejected": 1.038599967956543, "loss": 1.3768, "step": 567 }, { "beta_dpo/beta_used": 0.05987339839339256, "beta_dpo/beta_used_raw": -0.008187372237443924, "beta_dpo/gap_mean": 11.487663269042969, "beta_dpo/gap_std": 18.96971893310547, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8586545729402872, "grad_norm": 23.292221069335938, "learning_rate": 3.026418409484513e-08, "logits/chosen": 1.5052483081817627, "logits/rejected": 1.313591718673706, "loss": 1.1959, "step": 568 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.05208010971546173, "beta_dpo/gap_mean": 11.568532943725586, "beta_dpo/gap_std": 18.769332885742188, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8601662887377173, "grad_norm": 0.33875489234924316, "learning_rate": 2.963665913810451e-08, "logits/chosen": 1.5558233261108398, "logits/rejected": 1.5793402194976807, "loss": 1.378, "step": 569 }, { "beta_dpo/beta_used": 0.2890852391719818, "beta_dpo/beta_used_raw": 0.2890852391719818, "beta_dpo/gap_mean": 11.699023246765137, "beta_dpo/gap_std": 18.902652740478516, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8616780045351474, "grad_norm": 49.38574981689453, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 1.6536730527877808, "logits/rejected": 1.5479531288146973, "loss": 0.6619, "step": 570 }, { "beta_dpo/beta_used": 0.12220169603824615, "beta_dpo/beta_used_raw": 0.07312116771936417, "beta_dpo/gap_mean": 11.507964134216309, "beta_dpo/gap_std": 18.88650894165039, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8631897203325775, "grad_norm": 22.84241485595703, "learning_rate": 2.840011871446962e-08, "logits/chosen": 1.3720738887786865, "logits/rejected": 1.0536761283874512, "loss": 1.0365, "step": 571 }, { "beta_dpo/beta_used": 0.07529482990503311, "beta_dpo/beta_used_raw": 0.016804847866296768, "beta_dpo/gap_mean": 11.208440780639648, "beta_dpo/gap_std": 18.8841495513916, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.8647014361300076, "grad_norm": 22.246524810791016, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 1.4899077415466309, "logits/rejected": 1.3142718076705933, "loss": 1.0893, "step": 572 }, { "beta_dpo/beta_used": 0.10679773986339569, "beta_dpo/beta_used_raw": 0.10679773986339569, "beta_dpo/gap_mean": 11.175505638122559, "beta_dpo/gap_std": 19.131608963012695, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8662131519274376, "grad_norm": 25.517454147338867, "learning_rate": 2.718837261761528e-08, "logits/chosen": 1.6858450174331665, "logits/rejected": 1.5363482236862183, "loss": 0.9872, "step": 573 }, { "beta_dpo/beta_used": 0.3487897515296936, "beta_dpo/beta_used_raw": 0.3487897515296936, "beta_dpo/gap_mean": 11.519730567932129, "beta_dpo/gap_std": 19.185972213745117, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8677248677248677, "grad_norm": 61.2857780456543, "learning_rate": 2.659183991914696e-08, "logits/chosen": 1.2357861995697021, "logits/rejected": 1.3313536643981934, "loss": 0.7915, "step": 574 }, { "beta_dpo/beta_used": 0.12381540983915329, "beta_dpo/beta_used_raw": -0.13067613542079926, "beta_dpo/gap_mean": 11.860994338989258, "beta_dpo/gap_std": 19.543277740478516, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8692365835222978, "grad_norm": 35.53083038330078, "learning_rate": 2.600155642716606e-08, "logits/chosen": 1.5488557815551758, "logits/rejected": 1.1586174964904785, "loss": 1.3028, "step": 575 }, { "beta_dpo/beta_used": 0.3507198095321655, "beta_dpo/beta_used_raw": 0.3507198095321655, "beta_dpo/gap_mean": 11.940589904785156, "beta_dpo/gap_std": 19.488418579101562, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8707482993197279, "grad_norm": 90.53150177001953, "learning_rate": 2.5417538653170754e-08, "logits/chosen": 1.8404762744903564, "logits/rejected": 1.5911169052124023, "loss": 1.0244, "step": 576 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2546420693397522, "beta_dpo/gap_mean": 11.400903701782227, "beta_dpo/gap_std": 19.448040008544922, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.872260015117158, "grad_norm": 0.31355878710746765, "learning_rate": 2.4839802933393607e-08, "logits/chosen": 2.165390968322754, "logits/rejected": 1.9945690631866455, "loss": 1.3799, "step": 577 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14779648184776306, "beta_dpo/gap_mean": 10.787927627563477, "beta_dpo/gap_std": 19.160215377807617, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.873771730914588, "grad_norm": 0.3057783842086792, "learning_rate": 2.4268365428344733e-08, "logits/chosen": 1.7347502708435059, "logits/rejected": 1.4005095958709717, "loss": 1.38, "step": 578 }, { "beta_dpo/beta_used": 0.04637397825717926, "beta_dpo/beta_used_raw": 0.03942590579390526, "beta_dpo/gap_mean": 10.95724105834961, "beta_dpo/gap_std": 18.829822540283203, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8752834467120182, "grad_norm": 12.994908332824707, "learning_rate": 2.3703242122359357e-08, "logits/chosen": 1.5472080707550049, "logits/rejected": 1.3895456790924072, "loss": 1.1182, "step": 579 }, { "beta_dpo/beta_used": 0.0774238333106041, "beta_dpo/beta_used_raw": 0.02635762467980385, "beta_dpo/gap_mean": 10.798433303833008, "beta_dpo/gap_std": 19.00153350830078, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8767951625094482, "grad_norm": 18.991615295410156, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 1.3897788524627686, "logits/rejected": 1.2619503736495972, "loss": 1.0359, "step": 580 }, { "beta_dpo/beta_used": 0.20503893494606018, "beta_dpo/beta_used_raw": 0.20503893494606018, "beta_dpo/gap_mean": 10.974782943725586, "beta_dpo/gap_std": 18.892879486083984, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8783068783068783, "grad_norm": 46.59199142456055, "learning_rate": 2.259200116137039e-08, "logits/chosen": 1.6775740385055542, "logits/rejected": 1.5639266967773438, "loss": 1.0779, "step": 581 }, { "beta_dpo/beta_used": 0.08953151851892471, "beta_dpo/beta_used_raw": 0.08953151851892471, "beta_dpo/gap_mean": 10.719525337219238, "beta_dpo/gap_std": 18.856456756591797, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8798185941043084, "grad_norm": 28.192134857177734, "learning_rate": 2.204591459016525e-08, "logits/chosen": 1.0942572355270386, "logits/rejected": 0.8079568147659302, "loss": 1.3575, "step": 582 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.047244954854249954, "beta_dpo/gap_mean": 10.52918529510498, "beta_dpo/gap_std": 19.494892120361328, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8813303099017384, "grad_norm": 0.4001644253730774, "learning_rate": 2.1506204384751064e-08, "logits/chosen": 1.7894879579544067, "logits/rejected": 1.7086610794067383, "loss": 1.3775, "step": 583 }, { "beta_dpo/beta_used": 0.226608008146286, "beta_dpo/beta_used_raw": 0.226608008146286, "beta_dpo/gap_mean": 10.642799377441406, "beta_dpo/gap_std": 19.788650512695312, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8828420256991686, "grad_norm": 68.65081787109375, "learning_rate": 2.09728856419826e-08, "logits/chosen": 1.6787614822387695, "logits/rejected": 1.416142225265503, "loss": 1.4486, "step": 584 }, { "beta_dpo/beta_used": 0.147002711892128, "beta_dpo/beta_used_raw": 0.09757278859615326, "beta_dpo/gap_mean": 10.093536376953125, "beta_dpo/gap_std": 19.318706512451172, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8843537414965986, "grad_norm": 46.095481872558594, "learning_rate": 2.044597327993153e-08, "logits/chosen": 1.482553243637085, "logits/rejected": 1.1475740671157837, "loss": 1.0405, "step": 585 }, { "beta_dpo/beta_used": 0.23156246542930603, "beta_dpo/beta_used_raw": 0.23156246542930603, "beta_dpo/gap_mean": 10.390697479248047, "beta_dpo/gap_std": 18.974735260009766, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8858654572940288, "grad_norm": 73.39409637451172, "learning_rate": 1.9925482037469187e-08, "logits/chosen": 1.423257827758789, "logits/rejected": 1.4906511306762695, "loss": 1.2219, "step": 586 }, { "beta_dpo/beta_used": 0.12944234907627106, "beta_dpo/beta_used_raw": 0.08352088928222656, "beta_dpo/gap_mean": 11.080392837524414, "beta_dpo/gap_std": 19.111534118652344, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8873771730914588, "grad_norm": 83.24421691894531, "learning_rate": 1.9411426473854687e-08, "logits/chosen": 1.5286226272583008, "logits/rejected": 1.362818956375122, "loss": 1.2369, "step": 587 }, { "beta_dpo/beta_used": 0.20540329813957214, "beta_dpo/beta_used_raw": 0.20540329813957214, "beta_dpo/gap_mean": 11.472732543945312, "beta_dpo/gap_std": 18.702571868896484, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8888888888888888, "grad_norm": 45.81542205810547, "learning_rate": 1.890382096832699e-08, "logits/chosen": 1.0323173999786377, "logits/rejected": 0.911257266998291, "loss": 0.8494, "step": 588 }, { "beta_dpo/beta_used": 0.2612083852291107, "beta_dpo/beta_used_raw": 0.2612083852291107, "beta_dpo/gap_mean": 11.75611686706543, "beta_dpo/gap_std": 18.558521270751953, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.890400604686319, "grad_norm": 55.26761245727539, "learning_rate": 1.840267971970344e-08, "logits/chosen": 1.2208616733551025, "logits/rejected": 1.25350022315979, "loss": 0.6308, "step": 589 }, { "beta_dpo/beta_used": 0.06067529320716858, "beta_dpo/beta_used_raw": 0.06067529320716858, "beta_dpo/gap_mean": 12.026023864746094, "beta_dpo/gap_std": 18.19622802734375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.891912320483749, "grad_norm": 14.301695823669434, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 1.392610788345337, "logits/rejected": 1.33760404586792, "loss": 0.9256, "step": 590 }, { "beta_dpo/beta_used": 0.1715593785047531, "beta_dpo/beta_used_raw": 0.06642448157072067, "beta_dpo/gap_mean": 12.463174819946289, "beta_dpo/gap_std": 18.169431686401367, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8934240362811792, "grad_norm": 47.96595001220703, "learning_rate": 1.7419845883949098e-08, "logits/chosen": 1.46830153465271, "logits/rejected": 1.4073936939239502, "loss": 1.2754, "step": 591 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3008629381656647, "beta_dpo/gap_mean": 12.13792610168457, "beta_dpo/gap_std": 18.62552833557129, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8949357520786092, "grad_norm": 0.33993443846702576, "learning_rate": 1.6938180788793556e-08, "logits/chosen": 1.4184048175811768, "logits/rejected": 1.281282663345337, "loss": 1.3805, "step": 592 }, { "beta_dpo/beta_used": 0.01834931969642639, "beta_dpo/beta_used_raw": 0.01834931969642639, "beta_dpo/gap_mean": 12.079296112060547, "beta_dpo/gap_std": 18.619632720947266, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8964474678760394, "grad_norm": 9.732373237609863, "learning_rate": 1.6463034933723336e-08, "logits/chosen": 1.4756786823272705, "logits/rejected": 1.439645528793335, "loss": 1.2391, "step": 593 }, { "beta_dpo/beta_used": 0.03766559436917305, "beta_dpo/beta_used_raw": -0.07916873693466187, "beta_dpo/gap_mean": 11.784311294555664, "beta_dpo/gap_std": 18.663883209228516, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8979591836734694, "grad_norm": 13.725388526916504, "learning_rate": 1.5994421609589385e-08, "logits/chosen": 1.5952414274215698, "logits/rejected": 1.5322446823120117, "loss": 1.1311, "step": 594 }, { "beta_dpo/beta_used": 0.29605233669281006, "beta_dpo/beta_used_raw": 0.29605233669281006, "beta_dpo/gap_mean": 11.810873031616211, "beta_dpo/gap_std": 19.1055850982666, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8994708994708994, "grad_norm": 77.941650390625, "learning_rate": 1.553235392451377e-08, "logits/chosen": 1.9404058456420898, "logits/rejected": 1.6339752674102783, "loss": 1.0055, "step": 595 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19658523797988892, "beta_dpo/gap_mean": 11.020740509033203, "beta_dpo/gap_std": 19.33443832397461, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9009826152683296, "grad_norm": 0.28924560546875, "learning_rate": 1.507684480352292e-08, "logits/chosen": 1.3958216905593872, "logits/rejected": 1.2835626602172852, "loss": 1.3809, "step": 596 }, { "beta_dpo/beta_used": 0.11626582592725754, "beta_dpo/beta_used_raw": 0.09599099308252335, "beta_dpo/gap_mean": 10.842538833618164, "beta_dpo/gap_std": 19.066997528076172, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9024943310657596, "grad_norm": 29.83753776550293, "learning_rate": 1.4627906988186111e-08, "logits/chosen": 1.3235628604888916, "logits/rejected": 1.3475373983383179, "loss": 1.0947, "step": 597 }, { "beta_dpo/beta_used": 0.00658452557399869, "beta_dpo/beta_used_raw": -0.0820087194442749, "beta_dpo/gap_mean": 10.465547561645508, "beta_dpo/gap_std": 18.86764907836914, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9040060468631897, "grad_norm": 2.736207962036133, "learning_rate": 1.4185553036259095e-08, "logits/chosen": 0.8764083385467529, "logits/rejected": 0.8989740610122681, "loss": 1.3336, "step": 598 }, { "beta_dpo/beta_used": 0.01312843058258295, "beta_dpo/beta_used_raw": -0.017385948449373245, "beta_dpo/gap_mean": 10.36957836151123, "beta_dpo/gap_std": 18.702917098999023, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9055177626606198, "grad_norm": 5.408941268920898, "learning_rate": 1.3749795321332885e-08, "logits/chosen": 1.087230920791626, "logits/rejected": 1.0524613857269287, "loss": 1.2818, "step": 599 }, { "beta_dpo/beta_used": 0.10121805220842361, "beta_dpo/beta_used_raw": 0.10121805220842361, "beta_dpo/gap_mean": 9.870223045349121, "beta_dpo/gap_std": 18.549135208129883, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9070294784580499, "grad_norm": 29.112049102783203, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 1.4981776475906372, "logits/rejected": 1.579132080078125, "loss": 1.0636, "step": 600 }, { "epoch": 0.9070294784580499, "eval_beta_dpo/beta_used": 0.19951747357845306, "eval_beta_dpo/beta_used_raw": 0.18094317615032196, "eval_beta_dpo/gap_mean": 9.920242309570312, "eval_beta_dpo/gap_std": 18.34697914123535, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.5448524951934814, "eval_logits/rejected": 1.4136770963668823, "eval_loss": 0.725577175617218, "eval_runtime": 42.6427, "eval_samples_per_second": 54.007, "eval_steps_per_second": 1.688, "step": 600 }, { "beta_dpo/beta_used": 0.30988115072250366, "beta_dpo/beta_used_raw": 0.30988115072250366, "beta_dpo/gap_mean": 10.584239959716797, "beta_dpo/gap_std": 18.33379364013672, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.90854119425548, "grad_norm": 72.30387878417969, "learning_rate": 1.2898117173950868e-08, "logits/chosen": 1.4453303813934326, "logits/rejected": 1.3735606670379639, "loss": 1.0263, "step": 601 }, { "beta_dpo/beta_used": 0.22164717316627502, "beta_dpo/beta_used_raw": 0.22164717316627502, "beta_dpo/gap_mean": 10.797483444213867, "beta_dpo/gap_std": 17.95302963256836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.91005291005291, "grad_norm": 64.73311614990234, "learning_rate": 1.2482220564763667e-08, "logits/chosen": 1.5381059646606445, "logits/rejected": 1.3643220663070679, "loss": 0.875, "step": 602 }, { "beta_dpo/beta_used": 0.046099040657281876, "beta_dpo/beta_used_raw": 0.046099040657281876, "beta_dpo/gap_mean": 11.128530502319336, "beta_dpo/gap_std": 17.995418548583984, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9115646258503401, "grad_norm": 12.024964332580566, "learning_rate": 1.2072967838448051e-08, "logits/chosen": 1.3041894435882568, "logits/rejected": 0.8876796960830688, "loss": 1.0542, "step": 603 }, { "beta_dpo/beta_used": 0.10406889021396637, "beta_dpo/beta_used_raw": 0.10406889021396637, "beta_dpo/gap_mean": 10.929356575012207, "beta_dpo/gap_std": 18.385833740234375, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9130763416477702, "grad_norm": 30.560834884643555, "learning_rate": 1.1670370442682459e-08, "logits/chosen": 1.3623418807983398, "logits/rejected": 1.2177022695541382, "loss": 1.1108, "step": 604 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1141221672296524, "beta_dpo/gap_mean": 10.722838401794434, "beta_dpo/gap_std": 18.810199737548828, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9145880574452003, "grad_norm": 0.3671630620956421, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 1.7869694232940674, "logits/rejected": 1.4546797275543213, "loss": 1.3779, "step": 605 }, { "beta_dpo/beta_used": 0.06800004839897156, "beta_dpo/beta_used_raw": -0.004727482795715332, "beta_dpo/gap_mean": 10.593547821044922, "beta_dpo/gap_std": 18.50115394592285, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9160997732426304, "grad_norm": 18.27583885192871, "learning_rate": 1.0885186502381016e-08, "logits/chosen": 1.588325023651123, "logits/rejected": 1.3476459980010986, "loss": 1.0867, "step": 606 }, { "beta_dpo/beta_used": 0.08275961130857468, "beta_dpo/beta_used_raw": 0.021806050091981888, "beta_dpo/gap_mean": 11.124649047851562, "beta_dpo/gap_std": 18.29534912109375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9176114890400605, "grad_norm": 28.003469467163086, "learning_rate": 1.0502621921127774e-08, "logits/chosen": 0.8255990743637085, "logits/rejected": 1.3248374462127686, "loss": 1.0785, "step": 607 }, { "beta_dpo/beta_used": 0.05329656973481178, "beta_dpo/beta_used_raw": 0.00821135938167572, "beta_dpo/gap_mean": 10.249990463256836, "beta_dpo/gap_std": 18.143394470214844, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9191232048374905, "grad_norm": 18.02731704711914, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 1.4278472661972046, "logits/rejected": 1.3889408111572266, "loss": 1.1801, "step": 608 }, { "beta_dpo/beta_used": 0.20283672213554382, "beta_dpo/beta_used_raw": 0.20283672213554382, "beta_dpo/gap_mean": 10.51877212524414, "beta_dpo/gap_std": 17.66318702697754, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9206349206349206, "grad_norm": 51.436180114746094, "learning_rate": 9.757601041885694e-09, "logits/chosen": 1.7704598903656006, "logits/rejected": 1.4652361869812012, "loss": 0.9272, "step": 609 }, { "beta_dpo/beta_used": 0.06131238117814064, "beta_dpo/beta_used_raw": 0.020395085215568542, "beta_dpo/gap_mean": 10.665861129760742, "beta_dpo/gap_std": 17.976917266845703, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9221466364323507, "grad_norm": 21.912073135375977, "learning_rate": 9.395165583732379e-09, "logits/chosen": 1.5744285583496094, "logits/rejected": 1.679150104522705, "loss": 1.1984, "step": 610 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09681444615125656, "beta_dpo/gap_mean": 10.473505973815918, "beta_dpo/gap_std": 18.203094482421875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9236583522297808, "grad_norm": 0.31549084186553955, "learning_rate": 9.03946036001449e-09, "logits/chosen": 1.1862019300460815, "logits/rejected": 1.1827809810638428, "loss": 1.3791, "step": 611 }, { "beta_dpo/beta_used": 0.1381537914276123, "beta_dpo/beta_used_raw": 0.13283728063106537, "beta_dpo/gap_mean": 10.559354782104492, "beta_dpo/gap_std": 18.345184326171875, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9251700680272109, "grad_norm": 45.5057258605957, "learning_rate": 8.690495320571839e-09, "logits/chosen": 0.7239348292350769, "logits/rejected": 0.553448498249054, "loss": 1.0332, "step": 612 }, { "beta_dpo/beta_used": 0.44979095458984375, "beta_dpo/beta_used_raw": 0.44979095458984375, "beta_dpo/gap_mean": 11.126469612121582, "beta_dpo/gap_std": 18.617183685302734, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.926681783824641, "grad_norm": 74.70843505859375, "learning_rate": 8.348280226706722e-09, "logits/chosen": 0.8377181887626648, "logits/rejected": 0.8131814002990723, "loss": 0.5414, "step": 613 }, { "beta_dpo/beta_used": 0.15282967686653137, "beta_dpo/beta_used_raw": 0.15282967686653137, "beta_dpo/gap_mean": 11.11674690246582, "beta_dpo/gap_std": 18.060022354125977, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9281934996220711, "grad_norm": 37.34785842895508, "learning_rate": 8.012824650910937e-09, "logits/chosen": 2.0333778858184814, "logits/rejected": 1.645104169845581, "loss": 0.8374, "step": 614 }, { "beta_dpo/beta_used": 0.09008196741342545, "beta_dpo/beta_used_raw": 0.08506174385547638, "beta_dpo/gap_mean": 11.391464233398438, "beta_dpo/gap_std": 18.11256980895996, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9297052154195011, "grad_norm": 30.425193786621094, "learning_rate": 7.684137976598088e-09, "logits/chosen": 1.6812443733215332, "logits/rejected": 1.6477621793746948, "loss": 1.2128, "step": 615 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07090554386377335, "beta_dpo/gap_mean": 11.225221633911133, "beta_dpo/gap_std": 18.369062423706055, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9312169312169312, "grad_norm": 0.36726945638656616, "learning_rate": 7.36222939784098e-09, "logits/chosen": 1.408949851989746, "logits/rejected": 1.2780832052230835, "loss": 1.3783, "step": 616 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.047676198184490204, "beta_dpo/gap_mean": 10.978787422180176, "beta_dpo/gap_std": 18.129138946533203, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9327286470143613, "grad_norm": 0.35759875178337097, "learning_rate": 7.047107919114586e-09, "logits/chosen": 1.7381243705749512, "logits/rejected": 1.6514561176300049, "loss": 1.3779, "step": 617 }, { "beta_dpo/beta_used": 0.048610154539346695, "beta_dpo/beta_used_raw": 0.02857622131705284, "beta_dpo/gap_mean": 10.871879577636719, "beta_dpo/gap_std": 18.12830352783203, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9342403628117913, "grad_norm": 19.95886993408203, "learning_rate": 6.738782355044048e-09, "logits/chosen": 1.4191548824310303, "logits/rejected": 1.0452890396118164, "loss": 1.1654, "step": 618 }, { "beta_dpo/beta_used": 0.24798354506492615, "beta_dpo/beta_used_raw": 0.24798354506492615, "beta_dpo/gap_mean": 11.172018051147461, "beta_dpo/gap_std": 18.40379524230957, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9357520786092215, "grad_norm": 59.49594497680664, "learning_rate": 6.437261330158206e-09, "logits/chosen": 1.8096110820770264, "logits/rejected": 1.7222764492034912, "loss": 0.9241, "step": 619 }, { "beta_dpo/beta_used": 0.10477973520755768, "beta_dpo/beta_used_raw": 0.04265592247247696, "beta_dpo/gap_mean": 10.613101959228516, "beta_dpo/gap_std": 18.306976318359375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9372637944066515, "grad_norm": 33.00358581542969, "learning_rate": 6.142553278648238e-09, "logits/chosen": 1.4938249588012695, "logits/rejected": 1.4289090633392334, "loss": 1.0332, "step": 620 }, { "beta_dpo/beta_used": 0.02596830204129219, "beta_dpo/beta_used_raw": 0.004421204328536987, "beta_dpo/gap_mean": 10.533326148986816, "beta_dpo/gap_std": 18.14520263671875, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.9387755102040817, "grad_norm": 8.95301342010498, "learning_rate": 5.854666444131934e-09, "logits/chosen": 1.3044579029083252, "logits/rejected": 1.5364813804626465, "loss": 1.2235, "step": 621 }, { "beta_dpo/beta_used": 0.303781121969223, "beta_dpo/beta_used_raw": 0.303781121969223, "beta_dpo/gap_mean": 10.607638359069824, "beta_dpo/gap_std": 17.87635040283203, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9402872260015117, "grad_norm": 52.88166046142578, "learning_rate": 5.573608879422875e-09, "logits/chosen": 1.4276492595672607, "logits/rejected": 1.4481043815612793, "loss": 0.6664, "step": 622 }, { "beta_dpo/beta_used": 0.08703246712684631, "beta_dpo/beta_used_raw": 0.06431964039802551, "beta_dpo/gap_mean": 10.673839569091797, "beta_dpo/gap_std": 17.585182189941406, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9417989417989417, "grad_norm": 20.131181716918945, "learning_rate": 5.299388446305342e-09, "logits/chosen": 1.686722993850708, "logits/rejected": 1.3298940658569336, "loss": 0.9207, "step": 623 }, { "beta_dpo/beta_used": 0.2075013518333435, "beta_dpo/beta_used_raw": 0.2075013518333435, "beta_dpo/gap_mean": 11.12516975402832, "beta_dpo/gap_std": 17.868022918701172, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9433106575963719, "grad_norm": 44.760963439941406, "learning_rate": 5.03201281531429e-09, "logits/chosen": 1.1061675548553467, "logits/rejected": 1.0347222089767456, "loss": 0.9306, "step": 624 }, { "beta_dpo/beta_used": 0.0016511206049472094, "beta_dpo/beta_used_raw": -0.07338549196720123, "beta_dpo/gap_mean": 10.489020347595215, "beta_dpo/gap_std": 17.979766845703125, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.9448223733938019, "grad_norm": 0.6475630402565002, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 1.7902281284332275, "logits/rejected": 1.5662107467651367, "loss": 1.3732, "step": 625 }, { "beta_dpo/beta_used": 0.11615270376205444, "beta_dpo/beta_used_raw": 0.046706706285476685, "beta_dpo/gap_mean": 10.757328987121582, "beta_dpo/gap_std": 18.39080238342285, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9463340891912321, "grad_norm": 60.5959587097168, "learning_rate": 4.517825684323323e-09, "logits/chosen": 2.184417486190796, "logits/rejected": 1.984039545059204, "loss": 1.4281, "step": 626 }, { "beta_dpo/beta_used": 0.07429970800876617, "beta_dpo/beta_used_raw": 0.005273900926113129, "beta_dpo/gap_mean": 11.423591613769531, "beta_dpo/gap_std": 18.419429779052734, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9478458049886621, "grad_norm": 16.982189178466797, "learning_rate": 4.271028567242818e-09, "logits/chosen": 1.4129197597503662, "logits/rejected": 1.0711102485656738, "loss": 0.9599, "step": 627 }, { "beta_dpo/beta_used": 0.342271089553833, "beta_dpo/beta_used_raw": 0.342271089553833, "beta_dpo/gap_mean": 11.820939064025879, "beta_dpo/gap_std": 19.3472957611084, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.9493575207860923, "grad_norm": 105.21951293945312, "learning_rate": 4.0311050177251895e-09, "logits/chosen": 1.5985822677612305, "logits/rejected": 1.7508151531219482, "loss": 1.1523, "step": 628 }, { "beta_dpo/beta_used": 0.012321592308580875, "beta_dpo/beta_used_raw": -0.017206501215696335, "beta_dpo/gap_mean": 12.163355827331543, "beta_dpo/gap_std": 18.93172836303711, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9508692365835223, "grad_norm": 4.553313255310059, "learning_rate": 3.798061746947995e-09, "logits/chosen": 1.690114974975586, "logits/rejected": 1.825777530670166, "loss": 1.2731, "step": 629 }, { "beta_dpo/beta_used": 0.11094523966312408, "beta_dpo/beta_used_raw": 0.11094523966312408, "beta_dpo/gap_mean": 12.257745742797852, "beta_dpo/gap_std": 18.752323150634766, "beta_dpo/mask_keep_frac": 0.4375, "epoch": 0.9523809523809523, "grad_norm": 21.555618286132812, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 1.6641654968261719, "logits/rejected": 1.4665703773498535, "loss": 0.8095, "step": 630 }, { "beta_dpo/beta_used": 0.12940296530723572, "beta_dpo/beta_used_raw": 0.12940296530723572, "beta_dpo/gap_mean": 12.77896499633789, "beta_dpo/gap_std": 18.95907974243164, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9538926681783825, "grad_norm": 33.42890548706055, "learning_rate": 3.352641923861144e-09, "logits/chosen": 2.0447068214416504, "logits/rejected": 1.8289787769317627, "loss": 0.9098, "step": 631 }, { "beta_dpo/beta_used": 0.24121464788913727, "beta_dpo/beta_used_raw": 0.24121464788913727, "beta_dpo/gap_mean": 12.694576263427734, "beta_dpo/gap_std": 19.064842224121094, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9554043839758125, "grad_norm": 85.89191436767578, "learning_rate": 3.140277830901428e-09, "logits/chosen": 2.1072683334350586, "logits/rejected": 1.948132038116455, "loss": 1.1471, "step": 632 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.03531520441174507, "beta_dpo/gap_mean": 12.826020240783691, "beta_dpo/gap_std": 19.142948150634766, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9569160997732427, "grad_norm": 0.4302322268486023, "learning_rate": 2.9348189350335007e-09, "logits/chosen": 1.1234136819839478, "logits/rejected": 0.9836254715919495, "loss": 1.3753, "step": 633 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.29016977548599243, "beta_dpo/gap_mean": 11.909151077270508, "beta_dpo/gap_std": 19.207143783569336, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9584278155706727, "grad_norm": 0.31137824058532715, "learning_rate": 2.736270983384276e-09, "logits/chosen": 1.444591999053955, "logits/rejected": 1.4112733602523804, "loss": 1.3815, "step": 634 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12229815125465393, "beta_dpo/gap_mean": 11.218611717224121, "beta_dpo/gap_std": 18.949787139892578, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.9599395313681028, "grad_norm": 0.361990749835968, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 1.7448804378509521, "logits/rejected": 1.665621280670166, "loss": 1.3786, "step": 635 }, { "beta_dpo/beta_used": 0.21137313544750214, "beta_dpo/beta_used_raw": 0.21137313544750214, "beta_dpo/gap_mean": 11.505717277526855, "beta_dpo/gap_std": 18.636005401611328, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9614512471655329, "grad_norm": 66.32644653320312, "learning_rate": 2.359929934524829e-09, "logits/chosen": 1.6981236934661865, "logits/rejected": 1.3314778804779053, "loss": 0.9321, "step": 636 }, { "beta_dpo/beta_used": 0.017410093918442726, "beta_dpo/beta_used_raw": -0.047742683440446854, "beta_dpo/gap_mean": 11.64794921875, "beta_dpo/gap_std": 18.616622924804688, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9629629629629629, "grad_norm": 5.9650092124938965, "learning_rate": 2.1821473643827137e-09, "logits/chosen": 1.6464264392852783, "logits/rejected": 1.3672943115234375, "loss": 1.2547, "step": 637 }, { "beta_dpo/beta_used": 0.11326654255390167, "beta_dpo/beta_used_raw": 0.11326654255390167, "beta_dpo/gap_mean": 11.702375411987305, "beta_dpo/gap_std": 18.63088607788086, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9644746787603931, "grad_norm": 34.071075439453125, "learning_rate": 2.0112967923011646e-09, "logits/chosen": 0.9265055656433105, "logits/rejected": 0.8722898364067078, "loss": 0.888, "step": 638 }, { "beta_dpo/beta_used": 0.09828314930200577, "beta_dpo/beta_used_raw": 0.07606379687786102, "beta_dpo/gap_mean": 11.771888732910156, "beta_dpo/gap_std": 18.478546142578125, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9659863945578231, "grad_norm": 26.12000274658203, "learning_rate": 1.847382997337943e-09, "logits/chosen": 1.293177604675293, "logits/rejected": 0.9671316146850586, "loss": 1.0725, "step": 639 }, { "beta_dpo/beta_used": 0.08824127167463303, "beta_dpo/beta_used_raw": 0.05229192599654198, "beta_dpo/gap_mean": 11.435354232788086, "beta_dpo/gap_std": 18.631763458251953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9674981103552532, "grad_norm": 26.251401901245117, "learning_rate": 1.690410564514244e-09, "logits/chosen": 1.726629376411438, "logits/rejected": 1.3301911354064941, "loss": 1.0781, "step": 640 }, { "beta_dpo/beta_used": 0.09193282574415207, "beta_dpo/beta_used_raw": 0.03977712616324425, "beta_dpo/gap_mean": 11.201648712158203, "beta_dpo/gap_std": 18.50255584716797, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9690098261526833, "grad_norm": 32.0823860168457, "learning_rate": 1.5403838846864692e-09, "logits/chosen": 1.4812743663787842, "logits/rejected": 1.6599016189575195, "loss": 1.0413, "step": 641 }, { "beta_dpo/beta_used": 0.040066223591566086, "beta_dpo/beta_used_raw": -0.04489295557141304, "beta_dpo/gap_mean": 11.188066482543945, "beta_dpo/gap_std": 18.518863677978516, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9705215419501134, "grad_norm": 13.173298835754395, "learning_rate": 1.3973071544233218e-09, "logits/chosen": 1.4061027765274048, "logits/rejected": 1.4609074592590332, "loss": 1.1875, "step": 642 }, { "beta_dpo/beta_used": 0.10887844115495682, "beta_dpo/beta_used_raw": 0.10887844115495682, "beta_dpo/gap_mean": 11.167619705200195, "beta_dpo/gap_std": 18.79291534423828, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9720332577475435, "grad_norm": 31.67186164855957, "learning_rate": 1.261184375888541e-09, "logits/chosen": 1.539642572402954, "logits/rejected": 1.2710895538330078, "loss": 1.042, "step": 643 }, { "beta_dpo/beta_used": 0.20992937684059143, "beta_dpo/beta_used_raw": 0.20992937684059143, "beta_dpo/gap_mean": 10.766650199890137, "beta_dpo/gap_std": 19.097431182861328, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9735449735449735, "grad_norm": 53.034210205078125, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 1.723166584968567, "logits/rejected": 1.595113754272461, "loss": 1.1572, "step": 644 }, { "beta_dpo/beta_used": 0.2538291811943054, "beta_dpo/beta_used_raw": 0.2538291811943054, "beta_dpo/gap_mean": 11.259082794189453, "beta_dpo/gap_std": 18.895366668701172, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9750566893424036, "grad_norm": 50.3277702331543, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 1.4872803688049316, "logits/rejected": 1.5161464214324951, "loss": 0.6968, "step": 645 }, { "beta_dpo/beta_used": 0.02309798076748848, "beta_dpo/beta_used_raw": -0.013568395748734474, "beta_dpo/gap_mean": 11.416717529296875, "beta_dpo/gap_std": 18.746578216552734, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9765684051398337, "grad_norm": 8.568411827087402, "learning_rate": 8.945768539031783e-10, "logits/chosen": 1.5456962585449219, "logits/rejected": 1.2485579252243042, "loss": 1.1993, "step": 646 }, { "beta_dpo/beta_used": 0.26212525367736816, "beta_dpo/beta_used_raw": 0.26212525367736816, "beta_dpo/gap_mean": 12.01045036315918, "beta_dpo/gap_std": 18.542598724365234, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9780801209372638, "grad_norm": 39.779212951660156, "learning_rate": 7.863060120144316e-10, "logits/chosen": 1.6833382844924927, "logits/rejected": 1.4121618270874023, "loss": 0.4523, "step": 647 }, { "beta_dpo/beta_used": 0.12411724776029587, "beta_dpo/beta_used_raw": 0.1003761738538742, "beta_dpo/gap_mean": 12.072593688964844, "beta_dpo/gap_std": 18.08414077758789, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9795918367346939, "grad_norm": 34.08300018310547, "learning_rate": 6.850062128694045e-10, "logits/chosen": 1.20827054977417, "logits/rejected": 1.0931397676467896, "loss": 0.9287, "step": 648 }, { "beta_dpo/beta_used": 0.2505728006362915, "beta_dpo/beta_used_raw": 0.2505728006362915, "beta_dpo/gap_mean": 12.009123802185059, "beta_dpo/gap_std": 18.238069534301758, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.981103552532124, "grad_norm": 70.92848205566406, "learning_rate": 5.906802900412788e-10, "logits/chosen": 1.342724084854126, "logits/rejected": 1.1755287647247314, "loss": 0.704, "step": 649 }, { "beta_dpo/beta_used": 0.23393958806991577, "beta_dpo/beta_used_raw": 0.23393958806991577, "beta_dpo/gap_mean": 11.982925415039062, "beta_dpo/gap_std": 18.271129608154297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.982615268329554, "grad_norm": 65.76042938232422, "learning_rate": 5.033308820289184e-10, "logits/chosen": 1.873583197593689, "logits/rejected": 1.529442310333252, "loss": 1.1006, "step": 650 }, { "beta_dpo/beta_used": 0.011995721608400345, "beta_dpo/beta_used_raw": -0.07203161716461182, "beta_dpo/gap_mean": 11.841169357299805, "beta_dpo/gap_std": 17.955848693847656, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9841269841269841, "grad_norm": 6.071944713592529, "learning_rate": 4.2296043218295606e-10, "logits/chosen": 1.3133518695831299, "logits/rejected": 1.2405388355255127, "loss": 1.2771, "step": 651 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07919080555438995, "beta_dpo/gap_mean": 11.827400207519531, "beta_dpo/gap_std": 18.362974166870117, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9856386999244142, "grad_norm": 0.3452511429786682, "learning_rate": 3.4957118863768176e-10, "logits/chosen": 1.775701642036438, "logits/rejected": 1.7335072755813599, "loss": 1.3775, "step": 652 }, { "beta_dpo/beta_used": 0.11402089893817902, "beta_dpo/beta_used_raw": 0.11402089893817902, "beta_dpo/gap_mean": 11.791513442993164, "beta_dpo/gap_std": 18.576650619506836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9871504157218443, "grad_norm": 22.66193962097168, "learning_rate": 2.831652042480093e-10, "logits/chosen": 1.2139712572097778, "logits/rejected": 1.2871928215026855, "loss": 0.8905, "step": 653 }, { "beta_dpo/beta_used": 0.09395039081573486, "beta_dpo/beta_used_raw": -0.025495566427707672, "beta_dpo/gap_mean": 11.91246223449707, "beta_dpo/gap_std": 18.69017791748047, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9886621315192744, "grad_norm": 33.95069885253906, "learning_rate": 2.2374433653205016e-10, "logits/chosen": 1.3610148429870605, "logits/rejected": 1.0752047300338745, "loss": 1.0777, "step": 654 }, { "beta_dpo/beta_used": 0.01329093612730503, "beta_dpo/beta_used_raw": -0.07511409372091293, "beta_dpo/gap_mean": 11.890439987182617, "beta_dpo/gap_std": 18.40912628173828, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.9901738473167044, "grad_norm": 4.47454309463501, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 1.2066938877105713, "logits/rejected": 1.2044000625610352, "loss": 1.2717, "step": 655 }, { "beta_dpo/beta_used": 0.13312599062919617, "beta_dpo/beta_used_raw": 0.13312599062919617, "beta_dpo/gap_mean": 11.72813892364502, "beta_dpo/gap_std": 18.383424758911133, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.9916855631141346, "grad_norm": 26.90215301513672, "learning_rate": 1.2586440420372934e-10, "logits/chosen": 1.0210485458374023, "logits/rejected": 1.2208218574523926, "loss": 0.7741, "step": 656 }, { "beta_dpo/beta_used": 0.2219039648771286, "beta_dpo/beta_used_raw": 0.2219039648771286, "beta_dpo/gap_mean": 12.457931518554688, "beta_dpo/gap_std": 18.564159393310547, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9931972789115646, "grad_norm": 55.28135681152344, "learning_rate": 8.740807750345913e-11, "logits/chosen": 1.7017799615859985, "logits/rejected": 1.3097267150878906, "loss": 0.6586, "step": 657 }, { "beta_dpo/beta_used": 0.10924780368804932, "beta_dpo/beta_used_raw": 0.10924780368804932, "beta_dpo/gap_mean": 12.133644104003906, "beta_dpo/gap_std": 18.873802185058594, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9947089947089947, "grad_norm": 25.044599533081055, "learning_rate": 5.594234322453539e-11, "logits/chosen": 1.8211889266967773, "logits/rejected": 1.6856117248535156, "loss": 0.8992, "step": 658 }, { "beta_dpo/beta_used": 0.03363973647356033, "beta_dpo/beta_used_raw": -0.06083240360021591, "beta_dpo/gap_mean": 11.746139526367188, "beta_dpo/gap_std": 19.16648292541504, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9962207105064248, "grad_norm": 10.613242149353027, "learning_rate": 3.146808153123293e-11, "logits/chosen": 1.6447190046310425, "logits/rejected": 1.2653954029083252, "loss": 1.1776, "step": 659 }, { "beta_dpo/beta_used": 0.3293432593345642, "beta_dpo/beta_used_raw": 0.3293432593345642, "beta_dpo/gap_mean": 11.700296401977539, "beta_dpo/gap_std": 18.658584594726562, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9977324263038548, "grad_norm": 89.82917022705078, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 1.4930264949798584, "logits/rejected": 1.5581485033035278, "loss": 1.0381, "step": 660 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07245530188083649, "beta_dpo/gap_mean": 11.2869873046875, "beta_dpo/gap_std": 18.77579689025879, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.999244142101285, "grad_norm": 0.3315927982330322, "learning_rate": 3.4965187065971735e-12, "logits/chosen": 1.457210898399353, "logits/rejected": 1.4589345455169678, "loss": 1.3771, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.1663504292943294, "train_runtime": 3087.1314, "train_samples_per_second": 13.714, "train_steps_per_second": 0.214 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }