{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 100, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta": 0.1015101969242096, "beta_dpo/beta_margin_grad_mean": -0.5000330209732056, "beta_dpo/beta_margin_grad_std": 0.006504404824227095, "beta_dpo/beta_margin_mean": -0.0001328021171502769, "beta_dpo/beta_margin_std": 0.02602243982255459, "beta_dpo/beta_used": 0.1015101969242096, "beta_dpo/beta_used_raw": 0.1015101969242096, "beta_dpo/gap_mean": 0.00012409687042236328, "beta_dpo/gap_std": 0.03724822774529457, "beta_dpo/loss_margin_mean": -0.0013527870178222656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0015117157974300832, "grad_norm": 31.01732063293457, "learning_rate": 0.0, "logits/chosen": 0.13337239623069763, "logits/rejected": 0.12492948770523071, "loss": 1.3838, "step": 1 }, { "beta_dpo/beta": 0.1012001484632492, "beta_dpo/beta_margin_grad_mean": -0.49902671575546265, "beta_dpo/beta_margin_grad_std": 0.007323236670345068, "beta_dpo/beta_margin_mean": 0.0038939444348216057, "beta_dpo/beta_margin_std": 0.029300615191459656, "beta_dpo/beta_used": 0.1012001484632492, "beta_dpo/beta_used_raw": 0.1012001484632492, "beta_dpo/gap_mean": 0.0026843957602977753, "beta_dpo/gap_std": 0.08112463355064392, "beta_dpo/loss_margin_mean": 0.03744968771934509, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0030234315948601664, "grad_norm": 31.891300201416016, "learning_rate": 7.462686567164179e-09, "logits/chosen": 0.09414851665496826, "logits/rejected": 0.07363267242908478, "loss": 1.384, "step": 2 }, { "beta_dpo/beta": 0.10067637264728546, "beta_dpo/beta_margin_grad_mean": -0.500410258769989, "beta_dpo/beta_margin_grad_std": 0.009178511798381805, "beta_dpo/beta_margin_mean": -0.0016434545395895839, "beta_dpo/beta_margin_std": 0.03673430159687996, "beta_dpo/beta_used": 0.10067637264728546, "beta_dpo/beta_used_raw": 0.10067637264728546, "beta_dpo/gap_mean": 0.00046056427527219057, "beta_dpo/gap_std": 0.1343999207019806, "beta_dpo/loss_margin_mean": -0.016742348670959473, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0045351473922902496, "grad_norm": 32.93205261230469, "learning_rate": 1.4925373134328357e-08, "logits/chosen": 0.09402679651975632, "logits/rejected": 0.056407660245895386, "loss": 1.3852, "step": 3 }, { "beta_dpo/beta": 0.10021987557411194, "beta_dpo/beta_margin_grad_mean": -0.5003681182861328, "beta_dpo/beta_margin_grad_std": 0.006939771119505167, "beta_dpo/beta_margin_mean": -0.0014724871143698692, "beta_dpo/beta_margin_std": 0.027768485248088837, "beta_dpo/beta_used": 0.10021987557411194, "beta_dpo/beta_used_raw": 0.10021987557411194, "beta_dpo/gap_mean": -0.0026244998443871737, "beta_dpo/gap_std": 0.1629766970872879, "beta_dpo/loss_margin_mean": -0.01646369695663452, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.006046863189720333, "grad_norm": 38.14013671875, "learning_rate": 2.2388059701492534e-08, "logits/chosen": 0.10362987220287323, "logits/rejected": 0.0876229926943779, "loss": 1.3862, "step": 4 }, { "beta_dpo/beta": 0.0995863676071167, "beta_dpo/beta_margin_grad_mean": -0.5007702112197876, "beta_dpo/beta_margin_grad_std": 0.007658351678401232, "beta_dpo/beta_margin_mean": -0.003081433940678835, "beta_dpo/beta_margin_std": 0.030642056837677956, "beta_dpo/beta_used": 0.0995863676071167, "beta_dpo/beta_used_raw": 0.0995863676071167, "beta_dpo/gap_mean": -0.004291285760700703, "beta_dpo/gap_std": 0.18804097175598145, "beta_dpo/loss_margin_mean": -0.03093475103378296, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.007558578987150416, "grad_norm": 31.054513931274414, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 0.04905558377504349, "logits/rejected": 0.011664441786706448, "loss": 1.3875, "step": 5 }, { "beta_dpo/beta": 0.1000620573759079, "beta_dpo/beta_margin_grad_mean": -0.5015894174575806, "beta_dpo/beta_margin_grad_std": 0.007443729788064957, "beta_dpo/beta_margin_mean": -0.006359330844134092, "beta_dpo/beta_margin_std": 0.029781756922602654, "beta_dpo/beta_used": 0.1000620573759079, "beta_dpo/beta_used_raw": 0.1000620573759079, "beta_dpo/gap_mean": -0.012156388722360134, "beta_dpo/gap_std": 0.21093741059303284, "beta_dpo/loss_margin_mean": -0.06457433104515076, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.009070294784580499, "grad_norm": 32.63312911987305, "learning_rate": 3.731343283582089e-08, "logits/chosen": 0.12783631682395935, "logits/rejected": 0.08931756764650345, "loss": 1.3874, "step": 6 }, { "beta_dpo/beta": 0.10030151158571243, "beta_dpo/beta_margin_grad_mean": -0.500143826007843, "beta_dpo/beta_margin_grad_std": 0.005725794937461615, "beta_dpo/beta_margin_mean": -0.0005755546153523028, "beta_dpo/beta_margin_std": 0.02290569618344307, "beta_dpo/beta_used": 0.10030151158571243, "beta_dpo/beta_used_raw": 0.10030151158571243, "beta_dpo/gap_mean": -0.013967369683086872, "beta_dpo/gap_std": 0.21781358122825623, "beta_dpo/loss_margin_mean": -0.005991309881210327, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.010582010582010581, "grad_norm": 29.61240005493164, "learning_rate": 4.477611940298507e-08, "logits/chosen": 0.007831894792616367, "logits/rejected": -0.035432279109954834, "loss": 1.3873, "step": 7 }, { "beta_dpo/beta": 0.09883703291416168, "beta_dpo/beta_margin_grad_mean": -0.4992273151874542, "beta_dpo/beta_margin_grad_std": 0.00927159283310175, "beta_dpo/beta_margin_mean": 0.003093534614890814, "beta_dpo/beta_margin_std": 0.03710191324353218, "beta_dpo/beta_used": 0.09883703291416168, "beta_dpo/beta_used_raw": 0.09883703291416168, "beta_dpo/gap_mean": -0.009043242782354355, "beta_dpo/gap_std": 0.24341917037963867, "beta_dpo/loss_margin_mean": 0.031346142292022705, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.012093726379440665, "grad_norm": 30.12750244140625, "learning_rate": 5.223880597014925e-08, "logits/chosen": 0.08428698778152466, "logits/rejected": 0.061857692897319794, "loss": 1.3892, "step": 8 }, { "beta_dpo/beta": 0.10056120157241821, "beta_dpo/beta_margin_grad_mean": -0.49964460730552673, "beta_dpo/beta_margin_grad_std": 0.00834520161151886, "beta_dpo/beta_margin_mean": 0.0014211406232789159, "beta_dpo/beta_margin_std": 0.033395376056432724, "beta_dpo/beta_used": 0.10056120157241821, "beta_dpo/beta_used_raw": 0.10056120157241821, "beta_dpo/gap_mean": -0.0020596692338585854, "beta_dpo/gap_std": 0.26306188106536865, "beta_dpo/loss_margin_mean": 0.01439550518989563, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.013605442176870748, "grad_norm": 34.624351501464844, "learning_rate": 5.970149253731343e-08, "logits/chosen": 0.15184298157691956, "logits/rejected": 0.09390458464622498, "loss": 1.3857, "step": 9 }, { "beta_dpo/beta": 0.10067851096391678, "beta_dpo/beta_margin_grad_mean": -0.5004361867904663, "beta_dpo/beta_margin_grad_std": 0.009121380746364594, "beta_dpo/beta_margin_mean": -0.0017426289850845933, "beta_dpo/beta_margin_std": 0.036507148295640945, "beta_dpo/beta_used": 0.10067851096391678, "beta_dpo/beta_used_raw": 0.10067851096391678, "beta_dpo/gap_mean": -0.00195028493180871, "beta_dpo/gap_std": 0.2801288366317749, "beta_dpo/loss_margin_mean": -0.01787543296813965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.015117157974300832, "grad_norm": 30.904678344726562, "learning_rate": 6.71641791044776e-08, "logits/chosen": 0.1457955241203308, "logits/rejected": 0.1140444278717041, "loss": 1.3855, "step": 10 }, { "beta_dpo/beta": 0.09911488741636276, "beta_dpo/beta_margin_grad_mean": -0.4999541640281677, "beta_dpo/beta_margin_grad_std": 0.008431542664766312, "beta_dpo/beta_margin_mean": 0.00018064059258904308, "beta_dpo/beta_margin_std": 0.03374219313263893, "beta_dpo/beta_used": 0.09911488741636276, "beta_dpo/beta_used_raw": 0.09911488741636276, "beta_dpo/gap_mean": -0.0015360511606559157, "beta_dpo/gap_std": 0.28815028071403503, "beta_dpo/loss_margin_mean": 0.0008325278759002686, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.016628873771730914, "grad_norm": 31.81167221069336, "learning_rate": 7.462686567164178e-08, "logits/chosen": 0.09544117748737335, "logits/rejected": 0.0887732282280922, "loss": 1.388, "step": 11 }, { "beta_dpo/beta": 0.10047206282615662, "beta_dpo/beta_margin_grad_mean": -0.5000085830688477, "beta_dpo/beta_margin_grad_std": 0.006006320007145405, "beta_dpo/beta_margin_mean": -3.400599962333217e-05, "beta_dpo/beta_margin_std": 0.02402997761964798, "beta_dpo/beta_used": 0.10047206282615662, "beta_dpo/beta_used_raw": 0.10047206282615662, "beta_dpo/gap_mean": -0.005030278116464615, "beta_dpo/gap_std": 0.2830773591995239, "beta_dpo/loss_margin_mean": -0.00034230947494506836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.018140589569160998, "grad_norm": 33.425296783447266, "learning_rate": 8.208955223880596e-08, "logits/chosen": 0.03307221084833145, "logits/rejected": 0.016807714477181435, "loss": 1.3861, "step": 12 }, { "beta_dpo/beta": 0.09951455891132355, "beta_dpo/beta_margin_grad_mean": -0.5016134977340698, "beta_dpo/beta_margin_grad_std": 0.007461505476385355, "beta_dpo/beta_margin_mean": -0.006455389317125082, "beta_dpo/beta_margin_std": 0.02985336445271969, "beta_dpo/beta_used": 0.09951455891132355, "beta_dpo/beta_used_raw": 0.09951455891132355, "beta_dpo/gap_mean": -0.013505849055945873, "beta_dpo/gap_std": 0.28792649507522583, "beta_dpo/loss_margin_mean": -0.06482848525047302, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.019652305366591082, "grad_norm": 30.061500549316406, "learning_rate": 8.955223880597014e-08, "logits/chosen": 0.10790680348873138, "logits/rejected": 0.045588694512844086, "loss": 1.3886, "step": 13 }, { "beta_dpo/beta": 0.1006593257188797, "beta_dpo/beta_margin_grad_mean": -0.5000956654548645, "beta_dpo/beta_margin_grad_std": 0.009061355143785477, "beta_dpo/beta_margin_mean": -0.0003827259934041649, "beta_dpo/beta_margin_std": 0.03626179322600365, "beta_dpo/beta_used": 0.1006593257188797, "beta_dpo/beta_used_raw": 0.1006593257188797, "beta_dpo/gap_mean": -0.013502835296094418, "beta_dpo/gap_std": 0.29340142011642456, "beta_dpo/loss_margin_mean": -0.004083991050720215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.021164021164021163, "grad_norm": 31.050495147705078, "learning_rate": 9.701492537313432e-08, "logits/chosen": 0.09658454358577728, "logits/rejected": 0.07864398509263992, "loss": 1.3867, "step": 14 }, { "beta_dpo/beta": 0.10151919722557068, "beta_dpo/beta_margin_grad_mean": -0.500207245349884, "beta_dpo/beta_margin_grad_std": 0.00833460595458746, "beta_dpo/beta_margin_mean": -0.0008312489953823388, "beta_dpo/beta_margin_std": 0.03335753455758095, "beta_dpo/beta_used": 0.10151919722557068, "beta_dpo/beta_used_raw": 0.10151919722557068, "beta_dpo/gap_mean": -0.013929645530879498, "beta_dpo/gap_std": 0.30176180601119995, "beta_dpo/loss_margin_mean": -0.008538007736206055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.022675736961451247, "grad_norm": 38.498233795166016, "learning_rate": 1.044776119402985e-07, "logits/chosen": 0.05931752920150757, "logits/rejected": 0.01677715964615345, "loss": 1.3852, "step": 15 }, { "beta_dpo/beta": 0.1004377156496048, "beta_dpo/beta_margin_grad_mean": -0.4998069405555725, "beta_dpo/beta_margin_grad_std": 0.006501946598291397, "beta_dpo/beta_margin_mean": 0.0007726156036369503, "beta_dpo/beta_margin_std": 0.02601255662739277, "beta_dpo/beta_used": 0.1004377156496048, "beta_dpo/beta_used_raw": 0.1004377156496048, "beta_dpo/gap_mean": -0.011293605901300907, "beta_dpo/gap_std": 0.2983771562576294, "beta_dpo/loss_margin_mean": 0.007234424352645874, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.02418745275888133, "grad_norm": 28.71015739440918, "learning_rate": 1.1194029850746268e-07, "logits/chosen": 0.13399405777454376, "logits/rejected": 0.09560250490903854, "loss": 1.3868, "step": 16 }, { "beta_dpo/beta": 0.10274288803339005, "beta_dpo/beta_margin_grad_mean": -0.5008566379547119, "beta_dpo/beta_margin_grad_std": 0.008705493062734604, "beta_dpo/beta_margin_mean": -0.0034312044735997915, "beta_dpo/beta_margin_std": 0.034844666719436646, "beta_dpo/beta_used": 0.10274288803339005, "beta_dpo/beta_used_raw": 0.10274288803339005, "beta_dpo/gap_mean": -0.011791063472628593, "beta_dpo/gap_std": 0.30600911378860474, "beta_dpo/loss_margin_mean": -0.03330141305923462, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.025699168556311415, "grad_norm": 36.072509765625, "learning_rate": 1.1940298507462686e-07, "logits/chosen": 0.06134633719921112, "logits/rejected": 0.04228682443499565, "loss": 1.3829, "step": 17 }, { "beta_dpo/beta": 0.10039804875850677, "beta_dpo/beta_margin_grad_mean": -0.49987974762916565, "beta_dpo/beta_margin_grad_std": 0.006708572618663311, "beta_dpo/beta_margin_mean": 0.0004805707139894366, "beta_dpo/beta_margin_std": 0.026840215548872948, "beta_dpo/beta_used": 0.10039804875850677, "beta_dpo/beta_used_raw": 0.10039804875850677, "beta_dpo/gap_mean": -0.013461226597428322, "beta_dpo/gap_std": 0.29960107803344727, "beta_dpo/loss_margin_mean": 0.003337264060974121, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.027210884353741496, "grad_norm": 30.88655662536621, "learning_rate": 1.2686567164179106e-07, "logits/chosen": 0.10294324904680252, "logits/rejected": 0.05656662583351135, "loss": 1.3869, "step": 18 }, { "beta_dpo/beta": 0.10079077631235123, "beta_dpo/beta_margin_grad_mean": -0.49877893924713135, "beta_dpo/beta_margin_grad_std": 0.008953831158578396, "beta_dpo/beta_margin_mean": 0.00488430866971612, "beta_dpo/beta_margin_std": 0.03583861514925957, "beta_dpo/beta_used": 0.10079077631235123, "beta_dpo/beta_used_raw": 0.10079077631235123, "beta_dpo/gap_mean": -0.003621111623942852, "beta_dpo/gap_std": 0.3067810535430908, "beta_dpo/loss_margin_mean": 0.049518659710884094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.02872260015117158, "grad_norm": 30.9554386138916, "learning_rate": 1.343283582089552e-07, "logits/chosen": 0.08978766202926636, "logits/rejected": 0.07586466521024704, "loss": 1.3854, "step": 19 }, { "beta_dpo/beta": 0.09676133096218109, "beta_dpo/beta_margin_grad_mean": -0.5004621148109436, "beta_dpo/beta_margin_grad_std": 0.008276589214801788, "beta_dpo/beta_margin_mean": -0.0018479095306247473, "beta_dpo/beta_margin_std": 0.0331173837184906, "beta_dpo/beta_used": 0.09676133096218109, "beta_dpo/beta_used_raw": 0.09676133096218109, "beta_dpo/gap_mean": -0.002426680875942111, "beta_dpo/gap_std": 0.31554800271987915, "beta_dpo/loss_margin_mean": -0.019725129008293152, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.030234315948601664, "grad_norm": 30.733718872070312, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 0.13740913569927216, "logits/rejected": 0.11428765952587128, "loss": 1.3918, "step": 20 }, { "beta_dpo/beta": 0.1000765711069107, "beta_dpo/beta_margin_grad_mean": -0.5001194477081299, "beta_dpo/beta_margin_grad_std": 0.00721387006342411, "beta_dpo/beta_margin_mean": -0.0004777438298333436, "beta_dpo/beta_margin_std": 0.028863035142421722, "beta_dpo/beta_used": 0.1000765711069107, "beta_dpo/beta_used_raw": 0.1000765711069107, "beta_dpo/gap_mean": -0.0026407158002257347, "beta_dpo/gap_std": 0.3122379183769226, "beta_dpo/loss_margin_mean": -0.0048004984855651855, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.031746031746031744, "grad_norm": 30.7608699798584, "learning_rate": 1.4925373134328355e-07, "logits/chosen": 0.11362561583518982, "logits/rejected": 0.08779008686542511, "loss": 1.3866, "step": 21 }, { "beta_dpo/beta": 0.10177205502986908, "beta_dpo/beta_margin_grad_mean": -0.49977150559425354, "beta_dpo/beta_margin_grad_std": 0.006581050809472799, "beta_dpo/beta_margin_mean": 0.0009137458400800824, "beta_dpo/beta_margin_std": 0.026328111067414284, "beta_dpo/beta_used": 0.10177205502986908, "beta_dpo/beta_used_raw": 0.10177205502986908, "beta_dpo/gap_mean": -0.0007555157062597573, "beta_dpo/gap_std": 0.30382484197616577, "beta_dpo/loss_margin_mean": 0.008973121643066406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03325774754346183, "grad_norm": 31.633691787719727, "learning_rate": 1.5671641791044775e-07, "logits/chosen": 0.02553943172097206, "logits/rejected": 0.0035544242709875107, "loss": 1.3835, "step": 22 }, { "beta_dpo/beta": 0.10188088566064835, "beta_dpo/beta_margin_grad_mean": -0.4981038272380829, "beta_dpo/beta_margin_grad_std": 0.008175314404070377, "beta_dpo/beta_margin_mean": 0.007585472427308559, "beta_dpo/beta_margin_std": 0.032713212072849274, "beta_dpo/beta_used": 0.10188088566064835, "beta_dpo/beta_used_raw": 0.10188088566064835, "beta_dpo/gap_mean": 0.01067442912608385, "beta_dpo/gap_std": 0.3037213087081909, "beta_dpo/loss_margin_mean": 0.07446223497390747, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03476946334089191, "grad_norm": 35.18233108520508, "learning_rate": 1.6417910447761193e-07, "logits/chosen": 0.14777152240276337, "logits/rejected": 0.12199492752552032, "loss": 1.3821, "step": 23 }, { "beta_dpo/beta": 0.10047954320907593, "beta_dpo/beta_margin_grad_mean": -0.5006471276283264, "beta_dpo/beta_margin_grad_std": 0.006264388095587492, "beta_dpo/beta_margin_mean": -0.0025889482349157333, "beta_dpo/beta_margin_std": 0.025060316547751427, "beta_dpo/beta_used": 0.10047954320907593, "beta_dpo/beta_used_raw": 0.10047954320907593, "beta_dpo/gap_mean": 0.0053078667260706425, "beta_dpo/gap_std": 0.2973956763744354, "beta_dpo/loss_margin_mean": -0.028083205223083496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.036281179138321996, "grad_norm": 29.64365005493164, "learning_rate": 1.716417910447761e-07, "logits/chosen": 0.14769330620765686, "logits/rejected": 0.11441967636346817, "loss": 1.3848, "step": 24 }, { "beta_dpo/beta": 0.09820385277271271, "beta_dpo/beta_margin_grad_mean": -0.49843770265579224, "beta_dpo/beta_margin_grad_std": 0.008783336728811264, "beta_dpo/beta_margin_mean": 0.006256488151848316, "beta_dpo/beta_margin_std": 0.03516627103090286, "beta_dpo/beta_used": 0.09820385277271271, "beta_dpo/beta_used_raw": 0.09820385277271271, "beta_dpo/gap_mean": 0.012951113283634186, "beta_dpo/gap_std": 0.2982524633407593, "beta_dpo/loss_margin_mean": 0.06369102001190186, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03779289493575208, "grad_norm": 32.16172790527344, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 0.10487603396177292, "logits/rejected": 0.05346622318029404, "loss": 1.388, "step": 25 }, { "beta_dpo/beta": 0.0991971343755722, "beta_dpo/beta_margin_grad_mean": -0.5002507567405701, "beta_dpo/beta_margin_grad_std": 0.008068394847214222, "beta_dpo/beta_margin_mean": -0.0010022378992289305, "beta_dpo/beta_margin_std": 0.03228890150785446, "beta_dpo/beta_used": 0.0991971343755722, "beta_dpo/beta_used_raw": 0.0991971343755722, "beta_dpo/gap_mean": 0.014728277921676636, "beta_dpo/gap_std": 0.3074049949645996, "beta_dpo/loss_margin_mean": -0.010433167219161987, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.039304610733182165, "grad_norm": 30.883846282958984, "learning_rate": 1.8656716417910447e-07, "logits/chosen": 0.12802426517009735, "logits/rejected": 0.1090678796172142, "loss": 1.3862, "step": 26 }, { "beta_dpo/beta": 0.10119029879570007, "beta_dpo/beta_margin_grad_mean": -0.4989292025566101, "beta_dpo/beta_margin_grad_std": 0.007677622605115175, "beta_dpo/beta_margin_mean": 0.004283021669834852, "beta_dpo/beta_margin_std": 0.030718592926859856, "beta_dpo/beta_used": 0.10119029879570007, "beta_dpo/beta_used_raw": 0.10119029879570007, "beta_dpo/gap_mean": 0.018781719729304314, "beta_dpo/gap_std": 0.30886706709861755, "beta_dpo/loss_margin_mean": 0.041504472494125366, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04081632653061224, "grad_norm": 30.726280212402344, "learning_rate": 1.9402985074626865e-07, "logits/chosen": 0.1042436882853508, "logits/rejected": 0.07938185334205627, "loss": 1.3825, "step": 27 }, { "beta_dpo/beta": 0.10044533759355545, "beta_dpo/beta_margin_grad_mean": -0.5000773072242737, "beta_dpo/beta_margin_grad_std": 0.008852328173816204, "beta_dpo/beta_margin_mean": -0.00030975634581409395, "beta_dpo/beta_margin_std": 0.03541991114616394, "beta_dpo/beta_used": 0.10044533759355545, "beta_dpo/beta_used_raw": 0.10044533759355545, "beta_dpo/gap_mean": 0.018769798800349236, "beta_dpo/gap_std": 0.31470662355422974, "beta_dpo/loss_margin_mean": -0.0038602352142333984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.042328042328042326, "grad_norm": 30.597108840942383, "learning_rate": 2.0149253731343282e-07, "logits/chosen": 0.06483221799135208, "logits/rejected": 0.055861227214336395, "loss": 1.3838, "step": 28 }, { "beta_dpo/beta": 0.09717325866222382, "beta_dpo/beta_margin_grad_mean": -0.5017023086547852, "beta_dpo/beta_margin_grad_std": 0.007220883388072252, "beta_dpo/beta_margin_mean": -0.0068104080855846405, "beta_dpo/beta_margin_std": 0.028891343623399734, "beta_dpo/beta_used": 0.09717325866222382, "beta_dpo/beta_used_raw": 0.09717325866222382, "beta_dpo/gap_mean": -0.00046668609138578176, "beta_dpo/gap_std": 0.31308144330978394, "beta_dpo/loss_margin_mean": -0.07001826167106628, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04383975812547241, "grad_norm": 34.134544372558594, "learning_rate": 2.08955223880597e-07, "logits/chosen": 0.1364022195339203, "logits/rejected": 0.11881529539823532, "loss": 1.391, "step": 29 }, { "beta_dpo/beta": 0.09732764959335327, "beta_dpo/beta_margin_grad_mean": -0.5010040998458862, "beta_dpo/beta_margin_grad_std": 0.008385173976421356, "beta_dpo/beta_margin_mean": -0.00401653815060854, "beta_dpo/beta_margin_std": 0.03355298191308975, "beta_dpo/beta_used": 0.09732764959335327, "beta_dpo/beta_used_raw": 0.09732764959335327, "beta_dpo/gap_mean": -0.007357730530202389, "beta_dpo/gap_std": 0.31982719898223877, "beta_dpo/loss_margin_mean": -0.04151433706283569, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.045351473922902494, "grad_norm": 31.0594539642334, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 0.13864251971244812, "logits/rejected": 0.08308613300323486, "loss": 1.3915, "step": 30 }, { "beta_dpo/beta": 0.1001332476735115, "beta_dpo/beta_margin_grad_mean": -0.4992826581001282, "beta_dpo/beta_margin_grad_std": 0.006594392936676741, "beta_dpo/beta_margin_mean": 0.0028706123121082783, "beta_dpo/beta_margin_std": 0.02638271264731884, "beta_dpo/beta_used": 0.1001332476735115, "beta_dpo/beta_used_raw": 0.1001332476735115, "beta_dpo/gap_mean": -0.00842762179672718, "beta_dpo/gap_std": 0.3111611604690552, "beta_dpo/loss_margin_mean": 0.025867611169815063, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04686318972033258, "grad_norm": 37.38663101196289, "learning_rate": 2.2388059701492537e-07, "logits/chosen": 0.12416146695613861, "logits/rejected": 0.07688345015048981, "loss": 1.3867, "step": 31 }, { "beta_dpo/beta": 0.10006989538669586, "beta_dpo/beta_margin_grad_mean": -0.49960097670555115, "beta_dpo/beta_margin_grad_std": 0.009591592475771904, "beta_dpo/beta_margin_mean": 0.0015974619891494513, "beta_dpo/beta_margin_std": 0.038378216326236725, "beta_dpo/beta_used": 0.10006989538669586, "beta_dpo/beta_used_raw": 0.10006989538669586, "beta_dpo/gap_mean": -0.007976244203746319, "beta_dpo/gap_std": 0.3170028030872345, "beta_dpo/loss_margin_mean": 0.007406115531921387, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04837490551776266, "grad_norm": 33.643096923828125, "learning_rate": 2.3134328358208954e-07, "logits/chosen": 0.05975925922393799, "logits/rejected": 0.05218929052352905, "loss": 1.3866, "step": 32 }, { "beta_dpo/beta": 0.10326246917247772, "beta_dpo/beta_margin_grad_mean": -0.4977298080921173, "beta_dpo/beta_margin_grad_std": 0.008520841598510742, "beta_dpo/beta_margin_mean": 0.009082864038646221, "beta_dpo/beta_margin_std": 0.034096550196409225, "beta_dpo/beta_used": 0.10326246917247772, "beta_dpo/beta_used_raw": 0.10326246917247772, "beta_dpo/gap_mean": 0.010345546528697014, "beta_dpo/gap_std": 0.3211923837661743, "beta_dpo/loss_margin_mean": 0.08736389875411987, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.049886621315192746, "grad_norm": 33.94365310668945, "learning_rate": 2.388059701492537e-07, "logits/chosen": 0.03907015174627304, "logits/rejected": 0.012794176116585732, "loss": 1.3797, "step": 33 }, { "beta_dpo/beta": 0.1025274470448494, "beta_dpo/beta_margin_grad_mean": -0.4979722797870636, "beta_dpo/beta_margin_grad_std": 0.008004739880561829, "beta_dpo/beta_margin_mean": 0.008112696930766106, "beta_dpo/beta_margin_std": 0.032029684633016586, "beta_dpo/beta_used": 0.1025274470448494, "beta_dpo/beta_used_raw": 0.1025274470448494, "beta_dpo/gap_mean": 0.02443467453122139, "beta_dpo/gap_std": 0.3166555166244507, "beta_dpo/loss_margin_mean": 0.07865563035011292, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05139833711262283, "grad_norm": 29.62582015991211, "learning_rate": 2.4626865671641786e-07, "logits/chosen": 0.08650554716587067, "logits/rejected": 0.05725545063614845, "loss": 1.3796, "step": 34 }, { "beta_dpo/beta": 0.09984398633241653, "beta_dpo/beta_margin_grad_mean": -0.4997754693031311, "beta_dpo/beta_margin_grad_std": 0.006669824477285147, "beta_dpo/beta_margin_mean": 0.000898701255209744, "beta_dpo/beta_margin_std": 0.026683101430535316, "beta_dpo/beta_used": 0.09984398633241653, "beta_dpo/beta_used_raw": 0.09984398633241653, "beta_dpo/gap_mean": 0.024442963302135468, "beta_dpo/gap_std": 0.31088265776634216, "beta_dpo/loss_margin_mean": 0.0072622597217559814, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05291005291005291, "grad_norm": 31.378681182861328, "learning_rate": 2.537313432835821e-07, "logits/chosen": 0.07245808839797974, "logits/rejected": 0.04945196956396103, "loss": 1.384, "step": 35 }, { "beta_dpo/beta": 0.09638850390911102, "beta_dpo/beta_margin_grad_mean": -0.49970918893814087, "beta_dpo/beta_margin_grad_std": 0.008332891389727592, "beta_dpo/beta_margin_mean": 0.0011656360002234578, "beta_dpo/beta_margin_std": 0.033343665301799774, "beta_dpo/beta_used": 0.09638850390911102, "beta_dpo/beta_used_raw": 0.09638850390911102, "beta_dpo/gap_mean": 0.02393309585750103, "beta_dpo/gap_std": 0.3164275884628296, "beta_dpo/loss_margin_mean": 0.012125849723815918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05442176870748299, "grad_norm": 31.100717544555664, "learning_rate": 2.611940298507462e-07, "logits/chosen": 0.10495918989181519, "logits/rejected": 0.05185706540942192, "loss": 1.3899, "step": 36 }, { "beta_dpo/beta": 0.10284903645515442, "beta_dpo/beta_margin_grad_mean": -0.497913122177124, "beta_dpo/beta_margin_grad_std": 0.008537176996469498, "beta_dpo/beta_margin_mean": 0.00834999606013298, "beta_dpo/beta_margin_std": 0.03416427597403526, "beta_dpo/beta_used": 0.10284903645515442, "beta_dpo/beta_used_raw": 0.10284903645515442, "beta_dpo/gap_mean": 0.02981198951601982, "beta_dpo/gap_std": 0.31800198554992676, "beta_dpo/loss_margin_mean": 0.0813802182674408, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.055933484504913075, "grad_norm": 37.75326919555664, "learning_rate": 2.686567164179104e-07, "logits/chosen": 0.10214823484420776, "logits/rejected": 0.022578764706850052, "loss": 1.3785, "step": 37 }, { "beta_dpo/beta": 0.09838944673538208, "beta_dpo/beta_margin_grad_mean": -0.4993027150630951, "beta_dpo/beta_margin_grad_std": 0.00942269992083311, "beta_dpo/beta_margin_mean": 0.0027929130010306835, "beta_dpo/beta_margin_std": 0.03771368786692619, "beta_dpo/beta_used": 0.09838944673538208, "beta_dpo/beta_used_raw": 0.09838944673538208, "beta_dpo/gap_mean": 0.037786953151226044, "beta_dpo/gap_std": 0.32929813861846924, "beta_dpo/loss_margin_mean": 0.024533838033676147, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05744520030234316, "grad_norm": 31.314617156982422, "learning_rate": 2.761194029850746e-07, "logits/chosen": 0.06979146599769592, "logits/rejected": 0.05586238577961922, "loss": 1.3852, "step": 38 }, { "beta_dpo/beta": 0.09809637069702148, "beta_dpo/beta_margin_grad_mean": -0.4992535710334778, "beta_dpo/beta_margin_grad_std": 0.010051256977021694, "beta_dpo/beta_margin_mean": 0.0029852152802050114, "beta_dpo/beta_margin_std": 0.040224362164735794, "beta_dpo/beta_used": 0.09809637069702148, "beta_dpo/beta_used_raw": 0.09809637069702148, "beta_dpo/gap_mean": 0.029398782178759575, "beta_dpo/gap_std": 0.33756715059280396, "beta_dpo/loss_margin_mean": 0.02950763702392578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05895691609977324, "grad_norm": 29.13448715209961, "learning_rate": 2.8358208955223876e-07, "logits/chosen": 0.16034747660160065, "logits/rejected": 0.1335568130016327, "loss": 1.3866, "step": 39 }, { "beta_dpo/beta": 0.100420743227005, "beta_dpo/beta_margin_grad_mean": -0.499179869890213, "beta_dpo/beta_margin_grad_std": 0.007784340064972639, "beta_dpo/beta_margin_mean": 0.0032807684037834406, "beta_dpo/beta_margin_std": 0.031147774308919907, "beta_dpo/beta_used": 0.100420743227005, "beta_dpo/beta_used_raw": 0.100420743227005, "beta_dpo/gap_mean": 0.03405720740556717, "beta_dpo/gap_std": 0.334450364112854, "beta_dpo/loss_margin_mean": 0.031633391976356506, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06046863189720333, "grad_norm": 35.88178253173828, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 0.12306384742259979, "logits/rejected": 0.07282181829214096, "loss": 1.3822, "step": 40 }, { "beta_dpo/beta": 0.10087525099515915, "beta_dpo/beta_margin_grad_mean": -0.497925728559494, "beta_dpo/beta_margin_grad_std": 0.008461805991828442, "beta_dpo/beta_margin_mean": 0.008300484158098698, "beta_dpo/beta_margin_std": 0.03385802358388901, "beta_dpo/beta_used": 0.10087525099515915, "beta_dpo/beta_used_raw": 0.10087525099515915, "beta_dpo/gap_mean": 0.036321260035037994, "beta_dpo/gap_std": 0.3385712802410126, "beta_dpo/loss_margin_mean": 0.080975741147995, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06198034769463341, "grad_norm": 35.11070251464844, "learning_rate": 2.985074626865671e-07, "logits/chosen": 0.05649389326572418, "logits/rejected": 0.03625918924808502, "loss": 1.3812, "step": 41 }, { "beta_dpo/beta": 0.09852074086666107, "beta_dpo/beta_margin_grad_mean": -0.4987524747848511, "beta_dpo/beta_margin_grad_std": 0.009575996547937393, "beta_dpo/beta_margin_mean": 0.00499336002394557, "beta_dpo/beta_margin_std": 0.0383220799267292, "beta_dpo/beta_used": 0.09852074086666107, "beta_dpo/beta_used_raw": 0.09852074086666107, "beta_dpo/gap_mean": 0.04520569369196892, "beta_dpo/gap_std": 0.3435010612010956, "beta_dpo/loss_margin_mean": 0.047529637813568115, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06349206349206349, "grad_norm": 31.087583541870117, "learning_rate": 3.059701492537313e-07, "logits/chosen": 0.05465045943856239, "logits/rejected": 0.03304405137896538, "loss": 1.3841, "step": 42 }, { "beta_dpo/beta": 0.10309203714132309, "beta_dpo/beta_margin_grad_mean": -0.49921998381614685, "beta_dpo/beta_margin_grad_std": 0.010388917289674282, "beta_dpo/beta_margin_mean": 0.0031178132630884647, "beta_dpo/beta_margin_std": 0.041573185473680496, "beta_dpo/beta_used": 0.10309203714132309, "beta_dpo/beta_used_raw": 0.10309203714132309, "beta_dpo/gap_mean": 0.04160957783460617, "beta_dpo/gap_std": 0.35253000259399414, "beta_dpo/loss_margin_mean": 0.0297316312789917, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06500377928949358, "grad_norm": 39.25471115112305, "learning_rate": 3.134328358208955e-07, "logits/chosen": 0.06021273136138916, "logits/rejected": 0.0498313382267952, "loss": 1.3769, "step": 43 }, { "beta_dpo/beta": 0.0943903774023056, "beta_dpo/beta_margin_grad_mean": -0.5001733899116516, "beta_dpo/beta_margin_grad_std": 0.009070969186723232, "beta_dpo/beta_margin_mean": -0.0006922496249899268, "beta_dpo/beta_margin_std": 0.036296818405389786, "beta_dpo/beta_used": 0.0943903774023056, "beta_dpo/beta_used_raw": 0.0943903774023056, "beta_dpo/gap_mean": 0.0337846614420414, "beta_dpo/gap_std": 0.36235010623931885, "beta_dpo/loss_margin_mean": -0.0086250901222229, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06651549508692366, "grad_norm": 33.845481872558594, "learning_rate": 3.2089552238805965e-07, "logits/chosen": 0.04900500550866127, "logits/rejected": 0.018543703481554985, "loss": 1.392, "step": 44 }, { "beta_dpo/beta": 0.10028617084026337, "beta_dpo/beta_margin_grad_mean": -0.4976131319999695, "beta_dpo/beta_margin_grad_std": 0.009892971254885197, "beta_dpo/beta_margin_mean": 0.009551994502544403, "beta_dpo/beta_margin_std": 0.03958762809634209, "beta_dpo/beta_used": 0.10028617084026337, "beta_dpo/beta_used_raw": 0.10028617084026337, "beta_dpo/gap_mean": 0.03637174516916275, "beta_dpo/gap_std": 0.36881065368652344, "beta_dpo/loss_margin_mean": 0.09402036666870117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06802721088435375, "grad_norm": 36.0006217956543, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 0.1208408921957016, "logits/rejected": 0.06802251189947128, "loss": 1.3823, "step": 45 }, { "beta_dpo/beta": 0.09875007718801498, "beta_dpo/beta_margin_grad_mean": -0.4974738359451294, "beta_dpo/beta_margin_grad_std": 0.010846122168004513, "beta_dpo/beta_margin_mean": 0.010113219730556011, "beta_dpo/beta_margin_std": 0.04341111332178116, "beta_dpo/beta_used": 0.09875007718801498, "beta_dpo/beta_used_raw": 0.09875007718801498, "beta_dpo/gap_mean": 0.050506845116615295, "beta_dpo/gap_std": 0.38154229521751404, "beta_dpo/loss_margin_mean": 0.10229560732841492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06953892668178382, "grad_norm": 33.66736602783203, "learning_rate": 3.3582089552238805e-07, "logits/chosen": 0.12831750512123108, "logits/rejected": 0.08227770030498505, "loss": 1.3836, "step": 46 }, { "beta_dpo/beta": 0.10235325992107391, "beta_dpo/beta_margin_grad_mean": -0.49693551659584045, "beta_dpo/beta_margin_grad_std": 0.009976202622056007, "beta_dpo/beta_margin_mean": 0.01226241048425436, "beta_dpo/beta_margin_std": 0.039923615753650665, "beta_dpo/beta_used": 0.10235325992107391, "beta_dpo/beta_used_raw": 0.10235325992107391, "beta_dpo/gap_mean": 0.06335177272558212, "beta_dpo/gap_std": 0.3873853087425232, "beta_dpo/loss_margin_mean": 0.11981740593910217, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0710506424792139, "grad_norm": 29.64479637145996, "learning_rate": 3.432835820895522e-07, "logits/chosen": 0.0135899493470788, "logits/rejected": -0.028094399720430374, "loss": 1.376, "step": 47 }, { "beta_dpo/beta": 0.10037179291248322, "beta_dpo/beta_margin_grad_mean": -0.49941501021385193, "beta_dpo/beta_margin_grad_std": 0.011483574286103249, "beta_dpo/beta_margin_mean": 0.0023423591628670692, "beta_dpo/beta_margin_std": 0.04596313461661339, "beta_dpo/beta_used": 0.10037179291248322, "beta_dpo/beta_used_raw": 0.10037179291248322, "beta_dpo/gap_mean": 0.06056096404790878, "beta_dpo/gap_std": 0.39652693271636963, "beta_dpo/loss_margin_mean": 0.023324549198150635, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07256235827664399, "grad_norm": 28.561792373657227, "learning_rate": 3.507462686567164e-07, "logits/chosen": 0.1474730372428894, "logits/rejected": 0.11770699918270111, "loss": 1.3799, "step": 48 }, { "beta_dpo/beta": 0.10378497838973999, "beta_dpo/beta_margin_grad_mean": -0.497110515832901, "beta_dpo/beta_margin_grad_std": 0.012511258944869041, "beta_dpo/beta_margin_mean": 0.011552227661013603, "beta_dpo/beta_margin_std": 0.05013538524508476, "beta_dpo/beta_used": 0.10378497838973999, "beta_dpo/beta_used_raw": 0.10378497838973999, "beta_dpo/gap_mean": 0.06303433328866959, "beta_dpo/gap_std": 0.4152846336364746, "beta_dpo/loss_margin_mean": 0.11007669568061829, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07407407407407407, "grad_norm": 30.38995933532715, "learning_rate": 3.5820895522388055e-07, "logits/chosen": 0.0344427265226841, "logits/rejected": 0.01285216398537159, "loss": 1.3734, "step": 49 }, { "beta_dpo/beta": 0.1018582433462143, "beta_dpo/beta_margin_grad_mean": -0.4980669319629669, "beta_dpo/beta_margin_grad_std": 0.010723302140831947, "beta_dpo/beta_margin_mean": 0.00773262232542038, "beta_dpo/beta_margin_std": 0.042918216437101364, "beta_dpo/beta_used": 0.1018582433462143, "beta_dpo/beta_used_raw": 0.1018582433462143, "beta_dpo/gap_mean": 0.07021433860063553, "beta_dpo/gap_std": 0.4182177782058716, "beta_dpo/loss_margin_mean": 0.07544875144958496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07558578987150416, "grad_norm": 30.36774253845215, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 0.11492250859737396, "logits/rejected": 0.08501888066530228, "loss": 1.3761, "step": 50 }, { "beta_dpo/beta": 0.10719744116067886, "beta_dpo/beta_margin_grad_mean": -0.4953405559062958, "beta_dpo/beta_margin_grad_std": 0.012494519352912903, "beta_dpo/beta_margin_mean": 0.018651319667696953, "beta_dpo/beta_margin_std": 0.05001518130302429, "beta_dpo/beta_used": 0.10719744116067886, "beta_dpo/beta_used_raw": 0.10719744116067886, "beta_dpo/gap_mean": 0.08684976398944855, "beta_dpo/gap_std": 0.4211902320384979, "beta_dpo/loss_margin_mean": 0.17378860712051392, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07709750566893424, "grad_norm": 37.90382385253906, "learning_rate": 3.7313432835820895e-07, "logits/chosen": 0.08796243369579315, "logits/rejected": 0.060197874903678894, "loss": 1.3646, "step": 51 }, { "beta_dpo/beta": 0.10072717070579529, "beta_dpo/beta_margin_grad_mean": -0.49583399295806885, "beta_dpo/beta_margin_grad_std": 0.012241682037711143, "beta_dpo/beta_margin_mean": 0.016676129773259163, "beta_dpo/beta_margin_std": 0.049004100263118744, "beta_dpo/beta_used": 0.10072717070579529, "beta_dpo/beta_used_raw": 0.10072717070579529, "beta_dpo/gap_mean": 0.09611259400844574, "beta_dpo/gap_std": 0.43683481216430664, "beta_dpo/loss_margin_mean": 0.1634836196899414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07860922146636433, "grad_norm": 35.590213775634766, "learning_rate": 3.805970149253731e-07, "logits/chosen": 0.07440444082021713, "logits/rejected": 0.025928881019353867, "loss": 1.3756, "step": 52 }, { "beta_dpo/beta": 0.10719523578882217, "beta_dpo/beta_margin_grad_mean": -0.49494311213493347, "beta_dpo/beta_margin_grad_std": 0.012945474125444889, "beta_dpo/beta_margin_mean": 0.020237509161233902, "beta_dpo/beta_margin_std": 0.051830440759658813, "beta_dpo/beta_used": 0.10719523578882217, "beta_dpo/beta_used_raw": 0.10719523578882217, "beta_dpo/gap_mean": 0.11741121858358383, "beta_dpo/gap_std": 0.4450018107891083, "beta_dpo/loss_margin_mean": 0.1888570785522461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0801209372637944, "grad_norm": 30.624479293823242, "learning_rate": 3.880597014925373e-07, "logits/chosen": 0.08777962625026703, "logits/rejected": 0.07702084630727768, "loss": 1.3615, "step": 53 }, { "beta_dpo/beta": 0.10270829498767853, "beta_dpo/beta_margin_grad_mean": -0.49615994095802307, "beta_dpo/beta_margin_grad_std": 0.010877852328121662, "beta_dpo/beta_margin_mean": 0.015367364510893822, "beta_dpo/beta_margin_std": 0.04353713244199753, "beta_dpo/beta_used": 0.10270829498767853, "beta_dpo/beta_used_raw": 0.10270829498767853, "beta_dpo/gap_mean": 0.1302683800458908, "beta_dpo/gap_std": 0.44163817167282104, "beta_dpo/loss_margin_mean": 0.14893117547035217, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08163265306122448, "grad_norm": 29.564790725708008, "learning_rate": 3.9552238805970144e-07, "logits/chosen": 0.05520813167095184, "logits/rejected": 0.039937350898981094, "loss": 1.3686, "step": 54 }, { "beta_dpo/beta": 0.10008847713470459, "beta_dpo/beta_margin_grad_mean": -0.49438750743865967, "beta_dpo/beta_margin_grad_std": 0.01513749547302723, "beta_dpo/beta_margin_mean": 0.022482367232441902, "beta_dpo/beta_margin_std": 0.060637783259153366, "beta_dpo/beta_used": 0.10008847713470459, "beta_dpo/beta_used_raw": 0.10008847713470459, "beta_dpo/gap_mean": 0.13691473007202148, "beta_dpo/gap_std": 0.46075886487960815, "beta_dpo/loss_margin_mean": 0.2233232706785202, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08314436885865457, "grad_norm": 33.72454833984375, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 0.02177221141755581, "logits/rejected": -0.05276140943169594, "loss": 1.3727, "step": 55 }, { "beta_dpo/beta": 0.10054030269384384, "beta_dpo/beta_margin_grad_mean": -0.495496928691864, "beta_dpo/beta_margin_grad_std": 0.013434402644634247, "beta_dpo/beta_margin_mean": 0.018017152324318886, "beta_dpo/beta_margin_std": 0.05379686877131462, "beta_dpo/beta_used": 0.10054030269384384, "beta_dpo/beta_used_raw": 0.10054030269384384, "beta_dpo/gap_mean": 0.1483607143163681, "beta_dpo/gap_std": 0.48542577028274536, "beta_dpo/loss_margin_mean": 0.17924529314041138, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08465608465608465, "grad_norm": 29.508190155029297, "learning_rate": 4.1044776119402984e-07, "logits/chosen": 0.12270273268222809, "logits/rejected": 0.1003156453371048, "loss": 1.3708, "step": 56 }, { "beta_dpo/beta": 0.10854996740818024, "beta_dpo/beta_margin_grad_mean": -0.49215006828308105, "beta_dpo/beta_margin_grad_std": 0.017352448776364326, "beta_dpo/beta_margin_mean": 0.031447600573301315, "beta_dpo/beta_margin_std": 0.06956035643815994, "beta_dpo/beta_used": 0.10854996740818024, "beta_dpo/beta_used_raw": 0.10854996740818024, "beta_dpo/gap_mean": 0.17608734965324402, "beta_dpo/gap_std": 0.5068370699882507, "beta_dpo/loss_margin_mean": 0.29122018814086914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08616780045351474, "grad_norm": 35.71118927001953, "learning_rate": 4.17910447761194e-07, "logits/chosen": 0.11377115547657013, "logits/rejected": 0.04856497421860695, "loss": 1.3526, "step": 57 }, { "beta_dpo/beta": 0.09856951981782913, "beta_dpo/beta_margin_grad_mean": -0.4949773848056793, "beta_dpo/beta_margin_grad_std": 0.020041456446051598, "beta_dpo/beta_margin_mean": 0.020195821300148964, "beta_dpo/beta_margin_std": 0.08059463649988174, "beta_dpo/beta_used": 0.09856951981782913, "beta_dpo/beta_used_raw": 0.09856951981782913, "beta_dpo/gap_mean": 0.18698576092720032, "beta_dpo/gap_std": 0.5550523996353149, "beta_dpo/loss_margin_mean": 0.1905546486377716, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08767951625094482, "grad_norm": 34.71607971191406, "learning_rate": 4.253731343283582e-07, "logits/chosen": 0.06032850965857506, "logits/rejected": 0.044845663011074066, "loss": 1.3701, "step": 58 }, { "beta_dpo/beta": 0.09670780599117279, "beta_dpo/beta_margin_grad_mean": -0.4935970604419708, "beta_dpo/beta_margin_grad_std": 0.017941787838935852, "beta_dpo/beta_margin_mean": 0.02568780817091465, "beta_dpo/beta_margin_std": 0.07200151681900024, "beta_dpo/beta_used": 0.09670780599117279, "beta_dpo/beta_used_raw": 0.09670780599117279, "beta_dpo/gap_mean": 0.17569701373577118, "beta_dpo/gap_std": 0.5746784210205078, "beta_dpo/loss_margin_mean": 0.25069764256477356, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08919123204837491, "grad_norm": 27.714921951293945, "learning_rate": 4.3283582089552234e-07, "logits/chosen": 0.12836335599422455, "logits/rejected": 0.06272809952497482, "loss": 1.3745, "step": 59 }, { "beta_dpo/beta": 0.09911298751831055, "beta_dpo/beta_margin_grad_mean": -0.497974693775177, "beta_dpo/beta_margin_grad_std": 0.01722402684390545, "beta_dpo/beta_margin_mean": 0.008100156672298908, "beta_dpo/beta_margin_std": 0.06897980719804764, "beta_dpo/beta_used": 0.09911298751831055, "beta_dpo/beta_used_raw": 0.09911298751831055, "beta_dpo/gap_mean": 0.18243734538555145, "beta_dpo/gap_std": 0.6014333963394165, "beta_dpo/loss_margin_mean": 0.07506400346755981, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09070294784580499, "grad_norm": 29.543716430664062, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 0.11510735005140305, "logits/rejected": 0.11371426284313202, "loss": 1.3699, "step": 60 }, { "beta_dpo/beta": 0.09410177916288376, "beta_dpo/beta_margin_grad_mean": -0.4979269206523895, "beta_dpo/beta_margin_grad_std": 0.019729407504200935, "beta_dpo/beta_margin_mean": 0.008308450691401958, "beta_dpo/beta_margin_std": 0.07905634492635727, "beta_dpo/beta_used": 0.09410177916288376, "beta_dpo/beta_used_raw": 0.09410177916288376, "beta_dpo/gap_mean": 0.16265742480754852, "beta_dpo/gap_std": 0.6380031108856201, "beta_dpo/loss_margin_mean": 0.08492028713226318, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09221466364323508, "grad_norm": 26.6069278717041, "learning_rate": 4.4776119402985074e-07, "logits/chosen": 0.11286836862564087, "logits/rejected": 0.08159026503562927, "loss": 1.3808, "step": 61 }, { "beta_dpo/beta": 0.10248786211013794, "beta_dpo/beta_margin_grad_mean": -0.493076890707016, "beta_dpo/beta_margin_grad_std": 0.019803814589977264, "beta_dpo/beta_margin_mean": 0.027745075523853302, "beta_dpo/beta_margin_std": 0.07940506935119629, "beta_dpo/beta_used": 0.10248786211013794, "beta_dpo/beta_used_raw": 0.10248786211013794, "beta_dpo/gap_mean": 0.165227472782135, "beta_dpo/gap_std": 0.6631143689155579, "beta_dpo/loss_margin_mean": 0.26825329661369324, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09372637944066516, "grad_norm": 33.22285461425781, "learning_rate": 4.552238805970149e-07, "logits/chosen": 0.0910121276974678, "logits/rejected": 0.052763670682907104, "loss": 1.3659, "step": 62 }, { "beta_dpo/beta": 0.11158844828605652, "beta_dpo/beta_margin_grad_mean": -0.4859258234500885, "beta_dpo/beta_margin_grad_std": 0.021454254165291786, "beta_dpo/beta_margin_mean": 0.05640276148915291, "beta_dpo/beta_margin_std": 0.08617250621318817, "beta_dpo/beta_used": 0.11158844828605652, "beta_dpo/beta_used_raw": 0.11158844828605652, "beta_dpo/gap_mean": 0.22149600088596344, "beta_dpo/gap_std": 0.6870256066322327, "beta_dpo/loss_margin_mean": 0.5042006373405457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09523809523809523, "grad_norm": 35.34672927856445, "learning_rate": 4.626865671641791e-07, "logits/chosen": 0.1868915557861328, "logits/rejected": 0.16194406151771545, "loss": 1.3407, "step": 63 }, { "beta_dpo/beta": 0.09616056084632874, "beta_dpo/beta_margin_grad_mean": -0.4972890019416809, "beta_dpo/beta_margin_grad_std": 0.017049264162778854, "beta_dpo/beta_margin_mean": 0.01085972972214222, "beta_dpo/beta_margin_std": 0.06834981590509415, "beta_dpo/beta_used": 0.09616056084632874, "beta_dpo/beta_used_raw": 0.09616056084632874, "beta_dpo/gap_mean": 0.223545640707016, "beta_dpo/gap_std": 0.7015777826309204, "beta_dpo/loss_margin_mean": 0.11485102772712708, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09674981103552532, "grad_norm": 26.68852424621582, "learning_rate": 4.701492537313433e-07, "logits/chosen": 0.08099683374166489, "logits/rejected": 0.056533243507146835, "loss": 1.3714, "step": 64 }, { "beta_dpo/beta": 0.11661025881767273, "beta_dpo/beta_margin_grad_mean": -0.4880481958389282, "beta_dpo/beta_margin_grad_std": 0.02547621540725231, "beta_dpo/beta_margin_mean": 0.04795736074447632, "beta_dpo/beta_margin_std": 0.10229441523551941, "beta_dpo/beta_used": 0.11661025881767273, "beta_dpo/beta_used_raw": 0.11661025881767273, "beta_dpo/gap_mean": 0.24007107317447662, "beta_dpo/gap_std": 0.7217456698417664, "beta_dpo/loss_margin_mean": 0.4126676321029663, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0982615268329554, "grad_norm": 36.96430587768555, "learning_rate": 4.776119402985074e-07, "logits/chosen": 0.20523807406425476, "logits/rejected": 0.17545229196548462, "loss": 1.3282, "step": 65 }, { "beta_dpo/beta": 0.10083754360675812, "beta_dpo/beta_margin_grad_mean": -0.49288806319236755, "beta_dpo/beta_margin_grad_std": 0.024221470579504967, "beta_dpo/beta_margin_mean": 0.02851312793791294, "beta_dpo/beta_margin_std": 0.09726641327142715, "beta_dpo/beta_used": 0.10083754360675812, "beta_dpo/beta_used_raw": 0.10083754360675812, "beta_dpo/gap_mean": 0.26807379722595215, "beta_dpo/gap_std": 0.7676073908805847, "beta_dpo/loss_margin_mean": 0.28801459074020386, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09977324263038549, "grad_norm": 32.096954345703125, "learning_rate": 4.850746268656717e-07, "logits/chosen": 0.08074181526899338, "logits/rejected": 0.057395968586206436, "loss": 1.3589, "step": 66 }, { "beta_dpo/beta": 0.10163235664367676, "beta_dpo/beta_margin_grad_mean": -0.49344709515571594, "beta_dpo/beta_margin_grad_std": 0.01996057853102684, "beta_dpo/beta_margin_mean": 0.026278013363480568, "beta_dpo/beta_margin_std": 0.08008788526058197, "beta_dpo/beta_used": 0.10163235664367676, "beta_dpo/beta_used_raw": 0.10163235664367676, "beta_dpo/gap_mean": 0.2551182508468628, "beta_dpo/gap_std": 0.782842755317688, "beta_dpo/loss_margin_mean": 0.25911685824394226, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10128495842781557, "grad_norm": 32.75022888183594, "learning_rate": 4.925373134328357e-07, "logits/chosen": 0.08444076031446457, "logits/rejected": 0.02903038263320923, "loss": 1.3585, "step": 67 }, { "beta_dpo/beta": 0.10411373525857925, "beta_dpo/beta_margin_grad_mean": -0.49119579792022705, "beta_dpo/beta_margin_grad_std": 0.027670543640851974, "beta_dpo/beta_margin_mean": 0.03529214486479759, "beta_dpo/beta_margin_std": 0.11126258224248886, "beta_dpo/beta_used": 0.10411373525857925, "beta_dpo/beta_used_raw": 0.10411373525857925, "beta_dpo/gap_mean": 0.2772870659828186, "beta_dpo/gap_std": 0.8147046566009521, "beta_dpo/loss_margin_mean": 0.34844857454299927, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10279667422524566, "grad_norm": 35.180267333984375, "learning_rate": 5e-07, "logits/chosen": 0.08185356110334396, "logits/rejected": 0.05136846378445625, "loss": 1.351, "step": 68 }, { "beta_dpo/beta": 0.10379628837108612, "beta_dpo/beta_margin_grad_mean": -0.4831562936306, "beta_dpo/beta_margin_grad_std": 0.027017628774046898, "beta_dpo/beta_margin_mean": 0.06770786643028259, "beta_dpo/beta_margin_std": 0.10907536745071411, "beta_dpo/beta_used": 0.10379628837108612, "beta_dpo/beta_used_raw": 0.10379628837108612, "beta_dpo/gap_mean": 0.31204187870025635, "beta_dpo/gap_std": 0.8627355098724365, "beta_dpo/loss_margin_mean": 0.626896858215332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10430839002267574, "grad_norm": 33.93740463256836, "learning_rate": 4.999965034812934e-07, "logits/chosen": 0.10463707894086838, "logits/rejected": 0.061709824949502945, "loss": 1.3471, "step": 69 }, { "beta_dpo/beta": 0.09281350672245026, "beta_dpo/beta_margin_grad_mean": -0.49325621128082275, "beta_dpo/beta_margin_grad_std": 0.023107120767235756, "beta_dpo/beta_margin_mean": 0.027028771117329597, "beta_dpo/beta_margin_std": 0.09262385219335556, "beta_dpo/beta_used": 0.09281350672245026, "beta_dpo/beta_used_raw": 0.09281350672245026, "beta_dpo/gap_mean": 0.33571797609329224, "beta_dpo/gap_std": 0.8863239288330078, "beta_dpo/loss_margin_mean": 0.2911398708820343, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10582010582010581, "grad_norm": 29.002174377441406, "learning_rate": 4.999860140229787e-07, "logits/chosen": 0.15091601014137268, "logits/rejected": 0.1275625079870224, "loss": 1.3677, "step": 70 }, { "beta_dpo/beta": 0.09932013601064682, "beta_dpo/beta_margin_grad_mean": -0.4969637989997864, "beta_dpo/beta_margin_grad_std": 0.030911121517419815, "beta_dpo/beta_margin_mean": 0.012066809460520744, "beta_dpo/beta_margin_std": 0.12431972473859787, "beta_dpo/beta_used": 0.09932013601064682, "beta_dpo/beta_used_raw": 0.09932013601064682, "beta_dpo/gap_mean": 0.3090514540672302, "beta_dpo/gap_std": 0.9437848329544067, "beta_dpo/loss_margin_mean": 0.1241014152765274, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1073318216175359, "grad_norm": 30.64402198791504, "learning_rate": 4.999685319184688e-07, "logits/chosen": 0.0933336392045021, "logits/rejected": 0.07769975811243057, "loss": 1.3585, "step": 71 }, { "beta_dpo/beta": 0.10620521754026413, "beta_dpo/beta_margin_grad_mean": -0.4829525053501129, "beta_dpo/beta_margin_grad_std": 0.029280930757522583, "beta_dpo/beta_margin_mean": 0.06848964095115662, "beta_dpo/beta_margin_std": 0.11784832924604416, "beta_dpo/beta_used": 0.10620521754026413, "beta_dpo/beta_used_raw": 0.10620521754026413, "beta_dpo/gap_mean": 0.34875622391700745, "beta_dpo/gap_std": 0.9951860904693604, "beta_dpo/loss_margin_mean": 0.6475339531898499, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10884353741496598, "grad_norm": 34.11119842529297, "learning_rate": 4.999440576567755e-07, "logits/chosen": 0.10905547440052032, "logits/rejected": 0.04688937962055206, "loss": 1.3403, "step": 72 }, { "beta_dpo/beta": 0.08631128817796707, "beta_dpo/beta_margin_grad_mean": -0.49926990270614624, "beta_dpo/beta_margin_grad_std": 0.02832244336605072, "beta_dpo/beta_margin_mean": 0.0030645669903606176, "beta_dpo/beta_margin_std": 0.11412809789180756, "beta_dpo/beta_used": 0.08631128817796707, "beta_dpo/beta_used_raw": 0.08631128817796707, "beta_dpo/gap_mean": 0.2991209328174591, "beta_dpo/gap_std": 1.0169306993484497, "beta_dpo/loss_margin_mean": 0.025026828050613403, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11035525321239607, "grad_norm": 28.28575897216797, "learning_rate": 4.999125919224965e-07, "logits/chosen": 0.1120387464761734, "logits/rejected": 0.09779127687215805, "loss": 1.3813, "step": 73 }, { "beta_dpo/beta": 0.10641483962535858, "beta_dpo/beta_margin_grad_mean": -0.4837491810321808, "beta_dpo/beta_margin_grad_std": 0.030070235952734947, "beta_dpo/beta_margin_mean": 0.0653383806347847, "beta_dpo/beta_margin_std": 0.12112236768007278, "beta_dpo/beta_used": 0.10641483962535858, "beta_dpo/beta_used_raw": 0.10641483962535858, "beta_dpo/gap_mean": 0.33124732971191406, "beta_dpo/gap_std": 1.0620481967926025, "beta_dpo/loss_margin_mean": 0.6014441251754761, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11186696900982615, "grad_norm": 32.007293701171875, "learning_rate": 4.998741355957963e-07, "logits/chosen": 0.15119096636772156, "logits/rejected": 0.09727491438388824, "loss": 1.3415, "step": 74 }, { "beta_dpo/beta": 0.10607866197824478, "beta_dpo/beta_margin_grad_mean": -0.48331546783447266, "beta_dpo/beta_margin_grad_std": 0.03126617893576622, "beta_dpo/beta_margin_mean": 0.06704951822757721, "beta_dpo/beta_margin_std": 0.1258445382118225, "beta_dpo/beta_used": 0.10607866197824478, "beta_dpo/beta_used_raw": 0.10607866197824478, "beta_dpo/gap_mean": 0.40168869495391846, "beta_dpo/gap_std": 1.076457142829895, "beta_dpo/loss_margin_mean": 0.6295863389968872, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11337868480725624, "grad_norm": 27.578632354736328, "learning_rate": 4.998286897523808e-07, "logits/chosen": 0.1281864494085312, "logits/rejected": 0.0956953763961792, "loss": 1.3359, "step": 75 }, { "beta_dpo/beta": 0.09647442400455475, "beta_dpo/beta_margin_grad_mean": -0.4936937391757965, "beta_dpo/beta_margin_grad_std": 0.03319334238767624, "beta_dpo/beta_margin_mean": 0.025323159992694855, "beta_dpo/beta_margin_std": 0.1335090696811676, "beta_dpo/beta_used": 0.09647442400455475, "beta_dpo/beta_used_raw": 0.09647442400455475, "beta_dpo/gap_mean": 0.38828492164611816, "beta_dpo/gap_std": 1.133347988128662, "beta_dpo/loss_margin_mean": 0.2612752318382263, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11489040060468632, "grad_norm": 29.8311767578125, "learning_rate": 4.997762556634679e-07, "logits/chosen": 0.13822756707668304, "logits/rejected": 0.09391121566295624, "loss": 1.3565, "step": 76 }, { "beta_dpo/beta": 0.1073828637599945, "beta_dpo/beta_margin_grad_mean": -0.47993001341819763, "beta_dpo/beta_margin_grad_std": 0.03186849504709244, "beta_dpo/beta_margin_mean": 0.08071349561214447, "beta_dpo/beta_margin_std": 0.12836872041225433, "beta_dpo/beta_used": 0.1073828637599945, "beta_dpo/beta_used_raw": 0.1073828637599945, "beta_dpo/gap_mean": 0.4407605528831482, "beta_dpo/gap_std": 1.1482113599777222, "beta_dpo/loss_margin_mean": 0.722518801689148, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1164021164021164, "grad_norm": 33.737342834472656, "learning_rate": 4.99716834795752e-07, "logits/chosen": 0.13610659539699554, "logits/rejected": 0.09622259438037872, "loss": 1.3264, "step": 77 }, { "beta_dpo/beta": 0.09310435503721237, "beta_dpo/beta_margin_grad_mean": -0.49019360542297363, "beta_dpo/beta_margin_grad_std": 0.027901530265808105, "beta_dpo/beta_margin_mean": 0.03937191888689995, "beta_dpo/beta_margin_std": 0.11205900460481644, "beta_dpo/beta_used": 0.09310435503721237, "beta_dpo/beta_used_raw": 0.09310435503721237, "beta_dpo/gap_mean": 0.4376751780509949, "beta_dpo/gap_std": 1.1631581783294678, "beta_dpo/loss_margin_mean": 0.4201837480068207, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11791383219954649, "grad_norm": 30.198745727539062, "learning_rate": 4.996504288113623e-07, "logits/chosen": 0.12471885979175568, "logits/rejected": 0.1042494922876358, "loss": 1.3574, "step": 78 }, { "beta_dpo/beta": 0.10304860770702362, "beta_dpo/beta_margin_grad_mean": -0.4760061204433441, "beta_dpo/beta_margin_grad_std": 0.04387785494327545, "beta_dpo/beta_margin_mean": 0.09730573743581772, "beta_dpo/beta_margin_std": 0.17926862835884094, "beta_dpo/beta_used": 0.10304860770702362, "beta_dpo/beta_used_raw": 0.10304860770702362, "beta_dpo/gap_mean": 0.5142155289649963, "beta_dpo/gap_std": 1.2540637254714966, "beta_dpo/loss_margin_mean": 0.9398516416549683, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11942554799697656, "grad_norm": 31.872982025146484, "learning_rate": 4.995770395678171e-07, "logits/chosen": 0.15904495120048523, "logits/rejected": 0.10070707648992538, "loss": 1.3309, "step": 79 }, { "beta_dpo/beta": 0.08738920837640762, "beta_dpo/beta_margin_grad_mean": -0.48728257417678833, "beta_dpo/beta_margin_grad_std": 0.03742121160030365, "beta_dpo/beta_margin_mean": 0.051270920783281326, "beta_dpo/beta_margin_std": 0.1506371945142746, "beta_dpo/beta_used": 0.08738920837640762, "beta_dpo/beta_used_raw": 0.08738920837640762, "beta_dpo/gap_mean": 0.5226389765739441, "beta_dpo/gap_std": 1.3193085193634033, "beta_dpo/loss_margin_mean": 0.5483418703079224, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12093726379440665, "grad_norm": 27.30435562133789, "learning_rate": 4.994966691179711e-07, "logits/chosen": 0.1488003134727478, "logits/rejected": 0.08947046101093292, "loss": 1.3597, "step": 80 }, { "beta_dpo/beta": 0.09669499099254608, "beta_dpo/beta_margin_grad_mean": -0.48155760765075684, "beta_dpo/beta_margin_grad_std": 0.03757631033658981, "beta_dpo/beta_margin_mean": 0.07457643747329712, "beta_dpo/beta_margin_std": 0.15281039476394653, "beta_dpo/beta_used": 0.09669499099254608, "beta_dpo/beta_used_raw": 0.09669499099254608, "beta_dpo/gap_mean": 0.5826159119606018, "beta_dpo/gap_std": 1.3762335777282715, "beta_dpo/loss_margin_mean": 0.7375200390815735, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12244897959183673, "grad_norm": 30.987957000732422, "learning_rate": 4.994093197099587e-07, "logits/chosen": 0.11368920654058456, "logits/rejected": 0.08077847957611084, "loss": 1.3366, "step": 81 }, { "beta_dpo/beta": 0.11626774817705154, "beta_dpo/beta_margin_grad_mean": -0.472137451171875, "beta_dpo/beta_margin_grad_std": 0.03789715841412544, "beta_dpo/beta_margin_mean": 0.11208187788724899, "beta_dpo/beta_margin_std": 0.15328720211982727, "beta_dpo/beta_used": 0.11626774817705154, "beta_dpo/beta_used_raw": 0.11626774817705154, "beta_dpo/gap_mean": 0.6363944411277771, "beta_dpo/gap_std": 1.3633242845535278, "beta_dpo/loss_margin_mean": 0.9711171388626099, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12396069538926682, "grad_norm": 31.890888214111328, "learning_rate": 4.993149937871306e-07, "logits/chosen": 0.12913721799850464, "logits/rejected": 0.06530788540840149, "loss": 1.2855, "step": 82 }, { "beta_dpo/beta": 0.11282624304294586, "beta_dpo/beta_margin_grad_mean": -0.4715110659599304, "beta_dpo/beta_margin_grad_std": 0.041592177003622055, "beta_dpo/beta_margin_mean": 0.11523565649986267, "beta_dpo/beta_margin_std": 0.1703273057937622, "beta_dpo/beta_used": 0.11282624304294586, "beta_dpo/beta_used_raw": 0.11282624304294586, "beta_dpo/gap_mean": 0.7063510417938232, "beta_dpo/gap_std": 1.3889222145080566, "beta_dpo/loss_margin_mean": 1.012617826461792, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1254724111866969, "grad_norm": 33.445735931396484, "learning_rate": 4.992136939879856e-07, "logits/chosen": 0.1792718917131424, "logits/rejected": 0.13057425618171692, "loss": 1.287, "step": 83 }, { "beta_dpo/beta": 0.10456011444330215, "beta_dpo/beta_margin_grad_mean": -0.4815867245197296, "beta_dpo/beta_margin_grad_std": 0.04160284623503685, "beta_dpo/beta_margin_mean": 0.07425189018249512, "beta_dpo/beta_margin_std": 0.16766425967216492, "beta_dpo/beta_used": 0.10456011444330215, "beta_dpo/beta_used_raw": 0.10456011444330215, "beta_dpo/gap_mean": 0.7209852933883667, "beta_dpo/gap_std": 1.4176113605499268, "beta_dpo/loss_margin_mean": 0.7021726369857788, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12698412698412698, "grad_norm": 34.64680862426758, "learning_rate": 4.991054231460969e-07, "logits/chosen": 0.11454911530017853, "logits/rejected": 0.07707634568214417, "loss": 1.31, "step": 84 }, { "beta_dpo/beta": 0.12070846557617188, "beta_dpo/beta_margin_grad_mean": -0.46796914935112, "beta_dpo/beta_margin_grad_std": 0.05074300616979599, "beta_dpo/beta_margin_mean": 0.12967704236507416, "beta_dpo/beta_margin_std": 0.20634624361991882, "beta_dpo/beta_used": 0.12070846557617188, "beta_dpo/beta_used_raw": 0.12070846557617188, "beta_dpo/gap_mean": 0.757364809513092, "beta_dpo/gap_std": 1.4538509845733643, "beta_dpo/loss_margin_mean": 1.0756046772003174, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12849584278155707, "grad_norm": 35.5852165222168, "learning_rate": 4.989901842900325e-07, "logits/chosen": 0.1759287714958191, "logits/rejected": 0.13176394999027252, "loss": 1.2621, "step": 85 }, { "beta_dpo/beta": 0.08451604843139648, "beta_dpo/beta_margin_grad_mean": -0.48312827944755554, "beta_dpo/beta_margin_grad_std": 0.03507286682724953, "beta_dpo/beta_margin_mean": 0.06791017949581146, "beta_dpo/beta_margin_std": 0.14122274518013, "beta_dpo/beta_used": 0.08451604843139648, "beta_dpo/beta_used_raw": 0.08451604843139648, "beta_dpo/gap_mean": 0.7781229615211487, "beta_dpo/gap_std": 1.5088303089141846, "beta_dpo/loss_margin_mean": 0.7961375713348389, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13000755857898716, "grad_norm": 24.137842178344727, "learning_rate": 4.988679806432711e-07, "logits/chosen": 0.18794915080070496, "logits/rejected": 0.16945751011371613, "loss": 1.3456, "step": 86 }, { "beta_dpo/beta": 0.10423195362091064, "beta_dpo/beta_margin_grad_mean": -0.47645512223243713, "beta_dpo/beta_margin_grad_std": 0.049108896404504776, "beta_dpo/beta_margin_mean": 0.09508252143859863, "beta_dpo/beta_margin_std": 0.19925162196159363, "beta_dpo/beta_used": 0.10423195362091064, "beta_dpo/beta_used_raw": 0.10423195362091064, "beta_dpo/gap_mean": 0.8129284381866455, "beta_dpo/gap_std": 1.571422815322876, "beta_dpo/loss_margin_mean": 0.8943271636962891, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13151927437641722, "grad_norm": 30.79345703125, "learning_rate": 4.987388156241114e-07, "logits/chosen": 0.1435273289680481, "logits/rejected": 0.0876278430223465, "loss": 1.3003, "step": 87 }, { "beta_dpo/beta": 0.08814011514186859, "beta_dpo/beta_margin_grad_mean": -0.4850381910800934, "beta_dpo/beta_margin_grad_std": 0.05164018273353577, "beta_dpo/beta_margin_mean": 0.06059926748275757, "beta_dpo/beta_margin_std": 0.20973937213420868, "beta_dpo/beta_used": 0.08814011514186859, "beta_dpo/beta_used_raw": 0.08814011514186859, "beta_dpo/gap_mean": 0.8027680516242981, "beta_dpo/gap_std": 1.6989638805389404, "beta_dpo/loss_margin_mean": 0.6227868795394897, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1330309901738473, "grad_norm": 25.85154914855957, "learning_rate": 4.986026928455767e-07, "logits/chosen": 0.24119189381599426, "logits/rejected": 0.21431989967823029, "loss": 1.3356, "step": 88 }, { "beta_dpo/beta": 0.10085612535476685, "beta_dpo/beta_margin_grad_mean": -0.4691811800003052, "beta_dpo/beta_margin_grad_std": 0.0591701865196228, "beta_dpo/beta_margin_mean": 0.12679243087768555, "beta_dpo/beta_margin_std": 0.2483137995004654, "beta_dpo/beta_used": 0.10085612535476685, "beta_dpo/beta_used_raw": 0.10085612535476685, "beta_dpo/gap_mean": 0.796160101890564, "beta_dpo/gap_std": 1.7879228591918945, "beta_dpo/loss_margin_mean": 1.1158039569854736, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1345427059712774, "grad_norm": 32.81768798828125, "learning_rate": 4.984596161153135e-07, "logits/chosen": 0.23640497028827667, "logits/rejected": 0.15615656971931458, "loss": 1.2946, "step": 89 }, { "beta_dpo/beta": 0.09922297298908234, "beta_dpo/beta_margin_grad_mean": -0.474229097366333, "beta_dpo/beta_margin_grad_std": 0.05109386146068573, "beta_dpo/beta_margin_mean": 0.10420099645853043, "beta_dpo/beta_margin_std": 0.20693768560886383, "beta_dpo/beta_used": 0.09922297298908234, "beta_dpo/beta_used_raw": 0.09922297298908234, "beta_dpo/gap_mean": 0.8837804794311523, "beta_dpo/gap_std": 1.8559458255767822, "beta_dpo/loss_margin_mean": 1.0424596071243286, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1360544217687075, "grad_norm": 32.41039276123047, "learning_rate": 4.983095894354857e-07, "logits/chosen": 0.16189423203468323, "logits/rejected": 0.10690204054117203, "loss": 1.3085, "step": 90 }, { "beta_dpo/beta": 0.09870962798595428, "beta_dpo/beta_margin_grad_mean": -0.47690755128860474, "beta_dpo/beta_margin_grad_std": 0.05698744207620621, "beta_dpo/beta_margin_mean": 0.09375400096178055, "beta_dpo/beta_margin_std": 0.23264114558696747, "beta_dpo/beta_used": 0.09870962798595428, "beta_dpo/beta_used_raw": 0.09870962798595428, "beta_dpo/gap_mean": 0.8797661662101746, "beta_dpo/gap_std": 1.9356528520584106, "beta_dpo/loss_margin_mean": 0.9830507636070251, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13756613756613756, "grad_norm": 27.203453063964844, "learning_rate": 4.98152617002662e-07, "logits/chosen": 0.14668001234531403, "logits/rejected": 0.10438361018896103, "loss": 1.3035, "step": 91 }, { "beta_dpo/beta": 0.09463343024253845, "beta_dpo/beta_margin_grad_mean": -0.4683462679386139, "beta_dpo/beta_margin_grad_std": 0.06076580286026001, "beta_dpo/beta_margin_mean": 0.12961620092391968, "beta_dpo/beta_margin_std": 0.25017645955085754, "beta_dpo/beta_used": 0.09463343024253845, "beta_dpo/beta_used_raw": 0.09463343024253845, "beta_dpo/gap_mean": 0.937788188457489, "beta_dpo/gap_std": 2.049354314804077, "beta_dpo/loss_margin_mean": 1.3172084093093872, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13907785336356765, "grad_norm": 27.962493896484375, "learning_rate": 4.979887032076988e-07, "logits/chosen": 0.19484901428222656, "logits/rejected": 0.15675948560237885, "loss": 1.3129, "step": 92 }, { "beta_dpo/beta": 0.08639341592788696, "beta_dpo/beta_margin_grad_mean": -0.48290809988975525, "beta_dpo/beta_margin_grad_std": 0.053895775228738785, "beta_dpo/beta_margin_mean": 0.07019705325365067, "beta_dpo/beta_margin_std": 0.2225038707256317, "beta_dpo/beta_used": 0.08639341592788696, "beta_dpo/beta_used_raw": 0.08639341592788696, "beta_dpo/gap_mean": 0.9476668834686279, "beta_dpo/gap_std": 2.1445741653442383, "beta_dpo/loss_margin_mean": 0.8015139102935791, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14058956916099774, "grad_norm": 24.30367088317871, "learning_rate": 4.978178526356172e-07, "logits/chosen": 0.19220688939094543, "logits/rejected": 0.1637325882911682, "loss": 1.3297, "step": 93 }, { "beta_dpo/beta": 0.12751072645187378, "beta_dpo/beta_margin_grad_mean": -0.4522378444671631, "beta_dpo/beta_margin_grad_std": 0.09412048012018204, "beta_dpo/beta_margin_mean": 0.19668596982955933, "beta_dpo/beta_margin_std": 0.4031871557235718, "beta_dpo/beta_used": 0.12751072645187378, "beta_dpo/beta_used_raw": 0.12751072645187378, "beta_dpo/gap_mean": 1.0329408645629883, "beta_dpo/gap_std": 2.292163372039795, "beta_dpo/loss_margin_mean": 1.5427203178405762, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1421012849584278, "grad_norm": 37.51604461669922, "learning_rate": 4.976400700654751e-07, "logits/chosen": 0.25582846999168396, "logits/rejected": 0.21623843908309937, "loss": 1.217, "step": 94 }, { "beta_dpo/beta": 0.11038395762443542, "beta_dpo/beta_margin_grad_mean": -0.46619194746017456, "beta_dpo/beta_margin_grad_std": 0.07264120876789093, "beta_dpo/beta_margin_mean": 0.13830190896987915, "beta_dpo/beta_margin_std": 0.297305703163147, "beta_dpo/beta_used": 0.11038395762443542, "beta_dpo/beta_used_raw": 0.11038395762443542, "beta_dpo/gap_mean": 1.1002323627471924, "beta_dpo/gap_std": 2.4173107147216797, "beta_dpo/loss_margin_mean": 1.278725028038025, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1436130007558579, "grad_norm": 36.0577278137207, "learning_rate": 4.974553604702332e-07, "logits/chosen": 0.1208517849445343, "logits/rejected": 0.06007538363337517, "loss": 1.2627, "step": 95 }, { "beta_dpo/beta": 0.07217492908239365, "beta_dpo/beta_margin_grad_mean": -0.4727436304092407, "beta_dpo/beta_margin_grad_std": 0.05908944830298424, "beta_dpo/beta_margin_mean": 0.11143878847360611, "beta_dpo/beta_margin_std": 0.24230976402759552, "beta_dpo/beta_used": 0.07217492908239365, "beta_dpo/beta_used_raw": 0.07217492908239365, "beta_dpo/gap_mean": 1.1477243900299072, "beta_dpo/gap_std": 2.6134121417999268, "beta_dpo/loss_margin_mean": 1.502234935760498, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14512471655328799, "grad_norm": 21.8409366607666, "learning_rate": 4.972637290166157e-07, "logits/chosen": 0.21506036818027496, "logits/rejected": 0.17100423574447632, "loss": 1.3403, "step": 96 }, { "beta_dpo/beta": 0.07082679867744446, "beta_dpo/beta_margin_grad_mean": -0.4855877757072449, "beta_dpo/beta_margin_grad_std": 0.06629322469234467, "beta_dpo/beta_margin_mean": 0.059007786214351654, "beta_dpo/beta_margin_std": 0.2750501334667206, "beta_dpo/beta_used": 0.07082679867744446, "beta_dpo/beta_used_raw": 0.07082679867744446, "beta_dpo/gap_mean": 1.14254891872406, "beta_dpo/gap_std": 2.7921524047851562, "beta_dpo/loss_margin_mean": 0.8266361951828003, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14663643235071808, "grad_norm": 24.70173454284668, "learning_rate": 4.970651810649666e-07, "logits/chosen": 0.12239768356084824, "logits/rejected": 0.07986140251159668, "loss": 1.3453, "step": 97 }, { "beta_dpo/beta": 0.09226500988006592, "beta_dpo/beta_margin_grad_mean": -0.48356226086616516, "beta_dpo/beta_margin_grad_std": 0.05798405781388283, "beta_dpo/beta_margin_mean": 0.06667763739824295, "beta_dpo/beta_margin_std": 0.23548774421215057, "beta_dpo/beta_used": 0.09226500988006592, "beta_dpo/beta_used_raw": 0.09226500988006592, "beta_dpo/gap_mean": 1.052299976348877, "beta_dpo/gap_std": 2.8152146339416504, "beta_dpo/loss_margin_mean": 0.6153003573417664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14814814814814814, "grad_norm": 27.817951202392578, "learning_rate": 4.968597221690985e-07, "logits/chosen": 0.21289318799972534, "logits/rejected": 0.1862494945526123, "loss": 1.3007, "step": 98 }, { "beta_dpo/beta": 0.07890302687883377, "beta_dpo/beta_margin_grad_mean": -0.48339250683784485, "beta_dpo/beta_margin_grad_std": 0.07327800989151001, "beta_dpo/beta_margin_mean": 0.06827875226736069, "beta_dpo/beta_margin_std": 0.3037020266056061, "beta_dpo/beta_used": 0.07890302687883377, "beta_dpo/beta_used_raw": 0.07890302687883377, "beta_dpo/gap_mean": 0.9998865723609924, "beta_dpo/gap_std": 2.9074761867523193, "beta_dpo/loss_margin_mean": 0.8886429071426392, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14965986394557823, "grad_norm": 25.86090660095215, "learning_rate": 4.966473580761389e-07, "logits/chosen": 0.247923344373703, "logits/rejected": 0.20911335945129395, "loss": 1.3426, "step": 99 }, { "beta_dpo/beta": 0.10054519027471542, "beta_dpo/beta_margin_grad_mean": -0.4732176661491394, "beta_dpo/beta_margin_grad_std": 0.11176663637161255, "beta_dpo/beta_margin_mean": 0.11648314446210861, "beta_dpo/beta_margin_std": 0.5131522417068481, "beta_dpo/beta_used": 0.10054519027471542, "beta_dpo/beta_used_raw": 0.10054519027471542, "beta_dpo/gap_mean": 0.979209303855896, "beta_dpo/gap_std": 3.17752742767334, "beta_dpo/loss_margin_mean": 1.1264538764953613, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15117157974300832, "grad_norm": 35.79426956176758, "learning_rate": 4.964280947263676e-07, "logits/chosen": 0.2418309450149536, "logits/rejected": 0.2337619811296463, "loss": 1.3014, "step": 100 }, { "epoch": 0.15117157974300832, "eval_beta_dpo/beta": 0.11826334148645401, "eval_beta_dpo/beta_margin_grad_mean": -0.4594725966453552, "eval_beta_dpo/beta_margin_grad_std": 0.1056964099407196, "eval_beta_dpo/beta_margin_mean": 0.1789473295211792, "eval_beta_dpo/beta_margin_std": 0.47487327456474304, "eval_beta_dpo/beta_used": 0.11826334148645401, "eval_beta_dpo/beta_used_raw": 0.11826334148645401, "eval_beta_dpo/gap_mean": 1.0180015563964844, "eval_beta_dpo/gap_std": 3.335969924926758, "eval_beta_dpo/loss_margin_mean": 1.3223904371261597, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 0.25719377398490906, "eval_logits/rejected": 0.22068104147911072, "eval_loss": 0.6391391754150391, "eval_runtime": 39.093, "eval_samples_per_second": 58.911, "eval_steps_per_second": 1.842, "step": 100 }, { "beta_dpo/beta": 0.12229928374290466, "beta_dpo/beta_margin_grad_mean": -0.4488358199596405, "beta_dpo/beta_margin_grad_std": 0.0993746891617775, "beta_dpo/beta_margin_mean": 0.21102577447891235, "beta_dpo/beta_margin_std": 0.4214361906051636, "beta_dpo/beta_used": 0.12229928374290466, "beta_dpo/beta_used_raw": 0.12229928374290466, "beta_dpo/gap_mean": 1.1088917255401611, "beta_dpo/gap_std": 3.3312177658081055, "beta_dpo/loss_margin_mean": 1.7355849742889404, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15268329554043839, "grad_norm": 31.71246910095215, "learning_rate": 4.96201938253052e-07, "logits/chosen": 0.24451228976249695, "logits/rejected": 0.20558559894561768, "loss": 1.2362, "step": 101 }, { "beta_dpo/beta": 0.05900692939758301, "beta_dpo/beta_margin_grad_mean": -0.48880329728126526, "beta_dpo/beta_margin_grad_std": 0.07061181217432022, "beta_dpo/beta_margin_mean": 0.04781756550073624, "beta_dpo/beta_margin_std": 0.3020632565021515, "beta_dpo/beta_used": 0.05900692939758301, "beta_dpo/beta_used_raw": 0.05900692939758301, "beta_dpo/gap_mean": 1.0780134201049805, "beta_dpo/gap_std": 3.5245532989501953, "beta_dpo/loss_margin_mean": 0.6960800886154175, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15419501133786848, "grad_norm": 19.128576278686523, "learning_rate": 4.959688949822748e-07, "logits/chosen": 0.1611076295375824, "logits/rejected": 0.12269513309001923, "loss": 1.3618, "step": 102 }, { "beta_dpo/beta": 0.10784563422203064, "beta_dpo/beta_margin_grad_mean": -0.46517613530158997, "beta_dpo/beta_margin_grad_std": 0.0859994888305664, "beta_dpo/beta_margin_mean": 0.14561301469802856, "beta_dpo/beta_margin_std": 0.3582909107208252, "beta_dpo/beta_used": 0.10784563422203064, "beta_dpo/beta_used_raw": 0.10784563422203064, "beta_dpo/gap_mean": 1.0823962688446045, "beta_dpo/gap_std": 3.568239450454712, "beta_dpo/loss_margin_mean": 1.2876253128051758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15570672713529857, "grad_norm": 35.80467987060547, "learning_rate": 4.957289714327572e-07, "logits/chosen": 0.2758740782737732, "logits/rejected": 0.2419765442609787, "loss": 1.2792, "step": 103 }, { "beta_dpo/beta": 0.07326889038085938, "beta_dpo/beta_margin_grad_mean": -0.4682692885398865, "beta_dpo/beta_margin_grad_std": 0.08652123063802719, "beta_dpo/beta_margin_mean": 0.13173505663871765, "beta_dpo/beta_margin_std": 0.3622894287109375, "beta_dpo/beta_used": 0.07326889038085938, "beta_dpo/beta_used_raw": 0.07326889038085938, "beta_dpo/gap_mean": 1.222848892211914, "beta_dpo/gap_std": 3.759814500808716, "beta_dpo/loss_margin_mean": 1.9680804014205933, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15721844293272866, "grad_norm": 24.454832077026367, "learning_rate": 4.954821743156767e-07, "logits/chosen": 0.2790372967720032, "logits/rejected": 0.19069775938987732, "loss": 1.3294, "step": 104 }, { "beta_dpo/beta": 0.09441901743412018, "beta_dpo/beta_margin_grad_mean": -0.4641573131084442, "beta_dpo/beta_margin_grad_std": 0.09842989593744278, "beta_dpo/beta_margin_mean": 0.15091301500797272, "beta_dpo/beta_margin_std": 0.415006160736084, "beta_dpo/beta_used": 0.09441901743412018, "beta_dpo/beta_used_raw": 0.09441901743412018, "beta_dpo/gap_mean": 1.335235834121704, "beta_dpo/gap_std": 4.034293174743652, "beta_dpo/loss_margin_mean": 1.6171789169311523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15873015873015872, "grad_norm": 34.57042694091797, "learning_rate": 4.952285105344791e-07, "logits/chosen": 0.18213523924350739, "logits/rejected": 0.12961822748184204, "loss": 1.2843, "step": 105 }, { "beta_dpo/beta": 0.1123632863163948, "beta_dpo/beta_margin_grad_mean": -0.46128907799720764, "beta_dpo/beta_margin_grad_std": 0.12345249205827713, "beta_dpo/beta_margin_mean": 0.16074322164058685, "beta_dpo/beta_margin_std": 0.5476335287094116, "beta_dpo/beta_used": 0.1123632863163948, "beta_dpo/beta_used_raw": 0.1123632863163948, "beta_dpo/gap_mean": 1.3533254861831665, "beta_dpo/gap_std": 4.168464660644531, "beta_dpo/loss_margin_mean": 1.444665551185608, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1602418745275888, "grad_norm": 35.42899703979492, "learning_rate": 4.949679871846857e-07, "logits/chosen": 0.25271597504615784, "logits/rejected": 0.2385634183883667, "loss": 1.2505, "step": 106 }, { "beta_dpo/beta": 0.10497646033763885, "beta_dpo/beta_margin_grad_mean": -0.475864440202713, "beta_dpo/beta_margin_grad_std": 0.13249976933002472, "beta_dpo/beta_margin_mean": 0.09398960322141647, "beta_dpo/beta_margin_std": 0.5902509093284607, "beta_dpo/beta_used": 0.10497646033763885, "beta_dpo/beta_used_raw": 0.10497646033763885, "beta_dpo/gap_mean": 1.3109784126281738, "beta_dpo/gap_std": 4.361145973205566, "beta_dpo/loss_margin_mean": 0.7373759150505066, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1617535903250189, "grad_norm": 33.42430114746094, "learning_rate": 4.947006115536947e-07, "logits/chosen": 0.17871206998825073, "logits/rejected": 0.1560344398021698, "loss": 1.2682, "step": 107 }, { "beta_dpo/beta": 0.13177213072776794, "beta_dpo/beta_margin_grad_mean": -0.45206108689308167, "beta_dpo/beta_margin_grad_std": 0.14440619945526123, "beta_dpo/beta_margin_mean": 0.21699368953704834, "beta_dpo/beta_margin_std": 0.6584862470626831, "beta_dpo/beta_used": 0.13177213072776794, "beta_dpo/beta_used_raw": 0.13177213072776794, "beta_dpo/gap_mean": 1.2987751960754395, "beta_dpo/gap_std": 4.507687568664551, "beta_dpo/loss_margin_mean": 1.6458370685577393, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16326530612244897, "grad_norm": 37.21890640258789, "learning_rate": 4.944263911205772e-07, "logits/chosen": 0.19411234557628632, "logits/rejected": 0.16377675533294678, "loss": 1.2105, "step": 108 }, { "beta_dpo/beta": 0.08962151408195496, "beta_dpo/beta_margin_grad_mean": -0.44173574447631836, "beta_dpo/beta_margin_grad_std": 0.10800231993198395, "beta_dpo/beta_margin_mean": 0.2577720284461975, "beta_dpo/beta_margin_std": 0.49331575632095337, "beta_dpo/beta_used": 0.08962151408195496, "beta_dpo/beta_used_raw": 0.08962151408195496, "beta_dpo/gap_mean": 1.5422182083129883, "beta_dpo/gap_std": 4.703213691711426, "beta_dpo/loss_margin_mean": 2.883152961730957, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16477702191987906, "grad_norm": 26.614168167114258, "learning_rate": 4.941453335558681e-07, "logits/chosen": 0.18414027988910675, "logits/rejected": 0.13302767276763916, "loss": 1.2804, "step": 109 }, { "beta_dpo/beta": 0.07986485213041306, "beta_dpo/beta_margin_grad_mean": -0.49511831998825073, "beta_dpo/beta_margin_grad_std": 0.10436120629310608, "beta_dpo/beta_margin_mean": 0.008938713930547237, "beta_dpo/beta_margin_std": 0.477845698595047, "beta_dpo/beta_used": 0.07986485213041306, "beta_dpo/beta_used_raw": 0.07986485213041306, "beta_dpo/gap_mean": 1.3971412181854248, "beta_dpo/gap_std": 4.875141143798828, "beta_dpo/loss_margin_mean": 0.1147775948047638, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16628873771730915, "grad_norm": 26.075336456298828, "learning_rate": 4.938574467213517e-07, "logits/chosen": 0.19698774814605713, "logits/rejected": 0.2059447467327118, "loss": 1.3093, "step": 110 }, { "beta_dpo/beta": 0.0918872281908989, "beta_dpo/beta_margin_grad_mean": -0.46057236194610596, "beta_dpo/beta_margin_grad_std": 0.11816360801458359, "beta_dpo/beta_margin_mean": 0.17831836640834808, "beta_dpo/beta_margin_std": 0.5313184857368469, "beta_dpo/beta_used": 0.0918872281908989, "beta_dpo/beta_used_raw": 0.0918872281908989, "beta_dpo/gap_mean": 1.4028397798538208, "beta_dpo/gap_std": 5.010308742523193, "beta_dpo/loss_margin_mean": 1.9166357517242432, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16780045351473924, "grad_norm": 26.0497989654541, "learning_rate": 4.935627386698418e-07, "logits/chosen": 0.28147315979003906, "logits/rejected": 0.24575236439704895, "loss": 1.3027, "step": 111 }, { "beta_dpo/beta": 0.1317475140094757, "beta_dpo/beta_margin_grad_mean": -0.43314915895462036, "beta_dpo/beta_margin_grad_std": 0.14976409077644348, "beta_dpo/beta_margin_mean": 0.2932818531990051, "beta_dpo/beta_margin_std": 0.6779165863990784, "beta_dpo/beta_used": 0.1317475140094757, "beta_dpo/beta_used_raw": 0.1317475140094757, "beta_dpo/gap_mean": 1.5489156246185303, "beta_dpo/gap_std": 5.076373100280762, "beta_dpo/loss_margin_mean": 2.225752353668213, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1693121693121693, "grad_norm": 42.69919967651367, "learning_rate": 4.932612176449559e-07, "logits/chosen": 0.22592046856880188, "logits/rejected": 0.16073720157146454, "loss": 1.1894, "step": 112 }, { "beta_dpo/beta": 0.09787484258413315, "beta_dpo/beta_margin_grad_mean": -0.47497114539146423, "beta_dpo/beta_margin_grad_std": 0.12046003341674805, "beta_dpo/beta_margin_mean": 0.10006435960531235, "beta_dpo/beta_margin_std": 0.5390375256538391, "beta_dpo/beta_used": 0.09787484258413315, "beta_dpo/beta_used_raw": 0.09787484258413315, "beta_dpo/gap_mean": 1.505769968032837, "beta_dpo/gap_std": 5.138182640075684, "beta_dpo/loss_margin_mean": 1.1710374355316162, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1708238851095994, "grad_norm": 42.932861328125, "learning_rate": 4.929528920808854e-07, "logits/chosen": 0.23544499278068542, "logits/rejected": 0.19660460948944092, "loss": 1.2689, "step": 113 }, { "beta_dpo/beta": 0.08954670280218124, "beta_dpo/beta_margin_grad_mean": -0.4552899897098541, "beta_dpo/beta_margin_grad_std": 0.11374201625585556, "beta_dpo/beta_margin_mean": 0.19355130195617676, "beta_dpo/beta_margin_std": 0.4905747175216675, "beta_dpo/beta_used": 0.08954670280218124, "beta_dpo/beta_used_raw": 0.08954670280218124, "beta_dpo/gap_mean": 1.615609884262085, "beta_dpo/gap_std": 5.165139198303223, "beta_dpo/loss_margin_mean": 2.1351757049560547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17233560090702948, "grad_norm": 31.22429656982422, "learning_rate": 4.92637770602159e-07, "logits/chosen": 0.2743056118488312, "logits/rejected": 0.21552340686321259, "loss": 1.2937, "step": 114 }, { "beta_dpo/beta": 0.09392043948173523, "beta_dpo/beta_margin_grad_mean": -0.45995837450027466, "beta_dpo/beta_margin_grad_std": 0.1096334159374237, "beta_dpo/beta_margin_mean": 0.17287258803844452, "beta_dpo/beta_margin_std": 0.4752873182296753, "beta_dpo/beta_used": 0.09392043948173523, "beta_dpo/beta_used_raw": 0.09392043948173523, "beta_dpo/gap_mean": 1.685424566268921, "beta_dpo/gap_std": 5.101430892944336, "beta_dpo/loss_margin_mean": 1.8901252746582031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17384731670445955, "grad_norm": 28.878787994384766, "learning_rate": 4.923158620234019e-07, "logits/chosen": 0.2532087564468384, "logits/rejected": 0.1925436407327652, "loss": 1.2787, "step": 115 }, { "beta_dpo/beta": 0.12234266102313995, "beta_dpo/beta_margin_grad_mean": -0.4119870066642761, "beta_dpo/beta_margin_grad_std": 0.1418372392654419, "beta_dpo/beta_margin_mean": 0.3979705572128296, "beta_dpo/beta_margin_std": 0.6543411016464233, "beta_dpo/beta_used": 0.12234266102313995, "beta_dpo/beta_used_raw": 0.12234266102313995, "beta_dpo/gap_mean": 1.851987600326538, "beta_dpo/gap_std": 5.066287994384766, "beta_dpo/loss_margin_mean": 3.1081788539886475, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17535903250188964, "grad_norm": 32.4793701171875, "learning_rate": 4.91987175349089e-07, "logits/chosen": 0.2658846974372864, "logits/rejected": 0.1990557312965393, "loss": 1.2118, "step": 116 }, { "beta_dpo/beta": 0.084126777946949, "beta_dpo/beta_margin_grad_mean": -0.44095876812934875, "beta_dpo/beta_margin_grad_std": 0.12159502506256104, "beta_dpo/beta_margin_mean": 0.27267566323280334, "beta_dpo/beta_margin_std": 0.5938247442245483, "beta_dpo/beta_used": 0.084126777946949, "beta_dpo/beta_used_raw": 0.084126777946949, "beta_dpo/gap_mean": 2.017143726348877, "beta_dpo/gap_std": 5.090367317199707, "beta_dpo/loss_margin_mean": 1.8112168312072754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17687074829931973, "grad_norm": 25.763986587524414, "learning_rate": 4.916517197732933e-07, "logits/chosen": 0.2703200876712799, "logits/rejected": 0.23447707295417786, "loss": 1.2039, "step": 117 }, { "beta_dpo/beta": 0.12284408509731293, "beta_dpo/beta_margin_grad_mean": -0.4303743243217468, "beta_dpo/beta_margin_grad_std": 0.14086388051509857, "beta_dpo/beta_margin_mean": 0.31298136711120605, "beta_dpo/beta_margin_std": 0.6518138647079468, "beta_dpo/beta_used": 0.12284408509731293, "beta_dpo/beta_used_raw": 0.12284408509731293, "beta_dpo/gap_mean": 2.001436710357666, "beta_dpo/gap_std": 5.041218280792236, "beta_dpo/loss_margin_mean": 2.388618230819702, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17838246409674982, "grad_norm": 36.57698059082031, "learning_rate": 4.913095046794281e-07, "logits/chosen": 0.3213784098625183, "logits/rejected": 0.2811124324798584, "loss": 1.1707, "step": 118 }, { "beta_dpo/beta": 0.09360114485025406, "beta_dpo/beta_margin_grad_mean": -0.4581930935382843, "beta_dpo/beta_margin_grad_std": 0.14400675892829895, "beta_dpo/beta_margin_mean": 0.19105002284049988, "beta_dpo/beta_margin_std": 0.678154706954956, "beta_dpo/beta_used": 0.09360114485025406, "beta_dpo/beta_used_raw": 0.09360114485025406, "beta_dpo/gap_mean": 1.9388705492019653, "beta_dpo/gap_std": 5.184415817260742, "beta_dpo/loss_margin_mean": 1.7741563320159912, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17989417989417988, "grad_norm": 29.395437240600586, "learning_rate": 4.909605396399855e-07, "logits/chosen": 0.24336868524551392, "logits/rejected": 0.2054998278617859, "loss": 1.2536, "step": 119 }, { "beta_dpo/beta": 0.11056067794561386, "beta_dpo/beta_margin_grad_mean": -0.42801910638809204, "beta_dpo/beta_margin_grad_std": 0.10624414682388306, "beta_dpo/beta_margin_mean": 0.30804508924484253, "beta_dpo/beta_margin_std": 0.4607044756412506, "beta_dpo/beta_used": 0.11056067794561386, "beta_dpo/beta_used_raw": 0.11056067794561386, "beta_dpo/gap_mean": 2.0835390090942383, "beta_dpo/gap_std": 5.089218616485596, "beta_dpo/loss_margin_mean": 2.8222334384918213, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18140589569160998, "grad_norm": 33.18598175048828, "learning_rate": 4.906048344162676e-07, "logits/chosen": 0.24057039618492126, "logits/rejected": 0.1823032796382904, "loss": 1.1735, "step": 120 }, { "beta_dpo/beta": 0.06963507831096649, "beta_dpo/beta_margin_grad_mean": -0.44736799597740173, "beta_dpo/beta_margin_grad_std": 0.0942336916923523, "beta_dpo/beta_margin_mean": 0.22871233522891998, "beta_dpo/beta_margin_std": 0.4231445789337158, "beta_dpo/beta_used": 0.06963507831096649, "beta_dpo/beta_used_raw": 0.06963507831096649, "beta_dpo/gap_mean": 2.3149490356445312, "beta_dpo/gap_std": 5.239107131958008, "beta_dpo/loss_margin_mean": 3.3256545066833496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18291761148904007, "grad_norm": 21.643383026123047, "learning_rate": 4.902423989581143e-07, "logits/chosen": 0.3411806523799896, "logits/rejected": 0.2576831877231598, "loss": 1.2754, "step": 121 }, { "beta_dpo/beta": 0.04715808108448982, "beta_dpo/beta_margin_grad_mean": -0.46957728266716003, "beta_dpo/beta_margin_grad_std": 0.09799901396036148, "beta_dpo/beta_margin_mean": 0.14232568442821503, "beta_dpo/beta_margin_std": 0.4737836420536041, "beta_dpo/beta_used": 0.04715808108448982, "beta_dpo/beta_used_raw": 0.04715808108448982, "beta_dpo/gap_mean": 2.383568048477173, "beta_dpo/gap_std": 5.513972282409668, "beta_dpo/loss_margin_mean": 2.6904125213623047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18442932728647016, "grad_norm": 18.69718360900879, "learning_rate": 4.898732434036243e-07, "logits/chosen": 0.27230706810951233, "logits/rejected": 0.23791071772575378, "loss": 1.3057, "step": 122 }, { "beta_dpo/beta": 0.04343951866030693, "beta_dpo/beta_margin_grad_mean": -0.4723133146762848, "beta_dpo/beta_margin_grad_std": 0.05662338435649872, "beta_dpo/beta_margin_mean": 0.11453551054000854, "beta_dpo/beta_margin_std": 0.2412138134241104, "beta_dpo/beta_used": 0.04343951866030693, "beta_dpo/beta_used_raw": 0.04343951866030693, "beta_dpo/gap_mean": 2.4602584838867188, "beta_dpo/gap_std": 5.645776748657227, "beta_dpo/loss_margin_mean": 2.6306912899017334, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18594104308390022, "grad_norm": 13.800962448120117, "learning_rate": 4.894973780788722e-07, "logits/chosen": 0.26904135942459106, "logits/rejected": 0.2292679101228714, "loss": 1.3275, "step": 123 }, { "beta_dpo/beta": 0.14997775852680206, "beta_dpo/beta_margin_grad_mean": -0.39104214310646057, "beta_dpo/beta_margin_grad_std": 0.1786145716905594, "beta_dpo/beta_margin_mean": 0.5140237808227539, "beta_dpo/beta_margin_std": 0.888462483882904, "beta_dpo/beta_used": 0.14997775852680206, "beta_dpo/beta_used_raw": 0.14997775852680206, "beta_dpo/gap_mean": 2.6044697761535645, "beta_dpo/gap_std": 5.672214508056641, "beta_dpo/loss_margin_mean": 3.4369006156921387, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1874527588813303, "grad_norm": 37.8648796081543, "learning_rate": 4.89114813497619e-07, "logits/chosen": 0.278278112411499, "logits/rejected": 0.22016727924346924, "loss": 1.0223, "step": 124 }, { "beta_dpo/beta": 0.052618607878685, "beta_dpo/beta_margin_grad_mean": -0.4515383243560791, "beta_dpo/beta_margin_grad_std": 0.09755320847034454, "beta_dpo/beta_margin_mean": 0.21529045701026917, "beta_dpo/beta_margin_std": 0.4597736895084381, "beta_dpo/beta_used": 0.052618607878685, "beta_dpo/beta_used_raw": 0.052618607878685, "beta_dpo/gap_mean": 2.7607717514038086, "beta_dpo/gap_std": 5.762345314025879, "beta_dpo/loss_margin_mean": 3.107497453689575, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1889644746787604, "grad_norm": 24.20044708251953, "learning_rate": 4.887255603610184e-07, "logits/chosen": 0.31541913747787476, "logits/rejected": 0.25735703110694885, "loss": 1.2743, "step": 125 }, { "beta_dpo/beta": 0.0405975878238678, "beta_dpo/beta_margin_grad_mean": -0.48580294847488403, "beta_dpo/beta_margin_grad_std": 0.0543222539126873, "beta_dpo/beta_margin_mean": 0.05759422481060028, "beta_dpo/beta_margin_std": 0.22094084322452545, "beta_dpo/beta_used": 0.0405975878238678, "beta_dpo/beta_used_raw": 0.0405975878238678, "beta_dpo/gap_mean": 2.5174307823181152, "beta_dpo/gap_std": 5.691348552703857, "beta_dpo/loss_margin_mean": 1.3669215440750122, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19047619047619047, "grad_norm": 14.997529029846191, "learning_rate": 4.883296295573176e-07, "logits/chosen": 0.1563548743724823, "logits/rejected": 0.15073931217193604, "loss": 1.33, "step": 126 }, { "beta_dpo/beta": 0.09659086167812347, "beta_dpo/beta_margin_grad_mean": -0.42623478174209595, "beta_dpo/beta_margin_grad_std": 0.11462006717920303, "beta_dpo/beta_margin_mean": 0.32010120153427124, "beta_dpo/beta_margin_std": 0.5070313811302185, "beta_dpo/beta_used": 0.09659086167812347, "beta_dpo/beta_used_raw": 0.09659086167812347, "beta_dpo/gap_mean": 2.602691650390625, "beta_dpo/gap_std": 5.580142021179199, "beta_dpo/loss_margin_mean": 3.193540573120117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19198790627362056, "grad_norm": 29.69304084777832, "learning_rate": 4.87927032161552e-07, "logits/chosen": 0.2605654001235962, "logits/rejected": 0.2306014895439148, "loss": 1.1924, "step": 127 }, { "beta_dpo/beta": 0.08403972536325455, "beta_dpo/beta_margin_grad_mean": -0.4287105202674866, "beta_dpo/beta_margin_grad_std": 0.14072571694850922, "beta_dpo/beta_margin_mean": 0.3516583740711212, "beta_dpo/beta_margin_std": 0.7548319101333618, "beta_dpo/beta_used": 0.08403972536325455, "beta_dpo/beta_used_raw": 0.08403972536325455, "beta_dpo/gap_mean": 2.808605670928955, "beta_dpo/gap_std": 5.833803176879883, "beta_dpo/loss_margin_mean": 3.8815970420837402, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19349962207105065, "grad_norm": 28.656200408935547, "learning_rate": 4.875177794352363e-07, "logits/chosen": 0.26921409368515015, "logits/rejected": 0.21398761868476868, "loss": 1.2019, "step": 128 }, { "beta_dpo/beta": 0.033706970512866974, "beta_dpo/beta_margin_grad_mean": -0.4868035614490509, "beta_dpo/beta_margin_grad_std": 0.060135770589113235, "beta_dpo/beta_margin_mean": 0.05301497131586075, "beta_dpo/beta_margin_std": 0.24528354406356812, "beta_dpo/beta_used": 0.033706970512866974, "beta_dpo/beta_used_raw": 0.033706970512866974, "beta_dpo/gap_mean": 2.6513404846191406, "beta_dpo/gap_std": 6.111024856567383, "beta_dpo/loss_margin_mean": 1.6255993843078613, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19501133786848074, "grad_norm": 12.075440406799316, "learning_rate": 4.871018828260491e-07, "logits/chosen": 0.2676212191581726, "logits/rejected": 0.2616123557090759, "loss": 1.3404, "step": 129 }, { "beta_dpo/beta": 0.10842250287532806, "beta_dpo/beta_margin_grad_mean": -0.4172552824020386, "beta_dpo/beta_margin_grad_std": 0.1448642611503601, "beta_dpo/beta_margin_mean": 0.3701942563056946, "beta_dpo/beta_margin_std": 0.6463952660560608, "beta_dpo/beta_used": 0.10842250287532806, "beta_dpo/beta_used_raw": 0.10842250287532806, "beta_dpo/gap_mean": 2.757774829864502, "beta_dpo/gap_std": 6.210426330566406, "beta_dpo/loss_margin_mean": 3.5002007484436035, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1965230536659108, "grad_norm": 35.02021408081055, "learning_rate": 4.866793539675126e-07, "logits/chosen": 0.23043951392173767, "logits/rejected": 0.1808605194091797, "loss": 1.1603, "step": 130 }, { "beta_dpo/beta": 0.09228923916816711, "beta_dpo/beta_margin_grad_mean": -0.41555356979370117, "beta_dpo/beta_margin_grad_std": 0.1676713079214096, "beta_dpo/beta_margin_mean": 0.4410023093223572, "beta_dpo/beta_margin_std": 0.9455744028091431, "beta_dpo/beta_used": 0.09228923916816711, "beta_dpo/beta_used_raw": 0.09228923916816711, "beta_dpo/gap_mean": 3.0267586708068848, "beta_dpo/gap_std": 6.380265235900879, "beta_dpo/loss_margin_mean": 4.280169486999512, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1980347694633409, "grad_norm": 31.57815170288086, "learning_rate": 4.86250204678667e-07, "logits/chosen": 0.2520222067832947, "logits/rejected": 0.1882476657629013, "loss": 1.2054, "step": 131 }, { "beta_dpo/beta": 0.15973003208637238, "beta_dpo/beta_margin_grad_mean": -0.37915053963661194, "beta_dpo/beta_margin_grad_std": 0.18757346272468567, "beta_dpo/beta_margin_mean": 0.572222888469696, "beta_dpo/beta_margin_std": 0.9784995317459106, "beta_dpo/beta_used": 0.15973003208637238, "beta_dpo/beta_used_raw": 0.15973003208637238, "beta_dpo/gap_mean": 3.1300902366638184, "beta_dpo/gap_std": 6.296309471130371, "beta_dpo/loss_margin_mean": 3.5487518310546875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19954648526077098, "grad_norm": 39.60453414916992, "learning_rate": 4.858144469637408e-07, "logits/chosen": 0.32432320713996887, "logits/rejected": 0.2931896150112152, "loss": 0.9575, "step": 132 }, { "beta_dpo/beta": 0.07123492658138275, "beta_dpo/beta_margin_grad_mean": -0.4693993330001831, "beta_dpo/beta_margin_grad_std": 0.13667266070842743, "beta_dpo/beta_margin_mean": 0.13288940489292145, "beta_dpo/beta_margin_std": 0.6505784392356873, "beta_dpo/beta_used": 0.07123492658138275, "beta_dpo/beta_used_raw": 0.07123492658138275, "beta_dpo/gap_mean": 2.9774646759033203, "beta_dpo/gap_std": 6.303676605224609, "beta_dpo/loss_margin_mean": 2.090972423553467, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20105820105820105, "grad_norm": 26.4464168548584, "learning_rate": 4.853720930118138e-07, "logits/chosen": 0.24306905269622803, "logits/rejected": 0.23324322700500488, "loss": 1.2258, "step": 133 }, { "beta_dpo/beta": 0.11694416403770447, "beta_dpo/beta_margin_grad_mean": -0.3908437192440033, "beta_dpo/beta_margin_grad_std": 0.16718235611915588, "beta_dpo/beta_margin_mean": 0.5406786799430847, "beta_dpo/beta_margin_std": 0.9178445935249329, "beta_dpo/beta_used": 0.11694416403770447, "beta_dpo/beta_used_raw": 0.11694416403770447, "beta_dpo/gap_mean": 3.1626126766204834, "beta_dpo/gap_std": 6.352079391479492, "beta_dpo/loss_margin_mean": 4.546454429626465, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20256991685563114, "grad_norm": 36.3588981628418, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.3457499146461487, "logits/rejected": 0.29014575481414795, "loss": 1.094, "step": 134 }, { "beta_dpo/beta": 0.07570680230855942, "beta_dpo/beta_margin_grad_mean": -0.44854554533958435, "beta_dpo/beta_margin_grad_std": 0.1104312390089035, "beta_dpo/beta_margin_mean": 0.2286955565214157, "beta_dpo/beta_margin_std": 0.5155693888664246, "beta_dpo/beta_used": 0.07570680230855942, "beta_dpo/beta_used_raw": 0.07570680230855942, "beta_dpo/gap_mean": 3.166677951812744, "beta_dpo/gap_std": 6.442228317260742, "beta_dpo/loss_margin_mean": 2.8143091201782227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20408163265306123, "grad_norm": 22.227096557617188, "learning_rate": 4.844676460754862e-07, "logits/chosen": 0.293022096157074, "logits/rejected": 0.2606502175331116, "loss": 1.2059, "step": 135 }, { "beta_dpo/beta": 0.14603909850120544, "beta_dpo/beta_margin_grad_mean": -0.39259859919548035, "beta_dpo/beta_margin_grad_std": 0.23135711252689362, "beta_dpo/beta_margin_mean": 0.6742245554924011, "beta_dpo/beta_margin_std": 1.4196631908416748, "beta_dpo/beta_used": 0.14603909850120544, "beta_dpo/beta_used_raw": 0.14603909850120544, "beta_dpo/gap_mean": 3.3520989418029785, "beta_dpo/gap_std": 6.7299604415893555, "beta_dpo/loss_margin_mean": 3.902292251586914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20559334845049132, "grad_norm": 55.59946823120117, "learning_rate": 4.840055783904106e-07, "logits/chosen": 0.26997774839401245, "logits/rejected": 0.1992480307817459, "loss": 1.0577, "step": 136 }, { "beta_dpo/beta": 0.06228652596473694, "beta_dpo/beta_margin_grad_mean": -0.4408150017261505, "beta_dpo/beta_margin_grad_std": 0.10646083205938339, "beta_dpo/beta_margin_mean": 0.2699366807937622, "beta_dpo/beta_margin_std": 0.5479843616485596, "beta_dpo/beta_used": 0.06228652596473694, "beta_dpo/beta_used_raw": 0.06228652596473694, "beta_dpo/gap_mean": 3.378662586212158, "beta_dpo/gap_std": 6.867051124572754, "beta_dpo/loss_margin_mean": 4.064416408538818, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20710506424792138, "grad_norm": 19.85221290588379, "learning_rate": 4.835369650662767e-07, "logits/chosen": 0.30249646306037903, "logits/rejected": 0.2738552689552307, "loss": 1.2387, "step": 137 }, { "beta_dpo/beta": 0.1027345061302185, "beta_dpo/beta_margin_grad_mean": -0.4208414554595947, "beta_dpo/beta_margin_grad_std": 0.17435140907764435, "beta_dpo/beta_margin_mean": 0.3642386198043823, "beta_dpo/beta_margin_std": 0.8839937448501587, "beta_dpo/beta_used": 0.1027345061302185, "beta_dpo/beta_used_raw": 0.1027345061302185, "beta_dpo/gap_mean": 3.448456287384033, "beta_dpo/gap_std": 7.06696891784668, "beta_dpo/loss_margin_mean": 3.50492525100708, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20861678004535147, "grad_norm": 28.683637619018555, "learning_rate": 4.830618192112065e-07, "logits/chosen": 0.3061959147453308, "logits/rejected": 0.2705356180667877, "loss": 1.1284, "step": 138 }, { "beta_dpo/beta": 0.1404586136341095, "beta_dpo/beta_margin_grad_mean": -0.38947877287864685, "beta_dpo/beta_margin_grad_std": 0.19302061200141907, "beta_dpo/beta_margin_mean": 0.5290707945823669, "beta_dpo/beta_margin_std": 1.0226430892944336, "beta_dpo/beta_used": 0.1404586136341095, "beta_dpo/beta_used_raw": 0.1404586136341095, "beta_dpo/gap_mean": 3.507274866104126, "beta_dpo/gap_std": 7.193138599395752, "beta_dpo/loss_margin_mean": 3.7624733448028564, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21012849584278157, "grad_norm": 43.497894287109375, "learning_rate": 4.825801541160509e-07, "logits/chosen": 0.2549942433834076, "logits/rejected": 0.2260269820690155, "loss": 0.9876, "step": 139 }, { "beta_dpo/beta": 0.14053229987621307, "beta_dpo/beta_margin_grad_mean": -0.3765046298503876, "beta_dpo/beta_margin_grad_std": 0.24486418068408966, "beta_dpo/beta_margin_mean": 0.8039000630378723, "beta_dpo/beta_margin_std": 1.6034449338912964, "beta_dpo/beta_used": 0.14053229987621307, "beta_dpo/beta_used_raw": 0.14053229987621307, "beta_dpo/gap_mean": 3.81668758392334, "beta_dpo/gap_std": 7.505254745483398, "beta_dpo/loss_margin_mean": 5.339648723602295, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21164021164021163, "grad_norm": 47.931549072265625, "learning_rate": 4.820919832540181e-07, "logits/chosen": 0.26516544818878174, "logits/rejected": 0.2187662273645401, "loss": 1.0943, "step": 140 }, { "beta_dpo/beta": 0.10588505864143372, "beta_dpo/beta_margin_grad_mean": -0.3871758282184601, "beta_dpo/beta_margin_grad_std": 0.2020079493522644, "beta_dpo/beta_margin_mean": 0.6185064911842346, "beta_dpo/beta_margin_std": 1.2682307958602905, "beta_dpo/beta_used": 0.10588505864143372, "beta_dpo/beta_used_raw": 0.10588505864143372, "beta_dpo/gap_mean": 4.12210750579834, "beta_dpo/gap_std": 7.9551897048950195, "beta_dpo/loss_margin_mean": 5.243870735168457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21315192743764172, "grad_norm": 29.578947067260742, "learning_rate": 4.815973202802966e-07, "logits/chosen": 0.3229524493217468, "logits/rejected": 0.27724385261535645, "loss": 1.0874, "step": 141 }, { "beta_dpo/beta": 0.027559388428926468, "beta_dpo/beta_margin_grad_mean": -0.4702756702899933, "beta_dpo/beta_margin_grad_std": 0.07220163941383362, "beta_dpo/beta_margin_mean": 0.1247277557849884, "beta_dpo/beta_margin_std": 0.30607911944389343, "beta_dpo/beta_used": 0.027559388428926468, "beta_dpo/beta_used_raw": 0.02024449221789837, "beta_dpo/gap_mean": 4.11570930480957, "beta_dpo/gap_std": 7.797653675079346, "beta_dpo/loss_margin_mean": 4.000240802764893, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2146636432350718, "grad_norm": 11.837271690368652, "learning_rate": 4.810961790316729e-07, "logits/chosen": 0.31370168924331665, "logits/rejected": 0.2887398898601532, "loss": 1.3053, "step": 142 }, { "beta_dpo/beta": 0.12018455564975739, "beta_dpo/beta_margin_grad_mean": -0.4178207814693451, "beta_dpo/beta_margin_grad_std": 0.2122364342212677, "beta_dpo/beta_margin_mean": 0.3589272201061249, "beta_dpo/beta_margin_std": 1.4373321533203125, "beta_dpo/beta_used": 0.12018455564975739, "beta_dpo/beta_used_raw": 0.12018455564975739, "beta_dpo/gap_mean": 3.8963608741760254, "beta_dpo/gap_std": 8.00349235534668, "beta_dpo/loss_margin_mean": 2.8756353855133057, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2161753590325019, "grad_norm": 34.828216552734375, "learning_rate": 4.805885735261454e-07, "logits/chosen": 0.3244457244873047, "logits/rejected": 0.30710387229919434, "loss": 1.0544, "step": 143 }, { "beta_dpo/beta": 0.03807094693183899, "beta_dpo/beta_margin_grad_mean": -0.4691088795661926, "beta_dpo/beta_margin_grad_std": 0.10766191780567169, "beta_dpo/beta_margin_mean": 0.13813741505146027, "beta_dpo/beta_margin_std": 0.4963976740837097, "beta_dpo/beta_used": 0.03807094693183899, "beta_dpo/beta_used_raw": 0.03807094693183899, "beta_dpo/gap_mean": 3.9589571952819824, "beta_dpo/gap_std": 8.451248168945312, "beta_dpo/loss_margin_mean": 4.306517124176025, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21768707482993196, "grad_norm": 15.574111938476562, "learning_rate": 4.800745179625307e-07, "logits/chosen": 0.2954272925853729, "logits/rejected": 0.26464635133743286, "loss": 1.2931, "step": 144 }, { "beta_dpo/beta": 0.1702955812215805, "beta_dpo/beta_margin_grad_mean": -0.3686128854751587, "beta_dpo/beta_margin_grad_std": 0.2736431062221527, "beta_dpo/beta_margin_mean": 0.7588291168212891, "beta_dpo/beta_margin_std": 1.8663489818572998, "beta_dpo/beta_used": 0.1702955812215805, "beta_dpo/beta_used_raw": 0.1702955812215805, "beta_dpo/gap_mean": 4.025421142578125, "beta_dpo/gap_std": 8.940174102783203, "beta_dpo/loss_margin_mean": 4.432011604309082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21919879062736206, "grad_norm": 57.19875717163086, "learning_rate": 4.795540267200686e-07, "logits/chosen": 0.26614999771118164, "logits/rejected": 0.2815524935722351, "loss": 0.9421, "step": 145 }, { "beta_dpo/beta": 0.08469145745038986, "beta_dpo/beta_margin_grad_mean": -0.4177340567111969, "beta_dpo/beta_margin_grad_std": 0.16123804450035095, "beta_dpo/beta_margin_mean": 0.3749273121356964, "beta_dpo/beta_margin_std": 0.7374047636985779, "beta_dpo/beta_used": 0.08469145745038986, "beta_dpo/beta_used_raw": 0.08469145745038986, "beta_dpo/gap_mean": 4.05902099609375, "beta_dpo/gap_std": 8.936864852905273, "beta_dpo/loss_margin_mean": 4.456746578216553, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22071050642479215, "grad_norm": 32.773834228515625, "learning_rate": 4.790271143580173e-07, "logits/chosen": 0.26889604330062866, "logits/rejected": 0.254508912563324, "loss": 1.187, "step": 146 }, { "beta_dpo/beta": 0.1501540094614029, "beta_dpo/beta_margin_grad_mean": -0.4042907953262329, "beta_dpo/beta_margin_grad_std": 0.24906255304813385, "beta_dpo/beta_margin_mean": 0.5283809900283813, "beta_dpo/beta_margin_std": 1.4151684045791626, "beta_dpo/beta_used": 0.1501540094614029, "beta_dpo/beta_used_raw": 0.1501540094614029, "beta_dpo/gap_mean": 4.006381988525391, "beta_dpo/gap_std": 9.127963066101074, "beta_dpo/loss_margin_mean": 3.4659228324890137, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2222222222222222, "grad_norm": 46.82968521118164, "learning_rate": 4.784937956152489e-07, "logits/chosen": 0.2905007600784302, "logits/rejected": 0.24713094532489777, "loss": 1.0305, "step": 147 }, { "beta_dpo/beta": 0.19147343933582306, "beta_dpo/beta_margin_grad_mean": -0.32735762000083923, "beta_dpo/beta_margin_grad_std": 0.26872846484184265, "beta_dpo/beta_margin_mean": 1.068808913230896, "beta_dpo/beta_margin_std": 1.695504069328308, "beta_dpo/beta_used": 0.19147343933582306, "beta_dpo/beta_used_raw": 0.19147343933582306, "beta_dpo/gap_mean": 4.158587455749512, "beta_dpo/gap_std": 9.099699974060059, "beta_dpo/loss_margin_mean": 5.4793381690979, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2237339380196523, "grad_norm": 47.708614349365234, "learning_rate": 4.779540854098347e-07, "logits/chosen": 0.41925525665283203, "logits/rejected": 0.3467964828014374, "loss": 0.8812, "step": 148 }, { "beta_dpo/beta": 0.10122452676296234, "beta_dpo/beta_margin_grad_mean": -0.3874892592430115, "beta_dpo/beta_margin_grad_std": 0.16896942257881165, "beta_dpo/beta_margin_mean": 0.563135027885437, "beta_dpo/beta_margin_std": 0.9112203121185303, "beta_dpo/beta_used": 0.10122452676296234, "beta_dpo/beta_used_raw": 0.10122452676296234, "beta_dpo/gap_mean": 4.419680595397949, "beta_dpo/gap_std": 8.921280860900879, "beta_dpo/loss_margin_mean": 5.370478630065918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2252456538170824, "grad_norm": 31.664949417114258, "learning_rate": 4.774079988386296e-07, "logits/chosen": 0.2955988049507141, "logits/rejected": 0.2491205632686615, "loss": 1.0825, "step": 149 }, { "beta_dpo/beta": 0.12354602664709091, "beta_dpo/beta_margin_grad_mean": -0.3436400294303894, "beta_dpo/beta_margin_grad_std": 0.21282432973384857, "beta_dpo/beta_margin_mean": 0.9323195815086365, "beta_dpo/beta_margin_std": 1.350142478942871, "beta_dpo/beta_used": 0.12354602664709091, "beta_dpo/beta_used_raw": 0.12354602664709091, "beta_dpo/gap_mean": 4.944735527038574, "beta_dpo/gap_std": 9.273387908935547, "beta_dpo/loss_margin_mean": 7.580916881561279, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22675736961451248, "grad_norm": 46.71488952636719, "learning_rate": 4.768555511768486e-07, "logits/chosen": 0.3046860694885254, "logits/rejected": 0.2608673572540283, "loss": 1.0507, "step": 150 }, { "beta_dpo/beta": 0.21111351251602173, "beta_dpo/beta_margin_grad_mean": -0.2585567235946655, "beta_dpo/beta_margin_grad_std": 0.25030454993247986, "beta_dpo/beta_margin_mean": 1.8264737129211426, "beta_dpo/beta_margin_std": 2.1160757541656494, "beta_dpo/beta_used": 0.21111351251602173, "beta_dpo/beta_used_raw": 0.21111351251602173, "beta_dpo/gap_mean": 5.5986528396606445, "beta_dpo/gap_std": 9.438240051269531, "beta_dpo/loss_margin_mean": 8.652307510375977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22826908541194255, "grad_norm": 55.5644645690918, "learning_rate": 4.762967578776406e-07, "logits/chosen": 0.313462495803833, "logits/rejected": 0.25910061597824097, "loss": 0.7352, "step": 151 }, { "beta_dpo/beta": 0.09550273418426514, "beta_dpo/beta_margin_grad_mean": -0.38633495569229126, "beta_dpo/beta_margin_grad_std": 0.20238004624843597, "beta_dpo/beta_margin_mean": 0.5673766136169434, "beta_dpo/beta_margin_std": 1.0258516073226929, "beta_dpo/beta_used": 0.09550273418426514, "beta_dpo/beta_used_raw": 0.09550273418426514, "beta_dpo/gap_mean": 5.716914176940918, "beta_dpo/gap_std": 9.608884811401367, "beta_dpo/loss_margin_mean": 5.932858943939209, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22978080120937264, "grad_norm": 29.89628791809082, "learning_rate": 4.757316345716553e-07, "logits/chosen": 0.3986983299255371, "logits/rejected": 0.3447926342487335, "loss": 1.0198, "step": 152 }, { "beta_dpo/beta": 0.11710208654403687, "beta_dpo/beta_margin_grad_mean": -0.35504305362701416, "beta_dpo/beta_margin_grad_std": 0.19991737604141235, "beta_dpo/beta_margin_mean": 0.740251898765564, "beta_dpo/beta_margin_std": 1.0371873378753662, "beta_dpo/beta_used": 0.11710208654403687, "beta_dpo/beta_used_raw": 0.11710208654403687, "beta_dpo/gap_mean": 5.847277641296387, "beta_dpo/gap_std": 9.527088165283203, "beta_dpo/loss_margin_mean": 6.285402297973633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23129251700680273, "grad_norm": 38.12724304199219, "learning_rate": 4.751601970666064e-07, "logits/chosen": 0.2800205647945404, "logits/rejected": 0.24503669142723083, "loss": 0.9413, "step": 153 }, { "beta_dpo/beta": 0.12367129325866699, "beta_dpo/beta_margin_grad_mean": -0.38842007517814636, "beta_dpo/beta_margin_grad_std": 0.24438461661338806, "beta_dpo/beta_margin_mean": 0.5933310985565186, "beta_dpo/beta_margin_std": 1.3632546663284302, "beta_dpo/beta_used": 0.12367129325866699, "beta_dpo/beta_used_raw": 0.12367129325866699, "beta_dpo/gap_mean": 5.747869968414307, "beta_dpo/gap_std": 9.679136276245117, "beta_dpo/loss_margin_mean": 4.650402545928955, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2328042328042328, "grad_norm": 34.96685028076172, "learning_rate": 4.745824613468292e-07, "logits/chosen": 0.3880733847618103, "logits/rejected": 0.38508933782577515, "loss": 0.9928, "step": 154 }, { "beta_dpo/beta": 0.1392279863357544, "beta_dpo/beta_margin_grad_mean": -0.3571988344192505, "beta_dpo/beta_margin_grad_std": 0.2581491768360138, "beta_dpo/beta_margin_mean": 0.9176416397094727, "beta_dpo/beta_margin_std": 1.843299150466919, "beta_dpo/beta_used": 0.1392279863357544, "beta_dpo/beta_used_raw": 0.1392279863357544, "beta_dpo/gap_mean": 5.793259620666504, "beta_dpo/gap_std": 10.092991828918457, "beta_dpo/loss_margin_mean": 6.254579544067383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23431594860166288, "grad_norm": 54.003902435302734, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.3745535910129547, "logits/rejected": 0.3542165756225586, "loss": 0.9385, "step": 155 }, { "beta_dpo/beta": 0.1403558999300003, "beta_dpo/beta_margin_grad_mean": -0.3198482096195221, "beta_dpo/beta_margin_grad_std": 0.23029401898384094, "beta_dpo/beta_margin_mean": 1.1250354051589966, "beta_dpo/beta_margin_std": 1.623459815979004, "beta_dpo/beta_used": 0.1403558999300003, "beta_dpo/beta_used_raw": 0.1403558999300003, "beta_dpo/gap_mean": 6.089911460876465, "beta_dpo/gap_std": 10.31209659576416, "beta_dpo/loss_margin_mean": 8.206597328186035, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23582766439909297, "grad_norm": 41.47368621826172, "learning_rate": 4.7340816008085305e-07, "logits/chosen": 0.3209989070892334, "logits/rejected": 0.27650001645088196, "loss": 0.8772, "step": 156 }, { "beta_dpo/beta": 0.10112761706113815, "beta_dpo/beta_margin_grad_mean": -0.37136074900627136, "beta_dpo/beta_margin_grad_std": 0.22732508182525635, "beta_dpo/beta_margin_mean": 0.9316520690917969, "beta_dpo/beta_margin_std": 1.8109554052352905, "beta_dpo/beta_used": 0.10112761706113815, "beta_dpo/beta_used_raw": 0.06065015494823456, "beta_dpo/gap_mean": 6.362764358520508, "beta_dpo/gap_std": 10.283683776855469, "beta_dpo/loss_margin_mean": 6.171655654907227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23733938019652306, "grad_norm": 34.023895263671875, "learning_rate": 4.728116273823847e-07, "logits/chosen": 0.31635981798171997, "logits/rejected": 0.29805511236190796, "loss": 1.0313, "step": 157 }, { "beta_dpo/beta": 0.09633895754814148, "beta_dpo/beta_margin_grad_mean": -0.3940832018852234, "beta_dpo/beta_margin_grad_std": 0.21066348254680634, "beta_dpo/beta_margin_mean": 0.6347554326057434, "beta_dpo/beta_margin_std": 1.3230136632919312, "beta_dpo/beta_used": 0.09633895754814148, "beta_dpo/beta_used_raw": 0.09633895754814148, "beta_dpo/gap_mean": 6.245730400085449, "beta_dpo/gap_std": 10.449724197387695, "beta_dpo/loss_margin_mean": 6.507798194885254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23885109599395313, "grad_norm": 35.92675018310547, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 0.34792205691337585, "logits/rejected": 0.31133797764778137, "loss": 1.0874, "step": 158 }, { "beta_dpo/beta": 0.1303880512714386, "beta_dpo/beta_margin_grad_mean": -0.33156701922416687, "beta_dpo/beta_margin_grad_std": 0.23368042707443237, "beta_dpo/beta_margin_mean": 1.0686310529708862, "beta_dpo/beta_margin_std": 1.632267713546753, "beta_dpo/beta_used": 0.1303880512714386, "beta_dpo/beta_used_raw": 0.1303880512714386, "beta_dpo/gap_mean": 6.408536911010742, "beta_dpo/gap_std": 10.668962478637695, "beta_dpo/loss_margin_mean": 7.6960601806640625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24036281179138322, "grad_norm": 44.9423828125, "learning_rate": 4.715998812855304e-07, "logits/chosen": 0.3547423779964447, "logits/rejected": 0.3168538212776184, "loss": 0.9662, "step": 159 }, { "beta_dpo/beta": 0.043398354202508926, "beta_dpo/beta_margin_grad_mean": -0.44123727083206177, "beta_dpo/beta_margin_grad_std": 0.15287929773330688, "beta_dpo/beta_margin_mean": 0.29121875762939453, "beta_dpo/beta_margin_std": 0.7604160904884338, "beta_dpo/beta_used": 0.043398354202508926, "beta_dpo/beta_used_raw": 0.04327729716897011, "beta_dpo/gap_mean": 6.495538711547852, "beta_dpo/gap_std": 10.80352783203125, "beta_dpo/loss_margin_mean": 6.3689141273498535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2418745275888133, "grad_norm": 17.05034828186035, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 0.24205464124679565, "logits/rejected": 0.1971985101699829, "loss": 1.2066, "step": 160 }, { "beta_dpo/beta": 0.19278591871261597, "beta_dpo/beta_margin_grad_mean": -0.30224671959877014, "beta_dpo/beta_margin_grad_std": 0.27746814489364624, "beta_dpo/beta_margin_mean": 1.3429075479507446, "beta_dpo/beta_margin_std": 2.287931442260742, "beta_dpo/beta_used": 0.19278591871261597, "beta_dpo/beta_used_raw": 0.19278591871261597, "beta_dpo/gap_mean": 6.569012641906738, "beta_dpo/gap_std": 10.928247451782227, "beta_dpo/loss_margin_mean": 7.049097537994385, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24338624338624337, "grad_norm": 47.56407928466797, "learning_rate": 4.703633408618955e-07, "logits/chosen": 0.360584020614624, "logits/rejected": 0.3227540850639343, "loss": 0.7116, "step": 161 }, { "beta_dpo/beta": 0.2703293561935425, "beta_dpo/beta_margin_grad_mean": -0.23558209836483002, "beta_dpo/beta_margin_grad_std": 0.2916191816329956, "beta_dpo/beta_margin_mean": 2.5003085136413574, "beta_dpo/beta_margin_std": 3.345001220703125, "beta_dpo/beta_used": 0.2703293561935425, "beta_dpo/beta_used_raw": 0.2703293561935425, "beta_dpo/gap_mean": 6.996166229248047, "beta_dpo/gap_std": 10.952880859375, "beta_dpo/loss_margin_mean": 9.228057861328125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24489795918367346, "grad_norm": 70.73652648925781, "learning_rate": 4.697358159051549e-07, "logits/chosen": 0.4289015531539917, "logits/rejected": 0.3779623508453369, "loss": 0.7172, "step": 162 }, { "beta_dpo/beta": 0.12281934171915054, "beta_dpo/beta_margin_grad_mean": -0.3348690867424011, "beta_dpo/beta_margin_grad_std": 0.23672305047512054, "beta_dpo/beta_margin_mean": 1.490291953086853, "beta_dpo/beta_margin_std": 2.317763328552246, "beta_dpo/beta_used": 0.12281934171915054, "beta_dpo/beta_used_raw": 0.08148720860481262, "beta_dpo/gap_mean": 7.235516548156738, "beta_dpo/gap_std": 11.109076499938965, "beta_dpo/loss_margin_mean": 8.966986656188965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24640967498110355, "grad_norm": 42.12664794921875, "learning_rate": 4.691021444652876e-07, "logits/chosen": 0.34994274377822876, "logits/rejected": 0.30174481868743896, "loss": 0.9681, "step": 163 }, { "beta_dpo/beta": 0.15474146604537964, "beta_dpo/beta_margin_grad_mean": -0.28919804096221924, "beta_dpo/beta_margin_grad_std": 0.27007272839546204, "beta_dpo/beta_margin_mean": 1.4409639835357666, "beta_dpo/beta_margin_std": 1.9009385108947754, "beta_dpo/beta_used": 0.15474146604537964, "beta_dpo/beta_used_raw": 0.15474146604537964, "beta_dpo/gap_mean": 7.648365020751953, "beta_dpo/gap_std": 11.386979103088379, "beta_dpo/loss_margin_mean": 9.083514213562012, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24792139077853365, "grad_norm": 40.19808578491211, "learning_rate": 4.6846234426744624e-07, "logits/chosen": 0.33891743421554565, "logits/rejected": 0.2750347852706909, "loss": 0.7839, "step": 164 }, { "beta_dpo/beta": 0.07602068781852722, "beta_dpo/beta_margin_grad_mean": -0.37450677156448364, "beta_dpo/beta_margin_grad_std": 0.19640876352787018, "beta_dpo/beta_margin_mean": 0.8033673167228699, "beta_dpo/beta_margin_std": 1.3503183126449585, "beta_dpo/beta_used": 0.07602068781852722, "beta_dpo/beta_used_raw": 0.07242593914270401, "beta_dpo/gap_mean": 7.7765655517578125, "beta_dpo/gap_std": 11.585585594177246, "beta_dpo/loss_margin_mean": 8.278116226196289, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2494331065759637, "grad_norm": 25.78687858581543, "learning_rate": 4.678164332082175e-07, "logits/chosen": 0.3837462067604065, "logits/rejected": 0.32918989658355713, "loss": 1.0428, "step": 165 }, { "beta_dpo/beta": 0.01979365386068821, "beta_dpo/beta_margin_grad_mean": -0.46458086371421814, "beta_dpo/beta_margin_grad_std": 0.07628511637449265, "beta_dpo/beta_margin_mean": 0.14900416135787964, "beta_dpo/beta_margin_std": 0.3222452998161316, "beta_dpo/beta_used": 0.01979365386068821, "beta_dpo/beta_used_raw": 0.015947217121720314, "beta_dpo/gap_mean": 7.783942222595215, "beta_dpo/gap_std": 11.442436218261719, "beta_dpo/loss_margin_mean": 6.6719818115234375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2509448223733938, "grad_norm": 10.664765357971191, "learning_rate": 4.6716442935512214e-07, "logits/chosen": 0.3633756637573242, "logits/rejected": 0.27223867177963257, "loss": 1.2742, "step": 166 }, { "beta_dpo/beta": 0.13996389508247375, "beta_dpo/beta_margin_grad_mean": -0.3225991129875183, "beta_dpo/beta_margin_grad_std": 0.2330418974161148, "beta_dpo/beta_margin_mean": 1.5369998216629028, "beta_dpo/beta_margin_std": 2.3832523822784424, "beta_dpo/beta_used": 0.13996389508247375, "beta_dpo/beta_used_raw": 0.13996389508247375, "beta_dpo/gap_mean": 8.021318435668945, "beta_dpo/gap_std": 11.419715881347656, "beta_dpo/loss_margin_mean": 9.416756629943848, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25245653817082386, "grad_norm": 44.863494873046875, "learning_rate": 4.6650635094610966e-07, "logits/chosen": 0.29010146856307983, "logits/rejected": 0.2558974623680115, "loss": 0.8469, "step": 167 }, { "beta_dpo/beta": 0.12838992476463318, "beta_dpo/beta_margin_grad_mean": -0.35723546147346497, "beta_dpo/beta_margin_grad_std": 0.21953362226486206, "beta_dpo/beta_margin_mean": 1.0821810960769653, "beta_dpo/beta_margin_std": 1.8164141178131104, "beta_dpo/beta_used": 0.12838992476463318, "beta_dpo/beta_used_raw": 0.12838992476463318, "beta_dpo/gap_mean": 8.001766204833984, "beta_dpo/gap_std": 11.373407363891602, "beta_dpo/loss_margin_mean": 7.806889533996582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25396825396825395, "grad_norm": 37.25100326538086, "learning_rate": 4.6584221638904767e-07, "logits/chosen": 0.316717267036438, "logits/rejected": 0.2821142077445984, "loss": 0.9152, "step": 168 }, { "beta_dpo/beta": 0.08584662526845932, "beta_dpo/beta_margin_grad_mean": -0.3747517466545105, "beta_dpo/beta_margin_grad_std": 0.22064180672168732, "beta_dpo/beta_margin_mean": 0.7022613883018494, "beta_dpo/beta_margin_std": 1.4712177515029907, "beta_dpo/beta_used": 0.08584662526845932, "beta_dpo/beta_used_raw": 0.08584662526845932, "beta_dpo/gap_mean": 7.975764274597168, "beta_dpo/gap_std": 11.88857364654541, "beta_dpo/loss_margin_mean": 8.126168251037598, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25547996976568405, "grad_norm": 37.95653533935547, "learning_rate": 4.651720442612075e-07, "logits/chosen": 0.38414955139160156, "logits/rejected": 0.3503633141517639, "loss": 1.0402, "step": 169 }, { "beta_dpo/beta": 0.1892240047454834, "beta_dpo/beta_margin_grad_mean": -0.35289227962493896, "beta_dpo/beta_margin_grad_std": 0.2945278286933899, "beta_dpo/beta_margin_mean": 1.7924374341964722, "beta_dpo/beta_margin_std": 4.277266979217529, "beta_dpo/beta_used": 0.1892240047454834, "beta_dpo/beta_used_raw": 0.1892240047454834, "beta_dpo/gap_mean": 8.009968757629395, "beta_dpo/gap_std": 12.184398651123047, "beta_dpo/loss_margin_mean": 8.559287071228027, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25699168556311414, "grad_norm": 62.05488967895508, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 0.33764785528182983, "logits/rejected": 0.33556270599365234, "loss": 1.0906, "step": 170 }, { "beta_dpo/beta": 0.060626909136772156, "beta_dpo/beta_margin_grad_mean": -0.38109874725341797, "beta_dpo/beta_margin_grad_std": 0.21113894879817963, "beta_dpo/beta_margin_mean": 0.7707568407058716, "beta_dpo/beta_margin_std": 1.3697214126586914, "beta_dpo/beta_used": 0.060626909136772156, "beta_dpo/beta_used_raw": 0.033127665519714355, "beta_dpo/gap_mean": 8.439079284667969, "beta_dpo/gap_std": 12.717405319213867, "beta_dpo/loss_margin_mean": 9.387589454650879, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2585034013605442, "grad_norm": 30.833776473999023, "learning_rate": 4.6381366244617224e-07, "logits/chosen": 0.3928905725479126, "logits/rejected": 0.3435228765010834, "loss": 1.1744, "step": 171 }, { "beta_dpo/beta": 0.05887819081544876, "beta_dpo/beta_margin_grad_mean": -0.3980772793292999, "beta_dpo/beta_margin_grad_std": 0.1916777342557907, "beta_dpo/beta_margin_mean": 0.6465792059898376, "beta_dpo/beta_margin_std": 1.2853487730026245, "beta_dpo/beta_used": 0.05887819081544876, "beta_dpo/beta_used_raw": 0.015539560467004776, "beta_dpo/gap_mean": 8.218719482421875, "beta_dpo/gap_std": 12.945394515991211, "beta_dpo/loss_margin_mean": 8.467949867248535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2600151171579743, "grad_norm": 28.302942276000977, "learning_rate": 4.631254907558365e-07, "logits/chosen": 0.39802086353302, "logits/rejected": 0.34046101570129395, "loss": 1.1582, "step": 172 }, { "beta_dpo/beta": 0.14484825730323792, "beta_dpo/beta_margin_grad_mean": -0.38113656640052795, "beta_dpo/beta_margin_grad_std": 0.2808246612548828, "beta_dpo/beta_margin_mean": 1.866429328918457, "beta_dpo/beta_margin_std": 3.806476593017578, "beta_dpo/beta_used": 0.14484825730323792, "beta_dpo/beta_used_raw": 0.11751250922679901, "beta_dpo/gap_mean": 8.664399147033691, "beta_dpo/gap_std": 13.51144790649414, "beta_dpo/loss_margin_mean": 9.301438331604004, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2615268329554044, "grad_norm": 59.29345703125, "learning_rate": 4.624313574873786e-07, "logits/chosen": 0.3995574712753296, "logits/rejected": 0.3124532103538513, "loss": 1.2498, "step": 173 }, { "beta_dpo/beta": 0.06351954489946365, "beta_dpo/beta_margin_grad_mean": -0.3894718587398529, "beta_dpo/beta_margin_grad_std": 0.20763230323791504, "beta_dpo/beta_margin_mean": 0.6753450632095337, "beta_dpo/beta_margin_std": 1.3690553903579712, "beta_dpo/beta_used": 0.06351954489946365, "beta_dpo/beta_used_raw": 0.017194651067256927, "beta_dpo/gap_mean": 8.727994918823242, "beta_dpo/gap_std": 13.735832214355469, "beta_dpo/loss_margin_mean": 9.745848655700684, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26303854875283444, "grad_norm": 27.401628494262695, "learning_rate": 4.61731282057198e-07, "logits/chosen": 0.3719063401222229, "logits/rejected": 0.3064553737640381, "loss": 1.116, "step": 174 }, { "beta_dpo/beta": 0.17716515064239502, "beta_dpo/beta_margin_grad_mean": -0.30274447798728943, "beta_dpo/beta_margin_grad_std": 0.307978093624115, "beta_dpo/beta_margin_mean": 1.780694842338562, "beta_dpo/beta_margin_std": 2.6141295433044434, "beta_dpo/beta_used": 0.17716515064239502, "beta_dpo/beta_used_raw": 0.17716515064239502, "beta_dpo/gap_mean": 8.922683715820312, "beta_dpo/gap_std": 13.874872207641602, "beta_dpo/loss_margin_mean": 10.052102088928223, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26455026455026454, "grad_norm": 61.10188674926758, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 0.42207950353622437, "logits/rejected": 0.3914017975330353, "loss": 0.9454, "step": 175 }, { "beta_dpo/beta": 0.014193031936883926, "beta_dpo/beta_margin_grad_mean": -0.46745988726615906, "beta_dpo/beta_margin_grad_std": 0.06953144818544388, "beta_dpo/beta_margin_mean": 0.13555732369422913, "beta_dpo/beta_margin_std": 0.29187914729118347, "beta_dpo/beta_used": 0.014193031936883926, "beta_dpo/beta_used_raw": -0.012513306923210621, "beta_dpo/gap_mean": 8.849132537841797, "beta_dpo/gap_std": 14.094968795776367, "beta_dpo/loss_margin_mean": 7.293295860290527, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2660619803476946, "grad_norm": 8.840620040893555, "learning_rate": 4.603133832077953e-07, "logits/chosen": 0.3553032875061035, "logits/rejected": 0.32939857244491577, "loss": 1.2904, "step": 176 }, { "beta_dpo/beta": 0.23863446712493896, "beta_dpo/beta_margin_grad_mean": -0.2016785889863968, "beta_dpo/beta_margin_grad_std": 0.2579502761363983, "beta_dpo/beta_margin_mean": 3.2435951232910156, "beta_dpo/beta_margin_std": 3.862337589263916, "beta_dpo/beta_used": 0.23863446712493896, "beta_dpo/beta_used_raw": 0.23863446712493896, "beta_dpo/gap_mean": 9.347793579101562, "beta_dpo/gap_std": 14.189004898071289, "beta_dpo/loss_margin_mean": 13.310332298278809, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2675736961451247, "grad_norm": 49.86887741088867, "learning_rate": 4.5959559945025183e-07, "logits/chosen": 0.4710957705974579, "logits/rejected": 0.377672016620636, "loss": 0.5513, "step": 177 }, { "beta_dpo/beta": 0.1357753425836563, "beta_dpo/beta_margin_grad_mean": -0.35512369871139526, "beta_dpo/beta_margin_grad_std": 0.22710704803466797, "beta_dpo/beta_margin_mean": 1.5265370607376099, "beta_dpo/beta_margin_std": 2.7655467987060547, "beta_dpo/beta_used": 0.1357753425836563, "beta_dpo/beta_used_raw": 0.10194461047649384, "beta_dpo/gap_mean": 9.464914321899414, "beta_dpo/gap_std": 13.956880569458008, "beta_dpo/loss_margin_mean": 8.183725357055664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2690854119425548, "grad_norm": 28.31778335571289, "learning_rate": 4.588719528532341e-07, "logits/chosen": 0.3640734553337097, "logits/rejected": 0.3152313828468323, "loss": 0.8726, "step": 178 }, { "beta_dpo/beta": 0.07843155413866043, "beta_dpo/beta_margin_grad_mean": -0.3692859709262848, "beta_dpo/beta_margin_grad_std": 0.22836384177207947, "beta_dpo/beta_margin_mean": 0.7024539709091187, "beta_dpo/beta_margin_std": 1.2696468830108643, "beta_dpo/beta_used": 0.07843155413866043, "beta_dpo/beta_used_raw": 0.07843155413866043, "beta_dpo/gap_mean": 9.213695526123047, "beta_dpo/gap_std": 14.17374038696289, "beta_dpo/loss_margin_mean": 9.025459289550781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2705971277399849, "grad_norm": 31.640764236450195, "learning_rate": 4.581424636586928e-07, "logits/chosen": 0.388230562210083, "logits/rejected": 0.3721519112586975, "loss": 1.0333, "step": 179 }, { "beta_dpo/beta": 0.053619977086782455, "beta_dpo/beta_margin_grad_mean": -0.4183306396007538, "beta_dpo/beta_margin_grad_std": 0.18744249641895294, "beta_dpo/beta_margin_mean": 0.4771144390106201, "beta_dpo/beta_margin_std": 1.1917914152145386, "beta_dpo/beta_used": 0.053619977086782455, "beta_dpo/beta_used_raw": -0.014215823262929916, "beta_dpo/gap_mean": 8.941102981567383, "beta_dpo/gap_std": 14.313918113708496, "beta_dpo/loss_margin_mean": 7.800354957580566, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.272108843537415, "grad_norm": 19.132511138916016, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.21985140442848206, "logits/rejected": 0.20140361785888672, "loss": 1.1503, "step": 180 }, { "beta_dpo/beta": 0.10039281845092773, "beta_dpo/beta_margin_grad_mean": -0.30794665217399597, "beta_dpo/beta_margin_grad_std": 0.21378229558467865, "beta_dpo/beta_margin_mean": 1.109846830368042, "beta_dpo/beta_margin_std": 1.3198515176773071, "beta_dpo/beta_used": 0.10039281845092773, "beta_dpo/beta_used_raw": 0.10039281845092773, "beta_dpo/gap_mean": 9.251337051391602, "beta_dpo/gap_std": 14.210281372070312, "beta_dpo/loss_margin_mean": 11.050002098083496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.273620559334845, "grad_norm": 36.42921447753906, "learning_rate": 4.566660392614228e-07, "logits/chosen": 0.41873055696487427, "logits/rejected": 0.37909412384033203, "loss": 0.8726, "step": 181 }, { "beta_dpo/beta": 0.3119150400161743, "beta_dpo/beta_margin_grad_mean": -0.242029070854187, "beta_dpo/beta_margin_grad_std": 0.33485910296440125, "beta_dpo/beta_margin_mean": 3.7719202041625977, "beta_dpo/beta_margin_std": 5.102834224700928, "beta_dpo/beta_used": 0.3119150400161743, "beta_dpo/beta_used_raw": 0.3119150400161743, "beta_dpo/gap_mean": 9.723394393920898, "beta_dpo/gap_std": 14.290912628173828, "beta_dpo/loss_margin_mean": 12.058207511901855, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2751322751322751, "grad_norm": 89.14917755126953, "learning_rate": 4.5591914535745817e-07, "logits/chosen": 0.39306944608688354, "logits/rejected": 0.3127225339412689, "loss": 0.62, "step": 182 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4987727999687195, "beta_dpo/beta_margin_grad_std": 0.0033296155743300915, "beta_dpo/beta_margin_mean": 0.004908976145088673, "beta_dpo/beta_margin_std": 0.013319132849574089, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06012484058737755, "beta_dpo/gap_mean": 9.176264762878418, "beta_dpo/gap_std": 14.319192886352539, "beta_dpo/loss_margin_mean": 4.908976078033447, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2766439909297052, "grad_norm": 0.4539560079574585, "learning_rate": 4.551664914523433e-07, "logits/chosen": 0.35062384605407715, "logits/rejected": 0.3301568031311035, "loss": 1.3798, "step": 183 }, { "beta_dpo/beta": 0.061808113008737564, "beta_dpo/beta_margin_grad_mean": -0.3990829586982727, "beta_dpo/beta_margin_grad_std": 0.19997593760490417, "beta_dpo/beta_margin_mean": 0.6703276634216309, "beta_dpo/beta_margin_std": 1.3407752513885498, "beta_dpo/beta_used": 0.061808113008737564, "beta_dpo/beta_used_raw": 0.024503856897354126, "beta_dpo/gap_mean": 8.988155364990234, "beta_dpo/gap_std": 14.048759460449219, "beta_dpo/loss_margin_mean": 8.803841590881348, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2781557067271353, "grad_norm": 26.691272735595703, "learning_rate": 4.544080985994258e-07, "logits/chosen": 0.45184823870658875, "logits/rejected": 0.3895701766014099, "loss": 1.1332, "step": 184 }, { "beta_dpo/beta": 0.12395273894071579, "beta_dpo/beta_margin_grad_mean": -0.37329500913619995, "beta_dpo/beta_margin_grad_std": 0.2816545069217682, "beta_dpo/beta_margin_mean": 1.4491159915924072, "beta_dpo/beta_margin_std": 3.2485547065734863, "beta_dpo/beta_used": 0.12395273894071579, "beta_dpo/beta_used_raw": 0.09359581768512726, "beta_dpo/gap_mean": 9.074477195739746, "beta_dpo/gap_std": 14.342059135437012, "beta_dpo/loss_margin_mean": 9.644372940063477, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2796674225245654, "grad_norm": 58.39167785644531, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 0.41287195682525635, "logits/rejected": 0.36629611253738403, "loss": 1.1856, "step": 185 }, { "beta_dpo/beta": 0.08933384716510773, "beta_dpo/beta_margin_grad_mean": -0.3376484215259552, "beta_dpo/beta_margin_grad_std": 0.23757222294807434, "beta_dpo/beta_margin_mean": 0.938936710357666, "beta_dpo/beta_margin_std": 1.409091830253601, "beta_dpo/beta_used": 0.08933384716510773, "beta_dpo/beta_used_raw": 0.08933384716510773, "beta_dpo/gap_mean": 9.258342742919922, "beta_dpo/gap_std": 14.744714736938477, "beta_dpo/loss_margin_mean": 10.580622673034668, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2811791383219955, "grad_norm": 32.79323959350586, "learning_rate": 4.5287418106563354e-07, "logits/chosen": 0.319606214761734, "logits/rejected": 0.2793455719947815, "loss": 0.9711, "step": 186 }, { "beta_dpo/beta": 0.16774369776248932, "beta_dpo/beta_margin_grad_mean": -0.3941460847854614, "beta_dpo/beta_margin_grad_std": 0.2943778336048126, "beta_dpo/beta_margin_mean": 1.7276290655136108, "beta_dpo/beta_margin_std": 4.444072723388672, "beta_dpo/beta_used": 0.16774369776248932, "beta_dpo/beta_used_raw": 0.15828540921211243, "beta_dpo/gap_mean": 9.301834106445312, "beta_dpo/gap_std": 15.03553295135498, "beta_dpo/loss_margin_mean": 8.952919006347656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.28269085411942557, "grad_norm": 77.14328002929688, "learning_rate": 4.520986992917297e-07, "logits/chosen": 0.34573984146118164, "logits/rejected": 0.29307812452316284, "loss": 1.1231, "step": 187 }, { "beta_dpo/beta": 0.04338323697447777, "beta_dpo/beta_margin_grad_mean": -0.4201662838459015, "beta_dpo/beta_margin_grad_std": 0.1753028780221939, "beta_dpo/beta_margin_mean": 0.4367694556713104, "beta_dpo/beta_margin_std": 1.02838933467865, "beta_dpo/beta_used": 0.04338323697447777, "beta_dpo/beta_used_raw": 0.007783316075801849, "beta_dpo/gap_mean": 9.230745315551758, "beta_dpo/gap_std": 14.944974899291992, "beta_dpo/loss_margin_mean": 9.515567779541016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2842025699168556, "grad_norm": 19.2230281829834, "learning_rate": 4.5131756438276466e-07, "logits/chosen": 0.3678765296936035, "logits/rejected": 0.32579484581947327, "loss": 1.1893, "step": 188 }, { "beta_dpo/beta": 0.13889284431934357, "beta_dpo/beta_margin_grad_mean": -0.3419681489467621, "beta_dpo/beta_margin_grad_std": 0.23903344571590424, "beta_dpo/beta_margin_mean": 1.627152442932129, "beta_dpo/beta_margin_std": 2.859347343444824, "beta_dpo/beta_used": 0.13889284431934357, "beta_dpo/beta_used_raw": 0.07579651474952698, "beta_dpo/gap_mean": 9.295833587646484, "beta_dpo/gap_std": 14.690962791442871, "beta_dpo/loss_margin_mean": 8.370240211486816, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2857142857142857, "grad_norm": 41.23509979248047, "learning_rate": 4.5053079818876096e-07, "logits/chosen": 0.3950865566730499, "logits/rejected": 0.4080202877521515, "loss": 0.8801, "step": 189 }, { "beta_dpo/beta": 0.09667577594518661, "beta_dpo/beta_margin_grad_mean": -0.3044895529747009, "beta_dpo/beta_margin_grad_std": 0.21711069345474243, "beta_dpo/beta_margin_mean": 1.1539376974105835, "beta_dpo/beta_margin_std": 1.3543658256530762, "beta_dpo/beta_used": 0.09667577594518661, "beta_dpo/beta_used_raw": 0.09667577594518661, "beta_dpo/gap_mean": 9.486055374145508, "beta_dpo/gap_std": 14.589920997619629, "beta_dpo/loss_margin_mean": 11.935791015625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2872260015117158, "grad_norm": 41.752498626708984, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 0.45457923412323, "logits/rejected": 0.31400829553604126, "loss": 0.9074, "step": 190 }, { "beta_dpo/beta": 0.09922586381435394, "beta_dpo/beta_margin_grad_mean": -0.34414324164390564, "beta_dpo/beta_margin_grad_std": 0.24596984684467316, "beta_dpo/beta_margin_mean": 0.8614938259124756, "beta_dpo/beta_margin_std": 1.517560601234436, "beta_dpo/beta_used": 0.09922586381435394, "beta_dpo/beta_used_raw": 0.09922586381435394, "beta_dpo/gap_mean": 9.516761779785156, "beta_dpo/gap_std": 14.810906410217285, "beta_dpo/loss_margin_mean": 8.773695945739746, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2887377173091459, "grad_norm": 35.169071197509766, "learning_rate": 4.48940460132708e-07, "logits/chosen": 0.4336588382720947, "logits/rejected": 0.4057428538799286, "loss": 0.8965, "step": 191 }, { "beta_dpo/beta": 0.018604885786771774, "beta_dpo/beta_margin_grad_mean": -0.4708011746406555, "beta_dpo/beta_margin_grad_std": 0.07631269097328186, "beta_dpo/beta_margin_mean": 0.12362504750490189, "beta_dpo/beta_margin_std": 0.3264995813369751, "beta_dpo/beta_used": 0.018604885786771774, "beta_dpo/beta_used_raw": -0.016573412343859673, "beta_dpo/gap_mean": 8.850980758666992, "beta_dpo/gap_std": 14.339606285095215, "beta_dpo/loss_margin_mean": 5.650508403778076, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29024943310657597, "grad_norm": 9.701313018798828, "learning_rate": 4.481369327558329e-07, "logits/chosen": 0.43346714973449707, "logits/rejected": 0.4073938727378845, "loss": 1.2685, "step": 192 }, { "beta_dpo/beta": 0.13145090639591217, "beta_dpo/beta_margin_grad_mean": -0.35873836278915405, "beta_dpo/beta_margin_grad_std": 0.231857031583786, "beta_dpo/beta_margin_mean": 1.6050541400909424, "beta_dpo/beta_margin_std": 2.9745678901672363, "beta_dpo/beta_used": 0.13145090639591217, "beta_dpo/beta_used_raw": 0.12950916588306427, "beta_dpo/gap_mean": 9.072196960449219, "beta_dpo/gap_std": 14.12684154510498, "beta_dpo/loss_margin_mean": 10.512983322143555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29176114890400606, "grad_norm": 35.8497200012207, "learning_rate": 4.47327863063023e-07, "logits/chosen": 0.3378722667694092, "logits/rejected": 0.3161700367927551, "loss": 0.9555, "step": 193 }, { "beta_dpo/beta": 0.05448709428310394, "beta_dpo/beta_margin_grad_mean": -0.4038701355457306, "beta_dpo/beta_margin_grad_std": 0.22828541696071625, "beta_dpo/beta_margin_mean": 0.6580454111099243, "beta_dpo/beta_margin_std": 1.4924321174621582, "beta_dpo/beta_used": 0.05448709428310394, "beta_dpo/beta_used_raw": -0.0076342858374118805, "beta_dpo/gap_mean": 9.176326751708984, "beta_dpo/gap_std": 14.445403099060059, "beta_dpo/loss_margin_mean": 8.855490684509277, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29327286470143615, "grad_norm": 29.742713928222656, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 0.39154088497161865, "logits/rejected": 0.36346444487571716, "loss": 1.2431, "step": 194 }, { "beta_dpo/beta": 0.16575054824352264, "beta_dpo/beta_margin_grad_mean": -0.2906278073787689, "beta_dpo/beta_margin_grad_std": 0.28971052169799805, "beta_dpo/beta_margin_mean": 1.8691999912261963, "beta_dpo/beta_margin_std": 2.65287184715271, "beta_dpo/beta_used": 0.16575054824352264, "beta_dpo/beta_used_raw": 0.16575054824352264, "beta_dpo/gap_mean": 9.309657096862793, "beta_dpo/gap_std": 14.455615997314453, "beta_dpo/loss_margin_mean": 10.742607116699219, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2947845804988662, "grad_norm": 59.959014892578125, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.30429327487945557, "logits/rejected": 0.30695855617523193, "loss": 0.9126, "step": 195 }, { "beta_dpo/beta": 0.0779055505990982, "beta_dpo/beta_margin_grad_mean": -0.3585551679134369, "beta_dpo/beta_margin_grad_std": 0.1866200864315033, "beta_dpo/beta_margin_mean": 0.8284059166908264, "beta_dpo/beta_margin_std": 1.2458693981170654, "beta_dpo/beta_used": 0.0779055505990982, "beta_dpo/beta_used_raw": 0.0779055505990982, "beta_dpo/gap_mean": 9.129471778869629, "beta_dpo/gap_std": 14.351459503173828, "beta_dpo/loss_margin_mean": 8.850098609924316, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2962962962962963, "grad_norm": 30.147045135498047, "learning_rate": 4.448676271745197e-07, "logits/chosen": 0.41115450859069824, "logits/rejected": 0.37019413709640503, "loss": 0.9658, "step": 196 }, { "beta_dpo/beta": 0.17337322235107422, "beta_dpo/beta_margin_grad_mean": -0.3025214970111847, "beta_dpo/beta_margin_grad_std": 0.3217240571975708, "beta_dpo/beta_margin_mean": 1.8553613424301147, "beta_dpo/beta_margin_std": 3.1213772296905518, "beta_dpo/beta_used": 0.17337322235107422, "beta_dpo/beta_used_raw": 0.17337322235107422, "beta_dpo/gap_mean": 9.257464408874512, "beta_dpo/gap_std": 14.713043212890625, "beta_dpo/loss_margin_mean": 10.203736305236816, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29780801209372637, "grad_norm": 74.21235656738281, "learning_rate": 4.440366160729392e-07, "logits/chosen": 0.46007847785949707, "logits/rejected": 0.41251420974731445, "loss": 0.9402, "step": 197 }, { "beta_dpo/beta": 0.14420902729034424, "beta_dpo/beta_margin_grad_mean": -0.33038392663002014, "beta_dpo/beta_margin_grad_std": 0.25793397426605225, "beta_dpo/beta_margin_mean": 1.8629796504974365, "beta_dpo/beta_margin_std": 3.182447910308838, "beta_dpo/beta_used": 0.14420902729034424, "beta_dpo/beta_used_raw": 0.14420902729034424, "beta_dpo/gap_mean": 9.538639068603516, "beta_dpo/gap_std": 14.523576736450195, "beta_dpo/loss_margin_mean": 10.75778579711914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29931972789115646, "grad_norm": 32.6105842590332, "learning_rate": 4.432001773500957e-07, "logits/chosen": 0.4632105827331543, "logits/rejected": 0.423373818397522, "loss": 0.8424, "step": 198 }, { "beta_dpo/beta": 0.2410755455493927, "beta_dpo/beta_margin_grad_mean": -0.3472752869129181, "beta_dpo/beta_margin_grad_std": 0.28805285692214966, "beta_dpo/beta_margin_mean": 2.90079665184021, "beta_dpo/beta_margin_std": 5.994080066680908, "beta_dpo/beta_used": 0.2410755455493927, "beta_dpo/beta_used_raw": 0.19233468174934387, "beta_dpo/gap_mean": 9.373411178588867, "beta_dpo/gap_std": 14.620790481567383, "beta_dpo/loss_margin_mean": 8.427955627441406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30083144368858655, "grad_norm": 90.61835479736328, "learning_rate": 4.4235833440297856e-07, "logits/chosen": 0.4073561728000641, "logits/rejected": 0.32114654779434204, "loss": 0.8905, "step": 199 }, { "beta_dpo/beta": 0.14784272015094757, "beta_dpo/beta_margin_grad_mean": -0.3243432939052582, "beta_dpo/beta_margin_grad_std": 0.24798478186130524, "beta_dpo/beta_margin_mean": 2.166560173034668, "beta_dpo/beta_margin_std": 3.9794764518737793, "beta_dpo/beta_used": 0.14784272015094757, "beta_dpo/beta_used_raw": 0.14784272015094757, "beta_dpo/gap_mean": 10.113958358764648, "beta_dpo/gap_std": 15.15192985534668, "beta_dpo/loss_margin_mean": 13.069280624389648, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30234315948601664, "grad_norm": 64.76429748535156, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.43764257431030273, "logits/rejected": 0.3673984110355377, "loss": 0.9318, "step": 200 }, { "epoch": 0.30234315948601664, "eval_beta_dpo/beta": 0.0751558393239975, "eval_beta_dpo/beta_margin_grad_mean": -0.39745259284973145, "eval_beta_dpo/beta_margin_grad_std": 0.13326896727085114, "eval_beta_dpo/beta_margin_mean": 0.866983950138092, "eval_beta_dpo/beta_margin_std": 1.1565665006637573, "eval_beta_dpo/beta_used": 0.0751558393239975, "eval_beta_dpo/beta_used_raw": 0.03455571457743645, "eval_beta_dpo/gap_mean": 10.170903205871582, "eval_beta_dpo/gap_std": 15.263747215270996, "eval_beta_dpo/loss_margin_mean": 9.080164909362793, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 0.42500150203704834, "eval_logits/rejected": 0.37864118814468384, "eval_loss": 0.5938660502433777, "eval_runtime": 38.8927, "eval_samples_per_second": 59.214, "eval_steps_per_second": 1.851, "step": 200 }, { "beta_dpo/beta": 0.10797238349914551, "beta_dpo/beta_margin_grad_mean": -0.34527766704559326, "beta_dpo/beta_margin_grad_std": 0.2624824643135071, "beta_dpo/beta_margin_mean": 0.9581461548805237, "beta_dpo/beta_margin_std": 1.753610372543335, "beta_dpo/beta_used": 0.10797238349914551, "beta_dpo/beta_used_raw": 0.10797238349914551, "beta_dpo/gap_mean": 10.185378074645996, "beta_dpo/gap_std": 15.321168899536133, "beta_dpo/loss_margin_mean": 9.576545715332031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30385487528344673, "grad_norm": 41.83230972290039, "learning_rate": 4.4065853017905953e-07, "logits/chosen": 0.4699372947216034, "logits/rejected": 0.4243282973766327, "loss": 0.8809, "step": 201 }, { "beta_dpo/beta": 0.09276492148637772, "beta_dpo/beta_margin_grad_mean": -0.3580490052700043, "beta_dpo/beta_margin_grad_std": 0.2220529168844223, "beta_dpo/beta_margin_mean": 1.1636439561843872, "beta_dpo/beta_margin_std": 2.07939076423645, "beta_dpo/beta_used": 0.09276492148637772, "beta_dpo/beta_used_raw": 0.036593932658433914, "beta_dpo/gap_mean": 9.951011657714844, "beta_dpo/gap_std": 15.453784942626953, "beta_dpo/loss_margin_mean": 10.190545082092285, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30536659108087677, "grad_norm": 27.87430191040039, "learning_rate": 4.3980061644943575e-07, "logits/chosen": 0.362282931804657, "logits/rejected": 0.292508989572525, "loss": 0.9849, "step": 202 }, { "beta_dpo/beta": 0.009305215440690517, "beta_dpo/beta_margin_grad_mean": -0.48150211572647095, "beta_dpo/beta_margin_grad_std": 0.04638690501451492, "beta_dpo/beta_margin_mean": 0.0751611739397049, "beta_dpo/beta_margin_std": 0.18903209269046783, "beta_dpo/beta_used": 0.009305215440690517, "beta_dpo/beta_used_raw": -0.0015188101679086685, "beta_dpo/gap_mean": 9.854846000671387, "beta_dpo/gap_std": 15.199752807617188, "beta_dpo/loss_margin_mean": 8.687176704406738, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30687830687830686, "grad_norm": 7.202058792114258, "learning_rate": 4.3893739358856455e-07, "logits/chosen": 0.42288321256637573, "logits/rejected": 0.3511812090873718, "loss": 1.3163, "step": 203 }, { "beta_dpo/beta": 0.12975779175758362, "beta_dpo/beta_margin_grad_mean": -0.3484925925731659, "beta_dpo/beta_margin_grad_std": 0.2547266483306885, "beta_dpo/beta_margin_mean": 1.9150761365890503, "beta_dpo/beta_margin_std": 3.3663294315338135, "beta_dpo/beta_used": 0.12975779175758362, "beta_dpo/beta_used_raw": 0.09229342639446259, "beta_dpo/gap_mean": 10.125568389892578, "beta_dpo/gap_std": 15.098621368408203, "beta_dpo/loss_margin_mean": 10.558574676513672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30839002267573695, "grad_norm": 46.554901123046875, "learning_rate": 4.380688857426449e-07, "logits/chosen": 0.36739447712898254, "logits/rejected": 0.3011772036552429, "loss": 1.0569, "step": 204 }, { "beta_dpo/beta": 0.056526899337768555, "beta_dpo/beta_margin_grad_mean": -0.3798976540565491, "beta_dpo/beta_margin_grad_std": 0.176364466547966, "beta_dpo/beta_margin_mean": 0.6287395358085632, "beta_dpo/beta_margin_std": 1.0148792266845703, "beta_dpo/beta_used": 0.056526899337768555, "beta_dpo/beta_used_raw": 0.056526899337768555, "beta_dpo/gap_mean": 10.036584854125977, "beta_dpo/gap_std": 15.158401489257812, "beta_dpo/loss_margin_mean": 10.209553718566895, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30990173847316704, "grad_norm": 26.912302017211914, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 0.4105345606803894, "logits/rejected": 0.349745512008667, "loss": 1.0787, "step": 205 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4979967772960663, "beta_dpo/beta_margin_grad_std": 0.004184938967227936, "beta_dpo/beta_margin_mean": 0.008013593032956123, "beta_dpo/beta_margin_std": 0.016741132363677025, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09804756939411163, "beta_dpo/gap_mean": 9.589317321777344, "beta_dpo/gap_std": 15.620407104492188, "beta_dpo/loss_margin_mean": 8.013592720031738, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31141345427059713, "grad_norm": 0.5026677846908569, "learning_rate": 4.363161124189387e-07, "logits/chosen": 0.43905892968177795, "logits/rejected": 0.42348912358283997, "loss": 1.3801, "step": 206 }, { "beta_dpo/beta": 0.14289897680282593, "beta_dpo/beta_margin_grad_mean": -0.29581886529922485, "beta_dpo/beta_margin_grad_std": 0.2654317021369934, "beta_dpo/beta_margin_mean": 1.6500314474105835, "beta_dpo/beta_margin_std": 2.359097480773926, "beta_dpo/beta_used": 0.14289897680282593, "beta_dpo/beta_used_raw": 0.14289897680282593, "beta_dpo/gap_mean": 9.915831565856934, "beta_dpo/gap_std": 15.770639419555664, "beta_dpo/loss_margin_mean": 11.355326652526855, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3129251700680272, "grad_norm": 43.67290115356445, "learning_rate": 4.3543189596998986e-07, "logits/chosen": 0.3800593316555023, "logits/rejected": 0.31542912125587463, "loss": 0.7969, "step": 207 }, { "beta_dpo/beta": 0.02445879578590393, "beta_dpo/beta_margin_grad_mean": -0.4628857374191284, "beta_dpo/beta_margin_grad_std": 0.12461158633232117, "beta_dpo/beta_margin_mean": 0.1690797209739685, "beta_dpo/beta_margin_std": 0.5674142241477966, "beta_dpo/beta_used": 0.02445879578590393, "beta_dpo/beta_used_raw": -0.005164986476302147, "beta_dpo/gap_mean": 9.330613136291504, "beta_dpo/gap_std": 15.561910629272461, "beta_dpo/loss_margin_mean": 6.052608013153076, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3144368858654573, "grad_norm": 14.582758903503418, "learning_rate": 4.3454249259229664e-07, "logits/chosen": 0.3913588225841522, "logits/rejected": 0.365945041179657, "loss": 1.2406, "step": 208 }, { "beta_dpo/beta": 0.26314157247543335, "beta_dpo/beta_margin_grad_mean": -0.26778027415275574, "beta_dpo/beta_margin_grad_std": 0.3371621072292328, "beta_dpo/beta_margin_mean": 3.1177279949188232, "beta_dpo/beta_margin_std": 4.769398212432861, "beta_dpo/beta_used": 0.26314157247543335, "beta_dpo/beta_used_raw": 0.26314157247543335, "beta_dpo/gap_mean": 9.51311206817627, "beta_dpo/gap_std": 15.985190391540527, "beta_dpo/loss_margin_mean": 11.842841148376465, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31594860166288735, "grad_norm": 107.09064483642578, "learning_rate": 4.336479271643833e-07, "logits/chosen": 0.3666651248931885, "logits/rejected": 0.3131159543991089, "loss": 1.0, "step": 209 }, { "beta_dpo/beta": 0.1870449036359787, "beta_dpo/beta_margin_grad_mean": -0.26491236686706543, "beta_dpo/beta_margin_grad_std": 0.30507174134254456, "beta_dpo/beta_margin_mean": 2.4757566452026367, "beta_dpo/beta_margin_std": 3.589545488357544, "beta_dpo/beta_used": 0.1870449036359787, "beta_dpo/beta_used_raw": 0.1870449036359787, "beta_dpo/gap_mean": 10.071860313415527, "beta_dpo/gap_std": 16.347694396972656, "beta_dpo/loss_margin_mean": 12.725071907043457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31746031746031744, "grad_norm": 56.52892303466797, "learning_rate": 4.327482247091679e-07, "logits/chosen": 0.45084625482559204, "logits/rejected": 0.3572552800178528, "loss": 0.8648, "step": 210 }, { "beta_dpo/beta": 0.0382014662027359, "beta_dpo/beta_margin_grad_mean": -0.43724095821380615, "beta_dpo/beta_margin_grad_std": 0.17051011323928833, "beta_dpo/beta_margin_mean": 0.3267289698123932, "beta_dpo/beta_margin_std": 0.8928489685058594, "beta_dpo/beta_used": 0.0382014662027359, "beta_dpo/beta_used_raw": 0.012627029791474342, "beta_dpo/gap_mean": 10.269024848937988, "beta_dpo/gap_std": 16.395240783691406, "beta_dpo/loss_margin_mean": 10.694591522216797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31897203325774753, "grad_norm": 24.648975372314453, "learning_rate": 4.3184341039326217e-07, "logits/chosen": 0.46954113245010376, "logits/rejected": 0.3781422972679138, "loss": 1.1807, "step": 211 }, { "beta_dpo/beta": 0.3232237696647644, "beta_dpo/beta_margin_grad_mean": -0.2582166790962219, "beta_dpo/beta_margin_grad_std": 0.3422297537326813, "beta_dpo/beta_margin_mean": 4.010345935821533, "beta_dpo/beta_margin_std": 6.0359578132629395, "beta_dpo/beta_used": 0.3232237696647644, "beta_dpo/beta_used_raw": 0.3232237696647644, "beta_dpo/gap_mean": 10.692178726196289, "beta_dpo/gap_std": 16.397342681884766, "beta_dpo/loss_margin_mean": 12.392468452453613, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3204837490551776, "grad_norm": 106.18115234375, "learning_rate": 4.309335095262675e-07, "logits/chosen": 0.44062310457229614, "logits/rejected": 0.37093472480773926, "loss": 0.8162, "step": 212 }, { "beta_dpo/beta": 0.11237598955631256, "beta_dpo/beta_margin_grad_mean": -0.3015235662460327, "beta_dpo/beta_margin_grad_std": 0.26578059792518616, "beta_dpo/beta_margin_mean": 1.4179470539093018, "beta_dpo/beta_margin_std": 2.069363594055176, "beta_dpo/beta_used": 0.11237598955631256, "beta_dpo/beta_used_raw": 0.11237598955631256, "beta_dpo/gap_mean": 11.056885719299316, "beta_dpo/gap_std": 16.766132354736328, "beta_dpo/loss_margin_mean": 12.505507469177246, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3219954648526077, "grad_norm": 40.509246826171875, "learning_rate": 4.3001854756006724e-07, "logits/chosen": 0.43783697485923767, "logits/rejected": 0.41364121437072754, "loss": 0.9027, "step": 213 }, { "beta_dpo/beta": 0.0608704648911953, "beta_dpo/beta_margin_grad_mean": -0.3818773627281189, "beta_dpo/beta_margin_grad_std": 0.23320011794567108, "beta_dpo/beta_margin_mean": 0.9389346837997437, "beta_dpo/beta_margin_std": 1.8776214122772217, "beta_dpo/beta_used": 0.0608704648911953, "beta_dpo/beta_used_raw": 0.01937410980463028, "beta_dpo/gap_mean": 11.113325119018555, "beta_dpo/gap_std": 17.1014404296875, "beta_dpo/loss_margin_mean": 12.176145553588867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3235071806500378, "grad_norm": 35.15140151977539, "learning_rate": 4.290985500881143e-07, "logits/chosen": 0.3065475821495056, "logits/rejected": 0.2855311930179596, "loss": 1.1407, "step": 214 }, { "beta_dpo/beta": 0.21405133605003357, "beta_dpo/beta_margin_grad_mean": -0.27612632513046265, "beta_dpo/beta_margin_grad_std": 0.3283703327178955, "beta_dpo/beta_margin_mean": 2.8124191761016846, "beta_dpo/beta_margin_std": 3.740720748901367, "beta_dpo/beta_used": 0.21405133605003357, "beta_dpo/beta_used_raw": 0.21405133605003357, "beta_dpo/gap_mean": 11.513839721679688, "beta_dpo/gap_std": 17.267501831054688, "beta_dpo/loss_margin_mean": 13.468052864074707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3250188964474679, "grad_norm": 83.70057678222656, "learning_rate": 4.281735428447157e-07, "logits/chosen": 0.33656060695648193, "logits/rejected": 0.23366260528564453, "loss": 0.8325, "step": 215 }, { "beta_dpo/beta": 0.063032366335392, "beta_dpo/beta_margin_grad_mean": -0.3830406665802002, "beta_dpo/beta_margin_grad_std": 0.21951407194137573, "beta_dpo/beta_margin_mean": 0.8744828701019287, "beta_dpo/beta_margin_std": 1.6632479429244995, "beta_dpo/beta_used": 0.063032366335392, "beta_dpo/beta_used_raw": 0.04040754958987236, "beta_dpo/gap_mean": 12.008759498596191, "beta_dpo/gap_std": 17.251224517822266, "beta_dpo/loss_margin_mean": 13.142531394958496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.32653061224489793, "grad_norm": 26.42040252685547, "learning_rate": 4.2724355170431247e-07, "logits/chosen": 0.45972394943237305, "logits/rejected": 0.3764275014400482, "loss": 1.0786, "step": 216 }, { "beta_dpo/beta": 0.05261437967419624, "beta_dpo/beta_margin_grad_mean": -0.3919501006603241, "beta_dpo/beta_margin_grad_std": 0.20264987647533417, "beta_dpo/beta_margin_mean": 0.7517213821411133, "beta_dpo/beta_margin_std": 1.5161195993423462, "beta_dpo/beta_used": 0.05261437967419624, "beta_dpo/beta_used_raw": 0.01570678874850273, "beta_dpo/gap_mean": 12.255045890808105, "beta_dpo/gap_std": 17.351184844970703, "beta_dpo/loss_margin_mean": 13.261373519897461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.328042328042328, "grad_norm": 24.293655395507812, "learning_rate": 4.26308602680756e-07, "logits/chosen": 0.4059142768383026, "logits/rejected": 0.30515235662460327, "loss": 1.121, "step": 217 }, { "beta_dpo/beta": 0.14698970317840576, "beta_dpo/beta_margin_grad_mean": -0.38040444254875183, "beta_dpo/beta_margin_grad_std": 0.28120723366737366, "beta_dpo/beta_margin_mean": 1.6690462827682495, "beta_dpo/beta_margin_std": 4.68047571182251, "beta_dpo/beta_used": 0.14698970317840576, "beta_dpo/beta_used_raw": 0.14698970317840576, "beta_dpo/gap_mean": 11.52371597290039, "beta_dpo/gap_std": 17.293601989746094, "beta_dpo/loss_margin_mean": 8.26142406463623, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3295540438397581, "grad_norm": 56.007667541503906, "learning_rate": 4.253687219265803e-07, "logits/chosen": 0.2987746000289917, "logits/rejected": 0.2939714789390564, "loss": 1.1388, "step": 218 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4981619417667389, "beta_dpo/beta_margin_grad_std": 0.0033710116986185312, "beta_dpo/beta_margin_mean": 0.007352730259299278, "beta_dpo/beta_margin_std": 0.013484997674822807, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06018597632646561, "beta_dpo/gap_mean": 10.95039176940918, "beta_dpo/gap_std": 16.972551345825195, "beta_dpo/loss_margin_mean": 7.3527302742004395, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3310657596371882, "grad_norm": 0.6384725570678711, "learning_rate": 4.2442393573227043e-07, "logits/chosen": 0.3558601140975952, "logits/rejected": 0.3152506947517395, "loss": 1.3781, "step": 219 }, { "beta_dpo/beta": 0.12556403875350952, "beta_dpo/beta_margin_grad_mean": -0.36618298292160034, "beta_dpo/beta_margin_grad_std": 0.24885612726211548, "beta_dpo/beta_margin_mean": 1.5560061931610107, "beta_dpo/beta_margin_std": 2.9427649974823, "beta_dpo/beta_used": 0.12556403875350952, "beta_dpo/beta_used_raw": 0.055669739842414856, "beta_dpo/gap_mean": 10.764374732971191, "beta_dpo/gap_std": 16.656213760375977, "beta_dpo/loss_margin_mean": 10.330924987792969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3325774754346183, "grad_norm": 60.86797332763672, "learning_rate": 4.234742705255272e-07, "logits/chosen": 0.4525846242904663, "logits/rejected": 0.39114877581596375, "loss": 1.0217, "step": 220 }, { "beta_dpo/beta": 0.029629409313201904, "beta_dpo/beta_margin_grad_mean": -0.4422619044780731, "beta_dpo/beta_margin_grad_std": 0.1514252871274948, "beta_dpo/beta_margin_mean": 0.2931990623474121, "beta_dpo/beta_margin_std": 0.7789700031280518, "beta_dpo/beta_used": 0.029629409313201904, "beta_dpo/beta_used_raw": 0.015723902732133865, "beta_dpo/gap_mean": 10.685160636901855, "beta_dpo/gap_std": 16.907085418701172, "beta_dpo/loss_margin_mean": 11.518767356872559, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3340891912320484, "grad_norm": 17.142822265625, "learning_rate": 4.22519752870528e-07, "logits/chosen": 0.398847758769989, "logits/rejected": 0.3308085799217224, "loss": 1.1883, "step": 221 }, { "beta_dpo/beta": 0.21331076323986053, "beta_dpo/beta_margin_grad_mean": -0.23689118027687073, "beta_dpo/beta_margin_grad_std": 0.292352557182312, "beta_dpo/beta_margin_mean": 3.1503472328186035, "beta_dpo/beta_margin_std": 3.831242322921753, "beta_dpo/beta_used": 0.21331076323986053, "beta_dpo/beta_used_raw": 0.21331076323986053, "beta_dpo/gap_mean": 11.39797592163086, "beta_dpo/gap_std": 16.90200424194336, "beta_dpo/loss_margin_mean": 14.945018768310547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3356009070294785, "grad_norm": 59.19512939453125, "learning_rate": 4.2156040946718343e-07, "logits/chosen": 0.43866443634033203, "logits/rejected": 0.35765448212623596, "loss": 0.7302, "step": 222 }, { "beta_dpo/beta": 0.052638933062553406, "beta_dpo/beta_margin_grad_mean": -0.40419548749923706, "beta_dpo/beta_margin_grad_std": 0.18599951267242432, "beta_dpo/beta_margin_mean": 0.5999060869216919, "beta_dpo/beta_margin_std": 1.2042607069015503, "beta_dpo/beta_used": 0.052638933062553406, "beta_dpo/beta_used_raw": 0.03331661969423294, "beta_dpo/gap_mean": 11.813493728637695, "beta_dpo/gap_std": 16.868061065673828, "beta_dpo/loss_margin_mean": 12.644187927246094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3371126228269085, "grad_norm": 23.424663543701172, "learning_rate": 4.2059626715039065e-07, "logits/chosen": 0.43733513355255127, "logits/rejected": 0.3827732801437378, "loss": 1.0815, "step": 223 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49811801314353943, "beta_dpo/beta_margin_grad_std": 0.0036412279587239027, "beta_dpo/beta_margin_mean": 0.0075285304337739944, "beta_dpo/beta_margin_std": 0.014566164463758469, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14365257322788239, "beta_dpo/gap_mean": 11.2182035446167, "beta_dpo/gap_std": 16.46889877319336, "beta_dpo/loss_margin_mean": 7.528530120849609, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3386243386243386, "grad_norm": 0.497575044631958, "learning_rate": 4.1962735288928304e-07, "logits/chosen": 0.45619451999664307, "logits/rejected": 0.43575888872146606, "loss": 1.3792, "step": 224 }, { "beta_dpo/beta": 0.0709293931722641, "beta_dpo/beta_margin_grad_mean": -0.3631777763366699, "beta_dpo/beta_margin_grad_std": 0.20055457949638367, "beta_dpo/beta_margin_mean": 1.091381311416626, "beta_dpo/beta_margin_std": 1.866531252861023, "beta_dpo/beta_used": 0.0709293931722641, "beta_dpo/beta_used_raw": 0.05940817669034004, "beta_dpo/gap_mean": 11.353107452392578, "beta_dpo/gap_std": 16.404705047607422, "beta_dpo/loss_margin_mean": 12.793769836425781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3401360544217687, "grad_norm": 28.656009674072266, "learning_rate": 4.186536937864752e-07, "logits/chosen": 0.4412558972835541, "logits/rejected": 0.3315754532814026, "loss": 1.033, "step": 225 }, { "beta_dpo/beta": 0.08705855906009674, "beta_dpo/beta_margin_grad_mean": -0.4210311770439148, "beta_dpo/beta_margin_grad_std": 0.25707387924194336, "beta_dpo/beta_margin_mean": 0.7551378011703491, "beta_dpo/beta_margin_std": 2.270979881286621, "beta_dpo/beta_used": 0.08705855906009674, "beta_dpo/beta_used_raw": 0.004039667546749115, "beta_dpo/gap_mean": 10.987207412719727, "beta_dpo/gap_std": 16.678972244262695, "beta_dpo/loss_margin_mean": 9.073394775390625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3416477702191988, "grad_norm": 35.07719039916992, "learning_rate": 4.176753170773052e-07, "logits/chosen": 0.4656580984592438, "logits/rejected": 0.41961991786956787, "loss": 1.1192, "step": 226 }, { "beta_dpo/beta": 0.17244452238082886, "beta_dpo/beta_margin_grad_mean": -0.3985823690891266, "beta_dpo/beta_margin_grad_std": 0.2724515199661255, "beta_dpo/beta_margin_mean": 1.867832064628601, "beta_dpo/beta_margin_std": 4.624873161315918, "beta_dpo/beta_used": 0.17244452238082886, "beta_dpo/beta_used_raw": 0.1496300846338272, "beta_dpo/gap_mean": 10.793601036071777, "beta_dpo/gap_std": 16.972103118896484, "beta_dpo/loss_margin_mean": 10.16253662109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3431594860166289, "grad_norm": 114.27677917480469, "learning_rate": 4.166922501290729e-07, "logits/chosen": 0.4805440604686737, "logits/rejected": 0.4414859414100647, "loss": 1.1064, "step": 227 }, { "beta_dpo/beta": 0.09896055608987808, "beta_dpo/beta_margin_grad_mean": -0.38118550181388855, "beta_dpo/beta_margin_grad_std": 0.26663917303085327, "beta_dpo/beta_margin_mean": 1.2820067405700684, "beta_dpo/beta_margin_std": 2.9501523971557617, "beta_dpo/beta_used": 0.09896055608987808, "beta_dpo/beta_used_raw": 0.09224580228328705, "beta_dpo/gap_mean": 10.714456558227539, "beta_dpo/gap_std": 17.144290924072266, "beta_dpo/loss_margin_mean": 11.201956748962402, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34467120181405897, "grad_norm": 55.93511962890625, "learning_rate": 4.1570452044027405e-07, "logits/chosen": 0.4445973336696625, "logits/rejected": 0.3688391447067261, "loss": 1.0708, "step": 228 }, { "beta_dpo/beta": 0.07893217355012894, "beta_dpo/beta_margin_grad_mean": -0.4054025709629059, "beta_dpo/beta_margin_grad_std": 0.23098501563072205, "beta_dpo/beta_margin_mean": 0.7533602714538574, "beta_dpo/beta_margin_std": 1.986180067062378, "beta_dpo/beta_used": 0.07893217355012894, "beta_dpo/beta_used_raw": 0.07893217355012894, "beta_dpo/gap_mean": 10.609885215759277, "beta_dpo/gap_std": 17.362579345703125, "beta_dpo/loss_margin_mean": 9.359911918640137, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34618291761148906, "grad_norm": 29.154024124145508, "learning_rate": 4.147121556398312e-07, "logits/chosen": 0.5463353395462036, "logits/rejected": 0.4801861047744751, "loss": 1.0969, "step": 229 }, { "beta_dpo/beta": 0.025358129292726517, "beta_dpo/beta_margin_grad_mean": -0.42037683725357056, "beta_dpo/beta_margin_grad_std": 0.1302468180656433, "beta_dpo/beta_margin_mean": 0.37733587622642517, "beta_dpo/beta_margin_std": 0.6492464542388916, "beta_dpo/beta_used": 0.025358129292726517, "beta_dpo/beta_used_raw": 0.02071252465248108, "beta_dpo/gap_mean": 10.497218132019043, "beta_dpo/gap_std": 17.413190841674805, "beta_dpo/loss_margin_mean": 11.307621002197266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3476946334089191, "grad_norm": 12.894368171691895, "learning_rate": 4.137151834863213e-07, "logits/chosen": 0.434891939163208, "logits/rejected": 0.43139857053756714, "loss": 1.2022, "step": 230 }, { "beta_dpo/beta": 0.3272258937358856, "beta_dpo/beta_margin_grad_mean": -0.22254712879657745, "beta_dpo/beta_margin_grad_std": 0.31797823309898376, "beta_dpo/beta_margin_mean": 5.628799915313721, "beta_dpo/beta_margin_std": 6.833213806152344, "beta_dpo/beta_used": 0.3272258937358856, "beta_dpo/beta_used_raw": 0.3272258937358856, "beta_dpo/gap_mean": 11.607155799865723, "beta_dpo/gap_std": 17.173091888427734, "beta_dpo/loss_margin_mean": 16.309707641601562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3492063492063492, "grad_norm": 165.0520477294922, "learning_rate": 4.1271363186719835e-07, "logits/chosen": 0.35244348645210266, "logits/rejected": 0.3390952944755554, "loss": 0.8224, "step": 231 }, { "beta_dpo/beta": 0.06888004392385483, "beta_dpo/beta_margin_grad_mean": -0.40324854850769043, "beta_dpo/beta_margin_grad_std": 0.20974577963352203, "beta_dpo/beta_margin_mean": 0.798814594745636, "beta_dpo/beta_margin_std": 1.7441960573196411, "beta_dpo/beta_used": 0.06888004392385483, "beta_dpo/beta_used_raw": -0.003230072557926178, "beta_dpo/gap_mean": 11.649392127990723, "beta_dpo/gap_std": 17.554821014404297, "beta_dpo/loss_margin_mean": 10.54377269744873, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3507180650037793, "grad_norm": 36.105350494384766, "learning_rate": 4.1170752879801436e-07, "logits/chosen": 0.41157642006874084, "logits/rejected": 0.38223710656166077, "loss": 1.1257, "step": 232 }, { "beta_dpo/beta": 0.09651569277048111, "beta_dpo/beta_margin_grad_mean": -0.37120458483695984, "beta_dpo/beta_margin_grad_std": 0.2465994656085968, "beta_dpo/beta_margin_mean": 1.4080229997634888, "beta_dpo/beta_margin_std": 2.7828683853149414, "beta_dpo/beta_used": 0.09651569277048111, "beta_dpo/beta_used_raw": -0.13416676223278046, "beta_dpo/gap_mean": 11.465993881225586, "beta_dpo/gap_std": 17.789779663085938, "beta_dpo/loss_margin_mean": 9.44713020324707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35222978080120937, "grad_norm": 45.80852508544922, "learning_rate": 4.106969024216348e-07, "logits/chosen": 0.45560508966445923, "logits/rejected": 0.39847636222839355, "loss": 1.0658, "step": 233 }, { "beta_dpo/beta": 0.06583171337842941, "beta_dpo/beta_margin_grad_mean": -0.3836709260940552, "beta_dpo/beta_margin_grad_std": 0.22087500989437103, "beta_dpo/beta_margin_mean": 0.8249198198318481, "beta_dpo/beta_margin_std": 1.6976362466812134, "beta_dpo/beta_used": 0.06583171337842941, "beta_dpo/beta_used_raw": -0.054439254105091095, "beta_dpo/gap_mean": 10.425298690795898, "beta_dpo/gap_std": 17.674182891845703, "beta_dpo/loss_margin_mean": 7.9661054611206055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35374149659863946, "grad_norm": 30.462419509887695, "learning_rate": 4.09681781007452e-07, "logits/chosen": 0.37714219093322754, "logits/rejected": 0.3655141592025757, "loss": 1.1052, "step": 234 }, { "beta_dpo/beta": 0.21070542931556702, "beta_dpo/beta_margin_grad_mean": -0.20869186520576477, "beta_dpo/beta_margin_grad_std": 0.26472654938697815, "beta_dpo/beta_margin_mean": 3.020737886428833, "beta_dpo/beta_margin_std": 3.6254777908325195, "beta_dpo/beta_used": 0.21070542931556702, "beta_dpo/beta_used_raw": 0.21070542931556702, "beta_dpo/gap_mean": 11.103967666625977, "beta_dpo/gap_std": 17.23556137084961, "beta_dpo/loss_margin_mean": 14.643046379089355, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35525321239606955, "grad_norm": 96.36847686767578, "learning_rate": 4.08662192950594e-07, "logits/chosen": 0.4545882046222687, "logits/rejected": 0.43633896112442017, "loss": 0.8063, "step": 235 }, { "beta_dpo/beta": 0.2329292595386505, "beta_dpo/beta_margin_grad_mean": -0.2999168634414673, "beta_dpo/beta_margin_grad_std": 0.33858177065849304, "beta_dpo/beta_margin_mean": 2.892549514770508, "beta_dpo/beta_margin_std": 5.401739597320557, "beta_dpo/beta_used": 0.2329292595386505, "beta_dpo/beta_used_raw": 0.2329292595386505, "beta_dpo/gap_mean": 11.491912841796875, "beta_dpo/gap_std": 17.67310905456543, "beta_dpo/loss_margin_mean": 11.814598083496094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35676492819349964, "grad_norm": 98.90902709960938, "learning_rate": 4.076381667711306e-07, "logits/chosen": 0.39827001094818115, "logits/rejected": 0.38383930921554565, "loss": 0.8167, "step": 236 }, { "beta_dpo/beta": 0.3174073100090027, "beta_dpo/beta_margin_grad_mean": -0.21913442015647888, "beta_dpo/beta_margin_grad_std": 0.3325287401676178, "beta_dpo/beta_margin_mean": 3.888357639312744, "beta_dpo/beta_margin_std": 7.1055073738098145, "beta_dpo/beta_used": 0.3174073100090027, "beta_dpo/beta_used_raw": 0.3174073100090027, "beta_dpo/gap_mean": 11.519922256469727, "beta_dpo/gap_std": 17.956809997558594, "beta_dpo/loss_margin_mean": 12.32577133178711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35827664399092973, "grad_norm": 120.24292755126953, "learning_rate": 4.066097311132753e-07, "logits/chosen": 0.4687383472919464, "logits/rejected": 0.45551079511642456, "loss": 0.6911, "step": 237 }, { "beta_dpo/beta": 0.01583448052406311, "beta_dpo/beta_margin_grad_mean": -0.4526436924934387, "beta_dpo/beta_margin_grad_std": 0.07343795150518417, "beta_dpo/beta_margin_mean": 0.19448736310005188, "beta_dpo/beta_margin_std": 0.30200886726379395, "beta_dpo/beta_used": 0.01583448052406311, "beta_dpo/beta_used_raw": 0.01583448052406311, "beta_dpo/gap_mean": 11.880534172058105, "beta_dpo/gap_std": 17.80138397216797, "beta_dpo/loss_margin_mean": 12.886168479919434, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35978835978835977, "grad_norm": 8.267656326293945, "learning_rate": 4.0557691474458414e-07, "logits/chosen": 0.3795435428619385, "logits/rejected": 0.3663020730018616, "loss": 1.2449, "step": 238 }, { "beta_dpo/beta": 0.101829394698143, "beta_dpo/beta_margin_grad_mean": -0.33910471200942993, "beta_dpo/beta_margin_grad_std": 0.2562665641307831, "beta_dpo/beta_margin_mean": 1.7569659948349, "beta_dpo/beta_margin_std": 3.0229148864746094, "beta_dpo/beta_used": 0.101829394698143, "beta_dpo/beta_used_raw": 0.101829394698143, "beta_dpo/gap_mean": 11.849081039428711, "beta_dpo/gap_std": 18.05896759033203, "beta_dpo/loss_margin_mean": 13.367530822753906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36130007558578986, "grad_norm": 40.64457321166992, "learning_rate": 4.045397465551513e-07, "logits/chosen": 0.5176012516021729, "logits/rejected": 0.395955353975296, "loss": 1.0144, "step": 239 }, { "beta_dpo/beta": 0.4317074418067932, "beta_dpo/beta_margin_grad_mean": -0.2166636437177658, "beta_dpo/beta_margin_grad_std": 0.3384714126586914, "beta_dpo/beta_margin_mean": 7.427967071533203, "beta_dpo/beta_margin_std": 9.432563781738281, "beta_dpo/beta_used": 0.4317074418067932, "beta_dpo/beta_used_raw": 0.4317074418067932, "beta_dpo/gap_mean": 12.70523452758789, "beta_dpo/gap_std": 18.133880615234375, "beta_dpo/loss_margin_mean": 16.595781326293945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36281179138321995, "grad_norm": 113.23979187011719, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 0.4384820759296417, "logits/rejected": 0.3469165563583374, "loss": 0.5406, "step": 240 }, { "beta_dpo/beta": 0.045368727296590805, "beta_dpo/beta_margin_grad_mean": -0.424957811832428, "beta_dpo/beta_margin_grad_std": 0.19752579927444458, "beta_dpo/beta_margin_mean": 0.4823284447193146, "beta_dpo/beta_margin_std": 1.2542048692703247, "beta_dpo/beta_used": 0.045368727296590805, "beta_dpo/beta_used_raw": -0.007718522101640701, "beta_dpo/gap_mean": 12.458725929260254, "beta_dpo/gap_std": 18.17209243774414, "beta_dpo/loss_margin_mean": 9.584371566772461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36432350718065004, "grad_norm": 23.7581729888916, "learning_rate": 4.0245247088227377e-07, "logits/chosen": 0.41160500049591064, "logits/rejected": 0.37446707487106323, "loss": 1.1377, "step": 241 }, { "beta_dpo/beta": 0.06172889471054077, "beta_dpo/beta_margin_grad_mean": -0.35529595613479614, "beta_dpo/beta_margin_grad_std": 0.21127015352249146, "beta_dpo/beta_margin_mean": 1.0536497831344604, "beta_dpo/beta_margin_std": 1.8481637239456177, "beta_dpo/beta_used": 0.06172889471054077, "beta_dpo/beta_used_raw": 0.06172889471054077, "beta_dpo/gap_mean": 12.84834098815918, "beta_dpo/gap_std": 18.49911880493164, "beta_dpo/loss_margin_mean": 14.712586402893066, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36583522297808013, "grad_norm": 20.04178237915039, "learning_rate": 4.0140242178441665e-07, "logits/chosen": 0.3890725374221802, "logits/rejected": 0.3670586049556732, "loss": 1.0399, "step": 242 }, { "beta_dpo/beta": 0.1559908539056778, "beta_dpo/beta_margin_grad_mean": -0.28708603978157043, "beta_dpo/beta_margin_grad_std": 0.3101195693016052, "beta_dpo/beta_margin_mean": 1.9018021821975708, "beta_dpo/beta_margin_std": 2.786595582962036, "beta_dpo/beta_used": 0.1559908539056778, "beta_dpo/beta_used_raw": 0.1559908539056778, "beta_dpo/gap_mean": 12.671987533569336, "beta_dpo/gap_std": 18.41918182373047, "beta_dpo/loss_margin_mean": 12.152036666870117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3673469387755102, "grad_norm": 55.34590530395508, "learning_rate": 4.003481376353596e-07, "logits/chosen": 0.407698392868042, "logits/rejected": 0.4090770483016968, "loss": 0.8206, "step": 243 }, { "beta_dpo/beta": 0.2864580452442169, "beta_dpo/beta_margin_grad_mean": -0.1638103872537613, "beta_dpo/beta_margin_grad_std": 0.24098889529705048, "beta_dpo/beta_margin_mean": 4.863481044769287, "beta_dpo/beta_margin_std": 5.131604194641113, "beta_dpo/beta_used": 0.2864580452442169, "beta_dpo/beta_used_raw": 0.2864580452442169, "beta_dpo/gap_mean": 13.105916976928711, "beta_dpo/gap_std": 18.159984588623047, "beta_dpo/loss_margin_mean": 16.353925704956055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3688586545729403, "grad_norm": 60.079708099365234, "learning_rate": 3.9928964792569654e-07, "logits/chosen": 0.4592773914337158, "logits/rejected": 0.37424206733703613, "loss": 0.4978, "step": 244 }, { "beta_dpo/beta": 0.22604097425937653, "beta_dpo/beta_margin_grad_mean": -0.1703149378299713, "beta_dpo/beta_margin_grad_std": 0.2654411196708679, "beta_dpo/beta_margin_mean": 4.111567497253418, "beta_dpo/beta_margin_std": 4.208682060241699, "beta_dpo/beta_used": 0.22604097425937653, "beta_dpo/beta_used_raw": 0.22604097425937653, "beta_dpo/gap_mean": 14.10053539276123, "beta_dpo/gap_std": 18.00957489013672, "beta_dpo/loss_margin_mean": 18.244796752929688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37037037037037035, "grad_norm": 65.31201171875, "learning_rate": 3.982269822636601e-07, "logits/chosen": 0.47270846366882324, "logits/rejected": 0.4419899582862854, "loss": 0.4987, "step": 245 }, { "beta_dpo/beta": 0.004375386517494917, "beta_dpo/beta_margin_grad_mean": -0.4861513674259186, "beta_dpo/beta_margin_grad_std": 0.03189266845583916, "beta_dpo/beta_margin_mean": 0.05574238672852516, "beta_dpo/beta_margin_std": 0.12858904898166656, "beta_dpo/beta_used": 0.004375386517494917, "beta_dpo/beta_used_raw": -0.028331764042377472, "beta_dpo/gap_mean": 14.207629203796387, "beta_dpo/gap_std": 18.656829833984375, "beta_dpo/loss_margin_mean": 14.23241901397705, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37188208616780044, "grad_norm": 3.1414432525634766, "learning_rate": 3.971601703742932e-07, "logits/chosen": 0.46958476305007935, "logits/rejected": 0.4090319275856018, "loss": 1.3347, "step": 246 }, { "beta_dpo/beta": 0.03395707905292511, "beta_dpo/beta_margin_grad_mean": -0.41882583498954773, "beta_dpo/beta_margin_grad_std": 0.16061575710773468, "beta_dpo/beta_margin_mean": 0.4101926386356354, "beta_dpo/beta_margin_std": 0.8106998801231384, "beta_dpo/beta_used": 0.03395707905292511, "beta_dpo/beta_used_raw": -0.03948557376861572, "beta_dpo/gap_mean": 13.277643203735352, "beta_dpo/gap_std": 18.8796329498291, "beta_dpo/loss_margin_mean": 8.37158203125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37339380196523053, "grad_norm": 23.345308303833008, "learning_rate": 3.960892420986177e-07, "logits/chosen": 0.4679937958717346, "logits/rejected": 0.45675230026245117, "loss": 1.1301, "step": 247 }, { "beta_dpo/beta": 0.06966085731983185, "beta_dpo/beta_margin_grad_mean": -0.3950624465942383, "beta_dpo/beta_margin_grad_std": 0.24403807520866394, "beta_dpo/beta_margin_mean": 1.0330671072006226, "beta_dpo/beta_margin_std": 2.318099021911621, "beta_dpo/beta_used": 0.06966085731983185, "beta_dpo/beta_used_raw": 0.06966085731983185, "beta_dpo/gap_mean": 13.289308547973633, "beta_dpo/gap_std": 19.19415283203125, "beta_dpo/loss_margin_mean": 13.971484184265137, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3749055177626606, "grad_norm": 45.709678649902344, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 0.43965810537338257, "logits/rejected": 0.48121294379234314, "loss": 1.1869, "step": 248 }, { "beta_dpo/beta": 0.05159245431423187, "beta_dpo/beta_margin_grad_mean": -0.4309861660003662, "beta_dpo/beta_margin_grad_std": 0.24051769077777863, "beta_dpo/beta_margin_mean": 0.4380229711532593, "beta_dpo/beta_margin_std": 1.5990570783615112, "beta_dpo/beta_used": 0.05159245431423187, "beta_dpo/beta_used_raw": 0.009713437408208847, "beta_dpo/gap_mean": 12.065717697143555, "beta_dpo/gap_std": 19.33898162841797, "beta_dpo/loss_margin_mean": 5.354761123657227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3764172335600907, "grad_norm": 33.44173812866211, "learning_rate": 3.9393515632731094e-07, "logits/chosen": 0.42937660217285156, "logits/rejected": 0.4625554084777832, "loss": 1.2139, "step": 249 }, { "beta_dpo/beta": 0.20074601471424103, "beta_dpo/beta_margin_grad_mean": -0.2688792049884796, "beta_dpo/beta_margin_grad_std": 0.29901885986328125, "beta_dpo/beta_margin_mean": 3.485891819000244, "beta_dpo/beta_margin_std": 5.231993675231934, "beta_dpo/beta_used": 0.20074601471424103, "beta_dpo/beta_used_raw": 0.20074601471424103, "beta_dpo/gap_mean": 12.264188766479492, "beta_dpo/gap_std": 19.683074951171875, "beta_dpo/loss_margin_mean": 15.519104957580566, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3779289493575208, "grad_norm": 116.9253921508789, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.5390890836715698, "logits/rejected": 0.4927697777748108, "loss": 1.1048, "step": 250 }, { "beta_dpo/beta": 0.10495083034038544, "beta_dpo/beta_margin_grad_mean": -0.3397623598575592, "beta_dpo/beta_margin_grad_std": 0.288044810295105, "beta_dpo/beta_margin_mean": 1.0604958534240723, "beta_dpo/beta_margin_std": 1.9399701356887817, "beta_dpo/beta_used": 0.10495083034038544, "beta_dpo/beta_used_raw": 0.10495083034038544, "beta_dpo/gap_mean": 12.307474136352539, "beta_dpo/gap_std": 19.697185516357422, "beta_dpo/loss_margin_mean": 10.240516662597656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3794406651549509, "grad_norm": 40.252906799316406, "learning_rate": 3.9176496596569265e-07, "logits/chosen": 0.5220420360565186, "logits/rejected": 0.4805965721607208, "loss": 0.7686, "step": 251 }, { "beta_dpo/beta": 0.05897517874836922, "beta_dpo/beta_margin_grad_mean": -0.3872048556804657, "beta_dpo/beta_margin_grad_std": 0.2150556445121765, "beta_dpo/beta_margin_mean": 0.8916309475898743, "beta_dpo/beta_margin_std": 1.7850096225738525, "beta_dpo/beta_used": 0.05897517874836922, "beta_dpo/beta_used_raw": 0.009286485612392426, "beta_dpo/gap_mean": 11.468460083007812, "beta_dpo/gap_std": 19.796926498413086, "beta_dpo/loss_margin_mean": 9.313298225402832, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.38095238095238093, "grad_norm": 26.423254013061523, "learning_rate": 3.9067390737445254e-07, "logits/chosen": 0.3940281867980957, "logits/rejected": 0.3418418765068054, "loss": 1.0894, "step": 252 }, { "beta_dpo/beta": 0.10484018921852112, "beta_dpo/beta_margin_grad_mean": -0.3770482540130615, "beta_dpo/beta_margin_grad_std": 0.2779637575149536, "beta_dpo/beta_margin_mean": 1.4820187091827393, "beta_dpo/beta_margin_std": 3.32963228225708, "beta_dpo/beta_used": 0.10484018921852112, "beta_dpo/beta_used_raw": 0.08652335405349731, "beta_dpo/gap_mean": 11.674590110778809, "beta_dpo/gap_std": 19.611839294433594, "beta_dpo/loss_margin_mean": 10.626477241516113, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.382464096749811, "grad_norm": 59.38056182861328, "learning_rate": 3.8957891383162304e-07, "logits/chosen": 0.48830246925354004, "logits/rejected": 0.4461994767189026, "loss": 1.3494, "step": 253 }, { "beta_dpo/beta": 0.058685217052698135, "beta_dpo/beta_margin_grad_mean": -0.426761269569397, "beta_dpo/beta_margin_grad_std": 0.21793650090694427, "beta_dpo/beta_margin_mean": 0.5492849349975586, "beta_dpo/beta_margin_std": 1.6022003889083862, "beta_dpo/beta_used": 0.058685217052698135, "beta_dpo/beta_used_raw": 0.053593721240758896, "beta_dpo/gap_mean": 11.511938095092773, "beta_dpo/gap_std": 19.289674758911133, "beta_dpo/loss_margin_mean": 11.349576950073242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3839758125472411, "grad_norm": 36.390403747558594, "learning_rate": 3.884800159665276e-07, "logits/chosen": 0.4373748302459717, "logits/rejected": 0.38516467809677124, "loss": 1.1464, "step": 254 }, { "beta_dpo/beta": 0.11067987978458405, "beta_dpo/beta_margin_grad_mean": -0.3657572865486145, "beta_dpo/beta_margin_grad_std": 0.27411824464797974, "beta_dpo/beta_margin_mean": 1.7994710206985474, "beta_dpo/beta_margin_std": 3.8073818683624268, "beta_dpo/beta_used": 0.11067987978458405, "beta_dpo/beta_used_raw": 0.007904693484306335, "beta_dpo/gap_mean": 11.95395565032959, "beta_dpo/gap_std": 19.61341667175293, "beta_dpo/loss_margin_mean": 14.774104118347168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3854875283446712, "grad_norm": 89.89997100830078, "learning_rate": 3.873772445177015e-07, "logits/chosen": 0.42883598804473877, "logits/rejected": 0.39614689350128174, "loss": 1.2533, "step": 255 }, { "beta_dpo/beta": 0.117228664457798, "beta_dpo/beta_margin_grad_mean": -0.40933340787887573, "beta_dpo/beta_margin_grad_std": 0.2813352644443512, "beta_dpo/beta_margin_mean": 1.6359100341796875, "beta_dpo/beta_margin_std": 4.08366060256958, "beta_dpo/beta_used": 0.117228664457798, "beta_dpo/beta_used_raw": 0.037854887545108795, "beta_dpo/gap_mean": 12.13801383972168, "beta_dpo/gap_std": 20.043987274169922, "beta_dpo/loss_margin_mean": 12.292617797851562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3869992441421013, "grad_norm": 99.5350341796875, "learning_rate": 3.862706303320329e-07, "logits/chosen": 0.4363447427749634, "logits/rejected": 0.374740332365036, "loss": 1.2428, "step": 256 }, { "beta_dpo/beta": 0.038477640599012375, "beta_dpo/beta_margin_grad_mean": -0.4051564931869507, "beta_dpo/beta_margin_grad_std": 0.1941753476858139, "beta_dpo/beta_margin_mean": 0.611764669418335, "beta_dpo/beta_margin_std": 1.3536244630813599, "beta_dpo/beta_used": 0.038477640599012375, "beta_dpo/beta_used_raw": 0.01731099747121334, "beta_dpo/gap_mean": 12.789517402648926, "beta_dpo/gap_std": 20.492897033691406, "beta_dpo/loss_margin_mean": 16.812488555908203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3885109599395314, "grad_norm": 24.06647491455078, "learning_rate": 3.851602043638994e-07, "logits/chosen": 0.43763476610183716, "logits/rejected": 0.374079167842865, "loss": 1.1426, "step": 257 }, { "beta_dpo/beta": 0.03051311895251274, "beta_dpo/beta_margin_grad_mean": -0.40624645352363586, "beta_dpo/beta_margin_grad_std": 0.14975614845752716, "beta_dpo/beta_margin_mean": 0.45043420791625977, "beta_dpo/beta_margin_std": 0.7583449482917786, "beta_dpo/beta_used": 0.03051311895251274, "beta_dpo/beta_used_raw": -0.1679498702287674, "beta_dpo/gap_mean": 12.876970291137695, "beta_dpo/gap_std": 20.080921173095703, "beta_dpo/loss_margin_mean": 13.000361442565918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3900226757369615, "grad_norm": 21.060880661010742, "learning_rate": 3.840459976743023e-07, "logits/chosen": 0.47881019115448, "logits/rejected": 0.42693912982940674, "loss": 1.1519, "step": 258 }, { "beta_dpo/beta": 0.2693525552749634, "beta_dpo/beta_margin_grad_mean": -0.19658398628234863, "beta_dpo/beta_margin_grad_std": 0.3391178548336029, "beta_dpo/beta_margin_mean": 5.206562519073486, "beta_dpo/beta_margin_std": 5.524745464324951, "beta_dpo/beta_used": 0.2693525552749634, "beta_dpo/beta_used_raw": 0.2693525552749634, "beta_dpo/gap_mean": 14.02800178527832, "beta_dpo/gap_std": 20.028091430664062, "beta_dpo/loss_margin_mean": 19.475568771362305, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3915343915343915, "grad_norm": 133.27005004882812, "learning_rate": 3.8292804142999796e-07, "logits/chosen": 0.36390572786331177, "logits/rejected": 0.2654324173927307, "loss": 1.1567, "step": 259 }, { "beta_dpo/beta": 0.04879666119813919, "beta_dpo/beta_margin_grad_mean": -0.3880171477794647, "beta_dpo/beta_margin_grad_std": 0.2359832376241684, "beta_dpo/beta_margin_mean": 0.7953795790672302, "beta_dpo/beta_margin_std": 1.7049293518066406, "beta_dpo/beta_used": 0.04879666119813919, "beta_dpo/beta_used_raw": 0.046260517090559006, "beta_dpo/gap_mean": 14.264688491821289, "beta_dpo/gap_std": 20.290300369262695, "beta_dpo/loss_margin_mean": 13.966394424438477, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3930461073318216, "grad_norm": 27.301448822021484, "learning_rate": 3.818063669026256e-07, "logits/chosen": 0.4362449049949646, "logits/rejected": 0.349369078874588, "loss": 1.1159, "step": 260 }, { "beta_dpo/beta": 0.06346012651920319, "beta_dpo/beta_margin_grad_mean": -0.3791761100292206, "beta_dpo/beta_margin_grad_std": 0.21919672191143036, "beta_dpo/beta_margin_mean": 0.7327264547348022, "beta_dpo/beta_margin_std": 1.4131560325622559, "beta_dpo/beta_used": 0.06346012651920319, "beta_dpo/beta_used_raw": 0.06346012651920319, "beta_dpo/gap_mean": 13.676494598388672, "beta_dpo/gap_std": 20.232555389404297, "beta_dpo/loss_margin_mean": 11.213432312011719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3945578231292517, "grad_norm": 32.0900993347168, "learning_rate": 3.806810054678331e-07, "logits/chosen": 0.32648757100105286, "logits/rejected": 0.3490898013114929, "loss": 0.9706, "step": 261 }, { "beta_dpo/beta": 0.0835457369685173, "beta_dpo/beta_margin_grad_mean": -0.38975584506988525, "beta_dpo/beta_margin_grad_std": 0.26027020812034607, "beta_dpo/beta_margin_mean": 1.2454676628112793, "beta_dpo/beta_margin_std": 2.6653995513916016, "beta_dpo/beta_used": 0.0835457369685173, "beta_dpo/beta_used_raw": 0.0018124952912330627, "beta_dpo/gap_mean": 13.374794960021973, "beta_dpo/gap_std": 19.955730438232422, "beta_dpo/loss_margin_mean": 12.837187767028809, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3960695389266818, "grad_norm": 51.011417388916016, "learning_rate": 3.7955198860439887e-07, "logits/chosen": 0.46894973516464233, "logits/rejected": 0.40835195779800415, "loss": 1.2075, "step": 262 }, { "beta_dpo/beta": 0.0994168296456337, "beta_dpo/beta_margin_grad_mean": -0.3875061571598053, "beta_dpo/beta_margin_grad_std": 0.27189844846725464, "beta_dpo/beta_margin_mean": 1.2055269479751587, "beta_dpo/beta_margin_std": 2.890883207321167, "beta_dpo/beta_used": 0.0994168296456337, "beta_dpo/beta_used_raw": 0.015077054500579834, "beta_dpo/gap_mean": 13.045722961425781, "beta_dpo/gap_std": 19.6978759765625, "beta_dpo/loss_margin_mean": 10.980850219726562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3975812547241119, "grad_norm": 48.162906646728516, "learning_rate": 3.784193478933516e-07, "logits/chosen": 0.42282721400260925, "logits/rejected": 0.31909075379371643, "loss": 1.003, "step": 263 }, { "beta_dpo/beta": 0.038988277316093445, "beta_dpo/beta_margin_grad_mean": -0.4159775674343109, "beta_dpo/beta_margin_grad_std": 0.21583546698093414, "beta_dpo/beta_margin_mean": 0.5544974207878113, "beta_dpo/beta_margin_std": 1.3794368505477905, "beta_dpo/beta_used": 0.038988277316093445, "beta_dpo/beta_used_raw": -0.004001736640930176, "beta_dpo/gap_mean": 13.082950592041016, "beta_dpo/gap_std": 19.840484619140625, "beta_dpo/loss_margin_mean": 13.738378524780273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.39909297052154197, "grad_norm": 26.92840576171875, "learning_rate": 3.7728311501708674e-07, "logits/chosen": 0.34600526094436646, "logits/rejected": 0.2996159791946411, "loss": 1.1644, "step": 264 }, { "beta_dpo/beta": 0.2847278118133545, "beta_dpo/beta_margin_grad_mean": -0.2701014578342438, "beta_dpo/beta_margin_grad_std": 0.35836902260780334, "beta_dpo/beta_margin_mean": 4.273728370666504, "beta_dpo/beta_margin_std": 7.470240592956543, "beta_dpo/beta_used": 0.2847278118133545, "beta_dpo/beta_used_raw": 0.2847278118133545, "beta_dpo/gap_mean": 13.191385269165039, "beta_dpo/gap_std": 20.41083526611328, "beta_dpo/loss_margin_mean": 14.18088436126709, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40060468631897206, "grad_norm": 130.78456115722656, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 0.5080592632293701, "logits/rejected": 0.440662145614624, "loss": 1.2185, "step": 265 }, { "beta_dpo/beta": 0.08126115798950195, "beta_dpo/beta_margin_grad_mean": -0.33244937658309937, "beta_dpo/beta_margin_grad_std": 0.2310456931591034, "beta_dpo/beta_margin_mean": 1.032869815826416, "beta_dpo/beta_margin_std": 1.767521858215332, "beta_dpo/beta_used": 0.08126115798950195, "beta_dpo/beta_used_raw": 0.08126115798950195, "beta_dpo/gap_mean": 13.363494873046875, "beta_dpo/gap_std": 20.626605987548828, "beta_dpo/loss_margin_mean": 13.082228660583496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4021164021164021, "grad_norm": 36.103389739990234, "learning_rate": 3.75e-07, "logits/chosen": 0.4579857587814331, "logits/rejected": 0.38330739736557007, "loss": 0.8273, "step": 266 }, { "beta_dpo/beta": 0.072122722864151, "beta_dpo/beta_margin_grad_mean": -0.37835559248924255, "beta_dpo/beta_margin_grad_std": 0.2315768450498581, "beta_dpo/beta_margin_mean": 1.1022335290908813, "beta_dpo/beta_margin_std": 2.124861717224121, "beta_dpo/beta_used": 0.072122722864151, "beta_dpo/beta_used_raw": -0.060242317616939545, "beta_dpo/gap_mean": 12.627429962158203, "beta_dpo/gap_std": 20.575902938842773, "beta_dpo/loss_margin_mean": 10.242201805114746, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4036281179138322, "grad_norm": 32.35624313354492, "learning_rate": 3.738531817228131e-07, "logits/chosen": 0.4831930100917816, "logits/rejected": 0.46250009536743164, "loss": 1.0788, "step": 267 }, { "beta_dpo/beta": 0.013414818793535233, "beta_dpo/beta_margin_grad_mean": -0.45541560649871826, "beta_dpo/beta_margin_grad_std": 0.07803189009428024, "beta_dpo/beta_margin_mean": 0.18836283683776855, "beta_dpo/beta_margin_std": 0.33558791875839233, "beta_dpo/beta_used": 0.013414818793535233, "beta_dpo/beta_used_raw": 0.004978269338607788, "beta_dpo/gap_mean": 12.683042526245117, "beta_dpo/gap_std": 19.99517059326172, "beta_dpo/loss_margin_mean": 12.636152267456055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4051398337112623, "grad_norm": 7.843941688537598, "learning_rate": 3.7270289900589204e-07, "logits/chosen": 0.32715708017349243, "logits/rejected": 0.3075103759765625, "loss": 1.2591, "step": 268 }, { "beta_dpo/beta": 0.21129584312438965, "beta_dpo/beta_margin_grad_mean": -0.2817341685295105, "beta_dpo/beta_margin_grad_std": 0.2654723525047302, "beta_dpo/beta_margin_mean": 3.7788643836975098, "beta_dpo/beta_margin_std": 5.652310848236084, "beta_dpo/beta_used": 0.21129584312438965, "beta_dpo/beta_used_raw": 0.21129584312438965, "beta_dpo/gap_mean": 12.821964263916016, "beta_dpo/gap_std": 19.65093231201172, "beta_dpo/loss_margin_mean": 14.411201477050781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40665154950869237, "grad_norm": 82.85317993164062, "learning_rate": 3.7154918402511714e-07, "logits/chosen": 0.5275648832321167, "logits/rejected": 0.47768309712409973, "loss": 0.85, "step": 269 }, { "beta_dpo/beta": 0.027644401416182518, "beta_dpo/beta_margin_grad_mean": -0.43374115228652954, "beta_dpo/beta_margin_grad_std": 0.14928385615348816, "beta_dpo/beta_margin_mean": 0.3241073489189148, "beta_dpo/beta_margin_std": 0.7700070142745972, "beta_dpo/beta_used": 0.027644401416182518, "beta_dpo/beta_used_raw": -0.021652307361364365, "beta_dpo/gap_mean": 12.841148376464844, "beta_dpo/gap_std": 19.287689208984375, "beta_dpo/loss_margin_mean": 11.001672744750977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40816326530612246, "grad_norm": 16.57350730895996, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 0.44649437069892883, "logits/rejected": 0.3714543879032135, "loss": 1.1677, "step": 270 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49744880199432373, "beta_dpo/beta_margin_grad_std": 0.005622472148388624, "beta_dpo/beta_margin_mean": 0.010206691920757294, "beta_dpo/beta_margin_std": 0.022493908181786537, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14274440705776215, "beta_dpo/gap_mean": 12.42354965209961, "beta_dpo/gap_std": 19.610389709472656, "beta_dpo/loss_margin_mean": 10.20669174194336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40967498110355255, "grad_norm": 0.6051694750785828, "learning_rate": 3.692315864546635e-07, "logits/chosen": 0.45940130949020386, "logits/rejected": 0.39874905347824097, "loss": 1.378, "step": 271 }, { "beta_dpo/beta": 0.18113896250724792, "beta_dpo/beta_margin_grad_mean": -0.2012496441602707, "beta_dpo/beta_margin_grad_std": 0.25230535864830017, "beta_dpo/beta_margin_mean": 3.179334878921509, "beta_dpo/beta_margin_std": 3.390836238861084, "beta_dpo/beta_used": 0.18113896250724792, "beta_dpo/beta_used_raw": 0.18113896250724792, "beta_dpo/gap_mean": 12.827465057373047, "beta_dpo/gap_std": 19.45254898071289, "beta_dpo/loss_margin_mean": 17.20043182373047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41118669690098264, "grad_norm": 79.047607421875, "learning_rate": 3.6806776869317067e-07, "logits/chosen": 0.45520102977752686, "logits/rejected": 0.4643624424934387, "loss": 0.7339, "step": 272 }, { "beta_dpo/beta": 0.11468373984098434, "beta_dpo/beta_margin_grad_mean": -0.39956751465797424, "beta_dpo/beta_margin_grad_std": 0.2712042033672333, "beta_dpo/beta_margin_mean": 1.4363672733306885, "beta_dpo/beta_margin_std": 3.4752349853515625, "beta_dpo/beta_used": 0.11468373984098434, "beta_dpo/beta_used_raw": 0.10186155885457993, "beta_dpo/gap_mean": 13.144290924072266, "beta_dpo/gap_std": 19.811664581298828, "beta_dpo/loss_margin_mean": 13.085746765136719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4126984126984127, "grad_norm": 65.30044555664062, "learning_rate": 3.669006483223828e-07, "logits/chosen": 0.4667285084724426, "logits/rejected": 0.4003213047981262, "loss": 1.1074, "step": 273 }, { "beta_dpo/beta": 0.0694403201341629, "beta_dpo/beta_margin_grad_mean": -0.40486380457878113, "beta_dpo/beta_margin_grad_std": 0.24759718775749207, "beta_dpo/beta_margin_mean": 0.8668053150177002, "beta_dpo/beta_margin_std": 2.210718870162964, "beta_dpo/beta_used": 0.0694403201341629, "beta_dpo/beta_used_raw": 0.03696062043309212, "beta_dpo/gap_mean": 13.18702507019043, "beta_dpo/gap_std": 20.19510269165039, "beta_dpo/loss_margin_mean": 13.648605346679688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41421012849584277, "grad_norm": 39.4247932434082, "learning_rate": 3.657302579891656e-07, "logits/chosen": 0.33252406120300293, "logits/rejected": 0.30951541662216187, "loss": 1.1161, "step": 274 }, { "beta_dpo/beta": 0.08022014796733856, "beta_dpo/beta_margin_grad_mean": -0.32078537344932556, "beta_dpo/beta_margin_grad_std": 0.2081281989812851, "beta_dpo/beta_margin_mean": 1.29566490650177, "beta_dpo/beta_margin_std": 1.8696632385253906, "beta_dpo/beta_used": 0.08022014796733856, "beta_dpo/beta_used_raw": 0.08022014796733856, "beta_dpo/gap_mean": 13.405573844909668, "beta_dpo/gap_std": 20.12377166748047, "beta_dpo/loss_margin_mean": 14.838817596435547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41572184429327286, "grad_norm": 28.87332534790039, "learning_rate": 3.645566304318526e-07, "logits/chosen": 0.41394883394241333, "logits/rejected": 0.3336278200149536, "loss": 0.8851, "step": 275 }, { "beta_dpo/beta": 0.07089600712060928, "beta_dpo/beta_margin_grad_mean": -0.37287554144859314, "beta_dpo/beta_margin_grad_std": 0.22276493906974792, "beta_dpo/beta_margin_mean": 0.9248519539833069, "beta_dpo/beta_margin_std": 1.8152260780334473, "beta_dpo/beta_used": 0.07089600712060928, "beta_dpo/beta_used_raw": 0.03236527368426323, "beta_dpo/gap_mean": 13.642976760864258, "beta_dpo/gap_std": 19.716773986816406, "beta_dpo/loss_margin_mean": 13.728145599365234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41723356009070295, "grad_norm": 29.26936912536621, "learning_rate": 3.633797984793294e-07, "logits/chosen": 0.3696138560771942, "logits/rejected": 0.33492618799209595, "loss": 0.9796, "step": 276 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49800875782966614, "beta_dpo/beta_margin_grad_std": 0.004511854145675898, "beta_dpo/beta_margin_mean": 0.007965884171426296, "beta_dpo/beta_margin_std": 0.01804952323436737, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1568543016910553, "beta_dpo/gap_mean": 12.772453308105469, "beta_dpo/gap_std": 19.320093154907227, "beta_dpo/loss_margin_mean": 7.965883255004883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41874527588813304, "grad_norm": 0.6365845203399658, "learning_rate": 3.6219979505011555e-07, "logits/chosen": 0.46259212493896484, "logits/rejected": 0.48363691568374634, "loss": 1.3779, "step": 277 }, { "beta_dpo/beta": 0.15876400470733643, "beta_dpo/beta_margin_grad_mean": -0.30526286363601685, "beta_dpo/beta_margin_grad_std": 0.2431895136833191, "beta_dpo/beta_margin_mean": 2.8034675121307373, "beta_dpo/beta_margin_std": 5.201408863067627, "beta_dpo/beta_used": 0.15876400470733643, "beta_dpo/beta_used_raw": 0.15876400470733643, "beta_dpo/gap_mean": 12.495584487915039, "beta_dpo/gap_std": 19.255725860595703, "beta_dpo/loss_margin_mean": 13.553324699401855, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42025699168556313, "grad_norm": 33.650630950927734, "learning_rate": 3.6101665315144353e-07, "logits/chosen": 0.3732357621192932, "logits/rejected": 0.3219851851463318, "loss": 0.7781, "step": 278 }, { "beta_dpo/beta": 0.23024505376815796, "beta_dpo/beta_margin_grad_mean": -0.19230079650878906, "beta_dpo/beta_margin_grad_std": 0.28550806641578674, "beta_dpo/beta_margin_mean": 3.9565861225128174, "beta_dpo/beta_margin_std": 4.337207317352295, "beta_dpo/beta_used": 0.23024505376815796, "beta_dpo/beta_used_raw": 0.23024505376815796, "beta_dpo/gap_mean": 13.335415840148926, "beta_dpo/gap_std": 19.329914093017578, "beta_dpo/loss_margin_mean": 17.311302185058594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4217687074829932, "grad_norm": 63.41184616088867, "learning_rate": 3.5983040587833563e-07, "logits/chosen": 0.3997264504432678, "logits/rejected": 0.35758689045906067, "loss": 0.5583, "step": 279 }, { "beta_dpo/beta": 0.13080888986587524, "beta_dpo/beta_margin_grad_mean": -0.22343085706233978, "beta_dpo/beta_margin_grad_std": 0.21184687316417694, "beta_dpo/beta_margin_mean": 2.2733867168426514, "beta_dpo/beta_margin_std": 2.2885751724243164, "beta_dpo/beta_used": 0.13080888986587524, "beta_dpo/beta_used_raw": 0.13080888986587524, "beta_dpo/gap_mean": 14.12271499633789, "beta_dpo/gap_std": 18.93375015258789, "beta_dpo/loss_margin_mean": 17.238388061523438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42328042328042326, "grad_norm": 38.145751953125, "learning_rate": 3.586410864126781e-07, "logits/chosen": 0.4410172700881958, "logits/rejected": 0.39698508381843567, "loss": 0.5901, "step": 280 }, { "beta_dpo/beta": 0.030089804902672768, "beta_dpo/beta_margin_grad_mean": -0.4115253686904907, "beta_dpo/beta_margin_grad_std": 0.16938680410385132, "beta_dpo/beta_margin_mean": 0.4769037663936615, "beta_dpo/beta_margin_std": 0.9234161376953125, "beta_dpo/beta_used": 0.030089804902672768, "beta_dpo/beta_used_raw": 0.015273205004632473, "beta_dpo/gap_mean": 14.474631309509277, "beta_dpo/gap_std": 18.93929672241211, "beta_dpo/loss_margin_mean": 14.873807907104492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42479213907785335, "grad_norm": 15.947169303894043, "learning_rate": 3.574487280222929e-07, "logits/chosen": 0.4040282964706421, "logits/rejected": 0.41597017645835876, "loss": 1.158, "step": 281 }, { "beta_dpo/beta": 0.10513893514871597, "beta_dpo/beta_margin_grad_mean": -0.3416615426540375, "beta_dpo/beta_margin_grad_std": 0.2618146240711212, "beta_dpo/beta_margin_mean": 2.1443495750427246, "beta_dpo/beta_margin_std": 3.675140857696533, "beta_dpo/beta_used": 0.10513893514871597, "beta_dpo/beta_used_raw": -0.015448860824108124, "beta_dpo/gap_mean": 14.205320358276367, "beta_dpo/gap_std": 19.232637405395508, "beta_dpo/loss_margin_mean": 14.617086410522461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42630385487528344, "grad_norm": 67.41303253173828, "learning_rate": 3.562533640600075e-07, "logits/chosen": 0.3522970676422119, "logits/rejected": 0.3013116419315338, "loss": 1.1754, "step": 282 }, { "beta_dpo/beta": 0.0794285237789154, "beta_dpo/beta_margin_grad_mean": -0.3860606551170349, "beta_dpo/beta_margin_grad_std": 0.23796863853931427, "beta_dpo/beta_margin_mean": 1.2510942220687866, "beta_dpo/beta_margin_std": 2.778822422027588, "beta_dpo/beta_used": 0.0794285237789154, "beta_dpo/beta_used_raw": 0.030920717865228653, "beta_dpo/gap_mean": 14.144121170043945, "beta_dpo/gap_std": 19.416378021240234, "beta_dpo/loss_margin_mean": 12.779112815856934, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42781557067271353, "grad_norm": 49.29255294799805, "learning_rate": 3.550550279627215e-07, "logits/chosen": 0.40588125586509705, "logits/rejected": 0.31223565340042114, "loss": 1.1026, "step": 283 }, { "beta_dpo/beta": 0.04316014051437378, "beta_dpo/beta_margin_grad_mean": -0.39386722445487976, "beta_dpo/beta_margin_grad_std": 0.16511482000350952, "beta_dpo/beta_margin_mean": 0.6583900451660156, "beta_dpo/beta_margin_std": 1.2365429401397705, "beta_dpo/beta_used": 0.04316014051437378, "beta_dpo/beta_used_raw": 0.04316014051437378, "beta_dpo/gap_mean": 14.177618026733398, "beta_dpo/gap_std": 19.436182022094727, "beta_dpo/loss_margin_mean": 13.194892883300781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4293272864701436, "grad_norm": 23.569568634033203, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 0.4499743580818176, "logits/rejected": 0.38757190108299255, "loss": 1.0673, "step": 284 }, { "beta_dpo/beta": 0.0996825098991394, "beta_dpo/beta_margin_grad_mean": -0.35815978050231934, "beta_dpo/beta_margin_grad_std": 0.2535557746887207, "beta_dpo/beta_margin_mean": 1.3248549699783325, "beta_dpo/beta_margin_std": 2.5650172233581543, "beta_dpo/beta_used": 0.0996825098991394, "beta_dpo/beta_used_raw": 0.0996825098991394, "beta_dpo/gap_mean": 13.592554092407227, "beta_dpo/gap_std": 19.435794830322266, "beta_dpo/loss_margin_mean": 11.820451736450195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4308390022675737, "grad_norm": 50.08279037475586, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 0.46355849504470825, "logits/rejected": 0.43509894609451294, "loss": 0.8775, "step": 285 }, { "beta_dpo/beta": 0.19798368215560913, "beta_dpo/beta_margin_grad_mean": -0.3241359293460846, "beta_dpo/beta_margin_grad_std": 0.265657901763916, "beta_dpo/beta_margin_mean": 3.4995052814483643, "beta_dpo/beta_margin_std": 7.216194152832031, "beta_dpo/beta_used": 0.19798368215560913, "beta_dpo/beta_used_raw": 0.19475844502449036, "beta_dpo/gap_mean": 14.27553653717041, "beta_dpo/gap_std": 19.55514144897461, "beta_dpo/loss_margin_mean": 18.03702735900879, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4323507180650038, "grad_norm": 50.606544494628906, "learning_rate": 3.514425224712835e-07, "logits/chosen": 0.38016998767852783, "logits/rejected": 0.2861751317977905, "loss": 0.7735, "step": 286 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49570053815841675, "beta_dpo/beta_margin_grad_std": 0.005138974636793137, "beta_dpo/beta_margin_mean": 0.017200568690896034, "beta_dpo/beta_margin_std": 0.020560333505272865, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12309788912534714, "beta_dpo/gap_mean": 14.72789192199707, "beta_dpo/gap_std": 19.78351593017578, "beta_dpo/loss_margin_mean": 17.20056915283203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43386243386243384, "grad_norm": 0.901676595211029, "learning_rate": 3.502326338516534e-07, "logits/chosen": 0.4146936237812042, "logits/rejected": 0.373552143573761, "loss": 1.3754, "step": 287 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4973767399787903, "beta_dpo/beta_margin_grad_std": 0.00516990851610899, "beta_dpo/beta_margin_mean": 0.010494444519281387, "beta_dpo/beta_margin_std": 0.02068295143544674, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20959964394569397, "beta_dpo/gap_mean": 14.297950744628906, "beta_dpo/gap_std": 20.072662353515625, "beta_dpo/loss_margin_mean": 10.494444847106934, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43537414965986393, "grad_norm": 0.7291444540023804, "learning_rate": 3.490199415097892e-07, "logits/chosen": 0.3069779574871063, "logits/rejected": 0.25205767154693604, "loss": 1.3772, "step": 288 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49683305621147156, "beta_dpo/beta_margin_grad_std": 0.00543037336319685, "beta_dpo/beta_margin_mean": 0.012669811025261879, "beta_dpo/beta_margin_std": 0.02172556333243847, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.048285968601703644, "beta_dpo/gap_mean": 13.913497924804688, "beta_dpo/gap_std": 20.553274154663086, "beta_dpo/loss_margin_mean": 12.66981029510498, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.436885865457294, "grad_norm": 0.7524048089981079, "learning_rate": 3.4780447936730247e-07, "logits/chosen": 0.513412356376648, "logits/rejected": 0.47326600551605225, "loss": 1.375, "step": 289 }, { "beta_dpo/beta": 0.09844214469194412, "beta_dpo/beta_margin_grad_mean": -0.38945475220680237, "beta_dpo/beta_margin_grad_std": 0.2678634226322174, "beta_dpo/beta_margin_mean": 1.5054290294647217, "beta_dpo/beta_margin_std": 3.634999990463257, "beta_dpo/beta_used": 0.09844214469194412, "beta_dpo/beta_used_raw": 0.09844214469194412, "beta_dpo/gap_mean": 14.4169282913208, "beta_dpo/gap_std": 20.567150115966797, "beta_dpo/loss_margin_mean": 16.994667053222656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4383975812547241, "grad_norm": 59.73664474487305, "learning_rate": 3.465862814232821e-07, "logits/chosen": 0.49665823578834534, "logits/rejected": 0.42759448289871216, "loss": 1.1353, "step": 290 }, { "beta_dpo/beta": 0.25055256485939026, "beta_dpo/beta_margin_grad_mean": -0.33395248651504517, "beta_dpo/beta_margin_grad_std": 0.2863628566265106, "beta_dpo/beta_margin_mean": 5.20906400680542, "beta_dpo/beta_margin_std": 10.68582534790039, "beta_dpo/beta_used": 0.25055256485939026, "beta_dpo/beta_used_raw": 0.2299947887659073, "beta_dpo/gap_mean": 14.44134521484375, "beta_dpo/gap_std": 21.393653869628906, "beta_dpo/loss_margin_mean": 15.887751579284668, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4399092970521542, "grad_norm": 58.57472229003906, "learning_rate": 3.4536538175334343e-07, "logits/chosen": 0.5883669853210449, "logits/rejected": 0.5197364091873169, "loss": 0.8488, "step": 291 }, { "beta_dpo/beta": 0.007076106034219265, "beta_dpo/beta_margin_grad_mean": -0.47282010316848755, "beta_dpo/beta_margin_grad_std": 0.050678376108407974, "beta_dpo/beta_margin_mean": 0.11135596036911011, "beta_dpo/beta_margin_std": 0.2094070464372635, "beta_dpo/beta_used": 0.007076106034219265, "beta_dpo/beta_used_raw": -0.014458229765295982, "beta_dpo/gap_mean": 14.619291305541992, "beta_dpo/gap_std": 21.718494415283203, "beta_dpo/loss_margin_mean": 14.234935760498047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4414210128495843, "grad_norm": 5.891993999481201, "learning_rate": 3.4414181450867465e-07, "logits/chosen": 0.45743709802627563, "logits/rejected": 0.40714502334594727, "loss": 1.3021, "step": 292 }, { "beta_dpo/beta": 0.23176653683185577, "beta_dpo/beta_margin_grad_mean": -0.2546376585960388, "beta_dpo/beta_margin_grad_std": 0.36235809326171875, "beta_dpo/beta_margin_mean": 4.366588592529297, "beta_dpo/beta_margin_std": 5.932827472686768, "beta_dpo/beta_used": 0.23176653683185577, "beta_dpo/beta_used_raw": 0.23176653683185577, "beta_dpo/gap_mean": 15.188974380493164, "beta_dpo/gap_std": 22.373016357421875, "beta_dpo/loss_margin_mean": 18.706132888793945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4429327286470144, "grad_norm": 113.21065521240234, "learning_rate": 3.4291561391508185e-07, "logits/chosen": 0.5480353832244873, "logits/rejected": 0.45960021018981934, "loss": 1.0772, "step": 293 }, { "beta_dpo/beta": 0.1309327632188797, "beta_dpo/beta_margin_grad_mean": -0.3639461398124695, "beta_dpo/beta_margin_grad_std": 0.24621707201004028, "beta_dpo/beta_margin_mean": 2.632067918777466, "beta_dpo/beta_margin_std": 4.834066867828369, "beta_dpo/beta_used": 0.1309327632188797, "beta_dpo/beta_used_raw": -0.024740561842918396, "beta_dpo/gap_mean": 15.85311222076416, "beta_dpo/gap_std": 22.474395751953125, "beta_dpo/loss_margin_mean": 17.183238983154297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4444444444444444, "grad_norm": 131.88706970214844, "learning_rate": 3.4168681427203153e-07, "logits/chosen": 0.45878100395202637, "logits/rejected": 0.4147722125053406, "loss": 1.0422, "step": 294 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49661287665367126, "beta_dpo/beta_margin_grad_std": 0.005212538409978151, "beta_dpo/beta_margin_mean": 0.013550843112170696, "beta_dpo/beta_margin_std": 0.0208545234054327, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.119144506752491, "beta_dpo/gap_mean": 15.471101760864258, "beta_dpo/gap_std": 22.285518646240234, "beta_dpo/loss_margin_mean": 13.55084228515625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4459561602418745, "grad_norm": 0.7092771530151367, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 0.444843053817749, "logits/rejected": 0.35121992230415344, "loss": 1.3746, "step": 295 }, { "beta_dpo/beta": 0.05136363208293915, "beta_dpo/beta_margin_grad_mean": -0.3628765046596527, "beta_dpo/beta_margin_grad_std": 0.21744120121002197, "beta_dpo/beta_margin_mean": 1.1004339456558228, "beta_dpo/beta_margin_std": 1.9954913854599, "beta_dpo/beta_used": 0.05136363208293915, "beta_dpo/beta_used_raw": 0.04004165530204773, "beta_dpo/gap_mean": 15.906424522399902, "beta_dpo/gap_std": 22.500926971435547, "beta_dpo/loss_margin_mean": 18.224267959594727, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4474678760393046, "grad_norm": 27.477766036987305, "learning_rate": 3.392215553979679e-07, "logits/chosen": 0.4178934097290039, "logits/rejected": 0.3756554126739502, "loss": 1.0375, "step": 296 }, { "beta_dpo/beta": 0.2311479151248932, "beta_dpo/beta_margin_grad_mean": -0.24369271099567413, "beta_dpo/beta_margin_grad_std": 0.2907634675502777, "beta_dpo/beta_margin_mean": 4.739898681640625, "beta_dpo/beta_margin_std": 6.735095977783203, "beta_dpo/beta_used": 0.2311479151248932, "beta_dpo/beta_used_raw": 0.2311479151248932, "beta_dpo/gap_mean": 16.063180923461914, "beta_dpo/gap_std": 22.32459259033203, "beta_dpo/loss_margin_mean": 18.45130729675293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4489795918367347, "grad_norm": 151.92831420898438, "learning_rate": 3.3798516512554485e-07, "logits/chosen": 0.4511827230453491, "logits/rejected": 0.39176806807518005, "loss": 0.8429, "step": 297 }, { "beta_dpo/beta": 0.0763932392001152, "beta_dpo/beta_margin_grad_mean": -0.3957996666431427, "beta_dpo/beta_margin_grad_std": 0.25355854630470276, "beta_dpo/beta_margin_mean": 1.2725530862808228, "beta_dpo/beta_margin_std": 2.942795753479004, "beta_dpo/beta_used": 0.0763932392001152, "beta_dpo/beta_used_raw": 0.057013507932424545, "beta_dpo/gap_mean": 16.267505645751953, "beta_dpo/gap_std": 22.456554412841797, "beta_dpo/loss_margin_mean": 15.823343276977539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4504913076341648, "grad_norm": 51.522674560546875, "learning_rate": 3.367463137189156e-07, "logits/chosen": 0.5388308763504028, "logits/rejected": 0.4800010323524475, "loss": 1.0912, "step": 298 }, { "beta_dpo/beta": 0.0421302504837513, "beta_dpo/beta_margin_grad_mean": -0.39610981941223145, "beta_dpo/beta_margin_grad_std": 0.21745111048221588, "beta_dpo/beta_margin_mean": 0.6996102929115295, "beta_dpo/beta_margin_std": 1.5129352807998657, "beta_dpo/beta_used": 0.0421302504837513, "beta_dpo/beta_used_raw": -0.040616486221551895, "beta_dpo/gap_mean": 16.089981079101562, "beta_dpo/gap_std": 22.930587768554688, "beta_dpo/loss_margin_mean": 14.132258415222168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4520030234315949, "grad_norm": 25.89242172241211, "learning_rate": 3.355050358314172e-07, "logits/chosen": 0.3590264618396759, "logits/rejected": 0.3310147523880005, "loss": 1.0826, "step": 299 }, { "beta_dpo/beta": 0.05767098441720009, "beta_dpo/beta_margin_grad_mean": -0.38832151889801025, "beta_dpo/beta_margin_grad_std": 0.2553543150424957, "beta_dpo/beta_margin_mean": 0.9388990998268127, "beta_dpo/beta_margin_std": 2.173591375350952, "beta_dpo/beta_used": 0.05767098441720009, "beta_dpo/beta_used_raw": 0.04955215007066727, "beta_dpo/gap_mean": 15.694482803344727, "beta_dpo/gap_std": 23.181346893310547, "beta_dpo/loss_margin_mean": 15.122093200683594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.45351473922902497, "grad_norm": 46.98778533935547, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 0.46122419834136963, "logits/rejected": 0.39728420972824097, "loss": 1.1289, "step": 300 }, { "epoch": 0.45351473922902497, "eval_beta_dpo/beta": 0.11505560576915741, "eval_beta_dpo/beta_margin_grad_mean": -0.3684498369693756, "eval_beta_dpo/beta_margin_grad_std": 0.17674221098423004, "eval_beta_dpo/beta_margin_mean": 2.143476963043213, "eval_beta_dpo/beta_margin_std": 2.818143129348755, "eval_beta_dpo/beta_used": 0.11505560576915741, "eval_beta_dpo/beta_used_raw": 0.03927391394972801, "eval_beta_dpo/gap_mean": 15.726388931274414, "eval_beta_dpo/gap_std": 23.26214599609375, "eval_beta_dpo/loss_margin_mean": 14.714288711547852, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 0.5036243200302124, "eval_logits/rejected": 0.45220693945884705, "eval_loss": 0.6938130855560303, "eval_runtime": 38.89, "eval_samples_per_second": 59.218, "eval_steps_per_second": 1.851, "step": 300 }, { "beta_dpo/beta": 0.13246102631092072, "beta_dpo/beta_margin_grad_mean": -0.3911619782447815, "beta_dpo/beta_margin_grad_std": 0.27769890427589417, "beta_dpo/beta_margin_mean": 1.5554008483886719, "beta_dpo/beta_margin_std": 4.746208667755127, "beta_dpo/beta_used": 0.13246102631092072, "beta_dpo/beta_used_raw": 0.07967150211334229, "beta_dpo/gap_mean": 15.842938423156738, "beta_dpo/gap_std": 23.74286460876465, "beta_dpo/loss_margin_mean": 15.3896484375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.455026455026455, "grad_norm": 85.52210998535156, "learning_rate": 3.3301533956555885e-07, "logits/chosen": 0.5230924487113953, "logits/rejected": 0.4926201105117798, "loss": 0.9855, "step": 301 }, { "beta_dpo/beta": 0.046855803579092026, "beta_dpo/beta_margin_grad_mean": -0.42220836877822876, "beta_dpo/beta_margin_grad_std": 0.20177949965000153, "beta_dpo/beta_margin_mean": 0.46218007802963257, "beta_dpo/beta_margin_std": 1.4732623100280762, "beta_dpo/beta_used": 0.046855803579092026, "beta_dpo/beta_used_raw": -0.1274799406528473, "beta_dpo/gap_mean": 14.743999481201172, "beta_dpo/gap_std": 23.574613571166992, "beta_dpo/loss_margin_mean": 9.621975898742676, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4565381708238851, "grad_norm": 27.584754943847656, "learning_rate": 3.317669908293554e-07, "logits/chosen": 0.34268122911453247, "logits/rejected": 0.28931474685668945, "loss": 1.0507, "step": 302 }, { "beta_dpo/beta": 0.21900054812431335, "beta_dpo/beta_margin_grad_mean": -0.2753453850746155, "beta_dpo/beta_margin_grad_std": 0.3416510820388794, "beta_dpo/beta_margin_mean": 4.497269630432129, "beta_dpo/beta_margin_std": 7.897620677947998, "beta_dpo/beta_used": 0.21900054812431335, "beta_dpo/beta_used_raw": 0.21900054812431335, "beta_dpo/gap_mean": 15.209549903869629, "beta_dpo/gap_std": 23.96515464782715, "beta_dpo/loss_margin_mean": 19.80791473388672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4580498866213152, "grad_norm": 149.34068298339844, "learning_rate": 3.3051635489464793e-07, "logits/chosen": 0.46690768003463745, "logits/rejected": 0.40056318044662476, "loss": 1.5522, "step": 303 }, { "beta_dpo/beta": 0.1770317256450653, "beta_dpo/beta_margin_grad_mean": -0.2056574821472168, "beta_dpo/beta_margin_grad_std": 0.27400633692741394, "beta_dpo/beta_margin_mean": 3.444209575653076, "beta_dpo/beta_margin_std": 3.6619412899017334, "beta_dpo/beta_used": 0.1770317256450653, "beta_dpo/beta_used_raw": 0.1770317256450653, "beta_dpo/gap_mean": 16.013572692871094, "beta_dpo/gap_std": 23.73634147644043, "beta_dpo/loss_margin_mean": 19.397008895874023, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4595616024187453, "grad_norm": 76.03279113769531, "learning_rate": 3.292634667444117e-07, "logits/chosen": 0.43441325426101685, "logits/rejected": 0.380365252494812, "loss": 0.6161, "step": 304 }, { "beta_dpo/beta": 0.25595974922180176, "beta_dpo/beta_margin_grad_mean": -0.2995953857898712, "beta_dpo/beta_margin_grad_std": 0.3861447870731354, "beta_dpo/beta_margin_mean": 3.4279513359069824, "beta_dpo/beta_margin_std": 6.85020637512207, "beta_dpo/beta_used": 0.25595974922180176, "beta_dpo/beta_used_raw": 0.25595974922180176, "beta_dpo/gap_mean": 15.928811073303223, "beta_dpo/gap_std": 23.983856201171875, "beta_dpo/loss_margin_mean": 13.063897132873535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46107331821617537, "grad_norm": 132.59515380859375, "learning_rate": 3.280083614246217e-07, "logits/chosen": 0.4133915603160858, "logits/rejected": 0.4339390695095062, "loss": 1.0019, "step": 305 }, { "beta_dpo/beta": 0.17636467516422272, "beta_dpo/beta_margin_grad_mean": -0.3082793056964874, "beta_dpo/beta_margin_grad_std": 0.2324202060699463, "beta_dpo/beta_margin_mean": 3.2350707054138184, "beta_dpo/beta_margin_std": 5.149374485015869, "beta_dpo/beta_used": 0.17636467516422272, "beta_dpo/beta_used_raw": -0.025320664048194885, "beta_dpo/gap_mean": 15.603142738342285, "beta_dpo/gap_std": 23.355175018310547, "beta_dpo/loss_margin_mean": 14.176777839660645, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46258503401360546, "grad_norm": 81.64033508300781, "learning_rate": 3.267510740432719e-07, "logits/chosen": 0.4885938763618469, "logits/rejected": 0.37870633602142334, "loss": 0.8295, "step": 306 }, { "beta_dpo/beta": 0.1279929280281067, "beta_dpo/beta_margin_grad_mean": -0.43924155831336975, "beta_dpo/beta_margin_grad_std": 0.29262182116508484, "beta_dpo/beta_margin_mean": 1.2105649709701538, "beta_dpo/beta_margin_std": 4.71697998046875, "beta_dpo/beta_used": 0.1279929280281067, "beta_dpo/beta_used_raw": 0.09390576928853989, "beta_dpo/gap_mean": 14.28852653503418, "beta_dpo/gap_std": 23.57619285583496, "beta_dpo/loss_margin_mean": 8.560576438903809, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46409674981103555, "grad_norm": 115.50189208984375, "learning_rate": 3.2549163976939285e-07, "logits/chosen": 0.49088621139526367, "logits/rejected": 0.4447266459465027, "loss": 1.4104, "step": 307 }, { "beta_dpo/beta": 0.10693937540054321, "beta_dpo/beta_margin_grad_mean": -0.3734705448150635, "beta_dpo/beta_margin_grad_std": 0.2782239615917206, "beta_dpo/beta_margin_mean": 1.970252275466919, "beta_dpo/beta_margin_std": 4.069371223449707, "beta_dpo/beta_used": 0.10693937540054321, "beta_dpo/beta_used_raw": 0.09306029975414276, "beta_dpo/gap_mean": 14.391298294067383, "beta_dpo/gap_std": 23.804813385009766, "beta_dpo/loss_margin_mean": 15.613865852355957, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4656084656084656, "grad_norm": 86.94285583496094, "learning_rate": 3.2423009383206874e-07, "logits/chosen": 0.4649581015110016, "logits/rejected": 0.4495544135570526, "loss": 1.5807, "step": 308 }, { "beta_dpo/beta": 0.00609197374433279, "beta_dpo/beta_margin_grad_mean": -0.4813478887081146, "beta_dpo/beta_margin_grad_std": 0.04934345558285713, "beta_dpo/beta_margin_mean": 0.07604541629552841, "beta_dpo/beta_margin_std": 0.20145538449287415, "beta_dpo/beta_used": 0.00609197374433279, "beta_dpo/beta_used_raw": 0.006033188197761774, "beta_dpo/gap_mean": 14.475622177124023, "beta_dpo/gap_std": 23.469074249267578, "beta_dpo/loss_margin_mean": 14.663956642150879, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4671201814058957, "grad_norm": 5.302502155303955, "learning_rate": 3.229664715194511e-07, "logits/chosen": 0.5176172256469727, "logits/rejected": 0.45871448516845703, "loss": 1.3167, "step": 309 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4975871741771698, "beta_dpo/beta_margin_grad_std": 0.005759851075708866, "beta_dpo/beta_margin_mean": 0.009653035551309586, "beta_dpo/beta_margin_std": 0.023043153807520866, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.15084339678287506, "beta_dpo/gap_mean": 13.659213066101074, "beta_dpo/gap_std": 23.398651123046875, "beta_dpo/loss_margin_mean": 9.653035163879395, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46863189720332576, "grad_norm": 0.7138440012931824, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 0.48263025283813477, "logits/rejected": 0.46910643577575684, "loss": 1.3769, "step": 310 }, { "beta_dpo/beta": 0.028796648606657982, "beta_dpo/beta_margin_grad_mean": -0.4123019874095917, "beta_dpo/beta_margin_grad_std": 0.16005714237689972, "beta_dpo/beta_margin_mean": 0.462990939617157, "beta_dpo/beta_margin_std": 0.9138634204864502, "beta_dpo/beta_used": 0.028796648606657982, "beta_dpo/beta_used_raw": -0.09092193841934204, "beta_dpo/gap_mean": 13.479878425598145, "beta_dpo/gap_std": 23.28680419921875, "beta_dpo/loss_margin_mean": 12.439473152160645, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47014361300075586, "grad_norm": 16.826684951782227, "learning_rate": 3.204331392103574e-07, "logits/chosen": 0.3971518874168396, "logits/rejected": 0.2650872468948364, "loss": 1.1307, "step": 311 }, { "beta_dpo/beta": 0.163363978266716, "beta_dpo/beta_margin_grad_mean": -0.3334113359451294, "beta_dpo/beta_margin_grad_std": 0.25571146607398987, "beta_dpo/beta_margin_mean": 2.983691692352295, "beta_dpo/beta_margin_std": 5.67026424407959, "beta_dpo/beta_used": 0.163363978266716, "beta_dpo/beta_used_raw": 0.06641173362731934, "beta_dpo/gap_mean": 13.949535369873047, "beta_dpo/gap_std": 23.26689910888672, "beta_dpo/loss_margin_mean": 18.187755584716797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47165532879818595, "grad_norm": 41.266841888427734, "learning_rate": 3.1916350007663176e-07, "logits/chosen": 0.4562729001045227, "logits/rejected": 0.3602554500102997, "loss": 0.8424, "step": 312 }, { "beta_dpo/beta": 0.04959503188729286, "beta_dpo/beta_margin_grad_mean": -0.42779460549354553, "beta_dpo/beta_margin_grad_std": 0.23276080191135406, "beta_dpo/beta_margin_mean": 0.5780282616615295, "beta_dpo/beta_margin_std": 1.984850287437439, "beta_dpo/beta_used": 0.04959503188729286, "beta_dpo/beta_used_raw": 0.011893421411514282, "beta_dpo/gap_mean": 13.817769050598145, "beta_dpo/gap_std": 23.424896240234375, "beta_dpo/loss_margin_mean": 11.726629257202148, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47316704459561604, "grad_norm": 39.96240234375, "learning_rate": 3.178919262911314e-07, "logits/chosen": 0.5323947668075562, "logits/rejected": 0.510050356388092, "loss": 1.1167, "step": 313 }, { "beta_dpo/beta": 0.4231081008911133, "beta_dpo/beta_margin_grad_mean": -0.1942075788974762, "beta_dpo/beta_margin_grad_std": 0.3291747570037842, "beta_dpo/beta_margin_mean": 8.420723915100098, "beta_dpo/beta_margin_std": 9.913713455200195, "beta_dpo/beta_used": 0.4231081008911133, "beta_dpo/beta_used_raw": 0.4231081008911133, "beta_dpo/gap_mean": 14.603485107421875, "beta_dpo/gap_std": 23.527881622314453, "beta_dpo/loss_margin_mean": 19.856849670410156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47467876039304613, "grad_norm": 202.71646118164062, "learning_rate": 3.166184534225087e-07, "logits/chosen": 0.45087888836860657, "logits/rejected": 0.47414296865463257, "loss": 1.258, "step": 314 }, { "beta_dpo/beta": 0.1378045380115509, "beta_dpo/beta_margin_grad_mean": -0.3461618721485138, "beta_dpo/beta_margin_grad_std": 0.282050758600235, "beta_dpo/beta_margin_mean": 2.926037549972534, "beta_dpo/beta_margin_std": 5.34313440322876, "beta_dpo/beta_used": 0.1378045380115509, "beta_dpo/beta_used_raw": -0.007274851202964783, "beta_dpo/gap_mean": 14.610097885131836, "beta_dpo/gap_std": 23.17371940612793, "beta_dpo/loss_margin_mean": 15.014217376708984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47619047619047616, "grad_norm": 92.25020599365234, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 0.4022720456123352, "logits/rejected": 0.361319899559021, "loss": 1.3093, "step": 315 }, { "beta_dpo/beta": 0.2009076625108719, "beta_dpo/beta_margin_grad_mean": -0.30123385787010193, "beta_dpo/beta_margin_grad_std": 0.2647503912448883, "beta_dpo/beta_margin_mean": 5.084220886230469, "beta_dpo/beta_margin_std": 7.214354038238525, "beta_dpo/beta_used": 0.2009076625108719, "beta_dpo/beta_used_raw": 0.18065881729125977, "beta_dpo/gap_mean": 15.16646957397461, "beta_dpo/gap_std": 22.370153427124023, "beta_dpo/loss_margin_mean": 18.50032615661621, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47770219198790626, "grad_norm": 95.88639068603516, "learning_rate": 3.1406595297511564e-07, "logits/chosen": 0.3557189106941223, "logits/rejected": 0.23355801403522491, "loss": 1.0698, "step": 316 }, { "beta_dpo/beta": 0.1612454503774643, "beta_dpo/beta_margin_grad_mean": -0.3476071357727051, "beta_dpo/beta_margin_grad_std": 0.286365807056427, "beta_dpo/beta_margin_mean": 3.270941734313965, "beta_dpo/beta_margin_std": 6.323551654815674, "beta_dpo/beta_used": 0.1612454503774643, "beta_dpo/beta_used_raw": 0.07801032066345215, "beta_dpo/gap_mean": 16.22469711303711, "beta_dpo/gap_std": 22.18575668334961, "beta_dpo/loss_margin_mean": 18.86298179626465, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47921390778533635, "grad_norm": 82.9071273803711, "learning_rate": 3.1278699679526975e-07, "logits/chosen": 0.5267002582550049, "logits/rejected": 0.4774794578552246, "loss": 1.2068, "step": 317 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4968032240867615, "beta_dpo/beta_margin_grad_std": 0.0065883188508450985, "beta_dpo/beta_margin_mean": 0.01278975885361433, "beta_dpo/beta_margin_std": 0.02635836973786354, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18052691221237183, "beta_dpo/gap_mean": 15.857492446899414, "beta_dpo/gap_std": 22.781883239746094, "beta_dpo/loss_margin_mean": 12.789758682250977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48072562358276644, "grad_norm": 0.6836572885513306, "learning_rate": 3.1150628432815336e-07, "logits/chosen": 0.4887760281562805, "logits/rejected": 0.42186325788497925, "loss": 1.3752, "step": 318 }, { "beta_dpo/beta": 0.1261560469865799, "beta_dpo/beta_margin_grad_mean": -0.3443688154220581, "beta_dpo/beta_margin_grad_std": 0.261486679315567, "beta_dpo/beta_margin_mean": 2.205345869064331, "beta_dpo/beta_margin_std": 4.338085174560547, "beta_dpo/beta_used": 0.1261560469865799, "beta_dpo/beta_used_raw": 0.1261560469865799, "beta_dpo/gap_mean": 15.758548736572266, "beta_dpo/gap_std": 22.95236587524414, "beta_dpo/loss_margin_mean": 16.208959579467773, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48223733938019653, "grad_norm": 70.87828826904297, "learning_rate": 3.1022385139804707e-07, "logits/chosen": 0.4163881838321686, "logits/rejected": 0.3962825834751129, "loss": 1.0191, "step": 319 }, { "beta_dpo/beta": 0.08117100596427917, "beta_dpo/beta_margin_grad_mean": -0.3801567256450653, "beta_dpo/beta_margin_grad_std": 0.25498995184898376, "beta_dpo/beta_margin_mean": 1.6039687395095825, "beta_dpo/beta_margin_std": 3.3591623306274414, "beta_dpo/beta_used": 0.08117100596427917, "beta_dpo/beta_used_raw": -0.06516657024621964, "beta_dpo/gap_mean": 14.905145645141602, "beta_dpo/gap_std": 22.96976089477539, "beta_dpo/loss_margin_mean": 12.470100402832031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4837490551776266, "grad_norm": 35.630069732666016, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.3363953232765198, "logits/rejected": 0.2939741611480713, "loss": 1.1093, "step": 320 }, { "beta_dpo/beta": 0.07048396021127701, "beta_dpo/beta_margin_grad_mean": -0.3663146197795868, "beta_dpo/beta_margin_grad_std": 0.22248594462871552, "beta_dpo/beta_margin_mean": 1.050384521484375, "beta_dpo/beta_margin_std": 2.262019157409668, "beta_dpo/beta_used": 0.07048396021127701, "beta_dpo/beta_used_raw": -0.09373751282691956, "beta_dpo/gap_mean": 14.79505729675293, "beta_dpo/gap_std": 22.987442016601562, "beta_dpo/loss_margin_mean": 11.55805492401123, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4852607709750567, "grad_norm": 28.83603286743164, "learning_rate": 3.0765396768561004e-07, "logits/chosen": 0.42309537529945374, "logits/rejected": 0.40167927742004395, "loss": 0.9358, "step": 321 }, { "beta_dpo/beta": 0.3060827851295471, "beta_dpo/beta_margin_grad_mean": -0.1633433699607849, "beta_dpo/beta_margin_grad_std": 0.28638920187950134, "beta_dpo/beta_margin_mean": 5.767949104309082, "beta_dpo/beta_margin_std": 6.40022087097168, "beta_dpo/beta_used": 0.3060827851295471, "beta_dpo/beta_used_raw": 0.3060827851295471, "beta_dpo/gap_mean": 14.88734245300293, "beta_dpo/gap_std": 22.760725021362305, "beta_dpo/loss_margin_mean": 18.466712951660156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48677248677248675, "grad_norm": 120.95641326904297, "learning_rate": 3.063665887884511e-07, "logits/chosen": 0.47183048725128174, "logits/rejected": 0.39079803228378296, "loss": 0.8343, "step": 322 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4965234100818634, "beta_dpo/beta_margin_grad_std": 0.00671707559376955, "beta_dpo/beta_margin_mean": 0.01391004491597414, "beta_dpo/beta_margin_std": 0.026876913383603096, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13449180126190186, "beta_dpo/gap_mean": 14.877632141113281, "beta_dpo/gap_std": 23.045122146606445, "beta_dpo/loss_margin_mean": 13.910043716430664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48828420256991684, "grad_norm": 0.7591775059700012, "learning_rate": 3.0507763319663517e-07, "logits/chosen": 0.3593803644180298, "logits/rejected": 0.28365999460220337, "loss": 1.3754, "step": 323 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4968474507331848, "beta_dpo/beta_margin_grad_std": 0.004732394125312567, "beta_dpo/beta_margin_mean": 0.012611806392669678, "beta_dpo/beta_margin_std": 0.018933270126581192, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12538644671440125, "beta_dpo/gap_mean": 14.813908576965332, "beta_dpo/gap_std": 22.670183181762695, "beta_dpo/loss_margin_mean": 12.61180591583252, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4897959183673469, "grad_norm": 0.7829446196556091, "learning_rate": 3.0378713696502097e-07, "logits/chosen": 0.47413748502731323, "logits/rejected": 0.41598939895629883, "loss": 1.3753, "step": 324 }, { "beta_dpo/beta": 0.22583173215389252, "beta_dpo/beta_margin_grad_mean": -0.3542848229408264, "beta_dpo/beta_margin_grad_std": 0.301741361618042, "beta_dpo/beta_margin_mean": 4.75588846206665, "beta_dpo/beta_margin_std": 9.164177894592285, "beta_dpo/beta_used": 0.22583173215389252, "beta_dpo/beta_used_raw": 0.10879484564065933, "beta_dpo/gap_mean": 14.971842765808105, "beta_dpo/gap_std": 22.737186431884766, "beta_dpo/loss_margin_mean": 15.938600540161133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.491307634164777, "grad_norm": 86.0190658569336, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 0.42567065358161926, "logits/rejected": 0.36200234293937683, "loss": 1.1197, "step": 325 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49821144342422485, "beta_dpo/beta_margin_grad_std": 0.0051818969659507275, "beta_dpo/beta_margin_mean": 0.00715494342148304, "beta_dpo/beta_margin_std": 0.02072981186211109, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12947367131710052, "beta_dpo/gap_mean": 13.62185001373291, "beta_dpo/gap_std": 22.390117645263672, "beta_dpo/loss_margin_mean": 7.154942989349365, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4928193499622071, "grad_norm": 0.8826065063476562, "learning_rate": 3.012016670162977e-07, "logits/chosen": 0.3907411992549896, "logits/rejected": 0.39254793524742126, "loss": 1.3766, "step": 326 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4969516396522522, "beta_dpo/beta_margin_grad_std": 0.006606688257306814, "beta_dpo/beta_margin_mean": 0.012196130119264126, "beta_dpo/beta_margin_std": 0.026434360072016716, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.05859054625034332, "beta_dpo/gap_mean": 13.037698745727539, "beta_dpo/gap_std": 22.96277618408203, "beta_dpo/loss_margin_mean": 12.19612979888916, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4943310657596372, "grad_norm": 0.7743061184883118, "learning_rate": 2.99906765620341e-07, "logits/chosen": 0.35184246301651, "logits/rejected": 0.3126526474952698, "loss": 1.376, "step": 327 }, { "beta_dpo/beta": 0.09753447771072388, "beta_dpo/beta_margin_grad_mean": -0.38534635305404663, "beta_dpo/beta_margin_grad_std": 0.25296443700790405, "beta_dpo/beta_margin_mean": 1.4550480842590332, "beta_dpo/beta_margin_std": 3.0800600051879883, "beta_dpo/beta_used": 0.09753447771072388, "beta_dpo/beta_used_raw": 0.0254751518368721, "beta_dpo/gap_mean": 13.520404815673828, "beta_dpo/gap_std": 22.76117515563965, "beta_dpo/loss_margin_mean": 15.616045951843262, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4958427815570673, "grad_norm": 72.21566009521484, "learning_rate": 2.9861046822486766e-07, "logits/chosen": 0.3937995433807373, "logits/rejected": 0.36060845851898193, "loss": 1.1729, "step": 328 }, { "beta_dpo/beta": 0.002249703276902437, "beta_dpo/beta_margin_grad_mean": -0.49035826325416565, "beta_dpo/beta_margin_grad_std": 0.016022533178329468, "beta_dpo/beta_margin_mean": 0.03864653781056404, "beta_dpo/beta_margin_std": 0.06428585946559906, "beta_dpo/beta_used": 0.002249703276902437, "beta_dpo/beta_used_raw": -0.004807803314179182, "beta_dpo/gap_mean": 14.01812744140625, "beta_dpo/gap_std": 22.638988494873047, "beta_dpo/loss_margin_mean": 15.689536094665527, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4973544973544973, "grad_norm": 2.2578487396240234, "learning_rate": 2.9731281109010253e-07, "logits/chosen": 0.4657662510871887, "logits/rejected": 0.41007131338119507, "loss": 1.3591, "step": 329 }, { "beta_dpo/beta": 0.10219182074069977, "beta_dpo/beta_margin_grad_mean": -0.3714367747306824, "beta_dpo/beta_margin_grad_std": 0.2565270662307739, "beta_dpo/beta_margin_mean": 1.8236949443817139, "beta_dpo/beta_margin_std": 3.910278081893921, "beta_dpo/beta_used": 0.10219182074069977, "beta_dpo/beta_used_raw": 0.04957320913672447, "beta_dpo/gap_mean": 14.02891731262207, "beta_dpo/gap_std": 23.100826263427734, "beta_dpo/loss_margin_mean": 15.226579666137695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4988662131519274, "grad_norm": 45.77909851074219, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 0.43188655376434326, "logits/rejected": 0.3610241115093231, "loss": 0.9899, "step": 330 }, { "beta_dpo/beta": 0.10813254117965698, "beta_dpo/beta_margin_grad_mean": -0.3179372251033783, "beta_dpo/beta_margin_grad_std": 0.2648589611053467, "beta_dpo/beta_margin_mean": 2.2522261142730713, "beta_dpo/beta_margin_std": 4.484652519226074, "beta_dpo/beta_used": 0.10813254117965698, "beta_dpo/beta_used_raw": 0.10813254117965698, "beta_dpo/gap_mean": 15.494121551513672, "beta_dpo/gap_std": 23.874298095703125, "beta_dpo/loss_margin_mean": 22.335155487060547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5003779289493575, "grad_norm": 85.85868835449219, "learning_rate": 2.947135628327544e-07, "logits/chosen": 0.5212812423706055, "logits/rejected": 0.49004465341567993, "loss": 1.0558, "step": 331 }, { "beta_dpo/beta": 0.055088166147470474, "beta_dpo/beta_margin_grad_mean": -0.3655672073364258, "beta_dpo/beta_margin_grad_std": 0.22148992121219635, "beta_dpo/beta_margin_mean": 1.311160922050476, "beta_dpo/beta_margin_std": 2.3807129859924316, "beta_dpo/beta_used": 0.055088166147470474, "beta_dpo/beta_used_raw": -0.038188386708498, "beta_dpo/gap_mean": 15.760552406311035, "beta_dpo/gap_std": 24.142658233642578, "beta_dpo/loss_margin_mean": 17.73267364501953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5018896447467877, "grad_norm": 47.309410095214844, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.4019997715950012, "logits/rejected": 0.35436803102493286, "loss": 1.1193, "step": 332 }, { "beta_dpo/beta": 0.3347168564796448, "beta_dpo/beta_margin_grad_mean": -0.2177586406469345, "beta_dpo/beta_margin_grad_std": 0.3292071223258972, "beta_dpo/beta_margin_mean": 7.595628261566162, "beta_dpo/beta_margin_std": 8.555011749267578, "beta_dpo/beta_used": 0.3347168564796448, "beta_dpo/beta_used_raw": 0.3347168564796448, "beta_dpo/gap_mean": 17.009662628173828, "beta_dpo/gap_std": 24.363880157470703, "beta_dpo/loss_margin_mean": 22.31917953491211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5034013605442177, "grad_norm": 173.7876434326172, "learning_rate": 2.921093116725076e-07, "logits/chosen": 0.4317076802253723, "logits/rejected": 0.3626272678375244, "loss": 0.8614, "step": 333 }, { "beta_dpo/beta": 0.0760747492313385, "beta_dpo/beta_margin_grad_mean": -0.40545621514320374, "beta_dpo/beta_margin_grad_std": 0.2751104235649109, "beta_dpo/beta_margin_mean": 1.072293758392334, "beta_dpo/beta_margin_std": 3.0955679416656494, "beta_dpo/beta_used": 0.0760747492313385, "beta_dpo/beta_used_raw": 0.06470038741827011, "beta_dpo/gap_mean": 16.923879623413086, "beta_dpo/gap_std": 24.878725051879883, "beta_dpo/loss_margin_mean": 14.388141632080078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5049130763416477, "grad_norm": 81.30701446533203, "learning_rate": 2.9080540104031484e-07, "logits/chosen": 0.46789878606796265, "logits/rejected": 0.4239296317100525, "loss": 1.3485, "step": 334 }, { "beta_dpo/beta": 0.14732955396175385, "beta_dpo/beta_margin_grad_mean": -0.35019081830978394, "beta_dpo/beta_margin_grad_std": 0.27779415249824524, "beta_dpo/beta_margin_mean": 3.1584525108337402, "beta_dpo/beta_margin_std": 5.779693603515625, "beta_dpo/beta_used": 0.14732955396175385, "beta_dpo/beta_used_raw": 0.06579115241765976, "beta_dpo/gap_mean": 17.025259017944336, "beta_dpo/gap_std": 25.54525375366211, "beta_dpo/loss_margin_mean": 19.060718536376953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5064247921390779, "grad_norm": 115.02545928955078, "learning_rate": 2.895003489933375e-07, "logits/chosen": 0.4890259802341461, "logits/rejected": 0.45069044828414917, "loss": 1.3095, "step": 335 }, { "beta_dpo/beta": 0.006434235721826553, "beta_dpo/beta_margin_grad_mean": -0.4622502028942108, "beta_dpo/beta_margin_grad_std": 0.07217463850975037, "beta_dpo/beta_margin_mean": 0.1582019180059433, "beta_dpo/beta_margin_std": 0.307355672121048, "beta_dpo/beta_used": 0.006434235721826553, "beta_dpo/beta_used_raw": -0.16895954310894012, "beta_dpo/gap_mean": 17.870014190673828, "beta_dpo/gap_std": 26.2718505859375, "beta_dpo/loss_margin_mean": 19.669475555419922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5079365079365079, "grad_norm": 6.488018989562988, "learning_rate": 2.8819419203668675e-07, "logits/chosen": 0.391056627035141, "logits/rejected": 0.3693464994430542, "loss": 1.2986, "step": 336 }, { "beta_dpo/beta": 0.0338905043900013, "beta_dpo/beta_margin_grad_mean": -0.4215489327907562, "beta_dpo/beta_margin_grad_std": 0.18948403000831604, "beta_dpo/beta_margin_mean": 0.4833620488643646, "beta_dpo/beta_margin_std": 1.2267965078353882, "beta_dpo/beta_used": 0.0338905043900013, "beta_dpo/beta_used_raw": -0.11094224452972412, "beta_dpo/gap_mean": 17.074710845947266, "beta_dpo/gap_std": 26.033262252807617, "beta_dpo/loss_margin_mean": 13.338370323181152, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.509448223733938, "grad_norm": 26.08516502380371, "learning_rate": 2.8688696670638053e-07, "logits/chosen": 0.3497691750526428, "logits/rejected": 0.316182017326355, "loss": 1.1495, "step": 337 }, { "beta_dpo/beta": 0.046072110533714294, "beta_dpo/beta_margin_grad_mean": -0.41082024574279785, "beta_dpo/beta_margin_grad_std": 0.20698098838329315, "beta_dpo/beta_margin_mean": 0.5744975805282593, "beta_dpo/beta_margin_std": 1.5593328475952148, "beta_dpo/beta_used": 0.046072110533714294, "beta_dpo/beta_used_raw": -0.08032860606908798, "beta_dpo/gap_mean": 16.35885238647461, "beta_dpo/gap_std": 25.77899932861328, "beta_dpo/loss_margin_mean": 14.114729881286621, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5109599395313681, "grad_norm": 40.451805114746094, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 0.4158708155155182, "logits/rejected": 0.38753098249435425, "loss": 1.0496, "step": 338 }, { "beta_dpo/beta": 0.08386623114347458, "beta_dpo/beta_margin_grad_mean": -0.38753587007522583, "beta_dpo/beta_margin_grad_std": 0.25693479180336, "beta_dpo/beta_margin_mean": 1.5702204704284668, "beta_dpo/beta_margin_std": 3.792813539505005, "beta_dpo/beta_used": 0.08386623114347458, "beta_dpo/beta_used_raw": -0.0034884288907051086, "beta_dpo/gap_mean": 16.51103973388672, "beta_dpo/gap_std": 26.04473304748535, "beta_dpo/loss_margin_mean": 17.060956954956055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5124716553287982, "grad_norm": 49.29536819458008, "learning_rate": 2.842694572172736e-07, "logits/chosen": 0.5705593824386597, "logits/rejected": 0.48631036281585693, "loss": 1.158, "step": 339 }, { "beta_dpo/beta": 0.16217856109142303, "beta_dpo/beta_margin_grad_mean": -0.3174978196620941, "beta_dpo/beta_margin_grad_std": 0.37058526277542114, "beta_dpo/beta_margin_mean": 3.037602186203003, "beta_dpo/beta_margin_std": 4.942996978759766, "beta_dpo/beta_used": 0.16217856109142303, "beta_dpo/beta_used_raw": 0.16217856109142303, "beta_dpo/gap_mean": 16.645965576171875, "beta_dpo/gap_std": 26.448713302612305, "beta_dpo/loss_margin_mean": 19.0429744720459, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5139833711262283, "grad_norm": 103.46623229980469, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 0.46611109375953674, "logits/rejected": 0.4412732720375061, "loss": 1.4731, "step": 340 }, { "beta_dpo/beta": 0.42890098690986633, "beta_dpo/beta_margin_grad_mean": -0.3295966386795044, "beta_dpo/beta_margin_grad_std": 0.29606911540031433, "beta_dpo/beta_margin_mean": 12.761946678161621, "beta_dpo/beta_margin_std": 20.45010757446289, "beta_dpo/beta_used": 0.42890098690986633, "beta_dpo/beta_used_raw": 0.3651895523071289, "beta_dpo/gap_mean": 17.889223098754883, "beta_dpo/gap_std": 26.904251098632812, "beta_dpo/loss_margin_mean": 20.278278350830078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5154950869236583, "grad_norm": 190.08685302734375, "learning_rate": 2.816481133934373e-07, "logits/chosen": 0.4816865622997284, "logits/rejected": 0.43140339851379395, "loss": 1.0551, "step": 341 }, { "beta_dpo/beta": 0.10002905875444412, "beta_dpo/beta_margin_grad_mean": -0.39862194657325745, "beta_dpo/beta_margin_grad_std": 0.2727389633655548, "beta_dpo/beta_margin_mean": 2.2917656898498535, "beta_dpo/beta_margin_std": 5.209559917449951, "beta_dpo/beta_used": 0.10002905875444412, "beta_dpo/beta_used_raw": 0.04651253670454025, "beta_dpo/gap_mean": 18.16006851196289, "beta_dpo/gap_std": 27.659713745117188, "beta_dpo/loss_margin_mean": 21.531021118164062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5170068027210885, "grad_norm": 89.060546875, "learning_rate": 2.8033609524527046e-07, "logits/chosen": 0.5078241229057312, "logits/rejected": 0.46389999985694885, "loss": 1.4452, "step": 342 }, { "beta_dpo/beta": 0.020842978730797768, "beta_dpo/beta_margin_grad_mean": -0.41583141684532166, "beta_dpo/beta_margin_grad_std": 0.1556914746761322, "beta_dpo/beta_margin_mean": 0.4192487895488739, "beta_dpo/beta_margin_std": 0.8365549445152283, "beta_dpo/beta_used": 0.020842978730797768, "beta_dpo/beta_used_raw": -0.29891690611839294, "beta_dpo/gap_mean": 17.80866813659668, "beta_dpo/gap_std": 27.227333068847656, "beta_dpo/loss_margin_mean": 12.708889961242676, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5185185185185185, "grad_norm": 11.837695121765137, "learning_rate": 2.7902322853130753e-07, "logits/chosen": 0.3714722990989685, "logits/rejected": 0.3623930811882019, "loss": 1.1458, "step": 343 }, { "beta_dpo/beta": 0.017244327813386917, "beta_dpo/beta_margin_grad_mean": -0.4226335287094116, "beta_dpo/beta_margin_grad_std": 0.14330071210861206, "beta_dpo/beta_margin_mean": 0.3823748230934143, "beta_dpo/beta_margin_std": 0.7459104657173157, "beta_dpo/beta_used": 0.017244327813386917, "beta_dpo/beta_used_raw": 0.0008646678179502487, "beta_dpo/gap_mean": 17.61762237548828, "beta_dpo/gap_std": 27.113140106201172, "beta_dpo/loss_margin_mean": 19.65960121154785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5200302343159486, "grad_norm": 13.51938533782959, "learning_rate": 2.7770954997525274e-07, "logits/chosen": 0.49157804250717163, "logits/rejected": 0.42625850439071655, "loss": 1.1898, "step": 344 }, { "beta_dpo/beta": 0.2315424680709839, "beta_dpo/beta_margin_grad_mean": -0.28378885984420776, "beta_dpo/beta_margin_grad_std": 0.36535513401031494, "beta_dpo/beta_margin_mean": 3.849392890930176, "beta_dpo/beta_margin_std": 6.788583278656006, "beta_dpo/beta_used": 0.2315424680709839, "beta_dpo/beta_used_raw": 0.2315424680709839, "beta_dpo/gap_mean": 17.418254852294922, "beta_dpo/gap_std": 27.31433868408203, "beta_dpo/loss_margin_mean": 16.526798248291016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5215419501133787, "grad_norm": 133.7179412841797, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 0.5586827397346497, "logits/rejected": 0.5119316577911377, "loss": 1.0315, "step": 345 }, { "beta_dpo/beta": 0.19482511281967163, "beta_dpo/beta_margin_grad_mean": -0.2792256474494934, "beta_dpo/beta_margin_grad_std": 0.3250068128108978, "beta_dpo/beta_margin_mean": 4.4295735359191895, "beta_dpo/beta_margin_std": 8.381417274475098, "beta_dpo/beta_used": 0.19482511281967163, "beta_dpo/beta_used_raw": 0.19482511281967163, "beta_dpo/gap_mean": 17.792781829833984, "beta_dpo/gap_std": 27.38411521911621, "beta_dpo/loss_margin_mean": 21.0446720123291, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5230536659108088, "grad_norm": 143.16773986816406, "learning_rate": 2.7507990434420123e-07, "logits/chosen": 0.5135695934295654, "logits/rejected": 0.43501418828964233, "loss": 1.4406, "step": 346 }, { "beta_dpo/beta": 0.02533472701907158, "beta_dpo/beta_margin_grad_mean": -0.4315335750579834, "beta_dpo/beta_margin_grad_std": 0.17104710638523102, "beta_dpo/beta_margin_mean": 0.3843132555484772, "beta_dpo/beta_margin_std": 0.9899463653564453, "beta_dpo/beta_used": 0.02533472701907158, "beta_dpo/beta_used_raw": -0.21560555696487427, "beta_dpo/gap_mean": 17.808048248291016, "beta_dpo/gap_std": 27.780567169189453, "beta_dpo/loss_margin_mean": 16.444530487060547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5245653817082389, "grad_norm": 18.07720375061035, "learning_rate": 2.737640108260456e-07, "logits/chosen": 0.5739601850509644, "logits/rejected": 0.5248545408248901, "loss": 1.1428, "step": 347 }, { "beta_dpo/beta": 0.001988435396924615, "beta_dpo/beta_margin_grad_mean": -0.4891984760761261, "beta_dpo/beta_margin_grad_std": 0.01943393610417843, "beta_dpo/beta_margin_mean": 0.04333081468939781, "beta_dpo/beta_margin_std": 0.07802307605743408, "beta_dpo/beta_used": 0.001988435396924615, "beta_dpo/beta_used_raw": -0.11356782168149948, "beta_dpo/gap_mean": 18.29491424560547, "beta_dpo/gap_std": 28.60049057006836, "beta_dpo/loss_margin_mean": 20.155725479125977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5260770975056689, "grad_norm": 1.6436684131622314, "learning_rate": 2.724474525774229e-07, "logits/chosen": 0.6003262400627136, "logits/rejected": 0.5684754252433777, "loss": 1.3561, "step": 348 }, { "beta_dpo/beta": 0.3341373801231384, "beta_dpo/beta_margin_grad_mean": -0.2516961991786957, "beta_dpo/beta_margin_grad_std": 0.36008405685424805, "beta_dpo/beta_margin_mean": 7.000345706939697, "beta_dpo/beta_margin_std": 9.868422508239746, "beta_dpo/beta_used": 0.3341373801231384, "beta_dpo/beta_used_raw": 0.3341373801231384, "beta_dpo/gap_mean": 18.376079559326172, "beta_dpo/gap_std": 29.04025650024414, "beta_dpo/loss_margin_mean": 20.50440216064453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.527588813303099, "grad_norm": 210.76205444335938, "learning_rate": 2.711302664252973e-07, "logits/chosen": 0.49958592653274536, "logits/rejected": 0.4125804305076599, "loss": 1.4889, "step": 349 }, { "beta_dpo/beta": 0.20781663060188293, "beta_dpo/beta_margin_grad_mean": -0.23984435200691223, "beta_dpo/beta_margin_grad_std": 0.3351220190525055, "beta_dpo/beta_margin_mean": 5.478157043457031, "beta_dpo/beta_margin_std": 7.281736850738525, "beta_dpo/beta_used": 0.20781663060188293, "beta_dpo/beta_used_raw": 0.20781663060188293, "beta_dpo/gap_mean": 19.763946533203125, "beta_dpo/gap_std": 29.05038070678711, "beta_dpo/loss_margin_mean": 25.382022857666016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5291005291005291, "grad_norm": 110.43148040771484, "learning_rate": 2.698124892141971e-07, "logits/chosen": 0.48420655727386475, "logits/rejected": 0.40342360734939575, "loss": 1.1176, "step": 350 }, { "beta_dpo/beta": 0.3125341534614563, "beta_dpo/beta_margin_grad_mean": -0.24661174416542053, "beta_dpo/beta_margin_grad_std": 0.33939671516418457, "beta_dpo/beta_margin_mean": 5.621974468231201, "beta_dpo/beta_margin_std": 14.418107032775879, "beta_dpo/beta_used": 0.3125341534614563, "beta_dpo/beta_used_raw": 0.3125341534614563, "beta_dpo/gap_mean": 19.951894760131836, "beta_dpo/gap_std": 29.225894927978516, "beta_dpo/loss_margin_mean": 19.42828941345215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5306122448979592, "grad_norm": 142.16448974609375, "learning_rate": 2.6849415780518357e-07, "logits/chosen": 0.4121101200580597, "logits/rejected": 0.33884552121162415, "loss": 0.777, "step": 351 }, { "beta_dpo/beta": 0.09537062793970108, "beta_dpo/beta_margin_grad_mean": -0.35886648297309875, "beta_dpo/beta_margin_grad_std": 0.26381397247314453, "beta_dpo/beta_margin_mean": 2.0912771224975586, "beta_dpo/beta_margin_std": 4.213487148284912, "beta_dpo/beta_used": 0.09537062793970108, "beta_dpo/beta_used_raw": 0.08282928168773651, "beta_dpo/gap_mean": 19.89529800415039, "beta_dpo/gap_std": 29.30970001220703, "beta_dpo/loss_margin_mean": 19.572755813598633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5321239606953893, "grad_norm": 67.9634780883789, "learning_rate": 2.6717530907482024e-07, "logits/chosen": 0.5165045261383057, "logits/rejected": 0.4606800675392151, "loss": 1.2334, "step": 352 }, { "beta_dpo/beta": 0.15907520055770874, "beta_dpo/beta_margin_grad_mean": -0.38286593556404114, "beta_dpo/beta_margin_grad_std": 0.29630035161972046, "beta_dpo/beta_margin_mean": 3.0601959228515625, "beta_dpo/beta_margin_std": 7.680595397949219, "beta_dpo/beta_used": 0.15907520055770874, "beta_dpo/beta_used_raw": 0.15692223608493805, "beta_dpo/gap_mean": 19.871681213378906, "beta_dpo/gap_std": 29.349777221679688, "beta_dpo/loss_margin_mean": 21.176862716674805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5336356764928194, "grad_norm": 143.2347412109375, "learning_rate": 2.658559799141411e-07, "logits/chosen": 0.46999743580818176, "logits/rejected": 0.4698483347892761, "loss": 1.4572, "step": 353 }, { "beta_dpo/beta": 0.20546159148216248, "beta_dpo/beta_margin_grad_mean": -0.32804247736930847, "beta_dpo/beta_margin_grad_std": 0.2996892035007477, "beta_dpo/beta_margin_mean": 5.386460304260254, "beta_dpo/beta_margin_std": 9.564033508300781, "beta_dpo/beta_used": 0.20546159148216248, "beta_dpo/beta_used_raw": 0.20546159148216248, "beta_dpo/gap_mean": 20.4854793548584, "beta_dpo/gap_std": 29.17739486694336, "beta_dpo/loss_margin_mean": 21.667699813842773, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5351473922902494, "grad_norm": 75.140869140625, "learning_rate": 2.6453620722761895e-07, "logits/chosen": 0.5394304394721985, "logits/rejected": 0.4111761450767517, "loss": 0.992, "step": 354 }, { "beta_dpo/beta": 0.321403831243515, "beta_dpo/beta_margin_grad_mean": -0.2626912295818329, "beta_dpo/beta_margin_grad_std": 0.38087186217308044, "beta_dpo/beta_margin_mean": 6.422491073608398, "beta_dpo/beta_margin_std": 12.237265586853027, "beta_dpo/beta_used": 0.321403831243515, "beta_dpo/beta_used_raw": 0.321403831243515, "beta_dpo/gap_mean": 20.343292236328125, "beta_dpo/gap_std": 29.79207420349121, "beta_dpo/loss_margin_mean": 21.592735290527344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5366591080876795, "grad_norm": 187.55264282226562, "learning_rate": 2.632160279321328e-07, "logits/chosen": 0.5159712433815002, "logits/rejected": 0.3886120319366455, "loss": 1.3602, "step": 355 }, { "beta_dpo/beta": 0.1345498114824295, "beta_dpo/beta_margin_grad_mean": -0.3875563144683838, "beta_dpo/beta_margin_grad_std": 0.28731241822242737, "beta_dpo/beta_margin_mean": 2.935427188873291, "beta_dpo/beta_margin_std": 7.447274208068848, "beta_dpo/beta_used": 0.1345498114824295, "beta_dpo/beta_used_raw": 0.059556834399700165, "beta_dpo/gap_mean": 20.36905288696289, "beta_dpo/gap_std": 30.672847747802734, "beta_dpo/loss_margin_mean": 17.87114715576172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5381708238851096, "grad_norm": 79.05123901367188, "learning_rate": 2.618954789559356e-07, "logits/chosen": 0.4842956066131592, "logits/rejected": 0.4091559946537018, "loss": 1.4938, "step": 356 }, { "beta_dpo/beta": 0.2215581089258194, "beta_dpo/beta_margin_grad_mean": -0.3200496733188629, "beta_dpo/beta_margin_grad_std": 0.29043084383010864, "beta_dpo/beta_margin_mean": 5.6213555335998535, "beta_dpo/beta_margin_std": 10.097216606140137, "beta_dpo/beta_used": 0.2215581089258194, "beta_dpo/beta_used_raw": -0.05693148076534271, "beta_dpo/gap_mean": 19.813404083251953, "beta_dpo/gap_std": 30.41214370727539, "beta_dpo/loss_margin_mean": 15.453861236572266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5396825396825397, "grad_norm": 83.3572998046875, "learning_rate": 2.6057459723762076e-07, "logits/chosen": 0.4960511028766632, "logits/rejected": 0.46816837787628174, "loss": 1.069, "step": 357 }, { "beta_dpo/beta": 0.2554855942726135, "beta_dpo/beta_margin_grad_mean": -0.2960377633571625, "beta_dpo/beta_margin_grad_std": 0.3241449296474457, "beta_dpo/beta_margin_mean": 5.539848327636719, "beta_dpo/beta_margin_std": 10.479439735412598, "beta_dpo/beta_used": 0.2554855942726135, "beta_dpo/beta_used_raw": 0.2554855942726135, "beta_dpo/gap_mean": 19.572742462158203, "beta_dpo/gap_std": 30.08466148376465, "beta_dpo/loss_margin_mean": 23.14980125427246, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5411942554799698, "grad_norm": 239.4638671875, "learning_rate": 2.5925341972508954e-07, "logits/chosen": 0.42851513624191284, "logits/rejected": 0.43600502610206604, "loss": 1.5906, "step": 358 }, { "beta_dpo/beta": 0.03553559258580208, "beta_dpo/beta_margin_grad_mean": -0.416434109210968, "beta_dpo/beta_margin_grad_std": 0.20974458754062653, "beta_dpo/beta_margin_mean": 0.5857033729553223, "beta_dpo/beta_margin_std": 1.500328540802002, "beta_dpo/beta_used": 0.03553559258580208, "beta_dpo/beta_used_raw": -0.23965869843959808, "beta_dpo/gap_mean": 18.273534774780273, "beta_dpo/gap_std": 29.011173248291016, "beta_dpo/loss_margin_mean": 10.77413272857666, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5427059712773998, "grad_norm": 25.886608123779297, "learning_rate": 2.579319833745169e-07, "logits/chosen": 0.44077521562576294, "logits/rejected": 0.4130966365337372, "loss": 1.0985, "step": 359 }, { "beta_dpo/beta": 0.033735036849975586, "beta_dpo/beta_margin_grad_mean": -0.41033679246902466, "beta_dpo/beta_margin_grad_std": 0.18321087956428528, "beta_dpo/beta_margin_mean": 0.5736103653907776, "beta_dpo/beta_margin_std": 1.292583703994751, "beta_dpo/beta_used": 0.033735036849975586, "beta_dpo/beta_used_raw": -0.03152921795845032, "beta_dpo/gap_mean": 18.03290367126465, "beta_dpo/gap_std": 28.54608917236328, "beta_dpo/loss_margin_mean": 17.090740203857422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54421768707483, "grad_norm": 20.45010757446289, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 0.39448633790016174, "logits/rejected": 0.30303341150283813, "loss": 1.095, "step": 360 }, { "beta_dpo/beta": 0.11376432329416275, "beta_dpo/beta_margin_grad_mean": -0.268765389919281, "beta_dpo/beta_margin_grad_std": 0.27982643246650696, "beta_dpo/beta_margin_mean": 2.057039737701416, "beta_dpo/beta_margin_std": 3.291944980621338, "beta_dpo/beta_used": 0.11376432329416275, "beta_dpo/beta_used_raw": 0.11376432329416275, "beta_dpo/gap_mean": 18.154455184936523, "beta_dpo/gap_std": 28.302597045898438, "beta_dpo/loss_margin_mean": 18.740699768066406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54572940287226, "grad_norm": 60.24184799194336, "learning_rate": 2.552884820191154e-07, "logits/chosen": 0.5338229537010193, "logits/rejected": 0.48165255784988403, "loss": 0.7756, "step": 361 }, { "beta_dpo/beta": 0.21435359120368958, "beta_dpo/beta_margin_grad_mean": -0.3343871533870697, "beta_dpo/beta_margin_grad_std": 0.2788733243942261, "beta_dpo/beta_margin_mean": 6.487202167510986, "beta_dpo/beta_margin_std": 11.84697437286377, "beta_dpo/beta_used": 0.21435359120368958, "beta_dpo/beta_used_raw": 0.10896323621273041, "beta_dpo/gap_mean": 18.30129623413086, "beta_dpo/gap_std": 27.87055206298828, "beta_dpo/loss_margin_mean": 22.009746551513672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54724111866969, "grad_norm": 104.80538940429688, "learning_rate": 2.53966490958702e-07, "logits/chosen": 0.5371885299682617, "logits/rejected": 0.4334333539009094, "loss": 1.3946, "step": 362 }, { "beta_dpo/beta": 0.06475654244422913, "beta_dpo/beta_margin_grad_mean": -0.35036513209342957, "beta_dpo/beta_margin_grad_std": 0.2489209771156311, "beta_dpo/beta_margin_mean": 1.5037075281143188, "beta_dpo/beta_margin_std": 2.7491438388824463, "beta_dpo/beta_used": 0.06475654244422913, "beta_dpo/beta_used_raw": -0.055414482951164246, "beta_dpo/gap_mean": 19.423065185546875, "beta_dpo/gap_std": 27.947891235351562, "beta_dpo/loss_margin_mean": 21.895166397094727, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5487528344671202, "grad_norm": 30.40268325805664, "learning_rate": 2.526443889470099e-07, "logits/chosen": 0.5154651403427124, "logits/rejected": 0.3899514079093933, "loss": 1.0825, "step": 363 }, { "beta_dpo/beta": 0.17884472012519836, "beta_dpo/beta_margin_grad_mean": -0.2793542444705963, "beta_dpo/beta_margin_grad_std": 0.3477945923805237, "beta_dpo/beta_margin_mean": 3.8306987285614014, "beta_dpo/beta_margin_std": 5.927258491516113, "beta_dpo/beta_used": 0.17884472012519836, "beta_dpo/beta_used_raw": 0.17884472012519836, "beta_dpo/gap_mean": 19.666500091552734, "beta_dpo/gap_std": 28.525062561035156, "beta_dpo/loss_margin_mean": 21.15984535217285, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5502645502645502, "grad_norm": 92.59161376953125, "learning_rate": 2.513222129660744e-07, "logits/chosen": 0.35895150899887085, "logits/rejected": 0.26696082949638367, "loss": 1.1274, "step": 364 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4951821565628052, "beta_dpo/beta_margin_grad_std": 0.005717262625694275, "beta_dpo/beta_margin_mean": 0.01927482709288597, "beta_dpo/beta_margin_std": 0.022875025868415833, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.03625158965587616, "beta_dpo/gap_mean": 19.75531578063965, "beta_dpo/gap_std": 27.833091735839844, "beta_dpo/loss_margin_mean": 19.27482795715332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5517762660619804, "grad_norm": 0.978028416633606, "learning_rate": 2.5e-07, "logits/chosen": 0.505312442779541, "logits/rejected": 0.4941166639328003, "loss": 1.369, "step": 365 }, { "beta_dpo/beta": 0.034532975405454636, "beta_dpo/beta_margin_grad_mean": -0.3938365876674652, "beta_dpo/beta_margin_grad_std": 0.20211590826511383, "beta_dpo/beta_margin_mean": 0.7358689308166504, "beta_dpo/beta_margin_std": 1.4430540800094604, "beta_dpo/beta_used": 0.034532975405454636, "beta_dpo/beta_used_raw": 0.0041874852031469345, "beta_dpo/gap_mean": 19.195980072021484, "beta_dpo/gap_std": 27.799442291259766, "beta_dpo/loss_margin_mean": 17.422622680664062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5532879818594104, "grad_norm": 19.836706161499023, "learning_rate": 2.486777870339255e-07, "logits/chosen": 0.4261232316493988, "logits/rejected": 0.4047813415527344, "loss": 1.1212, "step": 366 }, { "beta_dpo/beta": 0.06913218647241592, "beta_dpo/beta_margin_grad_mean": -0.38716188073158264, "beta_dpo/beta_margin_grad_std": 0.24359238147735596, "beta_dpo/beta_margin_mean": 0.9459067583084106, "beta_dpo/beta_margin_std": 2.751627206802368, "beta_dpo/beta_used": 0.06913218647241592, "beta_dpo/beta_used_raw": 0.01255800575017929, "beta_dpo/gap_mean": 18.8731689453125, "beta_dpo/gap_std": 27.13546371459961, "beta_dpo/loss_margin_mean": 15.656370162963867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5547996976568406, "grad_norm": 35.99217987060547, "learning_rate": 2.4735561105299014e-07, "logits/chosen": 0.46010124683380127, "logits/rejected": 0.3569197952747345, "loss": 0.9947, "step": 367 }, { "beta_dpo/beta": 0.06575921177864075, "beta_dpo/beta_margin_grad_mean": -0.3926805555820465, "beta_dpo/beta_margin_grad_std": 0.25895747542381287, "beta_dpo/beta_margin_mean": 1.0753775835037231, "beta_dpo/beta_margin_std": 2.560882091522217, "beta_dpo/beta_used": 0.06575921177864075, "beta_dpo/beta_used_raw": 0.008796077221632004, "beta_dpo/gap_mean": 18.227333068847656, "beta_dpo/gap_std": 26.96753692626953, "beta_dpo/loss_margin_mean": 15.894394874572754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5563114134542706, "grad_norm": 40.85283279418945, "learning_rate": 2.46033509041298e-07, "logits/chosen": 0.31978198885917664, "logits/rejected": 0.31229010224342346, "loss": 1.1472, "step": 368 }, { "beta_dpo/beta": 0.04814423620700836, "beta_dpo/beta_margin_grad_mean": -0.39616596698760986, "beta_dpo/beta_margin_grad_std": 0.2142905592918396, "beta_dpo/beta_margin_mean": 0.8517483472824097, "beta_dpo/beta_margin_std": 1.803131103515625, "beta_dpo/beta_used": 0.04814423620700836, "beta_dpo/beta_used_raw": -0.09424681216478348, "beta_dpo/gap_mean": 17.42707061767578, "beta_dpo/gap_std": 26.661537170410156, "beta_dpo/loss_margin_mean": 14.567131996154785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5578231292517006, "grad_norm": 40.79011917114258, "learning_rate": 2.447115179808846e-07, "logits/chosen": 0.4484362304210663, "logits/rejected": 0.3975934386253357, "loss": 1.1383, "step": 369 }, { "beta_dpo/beta": 0.19350400567054749, "beta_dpo/beta_margin_grad_mean": -0.2489045411348343, "beta_dpo/beta_margin_grad_std": 0.32901516556739807, "beta_dpo/beta_margin_mean": 3.838660478591919, "beta_dpo/beta_margin_std": 5.751165866851807, "beta_dpo/beta_used": 0.19350400567054749, "beta_dpo/beta_used_raw": 0.19350400567054749, "beta_dpo/gap_mean": 17.788787841796875, "beta_dpo/gap_std": 27.057907104492188, "beta_dpo/loss_margin_mean": 19.834497451782227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5593348450491308, "grad_norm": 87.48202514648438, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 0.5482546091079712, "logits/rejected": 0.47974199056625366, "loss": 0.9597, "step": 370 }, { "beta_dpo/beta": 0.07270210981369019, "beta_dpo/beta_margin_grad_mean": -0.3535962700843811, "beta_dpo/beta_margin_grad_std": 0.263553649187088, "beta_dpo/beta_margin_mean": 1.738492727279663, "beta_dpo/beta_margin_std": 3.044748306274414, "beta_dpo/beta_used": 0.07270210981369019, "beta_dpo/beta_used_raw": 0.0545848049223423, "beta_dpo/gap_mean": 17.900829315185547, "beta_dpo/gap_std": 27.141389846801758, "beta_dpo/loss_margin_mean": 19.347623825073242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5608465608465608, "grad_norm": 53.165611267089844, "learning_rate": 2.420680166254831e-07, "logits/chosen": 0.5655019283294678, "logits/rejected": 0.5300487279891968, "loss": 1.2018, "step": 371 }, { "beta_dpo/beta": 0.053923849016427994, "beta_dpo/beta_margin_grad_mean": -0.4136897623538971, "beta_dpo/beta_margin_grad_std": 0.2468746155500412, "beta_dpo/beta_margin_mean": 0.8222647309303284, "beta_dpo/beta_margin_std": 2.5193469524383545, "beta_dpo/beta_used": 0.053923849016427994, "beta_dpo/beta_used_raw": -0.09299886226654053, "beta_dpo/gap_mean": 16.81853485107422, "beta_dpo/gap_std": 27.437904357910156, "beta_dpo/loss_margin_mean": 10.160256385803223, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.562358276643991, "grad_norm": 40.79886245727539, "learning_rate": 2.4074658027491044e-07, "logits/chosen": 0.4797401428222656, "logits/rejected": 0.39019930362701416, "loss": 1.1626, "step": 372 }, { "beta_dpo/beta": 0.019156690686941147, "beta_dpo/beta_margin_grad_mean": -0.4477452337741852, "beta_dpo/beta_margin_grad_std": 0.15734830498695374, "beta_dpo/beta_margin_mean": 0.2678249180316925, "beta_dpo/beta_margin_std": 0.8076358437538147, "beta_dpo/beta_used": 0.019156690686941147, "beta_dpo/beta_used_raw": -0.09059305489063263, "beta_dpo/gap_mean": 16.431793212890625, "beta_dpo/gap_std": 27.95029640197754, "beta_dpo/loss_margin_mean": 14.914463996887207, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.563869992441421, "grad_norm": 18.376066207885742, "learning_rate": 2.394254027623792e-07, "logits/chosen": 0.47552430629730225, "logits/rejected": 0.40688779950141907, "loss": 1.2204, "step": 373 }, { "beta_dpo/beta": 0.30811965465545654, "beta_dpo/beta_margin_grad_mean": -0.1607130467891693, "beta_dpo/beta_margin_grad_std": 0.29871711134910583, "beta_dpo/beta_margin_mean": 7.594576835632324, "beta_dpo/beta_margin_std": 9.025672912597656, "beta_dpo/beta_used": 0.30811965465545654, "beta_dpo/beta_used_raw": 0.30811965465545654, "beta_dpo/gap_mean": 17.549823760986328, "beta_dpo/gap_std": 27.397924423217773, "beta_dpo/loss_margin_mean": 23.819591522216797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5653817082388511, "grad_norm": 150.54312133789062, "learning_rate": 2.381045210440644e-07, "logits/chosen": 0.38389527797698975, "logits/rejected": 0.37589797377586365, "loss": 0.9835, "step": 374 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49679815769195557, "beta_dpo/beta_margin_grad_std": 0.006316898390650749, "beta_dpo/beta_margin_mean": 0.012809698469936848, "beta_dpo/beta_margin_std": 0.0252725537866354, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.31367141008377075, "beta_dpo/gap_mean": 17.115184783935547, "beta_dpo/gap_std": 26.947261810302734, "beta_dpo/loss_margin_mean": 12.809698104858398, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5668934240362812, "grad_norm": 0.853339433670044, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 0.495971143245697, "logits/rejected": 0.43806982040405273, "loss": 1.3762, "step": 375 }, { "beta_dpo/beta": 0.1979072391986847, "beta_dpo/beta_margin_grad_mean": -0.24381671845912933, "beta_dpo/beta_margin_grad_std": 0.3681492805480957, "beta_dpo/beta_margin_mean": 4.5390448570251465, "beta_dpo/beta_margin_std": 6.06004524230957, "beta_dpo/beta_used": 0.1979072391986847, "beta_dpo/beta_used_raw": 0.1979072391986847, "beta_dpo/gap_mean": 17.752330780029297, "beta_dpo/gap_std": 27.663818359375, "beta_dpo/loss_margin_mean": 22.940746307373047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5684051398337112, "grad_norm": 116.034912109375, "learning_rate": 2.3546379277238103e-07, "logits/chosen": 0.531902551651001, "logits/rejected": 0.46265655755996704, "loss": 1.6455, "step": 376 }, { "beta_dpo/beta": 0.12244483828544617, "beta_dpo/beta_margin_grad_mean": -0.3793109059333801, "beta_dpo/beta_margin_grad_std": 0.2850496768951416, "beta_dpo/beta_margin_mean": 2.0889053344726562, "beta_dpo/beta_margin_std": 4.836654186248779, "beta_dpo/beta_used": 0.12244483828544617, "beta_dpo/beta_used_raw": 0.09535021334886551, "beta_dpo/gap_mean": 17.380741119384766, "beta_dpo/gap_std": 27.402048110961914, "beta_dpo/loss_margin_mean": 14.553292274475098, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5699168556311414, "grad_norm": 68.49671936035156, "learning_rate": 2.3414402008585886e-07, "logits/chosen": 0.44863414764404297, "logits/rejected": 0.42397451400756836, "loss": 1.0434, "step": 377 }, { "beta_dpo/beta": 0.10824133455753326, "beta_dpo/beta_margin_grad_mean": -0.3810761570930481, "beta_dpo/beta_margin_grad_std": 0.2576172351837158, "beta_dpo/beta_margin_mean": 1.9034552574157715, "beta_dpo/beta_margin_std": 4.89661979675293, "beta_dpo/beta_used": 0.10824133455753326, "beta_dpo/beta_used_raw": 0.014959946274757385, "beta_dpo/gap_mean": 16.712974548339844, "beta_dpo/gap_std": 26.61426544189453, "beta_dpo/loss_margin_mean": 13.993646621704102, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5714285714285714, "grad_norm": 92.63347625732422, "learning_rate": 2.3282469092517977e-07, "logits/chosen": 0.519462525844574, "logits/rejected": 0.46947789192199707, "loss": 1.3392, "step": 378 }, { "beta_dpo/beta": 0.3307998776435852, "beta_dpo/beta_margin_grad_mean": -0.21893391013145447, "beta_dpo/beta_margin_grad_std": 0.35074496269226074, "beta_dpo/beta_margin_mean": 6.119668006896973, "beta_dpo/beta_margin_std": 8.400911331176758, "beta_dpo/beta_used": 0.3307998776435852, "beta_dpo/beta_used_raw": 0.3307998776435852, "beta_dpo/gap_mean": 17.011268615722656, "beta_dpo/gap_std": 26.57973861694336, "beta_dpo/loss_margin_mean": 18.500185012817383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5729402872260015, "grad_norm": 151.25961303710938, "learning_rate": 2.3150584219481643e-07, "logits/chosen": 0.4980725049972534, "logits/rejected": 0.4293912947177887, "loss": 0.7841, "step": 379 }, { "beta_dpo/beta": 0.29439330101013184, "beta_dpo/beta_margin_grad_mean": -0.22058521211147308, "beta_dpo/beta_margin_grad_std": 0.3584253489971161, "beta_dpo/beta_margin_mean": 6.252655982971191, "beta_dpo/beta_margin_std": 8.074928283691406, "beta_dpo/beta_used": 0.29439330101013184, "beta_dpo/beta_used_raw": 0.29439330101013184, "beta_dpo/gap_mean": 17.592838287353516, "beta_dpo/gap_std": 26.798145294189453, "beta_dpo/loss_margin_mean": 20.89601707458496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5744520030234316, "grad_norm": 136.94004821777344, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 0.4958919882774353, "logits/rejected": 0.45453202724456787, "loss": 1.1291, "step": 380 }, { "beta_dpo/beta": 0.10175088793039322, "beta_dpo/beta_margin_grad_mean": -0.42357251048088074, "beta_dpo/beta_margin_grad_std": 0.2839849889278412, "beta_dpo/beta_margin_mean": 1.3963819742202759, "beta_dpo/beta_margin_std": 4.324319839477539, "beta_dpo/beta_used": 0.10175088793039322, "beta_dpo/beta_used_raw": -0.13142962753772736, "beta_dpo/gap_mean": 16.102523803710938, "beta_dpo/gap_std": 26.190248489379883, "beta_dpo/loss_margin_mean": 7.799356460571289, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5759637188208617, "grad_norm": 76.48500061035156, "learning_rate": 2.288697335747027e-07, "logits/chosen": 0.4466308355331421, "logits/rejected": 0.4188675880432129, "loss": 1.1505, "step": 381 }, { "beta_dpo/beta": 0.11290884017944336, "beta_dpo/beta_margin_grad_mean": -0.33594417572021484, "beta_dpo/beta_margin_grad_std": 0.2575390040874481, "beta_dpo/beta_margin_mean": 2.236764907836914, "beta_dpo/beta_margin_std": 4.038408279418945, "beta_dpo/beta_used": 0.11290884017944336, "beta_dpo/beta_used_raw": -0.006284750998020172, "beta_dpo/gap_mean": 15.629575729370117, "beta_dpo/gap_std": 25.694416046142578, "beta_dpo/loss_margin_mean": 15.05109691619873, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5774754346182918, "grad_norm": 31.48938751220703, "learning_rate": 2.2755254742257706e-07, "logits/chosen": 0.4852759838104248, "logits/rejected": 0.43432578444480896, "loss": 0.8542, "step": 382 }, { "beta_dpo/beta": 0.0896899476647377, "beta_dpo/beta_margin_grad_mean": -0.38577207922935486, "beta_dpo/beta_margin_grad_std": 0.25467216968536377, "beta_dpo/beta_margin_mean": 1.8165364265441895, "beta_dpo/beta_margin_std": 3.8192477226257324, "beta_dpo/beta_used": 0.0896899476647377, "beta_dpo/beta_used_raw": 0.011013053357601166, "beta_dpo/gap_mean": 16.150833129882812, "beta_dpo/gap_std": 25.653362274169922, "beta_dpo/loss_margin_mean": 16.843584060668945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5789871504157218, "grad_norm": 63.09295654296875, "learning_rate": 2.2623598917395436e-07, "logits/chosen": 0.34624814987182617, "logits/rejected": 0.3639281094074249, "loss": 1.1991, "step": 383 }, { "beta_dpo/beta": 0.07606925070285797, "beta_dpo/beta_margin_grad_mean": -0.37067925930023193, "beta_dpo/beta_margin_grad_std": 0.26238465309143066, "beta_dpo/beta_margin_mean": 1.464687466621399, "beta_dpo/beta_margin_std": 2.8831002712249756, "beta_dpo/beta_used": 0.07606925070285797, "beta_dpo/beta_used_raw": 0.03083164617419243, "beta_dpo/gap_mean": 16.12920379638672, "beta_dpo/gap_std": 25.395118713378906, "beta_dpo/loss_margin_mean": 17.43052101135254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5804988662131519, "grad_norm": 55.40347671508789, "learning_rate": 2.2492009565579875e-07, "logits/chosen": 0.48950695991516113, "logits/rejected": 0.44139331579208374, "loss": 1.2499, "step": 384 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49544164538383484, "beta_dpo/beta_margin_grad_std": 0.005564127117395401, "beta_dpo/beta_margin_mean": 0.018236415460705757, "beta_dpo/beta_margin_std": 0.022261304780840874, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07840212434530258, "beta_dpo/gap_mean": 16.50790786743164, "beta_dpo/gap_std": 24.757612228393555, "beta_dpo/loss_margin_mean": 18.23641586303711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.582010582010582, "grad_norm": 0.7727059721946716, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 0.40697604417800903, "logits/rejected": 0.36528295278549194, "loss": 1.3729, "step": 385 }, { "beta_dpo/beta": 0.10942035913467407, "beta_dpo/beta_margin_grad_mean": -0.36618587374687195, "beta_dpo/beta_margin_grad_std": 0.2598530054092407, "beta_dpo/beta_margin_mean": 1.4770482778549194, "beta_dpo/beta_margin_std": 3.5450892448425293, "beta_dpo/beta_used": 0.10942035913467407, "beta_dpo/beta_used_raw": 0.09165161103010178, "beta_dpo/gap_mean": 16.45754623413086, "beta_dpo/gap_std": 24.44674301147461, "beta_dpo/loss_margin_mean": 14.757094383239746, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5835222978080121, "grad_norm": 58.915008544921875, "learning_rate": 2.2229045002474724e-07, "logits/chosen": 0.3973620533943176, "logits/rejected": 0.3377264738082886, "loss": 0.9296, "step": 386 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49545782804489136, "beta_dpo/beta_margin_grad_std": 0.005244361702352762, "beta_dpo/beta_margin_mean": 0.01817174255847931, "beta_dpo/beta_margin_std": 0.020982708781957626, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.047689471393823624, "beta_dpo/gap_mean": 16.546337127685547, "beta_dpo/gap_std": 23.793880462646484, "beta_dpo/loss_margin_mean": 18.171741485595703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5850340136054422, "grad_norm": 0.7535995841026306, "learning_rate": 2.209767714686924e-07, "logits/chosen": 0.4398246109485626, "logits/rejected": 0.344159871339798, "loss": 1.3723, "step": 387 }, { "beta_dpo/beta": 0.07745764404535294, "beta_dpo/beta_margin_grad_mean": -0.4048055410385132, "beta_dpo/beta_margin_grad_std": 0.2543579638004303, "beta_dpo/beta_margin_mean": 1.2141345739364624, "beta_dpo/beta_margin_std": 2.8941874504089355, "beta_dpo/beta_used": 0.07745764404535294, "beta_dpo/beta_used_raw": -0.088335320353508, "beta_dpo/gap_mean": 16.209470748901367, "beta_dpo/gap_std": 23.847801208496094, "beta_dpo/loss_margin_mean": 12.559057235717773, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5865457294028723, "grad_norm": 61.19523620605469, "learning_rate": 2.1966390475472954e-07, "logits/chosen": 0.45595115423202515, "logits/rejected": 0.4419313073158264, "loss": 1.1601, "step": 388 }, { "beta_dpo/beta": 0.12304967641830444, "beta_dpo/beta_margin_grad_mean": -0.38289767503738403, "beta_dpo/beta_margin_grad_std": 0.2779510021209717, "beta_dpo/beta_margin_mean": 2.046694755554199, "beta_dpo/beta_margin_std": 4.120178699493408, "beta_dpo/beta_used": 0.12304967641830444, "beta_dpo/beta_used_raw": 0.048085957765579224, "beta_dpo/gap_mean": 15.835220336914062, "beta_dpo/gap_std": 24.056732177734375, "beta_dpo/loss_margin_mean": 15.953550338745117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5880574452003023, "grad_norm": 83.82422637939453, "learning_rate": 2.1835188660656265e-07, "logits/chosen": 0.468747079372406, "logits/rejected": 0.43019360303878784, "loss": 1.186, "step": 389 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49623188376426697, "beta_dpo/beta_margin_grad_std": 0.0054491744376719, "beta_dpo/beta_margin_mean": 0.015074868686497211, "beta_dpo/beta_margin_std": 0.021800970658659935, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18968456983566284, "beta_dpo/gap_mean": 15.827681541442871, "beta_dpo/gap_std": 23.630695343017578, "beta_dpo/loss_margin_mean": 15.074868202209473, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5895691609977324, "grad_norm": 0.6730204224586487, "learning_rate": 2.170407537241599e-07, "logits/chosen": 0.5367964506149292, "logits/rejected": 0.4671306014060974, "loss": 1.3754, "step": 390 }, { "beta_dpo/beta": 0.3387356102466583, "beta_dpo/beta_margin_grad_mean": -0.3062863051891327, "beta_dpo/beta_margin_grad_std": 0.3581731617450714, "beta_dpo/beta_margin_mean": 5.966565132141113, "beta_dpo/beta_margin_std": 12.230363845825195, "beta_dpo/beta_used": 0.3387356102466583, "beta_dpo/beta_used_raw": 0.3387356102466583, "beta_dpo/gap_mean": 15.984790802001953, "beta_dpo/gap_std": 24.179485321044922, "beta_dpo/loss_margin_mean": 17.596973419189453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5910808767951625, "grad_norm": 227.28407287597656, "learning_rate": 2.1573054278272636e-07, "logits/chosen": 0.47585949301719666, "logits/rejected": 0.4066176414489746, "loss": 1.151, "step": 391 }, { "beta_dpo/beta": 0.07082900404930115, "beta_dpo/beta_margin_grad_mean": -0.36467647552490234, "beta_dpo/beta_margin_grad_std": 0.2492174655199051, "beta_dpo/beta_margin_mean": 1.7737425565719604, "beta_dpo/beta_margin_std": 3.359530448913574, "beta_dpo/beta_used": 0.07082900404930115, "beta_dpo/beta_used_raw": 0.06858093291521072, "beta_dpo/gap_mean": 16.43636131286621, "beta_dpo/gap_std": 25.151107788085938, "beta_dpo/loss_margin_mean": 20.14677619934082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5925925925925926, "grad_norm": 48.511722564697266, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 0.5203168392181396, "logits/rejected": 0.46064090728759766, "loss": 1.152, "step": 392 }, { "beta_dpo/beta": 0.11964704096317291, "beta_dpo/beta_margin_grad_mean": -0.34089747071266174, "beta_dpo/beta_margin_grad_std": 0.254536509513855, "beta_dpo/beta_margin_mean": 2.8861536979675293, "beta_dpo/beta_margin_std": 5.0302629470825195, "beta_dpo/beta_used": 0.11964704096317291, "beta_dpo/beta_used_raw": 0.06222714111208916, "beta_dpo/gap_mean": 17.573963165283203, "beta_dpo/gap_std": 25.1307373046875, "beta_dpo/loss_margin_mean": 20.435420989990234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5941043083900227, "grad_norm": 52.03828430175781, "learning_rate": 2.131130332936195e-07, "logits/chosen": 0.4791126251220703, "logits/rejected": 0.4386283755302429, "loss": 1.1132, "step": 393 }, { "beta_dpo/beta": 0.06266598403453827, "beta_dpo/beta_margin_grad_mean": -0.37125974893569946, "beta_dpo/beta_margin_grad_std": 0.2566405236721039, "beta_dpo/beta_margin_mean": 1.087797999382019, "beta_dpo/beta_margin_std": 2.204742908477783, "beta_dpo/beta_used": 0.06266598403453827, "beta_dpo/beta_used_raw": -0.004001658409833908, "beta_dpo/gap_mean": 17.02603530883789, "beta_dpo/gap_std": 24.27169418334961, "beta_dpo/loss_margin_mean": 14.85161018371582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5956160241874527, "grad_norm": 37.43368148803711, "learning_rate": 2.1180580796331323e-07, "logits/chosen": 0.5228984355926514, "logits/rejected": 0.48896557092666626, "loss": 1.1447, "step": 394 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49653303623199463, "beta_dpo/beta_margin_grad_std": 0.0054858047515153885, "beta_dpo/beta_margin_mean": 0.013870153576135635, "beta_dpo/beta_margin_std": 0.021947424858808517, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.23153652250766754, "beta_dpo/gap_mean": 16.453784942626953, "beta_dpo/gap_std": 23.770017623901367, "beta_dpo/loss_margin_mean": 13.870152473449707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5971277399848829, "grad_norm": 0.7639675736427307, "learning_rate": 2.104996510066625e-07, "logits/chosen": 0.4781748056411743, "logits/rejected": 0.38682687282562256, "loss": 1.3755, "step": 395 }, { "beta_dpo/beta": 0.0550997294485569, "beta_dpo/beta_margin_grad_mean": -0.3593597114086151, "beta_dpo/beta_margin_grad_std": 0.2185526043176651, "beta_dpo/beta_margin_mean": 1.2726523876190186, "beta_dpo/beta_margin_std": 2.2179882526397705, "beta_dpo/beta_used": 0.0550997294485569, "beta_dpo/beta_used_raw": -0.1203291267156601, "beta_dpo/gap_mean": 16.714210510253906, "beta_dpo/gap_std": 23.554920196533203, "beta_dpo/loss_margin_mean": 15.856443405151367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5986394557823129, "grad_norm": 24.56127166748047, "learning_rate": 2.0919459895968517e-07, "logits/chosen": 0.4733230471611023, "logits/rejected": 0.3763098418712616, "loss": 1.0211, "step": 396 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49783027172088623, "beta_dpo/beta_margin_grad_std": 0.005984327290207148, "beta_dpo/beta_margin_mean": 0.008679854683578014, "beta_dpo/beta_margin_std": 0.023941440507769585, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13786454498767853, "beta_dpo/gap_mean": 15.07978630065918, "beta_dpo/gap_std": 23.18680191040039, "beta_dpo/loss_margin_mean": 8.679854393005371, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.600151171579743, "grad_norm": 0.7217801809310913, "learning_rate": 2.078906883274924e-07, "logits/chosen": 0.4001700282096863, "logits/rejected": 0.35347041487693787, "loss": 1.3753, "step": 397 }, { "beta_dpo/beta": 0.1426246017217636, "beta_dpo/beta_margin_grad_mean": -0.25918036699295044, "beta_dpo/beta_margin_grad_std": 0.27966228127479553, "beta_dpo/beta_margin_mean": 2.813566207885742, "beta_dpo/beta_margin_std": 3.6704766750335693, "beta_dpo/beta_used": 0.1426246017217636, "beta_dpo/beta_used_raw": 0.1426246017217636, "beta_dpo/gap_mean": 15.561747550964355, "beta_dpo/gap_std": 23.492074966430664, "beta_dpo/loss_margin_mean": 19.25560760498047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6016628873771731, "grad_norm": 48.140342712402344, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.4392428994178772, "logits/rejected": 0.3709562420845032, "loss": 0.7647, "step": 398 }, { "beta_dpo/beta": 0.058677032589912415, "beta_dpo/beta_margin_grad_mean": -0.36431384086608887, "beta_dpo/beta_margin_grad_std": 0.22912275791168213, "beta_dpo/beta_margin_mean": 1.1983758211135864, "beta_dpo/beta_margin_std": 2.138871908187866, "beta_dpo/beta_used": 0.058677032589912415, "beta_dpo/beta_used_raw": -0.1429281234741211, "beta_dpo/gap_mean": 16.37626838684082, "beta_dpo/gap_std": 23.693418502807617, "beta_dpo/loss_margin_mean": 20.51258659362793, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6031746031746031, "grad_norm": 35.35142517089844, "learning_rate": 2.052864371672457e-07, "logits/chosen": 0.37037575244903564, "logits/rejected": 0.23645275831222534, "loss": 1.0667, "step": 399 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4972189664840698, "beta_dpo/beta_margin_grad_std": 0.005795391276478767, "beta_dpo/beta_margin_mean": 0.011126162484288216, "beta_dpo/beta_margin_std": 0.023186147212982178, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.33826786279678345, "beta_dpo/gap_mean": 16.00902557373047, "beta_dpo/gap_std": 23.945209503173828, "beta_dpo/loss_margin_mean": 11.1261625289917, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6046863189720333, "grad_norm": 0.7984305024147034, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 0.47489333152770996, "logits/rejected": 0.4180806279182434, "loss": 1.3777, "step": 400 }, { "epoch": 0.6046863189720333, "eval_beta_dpo/beta": 0.06983860582113266, "eval_beta_dpo/beta_margin_grad_mean": -0.40659934282302856, "eval_beta_dpo/beta_margin_grad_std": 0.12279289215803146, "eval_beta_dpo/beta_margin_mean": 1.232630729675293, "eval_beta_dpo/beta_margin_std": 1.670194387435913, "eval_beta_dpo/beta_used": 0.06983860582113266, "eval_beta_dpo/beta_used_raw": -0.03454193100333214, "eval_beta_dpo/gap_mean": 15.513669967651367, "eval_beta_dpo/gap_std": 23.837387084960938, "eval_beta_dpo/loss_margin_mean": 13.271306037902832, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 0.43915027379989624, "eval_logits/rejected": 0.387622207403183, "eval_loss": 0.6486362814903259, "eval_runtime": 38.9939, "eval_samples_per_second": 59.061, "eval_steps_per_second": 1.846, "step": 400 }, { "beta_dpo/beta": 0.10424748808145523, "beta_dpo/beta_margin_grad_mean": -0.3355239927768707, "beta_dpo/beta_margin_grad_std": 0.2324715554714203, "beta_dpo/beta_margin_mean": 2.10429310798645, "beta_dpo/beta_margin_std": 3.528271436691284, "beta_dpo/beta_used": 0.10424748808145523, "beta_dpo/beta_used_raw": 0.08866387605667114, "beta_dpo/gap_mean": 16.252836227416992, "beta_dpo/gap_std": 23.609046936035156, "beta_dpo/loss_margin_mean": 20.805694580078125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6061980347694633, "grad_norm": 37.258792877197266, "learning_rate": 2.0268718890989752e-07, "logits/chosen": 0.45804572105407715, "logits/rejected": 0.3601095676422119, "loss": 0.9848, "step": 401 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49628403782844543, "beta_dpo/beta_margin_grad_std": 0.006008894182741642, "beta_dpo/beta_margin_mean": 0.014866580255329609, "beta_dpo/beta_margin_std": 0.024040691554546356, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08317705988883972, "beta_dpo/gap_mean": 16.39299201965332, "beta_dpo/gap_std": 23.771438598632812, "beta_dpo/loss_margin_mean": 14.86658000946045, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6077097505668935, "grad_norm": 0.9669122099876404, "learning_rate": 2.013895317751323e-07, "logits/chosen": 0.47899359464645386, "logits/rejected": 0.4380500316619873, "loss": 1.3731, "step": 402 }, { "beta_dpo/beta": 0.03571583703160286, "beta_dpo/beta_margin_grad_mean": -0.3757665455341339, "beta_dpo/beta_margin_grad_std": 0.1899324208498001, "beta_dpo/beta_margin_mean": 0.8030893802642822, "beta_dpo/beta_margin_std": 1.3427276611328125, "beta_dpo/beta_used": 0.03571583703160286, "beta_dpo/beta_used_raw": -0.032127734273672104, "beta_dpo/gap_mean": 16.863525390625, "beta_dpo/gap_std": 24.415699005126953, "beta_dpo/loss_margin_mean": 21.25139808654785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6092214663643235, "grad_norm": 18.598398208618164, "learning_rate": 2.0009323437965898e-07, "logits/chosen": 0.5733494758605957, "logits/rejected": 0.4917227625846863, "loss": 1.1002, "step": 403 }, { "beta_dpo/beta": 0.25357943773269653, "beta_dpo/beta_margin_grad_mean": -0.25755763053894043, "beta_dpo/beta_margin_grad_std": 0.35744139552116394, "beta_dpo/beta_margin_mean": 4.395455837249756, "beta_dpo/beta_margin_std": 6.034870624542236, "beta_dpo/beta_used": 0.25357943773269653, "beta_dpo/beta_used_raw": 0.25357943773269653, "beta_dpo/gap_mean": 17.377187728881836, "beta_dpo/gap_std": 24.109294891357422, "beta_dpo/loss_margin_mean": 16.861284255981445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6107331821617535, "grad_norm": 109.06430053710938, "learning_rate": 1.9879833298370237e-07, "logits/chosen": 0.4435925781726837, "logits/rejected": 0.3511613607406616, "loss": 0.7406, "step": 404 }, { "beta_dpo/beta": 0.1809747815132141, "beta_dpo/beta_margin_grad_mean": -0.3554416596889496, "beta_dpo/beta_margin_grad_std": 0.29358693957328796, "beta_dpo/beta_margin_mean": 4.457952499389648, "beta_dpo/beta_margin_std": 7.733155250549316, "beta_dpo/beta_used": 0.1809747815132141, "beta_dpo/beta_used_raw": -0.05717755854129791, "beta_dpo/gap_mean": 17.331336975097656, "beta_dpo/gap_std": 23.831462860107422, "beta_dpo/loss_margin_mean": 16.45929527282715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6122448979591837, "grad_norm": 89.80956268310547, "learning_rate": 1.975048638084379e-07, "logits/chosen": 0.49917006492614746, "logits/rejected": 0.45142000913619995, "loss": 1.1742, "step": 405 }, { "beta_dpo/beta": 0.21984750032424927, "beta_dpo/beta_margin_grad_mean": -0.37012386322021484, "beta_dpo/beta_margin_grad_std": 0.30180418491363525, "beta_dpo/beta_margin_mean": 4.523665904998779, "beta_dpo/beta_margin_std": 10.00094223022461, "beta_dpo/beta_used": 0.21984750032424927, "beta_dpo/beta_used_raw": 0.13835027813911438, "beta_dpo/gap_mean": 17.194957733154297, "beta_dpo/gap_std": 23.892921447753906, "beta_dpo/loss_margin_mean": 18.594226837158203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6137566137566137, "grad_norm": 122.70568084716797, "learning_rate": 1.9621286303497914e-07, "logits/chosen": 0.4927158057689667, "logits/rejected": 0.339374303817749, "loss": 1.1442, "step": 406 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49648579955101013, "beta_dpo/beta_margin_grad_std": 0.005789801478385925, "beta_dpo/beta_margin_mean": 0.014059151522815228, "beta_dpo/beta_margin_std": 0.02316402457654476, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06442619860172272, "beta_dpo/gap_mean": 16.722686767578125, "beta_dpo/gap_std": 23.689287185668945, "beta_dpo/loss_margin_mean": 14.059150695800781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6152683295540439, "grad_norm": 0.9176772832870483, "learning_rate": 1.9492236680336483e-07, "logits/chosen": 0.37719011306762695, "logits/rejected": 0.29988420009613037, "loss": 1.3724, "step": 407 }, { "beta_dpo/beta": 0.059294041246175766, "beta_dpo/beta_margin_grad_mean": -0.3586428463459015, "beta_dpo/beta_margin_grad_std": 0.2213546484708786, "beta_dpo/beta_margin_mean": 1.363776683807373, "beta_dpo/beta_margin_std": 2.3988959789276123, "beta_dpo/beta_used": 0.059294041246175766, "beta_dpo/beta_used_raw": 0.0582197904586792, "beta_dpo/gap_mean": 16.845844268798828, "beta_dpo/gap_std": 23.545011520385742, "beta_dpo/loss_margin_mean": 19.550216674804688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6167800453514739, "grad_norm": 34.37969207763672, "learning_rate": 1.9363341121154895e-07, "logits/chosen": 0.46227675676345825, "logits/rejected": 0.37937504053115845, "loss": 1.0517, "step": 408 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.496993750333786, "beta_dpo/beta_margin_grad_std": 0.005834223236888647, "beta_dpo/beta_margin_mean": 0.01202741451561451, "beta_dpo/beta_margin_std": 0.023341767489910126, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.300281822681427, "beta_dpo/gap_mean": 16.437042236328125, "beta_dpo/gap_std": 23.63758659362793, "beta_dpo/loss_margin_mean": 12.027414321899414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.618291761148904, "grad_norm": 0.6476607918739319, "learning_rate": 1.9234603231438994e-07, "logits/chosen": 0.4304888844490051, "logits/rejected": 0.4324306547641754, "loss": 1.3767, "step": 409 }, { "beta_dpo/beta": 0.07325400412082672, "beta_dpo/beta_margin_grad_mean": -0.3431945741176605, "beta_dpo/beta_margin_grad_std": 0.22117945551872253, "beta_dpo/beta_margin_mean": 1.494828701019287, "beta_dpo/beta_margin_std": 2.5302703380584717, "beta_dpo/beta_used": 0.07325400412082672, "beta_dpo/beta_used_raw": -0.053586445748806, "beta_dpo/gap_mean": 16.29637336730957, "beta_dpo/gap_std": 22.9282283782959, "beta_dpo/loss_margin_mean": 15.299408912658691, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6198034769463341, "grad_norm": 41.123111724853516, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.47349417209625244, "logits/rejected": 0.4450224041938782, "loss": 0.971, "step": 410 }, { "beta_dpo/beta": 0.12284256517887115, "beta_dpo/beta_margin_grad_mean": -0.3937680721282959, "beta_dpo/beta_margin_grad_std": 0.28876185417175293, "beta_dpo/beta_margin_mean": 2.466118812561035, "beta_dpo/beta_margin_std": 5.714894771575928, "beta_dpo/beta_used": 0.12284256517887115, "beta_dpo/beta_used_raw": 0.04404173046350479, "beta_dpo/gap_mean": 16.310779571533203, "beta_dpo/gap_std": 23.467342376708984, "beta_dpo/loss_margin_mean": 17.508848190307617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6213151927437641, "grad_norm": 110.05622100830078, "learning_rate": 1.8977614860195296e-07, "logits/chosen": 0.43857282400131226, "logits/rejected": 0.37754371762275696, "loss": 1.4855, "step": 411 }, { "beta_dpo/beta": 0.06893683224916458, "beta_dpo/beta_margin_grad_mean": -0.4188035726547241, "beta_dpo/beta_margin_grad_std": 0.247576504945755, "beta_dpo/beta_margin_mean": 0.9057201743125916, "beta_dpo/beta_margin_std": 2.4244892597198486, "beta_dpo/beta_used": 0.06893683224916458, "beta_dpo/beta_used_raw": -0.06521739065647125, "beta_dpo/gap_mean": 16.284269332885742, "beta_dpo/gap_std": 23.78243637084961, "beta_dpo/loss_margin_mean": 15.739941596984863, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6228269085411943, "grad_norm": 51.01114273071289, "learning_rate": 1.8849371567184662e-07, "logits/chosen": 0.4643993377685547, "logits/rejected": 0.39351195096969604, "loss": 1.1684, "step": 412 }, { "beta_dpo/beta": 0.12662924826145172, "beta_dpo/beta_margin_grad_mean": -0.3909075856208801, "beta_dpo/beta_margin_grad_std": 0.3073519468307495, "beta_dpo/beta_margin_mean": 2.064873456954956, "beta_dpo/beta_margin_std": 5.546405792236328, "beta_dpo/beta_used": 0.12662924826145172, "beta_dpo/beta_used_raw": -0.011943697929382324, "beta_dpo/gap_mean": 16.021413803100586, "beta_dpo/gap_std": 24.061824798583984, "beta_dpo/loss_margin_mean": 15.6614351272583, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6243386243386243, "grad_norm": 105.40924072265625, "learning_rate": 1.872130032047302e-07, "logits/chosen": 0.3003043532371521, "logits/rejected": 0.26333707571029663, "loss": 1.159, "step": 413 }, { "beta_dpo/beta": 0.1279238611459732, "beta_dpo/beta_margin_grad_mean": -0.36717891693115234, "beta_dpo/beta_margin_grad_std": 0.2865369915962219, "beta_dpo/beta_margin_mean": 2.5722670555114746, "beta_dpo/beta_margin_std": 5.616209983825684, "beta_dpo/beta_used": 0.1279238611459732, "beta_dpo/beta_used_raw": 0.05033845454454422, "beta_dpo/gap_mean": 16.612018585205078, "beta_dpo/gap_std": 24.592979431152344, "beta_dpo/loss_margin_mean": 19.95748519897461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6258503401360545, "grad_norm": 103.1255111694336, "learning_rate": 1.8593404702488436e-07, "logits/chosen": 0.44454312324523926, "logits/rejected": 0.38254380226135254, "loss": 1.0349, "step": 414 }, { "beta_dpo/beta": 0.17535589635372162, "beta_dpo/beta_margin_grad_mean": -0.40026840567588806, "beta_dpo/beta_margin_grad_std": 0.3133196234703064, "beta_dpo/beta_margin_mean": 2.7742631435394287, "beta_dpo/beta_margin_std": 7.570990562438965, "beta_dpo/beta_used": 0.17535589635372162, "beta_dpo/beta_used_raw": 0.1638440191745758, "beta_dpo/gap_mean": 17.03055191040039, "beta_dpo/gap_std": 24.755905151367188, "beta_dpo/loss_margin_mean": 17.868186950683594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6273620559334845, "grad_norm": 211.03265380859375, "learning_rate": 1.846568829074628e-07, "logits/chosen": 0.46807390451431274, "logits/rejected": 0.4464624226093292, "loss": 1.6837, "step": 415 }, { "beta_dpo/beta": 0.1974441409111023, "beta_dpo/beta_margin_grad_mean": -0.3462623953819275, "beta_dpo/beta_margin_grad_std": 0.2804645299911499, "beta_dpo/beta_margin_mean": 4.031702518463135, "beta_dpo/beta_margin_std": 8.079400062561035, "beta_dpo/beta_used": 0.1974441409111023, "beta_dpo/beta_used_raw": 0.06509672105312347, "beta_dpo/gap_mean": 15.797903060913086, "beta_dpo/gap_std": 24.798934936523438, "beta_dpo/loss_margin_mean": 11.64545726776123, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6288737717309146, "grad_norm": 102.91788482666016, "learning_rate": 1.8338154657749128e-07, "logits/chosen": 0.4295591711997986, "logits/rejected": 0.3772777318954468, "loss": 0.9866, "step": 416 }, { "beta_dpo/beta": 0.1781310886144638, "beta_dpo/beta_margin_grad_mean": -0.25753018260002136, "beta_dpo/beta_margin_grad_std": 0.32876917719841003, "beta_dpo/beta_margin_mean": 3.546206474304199, "beta_dpo/beta_margin_std": 5.584624290466309, "beta_dpo/beta_used": 0.1781310886144638, "beta_dpo/beta_used_raw": 0.1781310886144638, "beta_dpo/gap_mean": 15.99776840209961, "beta_dpo/gap_std": 25.12869644165039, "beta_dpo/loss_margin_mean": 17.373567581176758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6303854875283447, "grad_norm": 86.60340118408203, "learning_rate": 1.8210807370886849e-07, "logits/chosen": 0.559472918510437, "logits/rejected": 0.48729389905929565, "loss": 0.7575, "step": 417 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4969225227832794, "beta_dpo/beta_margin_grad_std": 0.006542589515447617, "beta_dpo/beta_margin_mean": 0.012312895618379116, "beta_dpo/beta_margin_std": 0.026178967207670212, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11385427415370941, "beta_dpo/gap_mean": 16.031269073486328, "beta_dpo/gap_std": 25.14609146118164, "beta_dpo/loss_margin_mean": 12.312894821166992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6318972033257747, "grad_norm": 0.7612284421920776, "learning_rate": 1.8083649992336825e-07, "logits/chosen": 0.46164387464523315, "logits/rejected": 0.46708381175994873, "loss": 1.374, "step": 418 }, { "beta_dpo/beta": 0.14389443397521973, "beta_dpo/beta_margin_grad_mean": -0.34402981400489807, "beta_dpo/beta_margin_grad_std": 0.27868032455444336, "beta_dpo/beta_margin_mean": 3.101715326309204, "beta_dpo/beta_margin_std": 5.610728740692139, "beta_dpo/beta_used": 0.14389443397521973, "beta_dpo/beta_used_raw": 0.10019838809967041, "beta_dpo/gap_mean": 16.164939880371094, "beta_dpo/gap_std": 25.0128173828125, "beta_dpo/loss_margin_mean": 19.221092224121094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6334089191232048, "grad_norm": 75.70429992675781, "learning_rate": 1.7956686078964255e-07, "logits/chosen": 0.37886548042297363, "logits/rejected": 0.3200737535953522, "loss": 1.1798, "step": 419 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4973110556602478, "beta_dpo/beta_margin_grad_std": 0.007076014298945665, "beta_dpo/beta_margin_mean": 0.010757850483059883, "beta_dpo/beta_margin_std": 0.028311539441347122, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1291695386171341, "beta_dpo/gap_mean": 15.535102844238281, "beta_dpo/gap_std": 25.324172973632812, "beta_dpo/loss_margin_mean": 10.75784969329834, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6349206349206349, "grad_norm": 0.7967346906661987, "learning_rate": 1.782991918222275e-07, "logits/chosen": 0.4360436797142029, "logits/rejected": 0.38624030351638794, "loss": 1.3747, "step": 420 }, { "beta_dpo/beta": 0.16308826208114624, "beta_dpo/beta_margin_grad_mean": -0.3662463426589966, "beta_dpo/beta_margin_grad_std": 0.26859521865844727, "beta_dpo/beta_margin_mean": 2.6131467819213867, "beta_dpo/beta_margin_std": 6.545647621154785, "beta_dpo/beta_used": 0.16308826208114624, "beta_dpo/beta_used_raw": 0.11229175329208374, "beta_dpo/gap_mean": 15.035537719726562, "beta_dpo/gap_std": 26.121028900146484, "beta_dpo/loss_margin_mean": 13.687716484069824, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.636432350718065, "grad_norm": 84.8555679321289, "learning_rate": 1.7703352848054887e-07, "logits/chosen": 0.436802476644516, "logits/rejected": 0.37465423345565796, "loss": 1.4652, "step": 421 }, { "beta_dpo/beta": 0.01910341903567314, "beta_dpo/beta_margin_grad_mean": -0.43050575256347656, "beta_dpo/beta_margin_grad_std": 0.13256989419460297, "beta_dpo/beta_margin_mean": 0.33536016941070557, "beta_dpo/beta_margin_std": 0.6859686374664307, "beta_dpo/beta_used": 0.01910341903567314, "beta_dpo/beta_used_raw": -0.1404467225074768, "beta_dpo/gap_mean": 15.0646390914917, "beta_dpo/gap_std": 26.3007869720459, "beta_dpo/loss_margin_mean": 16.67266273498535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6379440665154951, "grad_norm": 17.200712203979492, "learning_rate": 1.7576990616793137e-07, "logits/chosen": 0.4313371777534485, "logits/rejected": 0.40216585993766785, "loss": 1.191, "step": 422 }, { "beta_dpo/beta": 0.2093159258365631, "beta_dpo/beta_margin_grad_mean": -0.2503194510936737, "beta_dpo/beta_margin_grad_std": 0.3161967694759369, "beta_dpo/beta_margin_mean": 3.7586827278137207, "beta_dpo/beta_margin_std": 5.329720973968506, "beta_dpo/beta_used": 0.2093159258365631, "beta_dpo/beta_used_raw": 0.2093159258365631, "beta_dpo/gap_mean": 15.665881156921387, "beta_dpo/gap_std": 26.160938262939453, "beta_dpo/loss_margin_mean": 18.111860275268555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6394557823129252, "grad_norm": 83.79721069335938, "learning_rate": 1.745083602306071e-07, "logits/chosen": 0.46611279249191284, "logits/rejected": 0.39604848623275757, "loss": 0.8254, "step": 423 }, { "beta_dpo/beta": 0.14506866037845612, "beta_dpo/beta_margin_grad_mean": -0.34038662910461426, "beta_dpo/beta_margin_grad_std": 0.2595653831958771, "beta_dpo/beta_margin_mean": 3.0149664878845215, "beta_dpo/beta_margin_std": 5.455316066741943, "beta_dpo/beta_used": 0.14506866037845612, "beta_dpo/beta_used_raw": 0.07982266694307327, "beta_dpo/gap_mean": 16.47106170654297, "beta_dpo/gap_std": 25.810760498046875, "beta_dpo/loss_margin_mean": 20.76560401916504, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6409674981103552, "grad_norm": 75.43428039550781, "learning_rate": 1.7324892595672804e-07, "logits/chosen": 0.3916613757610321, "logits/rejected": 0.3512648940086365, "loss": 0.9367, "step": 424 }, { "beta_dpo/beta": 0.07886487990617752, "beta_dpo/beta_margin_grad_mean": -0.3156943917274475, "beta_dpo/beta_margin_grad_std": 0.2569364905357361, "beta_dpo/beta_margin_mean": 1.328211784362793, "beta_dpo/beta_margin_std": 1.9083045721054077, "beta_dpo/beta_used": 0.07886487990617752, "beta_dpo/beta_used_raw": 0.07886487990617752, "beta_dpo/gap_mean": 16.746292114257812, "beta_dpo/gap_std": 25.627155303955078, "beta_dpo/loss_margin_mean": 16.717668533325195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6424792139077853, "grad_norm": 33.6092414855957, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 0.5233234763145447, "logits/rejected": 0.48915839195251465, "loss": 0.8456, "step": 425 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4974079728126526, "beta_dpo/beta_margin_grad_std": 0.006462402641773224, "beta_dpo/beta_margin_mean": 0.010370003059506416, "beta_dpo/beta_margin_std": 0.02585585229098797, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2091233730316162, "beta_dpo/gap_mean": 15.816381454467773, "beta_dpo/gap_std": 25.586580276489258, "beta_dpo/loss_margin_mean": 10.370002746582031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6439909297052154, "grad_norm": 0.863946795463562, "learning_rate": 1.7073653325558828e-07, "logits/chosen": 0.4199245572090149, "logits/rejected": 0.4237271547317505, "loss": 1.3757, "step": 426 }, { "beta_dpo/beta": 0.16894888877868652, "beta_dpo/beta_margin_grad_mean": -0.3107214570045471, "beta_dpo/beta_margin_grad_std": 0.29482501745224, "beta_dpo/beta_margin_mean": 2.7154009342193604, "beta_dpo/beta_margin_std": 6.2772908210754395, "beta_dpo/beta_used": 0.16894888877868652, "beta_dpo/beta_used_raw": 0.16894888877868652, "beta_dpo/gap_mean": 15.633672714233398, "beta_dpo/gap_std": 26.182838439941406, "beta_dpo/loss_margin_mean": 16.839767456054688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6455026455026455, "grad_norm": 59.07684326171875, "learning_rate": 1.6948364510535218e-07, "logits/chosen": 0.490261435508728, "logits/rejected": 0.4341810345649719, "loss": 0.7906, "step": 427 }, { "beta_dpo/beta": 0.206699937582016, "beta_dpo/beta_margin_grad_mean": -0.29449787735939026, "beta_dpo/beta_margin_grad_std": 0.3339853286743164, "beta_dpo/beta_margin_mean": 4.745669364929199, "beta_dpo/beta_margin_std": 8.069055557250977, "beta_dpo/beta_used": 0.206699937582016, "beta_dpo/beta_used_raw": 0.206699937582016, "beta_dpo/gap_mean": 16.280973434448242, "beta_dpo/gap_std": 26.821441650390625, "beta_dpo/loss_margin_mean": 20.525794982910156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6470143613000756, "grad_norm": 159.63389587402344, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 0.4094158709049225, "logits/rejected": 0.35882726311683655, "loss": 1.4005, "step": 428 }, { "beta_dpo/beta": 0.2941809594631195, "beta_dpo/beta_margin_grad_mean": -0.2386247217655182, "beta_dpo/beta_margin_grad_std": 0.3809661567211151, "beta_dpo/beta_margin_mean": 5.172230243682861, "beta_dpo/beta_margin_std": 7.734333515167236, "beta_dpo/beta_used": 0.2941809594631195, "beta_dpo/beta_used_raw": 0.2941809594631195, "beta_dpo/gap_mean": 16.665008544921875, "beta_dpo/gap_std": 26.66605567932129, "beta_dpo/loss_margin_mean": 17.31098175048828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6485260770975056, "grad_norm": 149.40992736816406, "learning_rate": 1.669846604344412e-07, "logits/chosen": 0.38707661628723145, "logits/rejected": 0.3977625370025635, "loss": 1.1137, "step": 429 }, { "beta_dpo/beta": 0.271389901638031, "beta_dpo/beta_margin_grad_mean": -0.23841898143291473, "beta_dpo/beta_margin_grad_std": 0.2879250943660736, "beta_dpo/beta_margin_mean": 5.876552581787109, "beta_dpo/beta_margin_std": 9.04500961303711, "beta_dpo/beta_used": 0.271389901638031, "beta_dpo/beta_used_raw": 0.271389901638031, "beta_dpo/gap_mean": 17.295337677001953, "beta_dpo/gap_std": 26.757667541503906, "beta_dpo/loss_margin_mean": 20.482933044433594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6500377928949358, "grad_norm": 124.40864562988281, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 0.3641434907913208, "logits/rejected": 0.3554537892341614, "loss": 0.6021, "step": 430 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49573636054992676, "beta_dpo/beta_margin_grad_std": 0.00596994673833251, "beta_dpo/beta_margin_mean": 0.017058147117495537, "beta_dpo/beta_margin_std": 0.023886609822511673, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14669229090213776, "beta_dpo/gap_mean": 17.474349975585938, "beta_dpo/gap_std": 26.309669494628906, "beta_dpo/loss_margin_mean": 17.058147430419922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6515495086923658, "grad_norm": 0.8200677633285522, "learning_rate": 1.6449496416858282e-07, "logits/chosen": 0.43957391381263733, "logits/rejected": 0.3921339511871338, "loss": 1.373, "step": 431 }, { "beta_dpo/beta": 0.20359541475772858, "beta_dpo/beta_margin_grad_mean": -0.34393805265426636, "beta_dpo/beta_margin_grad_std": 0.2853422164916992, "beta_dpo/beta_margin_mean": 4.172194480895996, "beta_dpo/beta_margin_std": 8.311675071716309, "beta_dpo/beta_used": 0.20359541475772858, "beta_dpo/beta_used_raw": 0.20359541475772858, "beta_dpo/gap_mean": 17.662752151489258, "beta_dpo/gap_std": 26.11996841430664, "beta_dpo/loss_margin_mean": 18.106279373168945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6530612244897959, "grad_norm": 113.8149185180664, "learning_rate": 1.632536862810844e-07, "logits/chosen": 0.47949665784835815, "logits/rejected": 0.4305593967437744, "loss": 1.1397, "step": 432 }, { "beta_dpo/beta": 0.3344748020172119, "beta_dpo/beta_margin_grad_mean": -0.22546495497226715, "beta_dpo/beta_margin_grad_std": 0.3282316327095032, "beta_dpo/beta_margin_mean": 7.9909210205078125, "beta_dpo/beta_margin_std": 14.14069652557373, "beta_dpo/beta_used": 0.3344748020172119, "beta_dpo/beta_used_raw": 0.3344748020172119, "beta_dpo/gap_mean": 18.123287200927734, "beta_dpo/gap_std": 26.272716522216797, "beta_dpo/loss_margin_mean": 22.30093765258789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.654572940287226, "grad_norm": 175.8500518798828, "learning_rate": 1.6201483487445515e-07, "logits/chosen": 0.5873138308525085, "logits/rejected": 0.575851321220398, "loss": 0.7079, "step": 433 }, { "beta_dpo/beta": 0.4044048488140106, "beta_dpo/beta_margin_grad_mean": -0.24625371396541595, "beta_dpo/beta_margin_grad_std": 0.3679701089859009, "beta_dpo/beta_margin_mean": 9.312926292419434, "beta_dpo/beta_margin_std": 15.837896347045898, "beta_dpo/beta_used": 0.4044048488140106, "beta_dpo/beta_used_raw": 0.4044048488140106, "beta_dpo/gap_mean": 18.955612182617188, "beta_dpo/gap_std": 27.102890014648438, "beta_dpo/loss_margin_mean": 20.431182861328125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.656084656084656, "grad_norm": 217.51641845703125, "learning_rate": 1.6077844460203204e-07, "logits/chosen": 0.5753809213638306, "logits/rejected": 0.5080810785293579, "loss": 1.6548, "step": 434 }, { "beta_dpo/beta": 0.2745690941810608, "beta_dpo/beta_margin_grad_mean": -0.36417117714881897, "beta_dpo/beta_margin_grad_std": 0.29817262291908264, "beta_dpo/beta_margin_mean": 5.747401714324951, "beta_dpo/beta_margin_std": 12.350319862365723, "beta_dpo/beta_used": 0.2745690941810608, "beta_dpo/beta_used_raw": 0.194306880235672, "beta_dpo/gap_mean": 18.432392120361328, "beta_dpo/gap_std": 27.030935287475586, "beta_dpo/loss_margin_mean": 17.350605010986328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6575963718820862, "grad_norm": 161.15036010742188, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 0.5192146301269531, "logits/rejected": 0.4788922667503357, "loss": 1.4735, "step": 435 }, { "beta_dpo/beta": 0.13801611959934235, "beta_dpo/beta_margin_grad_mean": -0.3944951295852661, "beta_dpo/beta_margin_grad_std": 0.28630712628364563, "beta_dpo/beta_margin_mean": 2.079245090484619, "beta_dpo/beta_margin_std": 5.407124042510986, "beta_dpo/beta_used": 0.13801611959934235, "beta_dpo/beta_used_raw": 0.012127086520195007, "beta_dpo/gap_mean": 18.145030975341797, "beta_dpo/gap_std": 27.07366180419922, "beta_dpo/loss_margin_mean": 15.945972442626953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6591080876795162, "grad_norm": 95.06700134277344, "learning_rate": 1.5831318572796847e-07, "logits/chosen": 0.4287049174308777, "logits/rejected": 0.37227755784988403, "loss": 1.0533, "step": 436 }, { "beta_dpo/beta": 0.08843779563903809, "beta_dpo/beta_margin_grad_mean": -0.355763703584671, "beta_dpo/beta_margin_grad_std": 0.27760180830955505, "beta_dpo/beta_margin_mean": 2.1557815074920654, "beta_dpo/beta_margin_std": 4.090998649597168, "beta_dpo/beta_used": 0.08843779563903809, "beta_dpo/beta_used_raw": 0.004511319100856781, "beta_dpo/gap_mean": 18.151508331298828, "beta_dpo/gap_std": 27.469696044921875, "beta_dpo/loss_margin_mean": 15.952765464782715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6606198034769464, "grad_norm": 67.06380462646484, "learning_rate": 1.5708438608491815e-07, "logits/chosen": 0.4623869061470032, "logits/rejected": 0.3321772813796997, "loss": 1.1109, "step": 437 }, { "beta_dpo/beta": 0.16248461604118347, "beta_dpo/beta_margin_grad_mean": -0.33106884360313416, "beta_dpo/beta_margin_grad_std": 0.2685889005661011, "beta_dpo/beta_margin_mean": 4.133353233337402, "beta_dpo/beta_margin_std": 7.090116024017334, "beta_dpo/beta_used": 0.16248461604118347, "beta_dpo/beta_used_raw": 0.15679943561553955, "beta_dpo/gap_mean": 18.146766662597656, "beta_dpo/gap_std": 28.101390838623047, "beta_dpo/loss_margin_mean": 22.395479202270508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6621315192743764, "grad_norm": 134.7775115966797, "learning_rate": 1.558581854913253e-07, "logits/chosen": 0.5341899394989014, "logits/rejected": 0.4623284339904785, "loss": 1.0142, "step": 438 }, { "beta_dpo/beta": 0.09724593162536621, "beta_dpo/beta_margin_grad_mean": -0.313736230134964, "beta_dpo/beta_margin_grad_std": 0.24954845011234283, "beta_dpo/beta_margin_mean": 2.1920928955078125, "beta_dpo/beta_margin_std": 3.4765686988830566, "beta_dpo/beta_used": 0.09724593162536621, "beta_dpo/beta_used_raw": 0.09724593162536621, "beta_dpo/gap_mean": 18.264135360717773, "beta_dpo/gap_std": 27.18978500366211, "beta_dpo/loss_margin_mean": 18.61200714111328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6636432350718064, "grad_norm": 61.768672943115234, "learning_rate": 1.5463461824665658e-07, "logits/chosen": 0.39725732803344727, "logits/rejected": 0.35775747895240784, "loss": 0.8909, "step": 439 }, { "beta_dpo/beta": 0.22115948796272278, "beta_dpo/beta_margin_grad_mean": -0.3153398931026459, "beta_dpo/beta_margin_grad_std": 0.27347540855407715, "beta_dpo/beta_margin_mean": 5.172101020812988, "beta_dpo/beta_margin_std": 9.553386688232422, "beta_dpo/beta_used": 0.22115948796272278, "beta_dpo/beta_used_raw": 0.11873233318328857, "beta_dpo/gap_mean": 18.741073608398438, "beta_dpo/gap_std": 27.152206420898438, "beta_dpo/loss_margin_mean": 20.439512252807617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6651549508692366, "grad_norm": 132.9888916015625, "learning_rate": 1.534137185767178e-07, "logits/chosen": 0.4396424889564514, "logits/rejected": 0.3410155773162842, "loss": 0.9659, "step": 440 }, { "beta_dpo/beta": 0.056231264024972916, "beta_dpo/beta_margin_grad_mean": -0.3631954789161682, "beta_dpo/beta_margin_grad_std": 0.2372567057609558, "beta_dpo/beta_margin_mean": 1.306308627128601, "beta_dpo/beta_margin_std": 2.314157009124756, "beta_dpo/beta_used": 0.056231264024972916, "beta_dpo/beta_used_raw": 0.014366436749696732, "beta_dpo/gap_mean": 19.39804458618164, "beta_dpo/gap_std": 26.627029418945312, "beta_dpo/loss_margin_mean": 21.177230834960938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6666666666666666, "grad_norm": 28.244613647460938, "learning_rate": 1.521955206326976e-07, "logits/chosen": 0.43280029296875, "logits/rejected": 0.3387737274169922, "loss": 1.0871, "step": 441 }, { "beta_dpo/beta": 0.13525980710983276, "beta_dpo/beta_margin_grad_mean": -0.3400418162345886, "beta_dpo/beta_margin_grad_std": 0.2894745469093323, "beta_dpo/beta_margin_mean": 2.674807071685791, "beta_dpo/beta_margin_std": 5.317812442779541, "beta_dpo/beta_used": 0.13525980710983276, "beta_dpo/beta_used_raw": -0.08266353607177734, "beta_dpo/gap_mean": 19.399829864501953, "beta_dpo/gap_std": 26.35002899169922, "beta_dpo/loss_margin_mean": 19.57100486755371, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6681783824640968, "grad_norm": 101.40801239013672, "learning_rate": 1.5098005849021078e-07, "logits/chosen": 0.47117850184440613, "logits/rejected": 0.4187871515750885, "loss": 0.8993, "step": 442 }, { "beta_dpo/beta": 0.3095214366912842, "beta_dpo/beta_margin_grad_mean": -0.1766759157180786, "beta_dpo/beta_margin_grad_std": 0.3004312217235565, "beta_dpo/beta_margin_mean": 7.633600234985352, "beta_dpo/beta_margin_std": 9.593216896057129, "beta_dpo/beta_used": 0.3095214366912842, "beta_dpo/beta_used_raw": 0.3095214366912842, "beta_dpo/gap_mean": 20.205463409423828, "beta_dpo/gap_std": 26.816097259521484, "beta_dpo/loss_margin_mean": 24.74801254272461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6696900982615268, "grad_norm": 115.0042953491211, "learning_rate": 1.4976736614834662e-07, "logits/chosen": 0.49063044786453247, "logits/rejected": 0.41436687111854553, "loss": 0.8238, "step": 443 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49786376953125, "beta_dpo/beta_margin_grad_std": 0.006280460394918919, "beta_dpo/beta_margin_mean": 0.008546494878828526, "beta_dpo/beta_margin_std": 0.025125892832875252, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3049757182598114, "beta_dpo/gap_mean": 18.85560417175293, "beta_dpo/gap_std": 26.632326126098633, "beta_dpo/loss_margin_mean": 8.546494483947754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.671201814058957, "grad_norm": 0.8523843884468079, "learning_rate": 1.4855747752871654e-07, "logits/chosen": 0.4893801212310791, "logits/rejected": 0.40110495686531067, "loss": 1.3743, "step": 444 }, { "beta_dpo/beta": 0.15167614817619324, "beta_dpo/beta_margin_grad_mean": -0.24699601531028748, "beta_dpo/beta_margin_grad_std": 0.29544878005981445, "beta_dpo/beta_margin_mean": 3.1027376651763916, "beta_dpo/beta_margin_std": 4.867432594299316, "beta_dpo/beta_used": 0.15167614817619324, "beta_dpo/beta_used_raw": 0.15167614817619324, "beta_dpo/gap_mean": 18.524452209472656, "beta_dpo/gap_std": 26.606185913085938, "beta_dpo/loss_margin_mean": 21.157268524169922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.672713529856387, "grad_norm": 58.71397018432617, "learning_rate": 1.473504264745062e-07, "logits/chosen": 0.453709214925766, "logits/rejected": 0.43405085802078247, "loss": 0.7044, "step": 445 }, { "beta_dpo/beta": 0.2611894905567169, "beta_dpo/beta_margin_grad_mean": -0.2053774893283844, "beta_dpo/beta_margin_grad_std": 0.25838059186935425, "beta_dpo/beta_margin_mean": 7.894767761230469, "beta_dpo/beta_margin_std": 10.342389106750488, "beta_dpo/beta_used": 0.2611894905567169, "beta_dpo/beta_used_raw": 0.2611894905567169, "beta_dpo/gap_mean": 19.257843017578125, "beta_dpo/gap_std": 26.065818786621094, "beta_dpo/loss_margin_mean": 24.705129623413086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.674225245653817, "grad_norm": 105.36664581298828, "learning_rate": 1.461462467495284e-07, "logits/chosen": 0.5040090084075928, "logits/rejected": 0.42126449942588806, "loss": 0.6766, "step": 446 }, { "beta_dpo/beta": 0.23961615562438965, "beta_dpo/beta_margin_grad_mean": -0.19545207917690277, "beta_dpo/beta_margin_grad_std": 0.33320990204811096, "beta_dpo/beta_margin_mean": 5.5971269607543945, "beta_dpo/beta_margin_std": 6.4180378913879395, "beta_dpo/beta_used": 0.23961615562438965, "beta_dpo/beta_used_raw": 0.23961615562438965, "beta_dpo/gap_mean": 20.464557647705078, "beta_dpo/gap_std": 26.064769744873047, "beta_dpo/loss_margin_mean": 23.23582649230957, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6757369614512472, "grad_norm": 118.35011291503906, "learning_rate": 1.4494497203727843e-07, "logits/chosen": 0.4212689995765686, "logits/rejected": 0.3177017867565155, "loss": 1.0214, "step": 447 }, { "beta_dpo/beta": 0.006910950411111116, "beta_dpo/beta_margin_grad_mean": -0.4693349301815033, "beta_dpo/beta_margin_grad_std": 0.048872094601392746, "beta_dpo/beta_margin_mean": 0.12464610487222672, "beta_dpo/beta_margin_std": 0.20036308467388153, "beta_dpo/beta_used": 0.006910950411111116, "beta_dpo/beta_used_raw": 0.006910950411111116, "beta_dpo/gap_mean": 20.188140869140625, "beta_dpo/gap_std": 26.024574279785156, "beta_dpo/loss_margin_mean": 18.08507537841797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6772486772486772, "grad_norm": 5.197481632232666, "learning_rate": 1.4374663593999256e-07, "logits/chosen": 0.4766013026237488, "logits/rejected": 0.4210081100463867, "loss": 1.2689, "step": 448 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49735549092292786, "beta_dpo/beta_margin_grad_std": 0.005208996124565601, "beta_dpo/beta_margin_mean": 0.010579775087535381, "beta_dpo/beta_margin_std": 0.020839868113398552, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.35340794920921326, "beta_dpo/gap_mean": 18.90911865234375, "beta_dpo/gap_std": 25.33905029296875, "beta_dpo/loss_margin_mean": 10.579774856567383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6787603930461074, "grad_norm": 0.742425262928009, "learning_rate": 1.4255127197770707e-07, "logits/chosen": 0.3229493200778961, "logits/rejected": 0.3151504397392273, "loss": 1.3751, "step": 449 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49666455388069153, "beta_dpo/beta_margin_grad_std": 0.0058851479552686214, "beta_dpo/beta_margin_mean": 0.013344475999474525, "beta_dpo/beta_margin_std": 0.023545950651168823, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10140938311815262, "beta_dpo/gap_mean": 17.542179107666016, "beta_dpo/gap_std": 24.803512573242188, "beta_dpo/loss_margin_mean": 13.344475746154785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6802721088435374, "grad_norm": 0.8854034543037415, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 0.5489107370376587, "logits/rejected": 0.43710023164749146, "loss": 1.3723, "step": 450 }, { "beta_dpo/beta": 0.0172061026096344, "beta_dpo/beta_margin_grad_mean": -0.4475163519382477, "beta_dpo/beta_margin_grad_std": 0.12823042273521423, "beta_dpo/beta_margin_mean": 0.24266105890274048, "beta_dpo/beta_margin_std": 0.6130807399749756, "beta_dpo/beta_used": 0.0172061026096344, "beta_dpo/beta_used_raw": -0.08448895812034607, "beta_dpo/gap_mean": 16.902774810791016, "beta_dpo/gap_std": 24.68649673461914, "beta_dpo/loss_margin_mean": 14.798531532287598, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6817838246409675, "grad_norm": 13.051300048828125, "learning_rate": 1.4016959412166437e-07, "logits/chosen": 0.42658311128616333, "logits/rejected": 0.37267670035362244, "loss": 1.1849, "step": 451 }, { "beta_dpo/beta": 0.13525496423244476, "beta_dpo/beta_margin_grad_mean": -0.29088878631591797, "beta_dpo/beta_margin_grad_std": 0.3417208194732666, "beta_dpo/beta_margin_mean": 2.171989917755127, "beta_dpo/beta_margin_std": 3.486285924911499, "beta_dpo/beta_used": 0.13525496423244476, "beta_dpo/beta_used_raw": 0.13525496423244476, "beta_dpo/gap_mean": 16.710304260253906, "beta_dpo/gap_std": 24.817352294921875, "beta_dpo/loss_margin_mean": 16.379568099975586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6832955404383976, "grad_norm": 70.75874328613281, "learning_rate": 1.3898334684855645e-07, "logits/chosen": 0.4217544198036194, "logits/rejected": 0.34103256464004517, "loss": 1.1461, "step": 452 }, { "beta_dpo/beta": 0.04890910163521767, "beta_dpo/beta_margin_grad_mean": -0.4136413037776947, "beta_dpo/beta_margin_grad_std": 0.24708783626556396, "beta_dpo/beta_margin_mean": 0.8333736658096313, "beta_dpo/beta_margin_std": 2.1311116218566895, "beta_dpo/beta_used": 0.04890910163521767, "beta_dpo/beta_used_raw": 0.04254155978560448, "beta_dpo/gap_mean": 16.893192291259766, "beta_dpo/gap_std": 25.19158172607422, "beta_dpo/loss_margin_mean": 17.536466598510742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6848072562358276, "grad_norm": 37.44161605834961, "learning_rate": 1.3780020494988445e-07, "logits/chosen": 0.41655856370925903, "logits/rejected": 0.381106436252594, "loss": 1.2468, "step": 453 }, { "beta_dpo/beta": 0.16885149478912354, "beta_dpo/beta_margin_grad_mean": -0.22665803134441376, "beta_dpo/beta_margin_grad_std": 0.304115355014801, "beta_dpo/beta_margin_mean": 3.1320598125457764, "beta_dpo/beta_margin_std": 4.471872806549072, "beta_dpo/beta_used": 0.16885149478912354, "beta_dpo/beta_used_raw": 0.16885149478912354, "beta_dpo/gap_mean": 17.158390045166016, "beta_dpo/gap_std": 25.75613021850586, "beta_dpo/loss_margin_mean": 18.559307098388672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6863189720332578, "grad_norm": 48.60162353515625, "learning_rate": 1.366202015206706e-07, "logits/chosen": 0.4805641770362854, "logits/rejected": 0.43357783555984497, "loss": 0.6251, "step": 454 }, { "beta_dpo/beta": 0.15867355465888977, "beta_dpo/beta_margin_grad_mean": -0.3603483736515045, "beta_dpo/beta_margin_grad_std": 0.27238547801971436, "beta_dpo/beta_margin_mean": 3.664536952972412, "beta_dpo/beta_margin_std": 6.963903427124023, "beta_dpo/beta_used": 0.15867355465888977, "beta_dpo/beta_used_raw": 0.13864237070083618, "beta_dpo/gap_mean": 17.709564208984375, "beta_dpo/gap_std": 25.734886169433594, "beta_dpo/loss_margin_mean": 19.53325080871582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6878306878306878, "grad_norm": 107.5218276977539, "learning_rate": 1.354433695681474e-07, "logits/chosen": 0.33997493982315063, "logits/rejected": 0.29935604333877563, "loss": 1.3453, "step": 455 }, { "beta_dpo/beta": 0.11797763407230377, "beta_dpo/beta_margin_grad_mean": -0.2854386270046234, "beta_dpo/beta_margin_grad_std": 0.28851282596588135, "beta_dpo/beta_margin_mean": 2.0227293968200684, "beta_dpo/beta_margin_std": 2.8564655780792236, "beta_dpo/beta_used": 0.11797763407230377, "beta_dpo/beta_used_raw": 0.11797763407230377, "beta_dpo/gap_mean": 17.563323974609375, "beta_dpo/gap_std": 25.402185440063477, "beta_dpo/loss_margin_mean": 16.928739547729492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6893424036281179, "grad_norm": 52.88853454589844, "learning_rate": 1.3426974201083439e-07, "logits/chosen": 0.39014971256256104, "logits/rejected": 0.3266359865665436, "loss": 0.7483, "step": 456 }, { "beta_dpo/beta": 0.1249324381351471, "beta_dpo/beta_margin_grad_mean": -0.3382973372936249, "beta_dpo/beta_margin_grad_std": 0.2664583921432495, "beta_dpo/beta_margin_mean": 2.556375503540039, "beta_dpo/beta_margin_std": 4.548144817352295, "beta_dpo/beta_used": 0.1249324381351471, "beta_dpo/beta_used_raw": -0.01606186479330063, "beta_dpo/gap_mean": 17.55565643310547, "beta_dpo/gap_std": 24.93761444091797, "beta_dpo/loss_margin_mean": 16.916906356811523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.690854119425548, "grad_norm": 56.52458190917969, "learning_rate": 1.3309935167761717e-07, "logits/chosen": 0.5392422676086426, "logits/rejected": 0.4641982913017273, "loss": 0.9844, "step": 457 }, { "beta_dpo/beta": 0.24690914154052734, "beta_dpo/beta_margin_grad_mean": -0.22146117687225342, "beta_dpo/beta_margin_grad_std": 0.33166489005088806, "beta_dpo/beta_margin_mean": 4.624577522277832, "beta_dpo/beta_margin_std": 6.362880229949951, "beta_dpo/beta_used": 0.24690914154052734, "beta_dpo/beta_used_raw": 0.24690914154052734, "beta_dpo/gap_mean": 17.572355270385742, "beta_dpo/gap_std": 24.927356719970703, "beta_dpo/loss_margin_mean": 18.480012893676758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6923658352229781, "grad_norm": 78.89071655273438, "learning_rate": 1.3193223130682936e-07, "logits/chosen": 0.45676666498184204, "logits/rejected": 0.34059104323387146, "loss": 0.6349, "step": 458 }, { "beta_dpo/beta": 0.2179662585258484, "beta_dpo/beta_margin_grad_mean": -0.3185714781284332, "beta_dpo/beta_margin_grad_std": 0.27158471941947937, "beta_dpo/beta_margin_mean": 6.563882827758789, "beta_dpo/beta_margin_std": 9.94521713256836, "beta_dpo/beta_used": 0.2179662585258484, "beta_dpo/beta_used_raw": 0.18530282378196716, "beta_dpo/gap_mean": 17.721694946289062, "beta_dpo/gap_std": 24.523391723632812, "beta_dpo/loss_margin_mean": 21.503368377685547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6938775510204082, "grad_norm": 122.18168640136719, "learning_rate": 1.3076841354533658e-07, "logits/chosen": 0.4888218343257904, "logits/rejected": 0.44728928804397583, "loss": 1.1406, "step": 459 }, { "beta_dpo/beta": 0.08090288937091827, "beta_dpo/beta_margin_grad_mean": -0.37582793831825256, "beta_dpo/beta_margin_grad_std": 0.2519203722476959, "beta_dpo/beta_margin_mean": 1.7335608005523682, "beta_dpo/beta_margin_std": 3.414593458175659, "beta_dpo/beta_used": 0.08090288937091827, "beta_dpo/beta_used_raw": -0.02788601815700531, "beta_dpo/gap_mean": 18.970165252685547, "beta_dpo/gap_std": 24.861597061157227, "beta_dpo/loss_margin_mean": 22.223512649536133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6953892668178382, "grad_norm": 63.34912109375, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 0.5079508423805237, "logits/rejected": 0.37769049406051636, "loss": 1.1034, "step": 460 }, { "beta_dpo/beta": 0.34863337874412537, "beta_dpo/beta_margin_grad_mean": -0.29620790481567383, "beta_dpo/beta_margin_grad_std": 0.2750532329082489, "beta_dpo/beta_margin_mean": 9.606303215026855, "beta_dpo/beta_margin_std": 15.002069473266602, "beta_dpo/beta_used": 0.34863337874412537, "beta_dpo/beta_used_raw": 0.053985416889190674, "beta_dpo/gap_mean": 19.691003799438477, "beta_dpo/gap_std": 24.841266632080078, "beta_dpo/loss_margin_mean": 21.206148147583008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6969009826152683, "grad_norm": 78.56777954101562, "learning_rate": 1.2845081597488286e-07, "logits/chosen": 0.608617901802063, "logits/rejected": 0.5119181275367737, "loss": 0.7492, "step": 461 }, { "beta_dpo/beta": 0.3502820134162903, "beta_dpo/beta_margin_grad_mean": -0.17331989109516144, "beta_dpo/beta_margin_grad_std": 0.2825244069099426, "beta_dpo/beta_margin_mean": 8.671442985534668, "beta_dpo/beta_margin_std": 10.874760627746582, "beta_dpo/beta_used": 0.3502820134162903, "beta_dpo/beta_used_raw": 0.3502820134162903, "beta_dpo/gap_mean": 20.119089126586914, "beta_dpo/gap_std": 24.742603302001953, "beta_dpo/loss_margin_mean": 23.444116592407227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6984126984126984, "grad_norm": 105.43913269042969, "learning_rate": 1.27297100994108e-07, "logits/chosen": 0.4638398289680481, "logits/rejected": 0.39704328775405884, "loss": 0.8603, "step": 462 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4957989454269409, "beta_dpo/beta_margin_grad_std": 0.005161019042134285, "beta_dpo/beta_margin_mean": 0.016806745901703835, "beta_dpo/beta_margin_std": 0.02064814232289791, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09972745180130005, "beta_dpo/gap_mean": 19.76999282836914, "beta_dpo/gap_std": 24.28656578063965, "beta_dpo/loss_margin_mean": 16.806743621826172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6999244142101285, "grad_norm": 0.8558281064033508, "learning_rate": 1.2614681827718695e-07, "logits/chosen": 0.44439205527305603, "logits/rejected": 0.4367867410182953, "loss": 1.37, "step": 463 }, { "beta_dpo/beta": 0.0761955976486206, "beta_dpo/beta_margin_grad_mean": -0.3430159091949463, "beta_dpo/beta_margin_grad_std": 0.2450447976589203, "beta_dpo/beta_margin_mean": 1.6685881614685059, "beta_dpo/beta_margin_std": 2.8676531314849854, "beta_dpo/beta_used": 0.0761955976486206, "beta_dpo/beta_used_raw": -0.042108893394470215, "beta_dpo/gap_mean": 19.153217315673828, "beta_dpo/gap_std": 24.542236328125, "beta_dpo/loss_margin_mean": 18.18233871459961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7014361300075586, "grad_norm": 31.882461547851562, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.417372465133667, "logits/rejected": 0.38535577058792114, "loss": 0.9379, "step": 464 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4959162771701813, "beta_dpo/beta_margin_grad_std": 0.006333097815513611, "beta_dpo/beta_margin_mean": 0.016338225454092026, "beta_dpo/beta_margin_std": 0.02533765509724617, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11223104596138, "beta_dpo/gap_mean": 18.943988800048828, "beta_dpo/gap_std": 24.53543472290039, "beta_dpo/loss_margin_mean": 16.338224411010742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7029478458049887, "grad_norm": 0.9496918320655823, "learning_rate": 1.238566782415197e-07, "logits/chosen": 0.5403270125389099, "logits/rejected": 0.47340455651283264, "loss": 1.3711, "step": 465 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4974312484264374, "beta_dpo/beta_margin_grad_std": 0.005646795034408569, "beta_dpo/beta_margin_mean": 0.010276666842401028, "beta_dpo/beta_margin_std": 0.02259155735373497, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3733450174331665, "beta_dpo/gap_mean": 17.61815071105957, "beta_dpo/gap_std": 24.498844146728516, "beta_dpo/loss_margin_mean": 10.276666641235352, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7044595616024187, "grad_norm": 0.8166243433952332, "learning_rate": 1.2271688498291334e-07, "logits/chosen": 0.47566479444503784, "logits/rejected": 0.47687482833862305, "loss": 1.3767, "step": 466 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4960554540157318, "beta_dpo/beta_margin_grad_std": 0.005727602168917656, "beta_dpo/beta_margin_mean": 0.015781516209244728, "beta_dpo/beta_margin_std": 0.022918064147233963, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18097154796123505, "beta_dpo/gap_mean": 16.883132934570312, "beta_dpo/gap_std": 24.102888107299805, "beta_dpo/loss_margin_mean": 15.781516075134277, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7059712773998488, "grad_norm": 0.7626246213912964, "learning_rate": 1.2158065210664848e-07, "logits/chosen": 0.4934718608856201, "logits/rejected": 0.3415928781032562, "loss": 1.3742, "step": 467 }, { "beta_dpo/beta": 0.22503986954689026, "beta_dpo/beta_margin_grad_mean": -0.2004920393228531, "beta_dpo/beta_margin_grad_std": 0.31278786063194275, "beta_dpo/beta_margin_mean": 5.361608505249023, "beta_dpo/beta_margin_std": 6.801639080047607, "beta_dpo/beta_used": 0.22503986954689026, "beta_dpo/beta_used_raw": 0.22503986954689026, "beta_dpo/gap_mean": 17.813079833984375, "beta_dpo/gap_std": 24.297039031982422, "beta_dpo/loss_margin_mean": 23.649864196777344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7074829931972789, "grad_norm": 142.0051727294922, "learning_rate": 1.204480113956011e-07, "logits/chosen": 0.42348283529281616, "logits/rejected": 0.4103195369243622, "loss": 0.7272, "step": 468 }, { "beta_dpo/beta": 0.3144519627094269, "beta_dpo/beta_margin_grad_mean": -0.3136805295944214, "beta_dpo/beta_margin_grad_std": 0.2636690139770508, "beta_dpo/beta_margin_mean": 8.632575988769531, "beta_dpo/beta_margin_std": 14.937712669372559, "beta_dpo/beta_used": 0.3144519627094269, "beta_dpo/beta_used_raw": 0.18009693920612335, "beta_dpo/gap_mean": 18.668554306030273, "beta_dpo/gap_std": 24.453214645385742, "beta_dpo/loss_margin_mean": 19.205217361450195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.708994708994709, "grad_norm": 179.1083221435547, "learning_rate": 1.1931899453216697e-07, "logits/chosen": 0.5388230085372925, "logits/rejected": 0.5226658582687378, "loss": 0.8659, "step": 469 }, { "beta_dpo/beta": 0.18248410522937775, "beta_dpo/beta_margin_grad_mean": -0.2651711404323578, "beta_dpo/beta_margin_grad_std": 0.3218035399913788, "beta_dpo/beta_margin_mean": 3.4488892555236816, "beta_dpo/beta_margin_std": 5.133399486541748, "beta_dpo/beta_used": 0.18248410522937775, "beta_dpo/beta_used_raw": 0.18248410522937775, "beta_dpo/gap_mean": 18.15283966064453, "beta_dpo/gap_std": 24.360286712646484, "beta_dpo/loss_margin_mean": 18.03568458557129, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7105064247921391, "grad_norm": 71.76032257080078, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 0.4892360270023346, "logits/rejected": 0.4123338460922241, "loss": 0.7933, "step": 470 }, { "beta_dpo/beta": 0.2866833209991455, "beta_dpo/beta_margin_grad_mean": -0.19581058621406555, "beta_dpo/beta_margin_grad_std": 0.3241235613822937, "beta_dpo/beta_margin_mean": 6.425544261932373, "beta_dpo/beta_margin_std": 7.508059501647949, "beta_dpo/beta_used": 0.2866833209991455, "beta_dpo/beta_used_raw": 0.2866833209991455, "beta_dpo/gap_mean": 18.825267791748047, "beta_dpo/gap_std": 24.537582397460938, "beta_dpo/loss_margin_mean": 22.44715118408203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7120181405895691, "grad_norm": 131.3478240966797, "learning_rate": 1.1707195857000215e-07, "logits/chosen": 0.46292543411254883, "logits/rejected": 0.3984526991844177, "loss": 0.7009, "step": 471 }, { "beta_dpo/beta": 0.07434147596359253, "beta_dpo/beta_margin_grad_mean": -0.38497617840766907, "beta_dpo/beta_margin_grad_std": 0.2753157615661621, "beta_dpo/beta_margin_mean": 1.7048362493515015, "beta_dpo/beta_margin_std": 3.7578043937683105, "beta_dpo/beta_used": 0.07434147596359253, "beta_dpo/beta_used_raw": -0.11290460079908371, "beta_dpo/gap_mean": 18.724979400634766, "beta_dpo/gap_std": 25.190521240234375, "beta_dpo/loss_margin_mean": 18.318096160888672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7135298563869993, "grad_norm": 63.933082580566406, "learning_rate": 1.1595400232569768e-07, "logits/chosen": 0.48472386598587036, "logits/rejected": 0.43443506956100464, "loss": 1.227, "step": 472 }, { "beta_dpo/beta": 0.00586696295067668, "beta_dpo/beta_margin_grad_mean": -0.4737027585506439, "beta_dpo/beta_margin_grad_std": 0.05226682126522064, "beta_dpo/beta_margin_mean": 0.10760366171598434, "beta_dpo/beta_margin_std": 0.21496045589447021, "beta_dpo/beta_used": 0.00586696295067668, "beta_dpo/beta_used_raw": 0.00586696295067668, "beta_dpo/gap_mean": 18.903789520263672, "beta_dpo/gap_std": 26.43364906311035, "beta_dpo/loss_margin_mean": 18.546789169311523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7150415721844293, "grad_norm": 5.026509761810303, "learning_rate": 1.1483979563610069e-07, "logits/chosen": 0.5756895542144775, "logits/rejected": 0.4601627588272095, "loss": 1.2965, "step": 473 }, { "beta_dpo/beta": 0.06509552150964737, "beta_dpo/beta_margin_grad_mean": -0.33907976746559143, "beta_dpo/beta_margin_grad_std": 0.2729524075984955, "beta_dpo/beta_margin_mean": 1.1306818723678589, "beta_dpo/beta_margin_std": 1.8167601823806763, "beta_dpo/beta_used": 0.06509552150964737, "beta_dpo/beta_used_raw": 0.06509552150964737, "beta_dpo/gap_mean": 18.619977951049805, "beta_dpo/gap_std": 26.78903579711914, "beta_dpo/loss_margin_mean": 17.39017105102539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7165532879818595, "grad_norm": 44.54093551635742, "learning_rate": 1.1372936966796709e-07, "logits/chosen": 0.516582727432251, "logits/rejected": 0.4398994445800781, "loss": 0.9809, "step": 474 }, { "beta_dpo/beta": 0.26542985439300537, "beta_dpo/beta_margin_grad_mean": -0.17872604727745056, "beta_dpo/beta_margin_grad_std": 0.28388726711273193, "beta_dpo/beta_margin_mean": 7.227434158325195, "beta_dpo/beta_margin_std": 8.638694763183594, "beta_dpo/beta_used": 0.26542985439300537, "beta_dpo/beta_used_raw": 0.26542985439300537, "beta_dpo/gap_mean": 19.758541107177734, "beta_dpo/gap_std": 26.653968811035156, "beta_dpo/loss_margin_mean": 25.69193458557129, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7180650037792895, "grad_norm": 104.1346435546875, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.45388850569725037, "logits/rejected": 0.3974682092666626, "loss": 0.5859, "step": 475 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4959273040294647, "beta_dpo/beta_margin_grad_std": 0.006980855017900467, "beta_dpo/beta_margin_mean": 0.016294946894049644, "beta_dpo/beta_margin_std": 0.027931276708841324, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11508934199810028, "beta_dpo/gap_mean": 19.323762893676758, "beta_dpo/gap_std": 26.89731216430664, "beta_dpo/loss_margin_mean": 16.294946670532227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7195767195767195, "grad_norm": 0.8413127660751343, "learning_rate": 1.1151998403347243e-07, "logits/chosen": 0.4007229208946228, "logits/rejected": 0.3899179995059967, "loss": 1.3708, "step": 476 }, { "beta_dpo/beta": 0.05996621027588844, "beta_dpo/beta_margin_grad_mean": -0.4041517376899719, "beta_dpo/beta_margin_grad_std": 0.2603454887866974, "beta_dpo/beta_margin_mean": 0.9991706609725952, "beta_dpo/beta_margin_std": 2.668644905090332, "beta_dpo/beta_used": 0.05996621027588844, "beta_dpo/beta_used_raw": -0.10893569886684418, "beta_dpo/gap_mean": 18.810359954833984, "beta_dpo/gap_std": 27.02542495727539, "beta_dpo/loss_margin_mean": 16.357866287231445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7210884353741497, "grad_norm": 59.0564079284668, "learning_rate": 1.1042108616837692e-07, "logits/chosen": 0.4921807050704956, "logits/rejected": 0.44167813658714294, "loss": 1.2437, "step": 477 }, { "beta_dpo/beta": 0.058396078646183014, "beta_dpo/beta_margin_grad_mean": -0.4280129373073578, "beta_dpo/beta_margin_grad_std": 0.24085818231105804, "beta_dpo/beta_margin_mean": 0.7688745856285095, "beta_dpo/beta_margin_std": 2.358423948287964, "beta_dpo/beta_used": 0.058396078646183014, "beta_dpo/beta_used_raw": -0.015529513359069824, "beta_dpo/gap_mean": 18.31366729736328, "beta_dpo/gap_std": 27.302425384521484, "beta_dpo/loss_margin_mean": 15.358002662658691, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7226001511715797, "grad_norm": 29.725767135620117, "learning_rate": 1.0932609262554746e-07, "logits/chosen": 0.3961327075958252, "logits/rejected": 0.39685729146003723, "loss": 1.128, "step": 478 }, { "beta_dpo/beta": 0.0391259491443634, "beta_dpo/beta_margin_grad_mean": -0.41009682416915894, "beta_dpo/beta_margin_grad_std": 0.1961776167154312, "beta_dpo/beta_margin_mean": 0.6494525074958801, "beta_dpo/beta_margin_std": 1.492968201637268, "beta_dpo/beta_used": 0.0391259491443634, "beta_dpo/beta_used_raw": -0.12356161326169968, "beta_dpo/gap_mean": 17.511415481567383, "beta_dpo/gap_std": 27.055574417114258, "beta_dpo/loss_margin_mean": 13.357870101928711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7241118669690099, "grad_norm": 30.614665985107422, "learning_rate": 1.0823503403430734e-07, "logits/chosen": 0.37263572216033936, "logits/rejected": 0.31845974922180176, "loss": 1.1149, "step": 479 }, { "beta_dpo/beta": 0.29351285099983215, "beta_dpo/beta_margin_grad_mean": -0.24419361352920532, "beta_dpo/beta_margin_grad_std": 0.38002249598503113, "beta_dpo/beta_margin_mean": 6.007750511169434, "beta_dpo/beta_margin_std": 9.08305549621582, "beta_dpo/beta_used": 0.29351285099983215, "beta_dpo/beta_used_raw": 0.29351285099983215, "beta_dpo/gap_mean": 17.11019515991211, "beta_dpo/gap_std": 27.254867553710938, "beta_dpo/loss_margin_mean": 18.857349395751953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7256235827664399, "grad_norm": 171.7132110595703, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 0.3939332962036133, "logits/rejected": 0.37893810868263245, "loss": 1.2783, "step": 480 }, { "beta_dpo/beta": 0.19773970544338226, "beta_dpo/beta_margin_grad_mean": -0.24991479516029358, "beta_dpo/beta_margin_grad_std": 0.3319016098976135, "beta_dpo/beta_margin_mean": 4.109105587005615, "beta_dpo/beta_margin_std": 5.891486167907715, "beta_dpo/beta_used": 0.19773970544338226, "beta_dpo/beta_used_raw": 0.19773970544338226, "beta_dpo/gap_mean": 17.974807739257812, "beta_dpo/gap_std": 27.388599395751953, "beta_dpo/loss_margin_mean": 20.404041290283203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.72713529856387, "grad_norm": 70.31513977050781, "learning_rate": 1.0606484367268906e-07, "logits/chosen": 0.37382426857948303, "logits/rejected": 0.36571288108825684, "loss": 0.7294, "step": 481 }, { "beta_dpo/beta": 0.0590001605451107, "beta_dpo/beta_margin_grad_mean": -0.42749252915382385, "beta_dpo/beta_margin_grad_std": 0.26465827226638794, "beta_dpo/beta_margin_mean": 0.7799001336097717, "beta_dpo/beta_margin_std": 3.059889078140259, "beta_dpo/beta_used": 0.0590001605451107, "beta_dpo/beta_used_raw": -0.014395486563444138, "beta_dpo/gap_mean": 17.67986297607422, "beta_dpo/gap_std": 28.380046844482422, "beta_dpo/loss_margin_mean": 16.58453941345215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7286470143613001, "grad_norm": 52.24790954589844, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 0.4137299656867981, "logits/rejected": 0.26786112785339355, "loss": 1.1551, "step": 482 }, { "beta_dpo/beta": 0.09157513827085495, "beta_dpo/beta_margin_grad_mean": -0.3851158022880554, "beta_dpo/beta_margin_grad_std": 0.2592366337776184, "beta_dpo/beta_margin_mean": 1.9007309675216675, "beta_dpo/beta_margin_std": 3.915107250213623, "beta_dpo/beta_used": 0.09157513827085495, "beta_dpo/beta_used_raw": 0.06725367903709412, "beta_dpo/gap_mean": 18.100704193115234, "beta_dpo/gap_std": 28.369171142578125, "beta_dpo/loss_margin_mean": 20.073625564575195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7301587301587301, "grad_norm": 72.34899139404297, "learning_rate": 1.0391075790138232e-07, "logits/chosen": 0.5550791025161743, "logits/rejected": 0.44513243436813354, "loss": 1.1065, "step": 483 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49605247378349304, "beta_dpo/beta_margin_grad_std": 0.005534137133508921, "beta_dpo/beta_margin_mean": 0.015792880207300186, "beta_dpo/beta_margin_std": 0.022141609340906143, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11549272388219833, "beta_dpo/gap_mean": 17.736948013305664, "beta_dpo/gap_std": 27.42501449584961, "beta_dpo/loss_margin_mean": 15.792879104614258, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7316704459561603, "grad_norm": 0.9392448663711548, "learning_rate": 1.0283982962570681e-07, "logits/chosen": 0.5212189555168152, "logits/rejected": 0.47851696610450745, "loss": 1.3723, "step": 484 }, { "beta_dpo/beta": 0.25393855571746826, "beta_dpo/beta_margin_grad_mean": -0.3070473074913025, "beta_dpo/beta_margin_grad_std": 0.2632943391799927, "beta_dpo/beta_margin_mean": 6.5717573165893555, "beta_dpo/beta_margin_std": 11.246562004089355, "beta_dpo/beta_used": 0.25393855571746826, "beta_dpo/beta_used_raw": 0.18657520413398743, "beta_dpo/gap_mean": 18.274065017700195, "beta_dpo/gap_std": 26.938018798828125, "beta_dpo/loss_margin_mean": 18.81686019897461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7331821617535903, "grad_norm": 198.32473754882812, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 0.4765240252017975, "logits/rejected": 0.4477100670337677, "loss": 1.1238, "step": 485 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4957917332649231, "beta_dpo/beta_margin_grad_std": 0.007428720127791166, "beta_dpo/beta_margin_mean": 0.016838233917951584, "beta_dpo/beta_margin_std": 0.029725831001996994, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1067897230386734, "beta_dpo/gap_mean": 17.730863571166992, "beta_dpo/gap_std": 27.222911834716797, "beta_dpo/loss_margin_mean": 16.838232040405273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7346938775510204, "grad_norm": 0.756230354309082, "learning_rate": 1.007103520743035e-07, "logits/chosen": 0.5120325088500977, "logits/rejected": 0.3889530897140503, "loss": 1.3722, "step": 486 }, { "beta_dpo/beta": 0.026900505647063255, "beta_dpo/beta_margin_grad_mean": -0.4126649498939514, "beta_dpo/beta_margin_grad_std": 0.15782445669174194, "beta_dpo/beta_margin_mean": 0.46885383129119873, "beta_dpo/beta_margin_std": 0.9157174825668335, "beta_dpo/beta_used": 0.026900505647063255, "beta_dpo/beta_used_raw": -0.053003422915935516, "beta_dpo/gap_mean": 17.832361221313477, "beta_dpo/gap_std": 27.04258155822754, "beta_dpo/loss_margin_mean": 18.905290603637695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7362055933484505, "grad_norm": 22.816574096679688, "learning_rate": 9.965186236464046e-08, "logits/chosen": 0.5597760677337646, "logits/rejected": 0.49492278695106506, "loss": 1.0969, "step": 487 }, { "beta_dpo/beta": 0.2829245328903198, "beta_dpo/beta_margin_grad_mean": -0.2355988472700119, "beta_dpo/beta_margin_grad_std": 0.29733532667160034, "beta_dpo/beta_margin_mean": 7.418085098266602, "beta_dpo/beta_margin_std": 11.626276969909668, "beta_dpo/beta_used": 0.2829245328903198, "beta_dpo/beta_used_raw": 0.2829245328903198, "beta_dpo/gap_mean": 18.307497024536133, "beta_dpo/gap_std": 27.187271118164062, "beta_dpo/loss_margin_mean": 22.07598114013672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7377173091458806, "grad_norm": 168.8614959716797, "learning_rate": 9.859757821558337e-08, "logits/chosen": 0.4678114354610443, "logits/rejected": 0.392938494682312, "loss": 1.3132, "step": 488 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4974829852581024, "beta_dpo/beta_margin_grad_std": 0.006341965869069099, "beta_dpo/beta_margin_mean": 0.010070238262414932, "beta_dpo/beta_margin_std": 0.025372974574565887, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.29972267150878906, "beta_dpo/gap_mean": 17.63011932373047, "beta_dpo/gap_std": 27.038352966308594, "beta_dpo/loss_margin_mean": 10.070237159729004, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7392290249433107, "grad_norm": 0.797245442867279, "learning_rate": 9.754752911772615e-08, "logits/chosen": 0.47051212191581726, "logits/rejected": 0.42137840390205383, "loss": 1.3755, "step": 489 }, { "beta_dpo/beta": 0.33493655920028687, "beta_dpo/beta_margin_grad_mean": -0.34081602096557617, "beta_dpo/beta_margin_grad_std": 0.3390964865684509, "beta_dpo/beta_margin_mean": 5.45980167388916, "beta_dpo/beta_margin_std": 16.640560150146484, "beta_dpo/beta_used": 0.33493655920028687, "beta_dpo/beta_used_raw": 0.33493655920028687, "beta_dpo/gap_mean": 16.79808807373047, "beta_dpo/gap_std": 27.53335189819336, "beta_dpo/loss_margin_mean": 15.238967895507812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7407407407407407, "grad_norm": 157.81983947753906, "learning_rate": 9.650174444319956e-08, "logits/chosen": 0.5644317865371704, "logits/rejected": 0.5341103672981262, "loss": 1.108, "step": 490 }, { "beta_dpo/beta": 0.153602734208107, "beta_dpo/beta_margin_grad_mean": -0.3420948386192322, "beta_dpo/beta_margin_grad_std": 0.27578026056289673, "beta_dpo/beta_margin_mean": 3.6059417724609375, "beta_dpo/beta_margin_std": 6.100773811340332, "beta_dpo/beta_used": 0.153602734208107, "beta_dpo/beta_used_raw": 0.06139887124300003, "beta_dpo/gap_mean": 17.124771118164062, "beta_dpo/gap_std": 27.31386375427246, "beta_dpo/loss_margin_mean": 17.191701889038086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7422524565381708, "grad_norm": 97.61920928955078, "learning_rate": 9.546025344484868e-08, "logits/chosen": 0.4223695993423462, "logits/rejected": 0.36396944522857666, "loss": 1.275, "step": 491 }, { "beta_dpo/beta": 0.11434933543205261, "beta_dpo/beta_margin_grad_mean": -0.37879207730293274, "beta_dpo/beta_margin_grad_std": 0.2923412024974823, "beta_dpo/beta_margin_mean": 2.2623884677886963, "beta_dpo/beta_margin_std": 4.472981929779053, "beta_dpo/beta_used": 0.11434933543205261, "beta_dpo/beta_used_raw": -0.050521112978458405, "beta_dpo/gap_mean": 16.184886932373047, "beta_dpo/gap_std": 27.43224334716797, "beta_dpo/loss_margin_mean": 14.301737785339355, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7437641723356009, "grad_norm": 91.39317321777344, "learning_rate": 9.442308525541589e-08, "logits/chosen": 0.42874184250831604, "logits/rejected": 0.3520268201828003, "loss": 1.368, "step": 492 }, { "beta_dpo/beta": 0.19871431589126587, "beta_dpo/beta_margin_grad_mean": -0.23222456872463226, "beta_dpo/beta_margin_grad_std": 0.2830079197883606, "beta_dpo/beta_margin_mean": 4.435938358306885, "beta_dpo/beta_margin_std": 7.169334888458252, "beta_dpo/beta_used": 0.19871431589126587, "beta_dpo/beta_used_raw": 0.19871431589126587, "beta_dpo/gap_mean": 16.834693908691406, "beta_dpo/gap_std": 27.216405868530273, "beta_dpo/loss_margin_mean": 20.395715713500977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.745275888133031, "grad_norm": 120.1291732788086, "learning_rate": 9.339026888672468e-08, "logits/chosen": 0.42790913581848145, "logits/rejected": 0.34174275398254395, "loss": 0.74, "step": 493 }, { "beta_dpo/beta": 0.04424438625574112, "beta_dpo/beta_margin_grad_mean": -0.3808707594871521, "beta_dpo/beta_margin_grad_std": 0.22228094935417175, "beta_dpo/beta_margin_mean": 0.6535773277282715, "beta_dpo/beta_margin_std": 1.5111050605773926, "beta_dpo/beta_used": 0.04424438625574112, "beta_dpo/beta_used_raw": 0.04424438625574112, "beta_dpo/gap_mean": 17.077911376953125, "beta_dpo/gap_std": 27.674633026123047, "beta_dpo/loss_margin_mean": 15.817744255065918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7467876039304611, "grad_norm": 32.58065414428711, "learning_rate": 9.236183322886945e-08, "logits/chosen": 0.3619958758354187, "logits/rejected": 0.3028886318206787, "loss": 0.9887, "step": 494 }, { "beta_dpo/beta": 0.12880435585975647, "beta_dpo/beta_margin_grad_mean": -0.3734891712665558, "beta_dpo/beta_margin_grad_std": 0.2588602602481842, "beta_dpo/beta_margin_mean": 2.2595906257629395, "beta_dpo/beta_margin_std": 5.230336666107178, "beta_dpo/beta_used": 0.12880435585975647, "beta_dpo/beta_used_raw": -0.13839080929756165, "beta_dpo/gap_mean": 16.387245178222656, "beta_dpo/gap_std": 27.691650390625, "beta_dpo/loss_margin_mean": 14.412410736083984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7482993197278912, "grad_norm": 99.71451568603516, "learning_rate": 9.133780704940594e-08, "logits/chosen": 0.4950551986694336, "logits/rejected": 0.4249255061149597, "loss": 1.0656, "step": 495 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49576571583747864, "beta_dpo/beta_margin_grad_std": 0.007818019017577171, "beta_dpo/beta_margin_mean": 0.01694556325674057, "beta_dpo/beta_margin_std": 0.0312989242374897, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1241304874420166, "beta_dpo/gap_mean": 16.68515968322754, "beta_dpo/gap_std": 28.40119171142578, "beta_dpo/loss_margin_mean": 16.9455623626709, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7498110355253212, "grad_norm": 0.8398783802986145, "learning_rate": 9.031821899254797e-08, "logits/chosen": 0.4990454316139221, "logits/rejected": 0.3839772939682007, "loss": 1.3735, "step": 496 }, { "beta_dpo/beta": 0.19726786017417908, "beta_dpo/beta_margin_grad_mean": -0.34216219186782837, "beta_dpo/beta_margin_grad_std": 0.28134962916374207, "beta_dpo/beta_margin_mean": 4.7668657302856445, "beta_dpo/beta_margin_std": 9.81608772277832, "beta_dpo/beta_used": 0.19726786017417908, "beta_dpo/beta_used_raw": 0.19726786017417908, "beta_dpo/gap_mean": 17.335676193237305, "beta_dpo/gap_std": 28.46596908569336, "beta_dpo/loss_margin_mean": 21.315322875976562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7513227513227513, "grad_norm": 126.32473754882812, "learning_rate": 8.930309757836516e-08, "logits/chosen": 0.4910498857498169, "logits/rejected": 0.45661675930023193, "loss": 1.6257, "step": 497 }, { "beta_dpo/beta": 0.1522623747587204, "beta_dpo/beta_margin_grad_mean": -0.2758595049381256, "beta_dpo/beta_margin_grad_std": 0.29086124897003174, "beta_dpo/beta_margin_mean": 3.130350112915039, "beta_dpo/beta_margin_std": 5.023598670959473, "beta_dpo/beta_used": 0.1522623747587204, "beta_dpo/beta_used_raw": 0.1522623747587204, "beta_dpo/gap_mean": 17.873306274414062, "beta_dpo/gap_std": 28.321250915527344, "beta_dpo/loss_margin_mean": 20.654321670532227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7528344671201814, "grad_norm": 66.43721771240234, "learning_rate": 8.829247120198563e-08, "logits/chosen": 0.47693729400634766, "logits/rejected": 0.43756186962127686, "loss": 0.7606, "step": 498 }, { "beta_dpo/beta": 0.15835049748420715, "beta_dpo/beta_margin_grad_mean": -0.39443886280059814, "beta_dpo/beta_margin_grad_std": 0.31629934906959534, "beta_dpo/beta_margin_mean": 3.0014355182647705, "beta_dpo/beta_margin_std": 8.597198486328125, "beta_dpo/beta_used": 0.15835049748420715, "beta_dpo/beta_used_raw": 0.027399331331253052, "beta_dpo/gap_mean": 17.992496490478516, "beta_dpo/gap_std": 29.04220199584961, "beta_dpo/loss_margin_mean": 17.409122467041016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7543461829176115, "grad_norm": 124.17149353027344, "learning_rate": 8.728636813280163e-08, "logits/chosen": 0.47000688314437866, "logits/rejected": 0.398383766412735, "loss": 1.6238, "step": 499 }, { "beta_dpo/beta": 0.24701504409313202, "beta_dpo/beta_margin_grad_mean": -0.2595021426677704, "beta_dpo/beta_margin_grad_std": 0.348397821187973, "beta_dpo/beta_margin_mean": 4.565580368041992, "beta_dpo/beta_margin_std": 8.080002784729004, "beta_dpo/beta_used": 0.24701504409313202, "beta_dpo/beta_used_raw": 0.24701504409313202, "beta_dpo/gap_mean": 17.846397399902344, "beta_dpo/gap_std": 28.710865020751953, "beta_dpo/loss_margin_mean": 17.975915908813477, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7558578987150416, "grad_norm": 142.9105224609375, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.384491503238678, "logits/rejected": 0.3731864094734192, "loss": 1.1911, "step": 500 }, { "epoch": 0.7558578987150416, "eval_beta_dpo/beta": 0.09361638128757477, "eval_beta_dpo/beta_margin_grad_mean": -0.38664332032203674, "eval_beta_dpo/beta_margin_grad_std": 0.14713595807552338, "eval_beta_dpo/beta_margin_mean": 1.9619940519332886, "eval_beta_dpo/beta_margin_std": 2.572732925415039, "eval_beta_dpo/beta_used": 0.09361638128757477, "eval_beta_dpo/beta_used_raw": -0.011094304732978344, "eval_beta_dpo/gap_mean": 17.90873146057129, "eval_beta_dpo/gap_std": 28.716110229492188, "eval_beta_dpo/loss_margin_mean": 16.057159423828125, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 0.502650797367096, "eval_logits/rejected": 0.44902658462524414, "eval_loss": 0.6888377070426941, "eval_runtime": 38.9533, "eval_samples_per_second": 59.122, "eval_steps_per_second": 1.848, "step": 500 }, { "beta_dpo/beta": 0.06464043259620667, "beta_dpo/beta_margin_grad_mean": -0.37159132957458496, "beta_dpo/beta_margin_grad_std": 0.2065133899450302, "beta_dpo/beta_margin_mean": 1.2337942123413086, "beta_dpo/beta_margin_std": 2.555989980697632, "beta_dpo/beta_used": 0.06464043259620667, "beta_dpo/beta_used_raw": 0.046276845037937164, "beta_dpo/gap_mean": 17.642791748046875, "beta_dpo/gap_std": 27.331443786621094, "beta_dpo/loss_margin_mean": 16.796661376953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7573696145124716, "grad_norm": 34.865447998046875, "learning_rate": 8.528784436016878e-08, "logits/chosen": 0.462974488735199, "logits/rejected": 0.45432478189468384, "loss": 1.0036, "step": 501 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4961468577384949, "beta_dpo/beta_margin_grad_std": 0.005573183763772249, "beta_dpo/beta_margin_mean": 0.015415522269904613, "beta_dpo/beta_margin_std": 0.02229905314743519, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06323258578777313, "beta_dpo/gap_mean": 17.295621871948242, "beta_dpo/gap_std": 26.546051025390625, "beta_dpo/loss_margin_mean": 15.415521621704102, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7588813303099018, "grad_norm": 1.0474236011505127, "learning_rate": 8.4295479559726e-08, "logits/chosen": 0.4636223614215851, "logits/rejected": 0.41382068395614624, "loss": 1.3718, "step": 502 }, { "beta_dpo/beta": 0.22760546207427979, "beta_dpo/beta_margin_grad_mean": -0.30316370725631714, "beta_dpo/beta_margin_grad_std": 0.31399786472320557, "beta_dpo/beta_margin_mean": 4.0226945877075195, "beta_dpo/beta_margin_std": 9.152859687805176, "beta_dpo/beta_used": 0.22760546207427979, "beta_dpo/beta_used_raw": 0.22760546207427979, "beta_dpo/gap_mean": 17.311302185058594, "beta_dpo/gap_std": 26.70954132080078, "beta_dpo/loss_margin_mean": 17.106420516967773, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7603930461073318, "grad_norm": 170.26931762695312, "learning_rate": 8.330774987092712e-08, "logits/chosen": 0.474370539188385, "logits/rejected": 0.4603527784347534, "loss": 1.6266, "step": 503 }, { "beta_dpo/beta": 0.14955301582813263, "beta_dpo/beta_margin_grad_mean": -0.2983316481113434, "beta_dpo/beta_margin_grad_std": 0.24636363983154297, "beta_dpo/beta_margin_mean": 4.377803325653076, "beta_dpo/beta_margin_std": 6.885531425476074, "beta_dpo/beta_used": 0.14955301582813263, "beta_dpo/beta_used_raw": 0.1396861970424652, "beta_dpo/gap_mean": 17.946958541870117, "beta_dpo/gap_std": 26.08230209350586, "beta_dpo/loss_margin_mean": 23.741315841674805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7619047619047619, "grad_norm": 36.053138732910156, "learning_rate": 8.232468292269479e-08, "logits/chosen": 0.42812907695770264, "logits/rejected": 0.39763376116752625, "loss": 0.8169, "step": 504 }, { "beta_dpo/beta": 0.1439221203327179, "beta_dpo/beta_margin_grad_mean": -0.3790625035762787, "beta_dpo/beta_margin_grad_std": 0.2830526828765869, "beta_dpo/beta_margin_mean": 3.2106552124023438, "beta_dpo/beta_margin_std": 7.093870639801025, "beta_dpo/beta_used": 0.1439221203327179, "beta_dpo/beta_used_raw": 0.010216429829597473, "beta_dpo/gap_mean": 18.527366638183594, "beta_dpo/gap_std": 26.727075576782227, "beta_dpo/loss_margin_mean": 16.65737533569336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.763416477702192, "grad_norm": 104.89347839355469, "learning_rate": 8.134630621352483e-08, "logits/chosen": 0.46603691577911377, "logits/rejected": 0.42604219913482666, "loss": 1.4932, "step": 505 }, { "beta_dpo/beta": 0.14408646523952484, "beta_dpo/beta_margin_grad_mean": -0.38642263412475586, "beta_dpo/beta_margin_grad_std": 0.2999417781829834, "beta_dpo/beta_margin_mean": 2.622800588607788, "beta_dpo/beta_margin_std": 6.82088565826416, "beta_dpo/beta_used": 0.14408646523952484, "beta_dpo/beta_used_raw": 0.141511932015419, "beta_dpo/gap_mean": 17.85563850402832, "beta_dpo/gap_std": 27.261791229248047, "beta_dpo/loss_margin_mean": 15.349295616149902, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.764928193499622, "grad_norm": 146.69357299804688, "learning_rate": 8.037264711071698e-08, "logits/chosen": 0.5133321285247803, "logits/rejected": 0.4881846308708191, "loss": 2.0045, "step": 506 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4952058494091034, "beta_dpo/beta_margin_grad_std": 0.006824954878538847, "beta_dpo/beta_margin_mean": 0.019181732088327408, "beta_dpo/beta_margin_std": 0.027308695018291473, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14111196994781494, "beta_dpo/gap_mean": 17.88521957397461, "beta_dpo/gap_std": 27.419790267944336, "beta_dpo/loss_margin_mean": 19.181730270385742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7664399092970522, "grad_norm": 0.7970109581947327, "learning_rate": 7.940373284960933e-08, "logits/chosen": 0.4557326138019562, "logits/rejected": 0.40037935972213745, "loss": 1.3726, "step": 507 }, { "beta_dpo/beta": 0.27150821685791016, "beta_dpo/beta_margin_grad_mean": -0.3548518717288971, "beta_dpo/beta_margin_grad_std": 0.2952079772949219, "beta_dpo/beta_margin_mean": 6.855192184448242, "beta_dpo/beta_margin_std": 12.318726539611816, "beta_dpo/beta_used": 0.27150821685791016, "beta_dpo/beta_used_raw": 0.22873634099960327, "beta_dpo/gap_mean": 18.09201431274414, "beta_dpo/gap_std": 27.218570709228516, "beta_dpo/loss_margin_mean": 20.832242965698242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7679516250944822, "grad_norm": 206.41529846191406, "learning_rate": 7.843959053281663e-08, "logits/chosen": 0.43897074460983276, "logits/rejected": 0.3105481266975403, "loss": 1.5294, "step": 508 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49585264921188354, "beta_dpo/beta_margin_grad_std": 0.006084589287638664, "beta_dpo/beta_margin_mean": 0.01659206487238407, "beta_dpo/beta_margin_std": 0.02434265986084938, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.05793162062764168, "beta_dpo/gap_mean": 18.304162979125977, "beta_dpo/gap_std": 26.860179901123047, "beta_dpo/loss_margin_mean": 16.592063903808594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7694633408919124, "grad_norm": 0.9970650672912598, "learning_rate": 7.748024712947204e-08, "logits/chosen": 0.3953160345554352, "logits/rejected": 0.368834912776947, "loss": 1.3708, "step": 509 }, { "beta_dpo/beta": 0.02194332703948021, "beta_dpo/beta_margin_grad_mean": -0.41290098428726196, "beta_dpo/beta_margin_grad_std": 0.17370301485061646, "beta_dpo/beta_margin_mean": 0.48397937417030334, "beta_dpo/beta_margin_std": 1.0054277181625366, "beta_dpo/beta_used": 0.02194332703948021, "beta_dpo/beta_used_raw": -0.08349817991256714, "beta_dpo/gap_mean": 18.485971450805664, "beta_dpo/gap_std": 27.07589340209961, "beta_dpo/loss_margin_mean": 20.233915328979492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7709750566893424, "grad_norm": 20.405038833618164, "learning_rate": 7.652572947447272e-08, "logits/chosen": 0.5540110468864441, "logits/rejected": 0.4546312093734741, "loss": 1.1753, "step": 510 }, { "beta_dpo/beta": 0.0254156943410635, "beta_dpo/beta_margin_grad_mean": -0.4023872911930084, "beta_dpo/beta_margin_grad_std": 0.19661438465118408, "beta_dpo/beta_margin_mean": 0.6091973185539246, "beta_dpo/beta_margin_std": 1.233200192451477, "beta_dpo/beta_used": 0.0254156943410635, "beta_dpo/beta_used_raw": 0.0013822559267282486, "beta_dpo/gap_mean": 19.057842254638672, "beta_dpo/gap_std": 27.40789794921875, "beta_dpo/loss_margin_mean": 22.84357261657715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7724867724867724, "grad_norm": 21.664785385131836, "learning_rate": 7.557606426772961e-08, "logits/chosen": 0.47340700030326843, "logits/rejected": 0.4142376780509949, "loss": 1.1929, "step": 511 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4957908093929291, "beta_dpo/beta_margin_grad_std": 0.00741164106875658, "beta_dpo/beta_margin_mean": 0.0168411023914814, "beta_dpo/beta_margin_std": 0.02965443953871727, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09359487146139145, "beta_dpo/gap_mean": 18.986900329589844, "beta_dpo/gap_std": 27.84600257873535, "beta_dpo/loss_margin_mean": 16.841102600097656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7739984882842026, "grad_norm": 2.918966770172119, "learning_rate": 7.463127807341966e-08, "logits/chosen": 0.31771016120910645, "logits/rejected": 0.3009248971939087, "loss": 1.3707, "step": 512 }, { "beta_dpo/beta": 0.08458594977855682, "beta_dpo/beta_margin_grad_mean": -0.3096770644187927, "beta_dpo/beta_margin_grad_std": 0.22452546656131744, "beta_dpo/beta_margin_mean": 1.4769704341888428, "beta_dpo/beta_margin_std": 2.3516552448272705, "beta_dpo/beta_used": 0.08458594977855682, "beta_dpo/beta_used_raw": 0.08458594977855682, "beta_dpo/gap_mean": 19.021957397460938, "beta_dpo/gap_std": 27.71468734741211, "beta_dpo/loss_margin_mean": 19.09242057800293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7755102040816326, "grad_norm": 34.026527404785156, "learning_rate": 7.369139731924401e-08, "logits/chosen": 0.5480648279190063, "logits/rejected": 0.4947201609611511, "loss": 0.7569, "step": 513 }, { "beta_dpo/beta": 0.41926461458206177, "beta_dpo/beta_margin_grad_mean": -0.21048571169376373, "beta_dpo/beta_margin_grad_std": 0.3752746284008026, "beta_dpo/beta_margin_mean": 9.245790481567383, "beta_dpo/beta_margin_std": 11.778825759887695, "beta_dpo/beta_used": 0.41926461458206177, "beta_dpo/beta_used_raw": 0.41926461458206177, "beta_dpo/gap_mean": 19.16301727294922, "beta_dpo/gap_std": 27.396949768066406, "beta_dpo/loss_margin_mean": 21.85741424560547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7770219198790628, "grad_norm": 178.23617553710938, "learning_rate": 7.275644829568747e-08, "logits/chosen": 0.5184712409973145, "logits/rejected": 0.4733770787715912, "loss": 1.0529, "step": 514 }, { "beta_dpo/beta": 0.03268057852983475, "beta_dpo/beta_margin_grad_mean": -0.3948572278022766, "beta_dpo/beta_margin_grad_std": 0.1987946331501007, "beta_dpo/beta_margin_mean": 0.6988779902458191, "beta_dpo/beta_margin_std": 1.3777862787246704, "beta_dpo/beta_used": 0.03268057852983475, "beta_dpo/beta_used_raw": 0.0016238931566476822, "beta_dpo/gap_mean": 19.57135772705078, "beta_dpo/gap_std": 27.504430770874023, "beta_dpo/loss_margin_mean": 19.71489143371582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7785336356764928, "grad_norm": 30.562400817871094, "learning_rate": 7.182645715528435e-08, "logits/chosen": 0.46997085213661194, "logits/rejected": 0.3870126008987427, "loss": 1.1548, "step": 515 }, { "beta_dpo/beta": 0.07201197743415833, "beta_dpo/beta_margin_grad_mean": -0.38167789578437805, "beta_dpo/beta_margin_grad_std": 0.2583937644958496, "beta_dpo/beta_margin_mean": 1.1529293060302734, "beta_dpo/beta_margin_std": 2.9729745388031006, "beta_dpo/beta_used": 0.07201197743415833, "beta_dpo/beta_used_raw": -0.1009015217423439, "beta_dpo/gap_mean": 18.779314041137695, "beta_dpo/gap_std": 27.408687591552734, "beta_dpo/loss_margin_mean": 14.936943054199219, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.780045351473923, "grad_norm": 26.425460815429688, "learning_rate": 7.090144991188568e-08, "logits/chosen": 0.41848045587539673, "logits/rejected": 0.3700318932533264, "loss": 0.9476, "step": 516 }, { "beta_dpo/beta": 0.015774274244904518, "beta_dpo/beta_margin_grad_mean": -0.44003695249557495, "beta_dpo/beta_margin_grad_std": 0.12989072501659393, "beta_dpo/beta_margin_mean": 0.2813131809234619, "beta_dpo/beta_margin_std": 0.6269446015357971, "beta_dpo/beta_used": 0.015774274244904518, "beta_dpo/beta_used_raw": -0.16760104894638062, "beta_dpo/gap_mean": 18.17236328125, "beta_dpo/gap_std": 27.5235652923584, "beta_dpo/loss_margin_mean": 16.041845321655273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.781557067271353, "grad_norm": 12.773390769958496, "learning_rate": 6.998145243993284e-08, "logits/chosen": 0.49850425124168396, "logits/rejected": 0.4892727732658386, "loss": 1.2018, "step": 517 }, { "beta_dpo/beta": 0.04643746092915535, "beta_dpo/beta_margin_grad_mean": -0.41171613335609436, "beta_dpo/beta_margin_grad_std": 0.23207998275756836, "beta_dpo/beta_margin_mean": 0.7455624341964722, "beta_dpo/beta_margin_std": 1.841551423072815, "beta_dpo/beta_used": 0.04643746092915535, "beta_dpo/beta_used_raw": -0.065179243683815, "beta_dpo/gap_mean": 17.777130126953125, "beta_dpo/gap_std": 27.24643325805664, "beta_dpo/loss_margin_mean": 15.644393920898438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.783068783068783, "grad_norm": 27.985763549804688, "learning_rate": 6.906649047373245e-08, "logits/chosen": 0.47469550371170044, "logits/rejected": 0.4198811650276184, "loss": 1.1144, "step": 518 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4969859719276428, "beta_dpo/beta_margin_grad_std": 0.007910608313977718, "beta_dpo/beta_margin_mean": 0.012059678323566914, "beta_dpo/beta_margin_std": 0.03165047988295555, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.29590263962745667, "beta_dpo/gap_mean": 16.789592742919922, "beta_dpo/gap_std": 28.03281593322754, "beta_dpo/loss_margin_mean": 12.059678077697754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7845804988662132, "grad_norm": 0.7758278250694275, "learning_rate": 6.815658960673781e-08, "logits/chosen": 0.5089113116264343, "logits/rejected": 0.4466610848903656, "loss": 1.3763, "step": 519 }, { "beta_dpo/beta": 0.0470229834318161, "beta_dpo/beta_margin_grad_mean": -0.3845081925392151, "beta_dpo/beta_margin_grad_std": 0.20655560493469238, "beta_dpo/beta_margin_mean": 0.8753683567047119, "beta_dpo/beta_margin_std": 1.7259058952331543, "beta_dpo/beta_used": 0.0470229834318161, "beta_dpo/beta_used_raw": 0.0470229834318161, "beta_dpo/gap_mean": 16.70362091064453, "beta_dpo/gap_std": 27.71854019165039, "beta_dpo/loss_margin_mean": 16.06646156311035, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7860922146636432, "grad_norm": 33.82258987426758, "learning_rate": 6.725177529083209e-08, "logits/chosen": 0.537433385848999, "logits/rejected": 0.47597891092300415, "loss": 1.0647, "step": 520 }, { "beta_dpo/beta": 0.1929291933774948, "beta_dpo/beta_margin_grad_mean": -0.24177253246307373, "beta_dpo/beta_margin_grad_std": 0.31293728947639465, "beta_dpo/beta_margin_mean": 3.521935224533081, "beta_dpo/beta_margin_std": 5.269829273223877, "beta_dpo/beta_used": 0.1929291933774948, "beta_dpo/beta_used_raw": 0.1929291933774948, "beta_dpo/gap_mean": 16.720731735229492, "beta_dpo/gap_std": 27.38351058959961, "beta_dpo/loss_margin_mean": 18.661502838134766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7876039304610734, "grad_norm": 139.29025268554688, "learning_rate": 6.63520728356167e-08, "logits/chosen": 0.3622036278247833, "logits/rejected": 0.27943283319473267, "loss": 0.8118, "step": 521 }, { "beta_dpo/beta": 0.12439863383769989, "beta_dpo/beta_margin_grad_mean": -0.31525975465774536, "beta_dpo/beta_margin_grad_std": 0.33538639545440674, "beta_dpo/beta_margin_mean": 1.9442089796066284, "beta_dpo/beta_margin_std": 4.323269844055176, "beta_dpo/beta_used": 0.12439863383769989, "beta_dpo/beta_used_raw": 0.12439863383769989, "beta_dpo/gap_mean": 16.79776382446289, "beta_dpo/gap_std": 28.118989944458008, "beta_dpo/loss_margin_mean": 15.861137390136719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7891156462585034, "grad_norm": 103.43621826171875, "learning_rate": 6.545750740770336e-08, "logits/chosen": 0.4541332721710205, "logits/rejected": 0.44197672605514526, "loss": 1.3581, "step": 522 }, { "beta_dpo/beta": 0.36119723320007324, "beta_dpo/beta_margin_grad_mean": -0.2371506243944168, "beta_dpo/beta_margin_grad_std": 0.37638023495674133, "beta_dpo/beta_margin_mean": 7.449059009552002, "beta_dpo/beta_margin_std": 13.321476936340332, "beta_dpo/beta_used": 0.36119723320007324, "beta_dpo/beta_used_raw": 0.36119723320007324, "beta_dpo/gap_mean": 17.18457794189453, "beta_dpo/gap_std": 28.612350463867188, "beta_dpo/loss_margin_mean": 18.937599182128906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7906273620559335, "grad_norm": 231.09762573242188, "learning_rate": 6.456810403001012e-08, "logits/chosen": 0.49882420897483826, "logits/rejected": 0.3736770749092102, "loss": 1.9876, "step": 523 }, { "beta_dpo/beta": 0.06380782276391983, "beta_dpo/beta_margin_grad_mean": -0.4031929075717926, "beta_dpo/beta_margin_grad_std": 0.22261260449886322, "beta_dpo/beta_margin_mean": 0.940835177898407, "beta_dpo/beta_margin_std": 2.2211990356445312, "beta_dpo/beta_used": 0.06380782276391983, "beta_dpo/beta_used_raw": -0.033944085240364075, "beta_dpo/gap_mean": 16.666719436645508, "beta_dpo/gap_std": 28.064117431640625, "beta_dpo/loss_margin_mean": 14.2322416305542, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7921390778533636, "grad_norm": 42.84857177734375, "learning_rate": 6.368388758106134e-08, "logits/chosen": 0.3853232264518738, "logits/rejected": 0.3610796332359314, "loss": 1.0716, "step": 524 }, { "beta_dpo/beta": 0.054753709584474564, "beta_dpo/beta_margin_grad_mean": -0.40546008944511414, "beta_dpo/beta_margin_grad_std": 0.2202645093202591, "beta_dpo/beta_margin_mean": 0.6783682107925415, "beta_dpo/beta_margin_std": 1.8368420600891113, "beta_dpo/beta_used": 0.054753709584474564, "beta_dpo/beta_used_raw": -0.0017726905643939972, "beta_dpo/gap_mean": 16.028484344482422, "beta_dpo/gap_std": 27.208412170410156, "beta_dpo/loss_margin_mean": 12.746237754821777, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7936507936507936, "grad_norm": 24.4576473236084, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.29569315910339355, "logits/rejected": 0.28484293818473816, "loss": 1.017, "step": 525 }, { "beta_dpo/beta": 0.1961859166622162, "beta_dpo/beta_margin_grad_mean": -0.3545135259628296, "beta_dpo/beta_margin_grad_std": 0.2908618450164795, "beta_dpo/beta_margin_mean": 4.208421230316162, "beta_dpo/beta_margin_std": 7.948729991912842, "beta_dpo/beta_used": 0.1961859166622162, "beta_dpo/beta_used_raw": 0.06960143148899078, "beta_dpo/gap_mean": 15.876822471618652, "beta_dpo/gap_std": 26.591964721679688, "beta_dpo/loss_margin_mean": 13.694323539733887, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7951625094482238, "grad_norm": 117.64985656738281, "learning_rate": 6.193111425735515e-08, "logits/chosen": 0.45245057344436646, "logits/rejected": 0.38413476943969727, "loss": 1.1558, "step": 526 }, { "beta_dpo/beta": 0.005362469702959061, "beta_dpo/beta_margin_grad_mean": -0.4843152165412903, "beta_dpo/beta_margin_grad_std": 0.0441654697060585, "beta_dpo/beta_margin_mean": 0.06408263742923737, "beta_dpo/beta_margin_std": 0.18112891912460327, "beta_dpo/beta_used": 0.005362469702959061, "beta_dpo/beta_used_raw": -0.03283126652240753, "beta_dpo/gap_mean": 14.933441162109375, "beta_dpo/gap_std": 26.443832397460938, "beta_dpo/loss_margin_mean": 12.786906242370605, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7966742252456538, "grad_norm": 5.342923641204834, "learning_rate": 6.106260641143546e-08, "logits/chosen": 0.5494492650032043, "logits/rejected": 0.4718747138977051, "loss": 1.324, "step": 527 }, { "beta_dpo/beta": 0.18812526762485504, "beta_dpo/beta_margin_grad_mean": -0.3760398030281067, "beta_dpo/beta_margin_grad_std": 0.2963537275791168, "beta_dpo/beta_margin_mean": 3.0523579120635986, "beta_dpo/beta_margin_std": 9.375741958618164, "beta_dpo/beta_used": 0.18812526762485504, "beta_dpo/beta_used_raw": 0.13063597679138184, "beta_dpo/gap_mean": 14.724921226501465, "beta_dpo/gap_std": 27.01572608947754, "beta_dpo/loss_margin_mean": 12.761770248413086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7981859410430839, "grad_norm": 103.13839721679688, "learning_rate": 6.019938355056422e-08, "logits/chosen": 0.38711076974868774, "logits/rejected": 0.3005455732345581, "loss": 1.0302, "step": 528 }, { "beta_dpo/beta": 0.6986787915229797, "beta_dpo/beta_margin_grad_mean": -0.17930427193641663, "beta_dpo/beta_margin_grad_std": 0.34411507844924927, "beta_dpo/beta_margin_mean": 21.36497688293457, "beta_dpo/beta_margin_std": 25.568307876586914, "beta_dpo/beta_used": 0.6986787915229797, "beta_dpo/beta_used_raw": 0.6986787915229797, "beta_dpo/gap_mean": 16.64554214477539, "beta_dpo/gap_std": 26.962486267089844, "beta_dpo/loss_margin_mean": 28.03153419494629, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.799697656840514, "grad_norm": 356.9806823730469, "learning_rate": 5.934146982094049e-08, "logits/chosen": 0.34986603260040283, "logits/rejected": 0.290559321641922, "loss": 1.4516, "step": 529 }, { "beta_dpo/beta": 0.050965264439582825, "beta_dpo/beta_margin_grad_mean": -0.3662997782230377, "beta_dpo/beta_margin_grad_std": 0.20529140532016754, "beta_dpo/beta_margin_mean": 0.9562637209892273, "beta_dpo/beta_margin_std": 1.5845041275024414, "beta_dpo/beta_used": 0.050965264439582825, "beta_dpo/beta_used_raw": -0.019955798983573914, "beta_dpo/gap_mean": 16.90506362915039, "beta_dpo/gap_std": 26.45660400390625, "beta_dpo/loss_margin_mean": 17.067411422729492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8012093726379441, "grad_norm": 23.774160385131836, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.4809131622314453, "logits/rejected": 0.4357951581478119, "loss": 1.0238, "step": 530 }, { "beta_dpo/beta": 0.08633482456207275, "beta_dpo/beta_margin_grad_mean": -0.36818552017211914, "beta_dpo/beta_margin_grad_std": 0.23734231293201447, "beta_dpo/beta_margin_mean": 1.7818925380706787, "beta_dpo/beta_margin_std": 3.7052230834960938, "beta_dpo/beta_used": 0.08633482456207275, "beta_dpo/beta_used_raw": -0.010635964572429657, "beta_dpo/gap_mean": 16.305274963378906, "beta_dpo/gap_std": 26.19503402709961, "beta_dpo/loss_margin_mean": 14.202505111694336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8027210884353742, "grad_norm": 57.3766975402832, "learning_rate": 5.7641665597021435e-08, "logits/chosen": 0.45231950283050537, "logits/rejected": 0.3825005292892456, "loss": 1.2027, "step": 531 }, { "beta_dpo/beta": 0.1528293490409851, "beta_dpo/beta_margin_grad_mean": -0.37715035676956177, "beta_dpo/beta_margin_grad_std": 0.29981446266174316, "beta_dpo/beta_margin_mean": 2.9251790046691895, "beta_dpo/beta_margin_std": 6.6156134605407715, "beta_dpo/beta_used": 0.1528293490409851, "beta_dpo/beta_used_raw": 0.14536309242248535, "beta_dpo/gap_mean": 16.91998291015625, "beta_dpo/gap_std": 26.47125244140625, "beta_dpo/loss_margin_mean": 19.16761589050293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8042328042328042, "grad_norm": 119.86915588378906, "learning_rate": 5.679982264990424e-08, "logits/chosen": 0.38349199295043945, "logits/rejected": 0.327578067779541, "loss": 1.8351, "step": 532 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4957379102706909, "beta_dpo/beta_margin_grad_std": 0.006807847414165735, "beta_dpo/beta_margin_mean": 0.01705196313560009, "beta_dpo/beta_margin_std": 0.0272390004247427, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1611635684967041, "beta_dpo/gap_mean": 17.01636505126953, "beta_dpo/gap_std": 26.619853973388672, "beta_dpo/loss_margin_mean": 17.05196189880371, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8057445200302343, "grad_norm": 0.8953373432159424, "learning_rate": 5.596338392706076e-08, "logits/chosen": 0.5655586123466492, "logits/rejected": 0.4968782067298889, "loss": 1.3738, "step": 533 }, { "beta_dpo/beta": 0.07555638998746872, "beta_dpo/beta_margin_grad_mean": -0.3829325735569, "beta_dpo/beta_margin_grad_std": 0.25800174474716187, "beta_dpo/beta_margin_mean": 1.4311376810073853, "beta_dpo/beta_margin_std": 3.2261085510253906, "beta_dpo/beta_used": 0.07555638998746872, "beta_dpo/beta_used_raw": 0.03285207971930504, "beta_dpo/gap_mean": 17.048686981201172, "beta_dpo/gap_std": 26.980222702026367, "beta_dpo/loss_margin_mean": 16.214319229125977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8072562358276644, "grad_norm": 61.75785827636719, "learning_rate": 5.513237282548033e-08, "logits/chosen": 0.47408878803253174, "logits/rejected": 0.42821216583251953, "loss": 1.3134, "step": 534 }, { "beta_dpo/beta": 0.019090402871370316, "beta_dpo/beta_margin_grad_mean": -0.4496954381465912, "beta_dpo/beta_margin_grad_std": 0.14462697505950928, "beta_dpo/beta_margin_mean": 0.26040640473365784, "beta_dpo/beta_margin_std": 0.7699604034423828, "beta_dpo/beta_used": 0.019090402871370316, "beta_dpo/beta_used_raw": -0.026568515226244926, "beta_dpo/gap_mean": 16.582523345947266, "beta_dpo/gap_std": 27.452091217041016, "beta_dpo/loss_margin_mean": 15.58816146850586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8087679516250945, "grad_norm": 17.136873245239258, "learning_rate": 5.430681259032957e-08, "logits/chosen": 0.361327588558197, "logits/rejected": 0.29467612504959106, "loss": 1.1952, "step": 535 }, { "beta_dpo/beta": 0.20863793790340424, "beta_dpo/beta_margin_grad_mean": -0.3422904312610626, "beta_dpo/beta_margin_grad_std": 0.2740715444087982, "beta_dpo/beta_margin_mean": 4.775197505950928, "beta_dpo/beta_margin_std": 8.743983268737793, "beta_dpo/beta_used": 0.20863793790340424, "beta_dpo/beta_used_raw": 0.1986754983663559, "beta_dpo/gap_mean": 17.35881805419922, "beta_dpo/gap_std": 27.202693939208984, "beta_dpo/loss_margin_mean": 21.026884078979492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8102796674225246, "grad_norm": 102.63251495361328, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 0.48901498317718506, "logits/rejected": 0.39689117670059204, "loss": 1.0097, "step": 536 }, { "beta_dpo/beta": 0.10196889936923981, "beta_dpo/beta_margin_grad_mean": -0.3624553084373474, "beta_dpo/beta_margin_grad_std": 0.2689007520675659, "beta_dpo/beta_margin_mean": 2.305483102798462, "beta_dpo/beta_margin_std": 4.311905860900879, "beta_dpo/beta_used": 0.10196889936923981, "beta_dpo/beta_used_raw": 0.002260163426399231, "beta_dpo/gap_mean": 17.75217056274414, "beta_dpo/gap_std": 26.952072143554688, "beta_dpo/loss_margin_mean": 18.10002326965332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8117913832199547, "grad_norm": 75.69073486328125, "learning_rate": 5.267213693697695e-08, "logits/chosen": 0.5690730810165405, "logits/rejected": 0.47435319423675537, "loss": 1.1746, "step": 537 }, { "beta_dpo/beta": 0.18487028777599335, "beta_dpo/beta_margin_grad_mean": -0.3477964401245117, "beta_dpo/beta_margin_grad_std": 0.28940126299858093, "beta_dpo/beta_margin_mean": 4.511460781097412, "beta_dpo/beta_margin_std": 8.299779891967773, "beta_dpo/beta_used": 0.18487028777599335, "beta_dpo/beta_used_raw": 0.040582671761512756, "beta_dpo/gap_mean": 18.115917205810547, "beta_dpo/gap_std": 27.02817153930664, "beta_dpo/loss_margin_mean": 22.240970611572266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8133030990173847, "grad_norm": 138.01597595214844, "learning_rate": 5.1863067244167144e-08, "logits/chosen": 0.42150217294692993, "logits/rejected": 0.38494616746902466, "loss": 1.141, "step": 538 }, { "beta_dpo/beta": 0.09701749682426453, "beta_dpo/beta_margin_grad_mean": -0.36717933416366577, "beta_dpo/beta_margin_grad_std": 0.25104501843452454, "beta_dpo/beta_margin_mean": 1.9268014430999756, "beta_dpo/beta_margin_std": 3.884998321533203, "beta_dpo/beta_used": 0.09701749682426453, "beta_dpo/beta_used_raw": 0.020983532071113586, "beta_dpo/gap_mean": 18.433753967285156, "beta_dpo/gap_std": 26.86570167541504, "beta_dpo/loss_margin_mean": 17.664384841918945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8148148148148148, "grad_norm": 73.92316436767578, "learning_rate": 5.105953986729195e-08, "logits/chosen": 0.4441359043121338, "logits/rejected": 0.3619890809059143, "loss": 1.0482, "step": 539 }, { "beta_dpo/beta": 0.23082008957862854, "beta_dpo/beta_margin_grad_mean": -0.32545050978660583, "beta_dpo/beta_margin_grad_std": 0.28637760877609253, "beta_dpo/beta_margin_mean": 6.588077068328857, "beta_dpo/beta_margin_std": 11.03348159790039, "beta_dpo/beta_used": 0.23082008957862854, "beta_dpo/beta_used_raw": 0.16205689311027527, "beta_dpo/gap_mean": 18.548377990722656, "beta_dpo/gap_std": 26.964176177978516, "beta_dpo/loss_margin_mean": 22.14543342590332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8163265306122449, "grad_norm": 177.70672607421875, "learning_rate": 5.026157728273966e-08, "logits/chosen": 0.4785692095756531, "logits/rejected": 0.37851160764694214, "loss": 1.3294, "step": 540 }, { "beta_dpo/beta": 0.1964220106601715, "beta_dpo/beta_margin_grad_mean": -0.35370177030563354, "beta_dpo/beta_margin_grad_std": 0.28223419189453125, "beta_dpo/beta_margin_mean": 4.8944549560546875, "beta_dpo/beta_margin_std": 9.397936820983887, "beta_dpo/beta_used": 0.1964220106601715, "beta_dpo/beta_used_raw": -0.016093730926513672, "beta_dpo/gap_mean": 19.498260498046875, "beta_dpo/gap_std": 27.14907455444336, "beta_dpo/loss_margin_mean": 20.661378860473633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.817838246409675, "grad_norm": 104.91517639160156, "learning_rate": 4.9469201811239035e-08, "logits/chosen": 0.48476243019104004, "logits/rejected": 0.49259325861930847, "loss": 1.1139, "step": 541 }, { "beta_dpo/beta": 0.14371059834957123, "beta_dpo/beta_margin_grad_mean": -0.24666696786880493, "beta_dpo/beta_margin_grad_std": 0.3141605257987976, "beta_dpo/beta_margin_mean": 3.1314268112182617, "beta_dpo/beta_margin_std": 4.227056980133057, "beta_dpo/beta_used": 0.14371059834957123, "beta_dpo/beta_used_raw": 0.14371059834957123, "beta_dpo/gap_mean": 19.874874114990234, "beta_dpo/gap_std": 27.419788360595703, "beta_dpo/loss_margin_mean": 22.236177444458008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8193499622071051, "grad_norm": 64.40750885009766, "learning_rate": 4.868243561723534e-08, "logits/chosen": 0.5121496915817261, "logits/rejected": 0.45453765988349915, "loss": 0.8592, "step": 542 }, { "beta_dpo/beta": 0.16332434117794037, "beta_dpo/beta_margin_grad_mean": -0.23017369210720062, "beta_dpo/beta_margin_grad_std": 0.2802915573120117, "beta_dpo/beta_margin_mean": 3.163569211959839, "beta_dpo/beta_margin_std": 4.976937294006348, "beta_dpo/beta_used": 0.16332434117794037, "beta_dpo/beta_used_raw": 0.16332434117794037, "beta_dpo/gap_mean": 19.70780372619629, "beta_dpo/gap_std": 27.284408569335938, "beta_dpo/loss_margin_mean": 19.093935012817383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8208616780045351, "grad_norm": 49.96735382080078, "learning_rate": 4.790130070827028e-08, "logits/chosen": 0.47974371910095215, "logits/rejected": 0.38592517375946045, "loss": 0.6643, "step": 543 }, { "beta_dpo/beta": 0.252260684967041, "beta_dpo/beta_margin_grad_mean": -0.19919231534004211, "beta_dpo/beta_margin_grad_std": 0.34733232855796814, "beta_dpo/beta_margin_mean": 6.332991600036621, "beta_dpo/beta_margin_std": 7.6051411628723145, "beta_dpo/beta_used": 0.252260684967041, "beta_dpo/beta_used_raw": 0.252260684967041, "beta_dpo/gap_mean": 20.517066955566406, "beta_dpo/gap_std": 27.914066314697266, "beta_dpo/loss_margin_mean": 25.072317123413086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8223733938019653, "grad_norm": 98.99584197998047, "learning_rate": 4.7125818934366454e-08, "logits/chosen": 0.4560331702232361, "logits/rejected": 0.3775930404663086, "loss": 1.0511, "step": 544 }, { "beta_dpo/beta": 0.07587642967700958, "beta_dpo/beta_margin_grad_mean": -0.32201042771339417, "beta_dpo/beta_margin_grad_std": 0.2806906998157501, "beta_dpo/beta_margin_mean": 1.2491117715835571, "beta_dpo/beta_margin_std": 2.0175347328186035, "beta_dpo/beta_used": 0.07587642967700958, "beta_dpo/beta_used_raw": 0.07587642967700958, "beta_dpo/gap_mean": 20.09872055053711, "beta_dpo/gap_std": 27.952808380126953, "beta_dpo/loss_margin_mean": 16.42570686340332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8238851095993953, "grad_norm": 30.904129028320312, "learning_rate": 4.635601198741607e-08, "logits/chosen": 0.4076148271560669, "logits/rejected": 0.3476223945617676, "loss": 0.7492, "step": 545 }, { "beta_dpo/beta": 0.15581192076206207, "beta_dpo/beta_margin_grad_mean": -0.31065890192985535, "beta_dpo/beta_margin_grad_std": 0.3145395517349243, "beta_dpo/beta_margin_mean": 2.969562292098999, "beta_dpo/beta_margin_std": 5.391257286071777, "beta_dpo/beta_used": 0.15581192076206207, "beta_dpo/beta_used_raw": 0.15581192076206207, "beta_dpo/gap_mean": 19.73141860961914, "beta_dpo/gap_std": 27.47371482849121, "beta_dpo/loss_margin_mean": 18.355119705200195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8253968253968254, "grad_norm": 147.27969360351562, "learning_rate": 4.559190140057428e-08, "logits/chosen": 0.5145200490951538, "logits/rejected": 0.4989354610443115, "loss": 1.3293, "step": 546 }, { "beta_dpo/beta": 0.07221545279026031, "beta_dpo/beta_margin_grad_mean": -0.35358917713165283, "beta_dpo/beta_margin_grad_std": 0.242741659283638, "beta_dpo/beta_margin_mean": 1.2553969621658325, "beta_dpo/beta_margin_std": 2.621389150619507, "beta_dpo/beta_used": 0.07221545279026031, "beta_dpo/beta_used_raw": 0.014809608459472656, "beta_dpo/gap_mean": 19.883861541748047, "beta_dpo/gap_std": 27.571674346923828, "beta_dpo/loss_margin_mean": 20.501134872436523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8269085411942555, "grad_norm": 31.414352416992188, "learning_rate": 4.483350854765672e-08, "logits/chosen": 0.40707188844680786, "logits/rejected": 0.3357613682746887, "loss": 0.9218, "step": 547 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49660223722457886, "beta_dpo/beta_margin_grad_std": 0.006567038130015135, "beta_dpo/beta_margin_mean": 0.013594682328402996, "beta_dpo/beta_margin_std": 0.02627561055123806, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22177554666996002, "beta_dpo/gap_mean": 18.659589767456055, "beta_dpo/gap_std": 27.11603546142578, "beta_dpo/loss_margin_mean": 13.594681739807129, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8284202569916855, "grad_norm": 0.8114597201347351, "learning_rate": 4.4080854642541826e-08, "logits/chosen": 0.3467254936695099, "logits/rejected": 0.2756657302379608, "loss": 1.3732, "step": 548 }, { "beta_dpo/beta": 0.09689143300056458, "beta_dpo/beta_margin_grad_mean": -0.4047882854938507, "beta_dpo/beta_margin_grad_std": 0.2726931869983673, "beta_dpo/beta_margin_mean": 1.5041784048080444, "beta_dpo/beta_margin_std": 4.353418350219727, "beta_dpo/beta_used": 0.09689143300056458, "beta_dpo/beta_used_raw": 0.035542529076337814, "beta_dpo/gap_mean": 18.34221649169922, "beta_dpo/gap_std": 27.050926208496094, "beta_dpo/loss_margin_mean": 16.402610778808594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8299319727891157, "grad_norm": 81.26973724365234, "learning_rate": 4.333396073857723e-08, "logits/chosen": 0.5225358009338379, "logits/rejected": 0.45289480686187744, "loss": 1.3529, "step": 549 }, { "beta_dpo/beta": 0.07474076747894287, "beta_dpo/beta_margin_grad_mean": -0.3902982175350189, "beta_dpo/beta_margin_grad_std": 0.2482263296842575, "beta_dpo/beta_margin_mean": 1.402299404144287, "beta_dpo/beta_margin_std": 3.1106622219085693, "beta_dpo/beta_used": 0.07474076747894287, "beta_dpo/beta_used_raw": -0.07162999361753464, "beta_dpo/gap_mean": 17.733795166015625, "beta_dpo/gap_std": 26.989667892456055, "beta_dpo/loss_margin_mean": 13.307918548583984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8314436885865457, "grad_norm": 45.28874206542969, "learning_rate": 4.259284772799099e-08, "logits/chosen": 0.4761781692504883, "logits/rejected": 0.4413219094276428, "loss": 1.0674, "step": 550 }, { "beta_dpo/beta": 0.03209269419312477, "beta_dpo/beta_margin_grad_mean": -0.4065311551094055, "beta_dpo/beta_margin_grad_std": 0.1826740801334381, "beta_dpo/beta_margin_mean": 0.57929527759552, "beta_dpo/beta_margin_std": 1.1693179607391357, "beta_dpo/beta_used": 0.03209269419312477, "beta_dpo/beta_used_raw": -0.03214040771126747, "beta_dpo/gap_mean": 16.9639892578125, "beta_dpo/gap_std": 26.479679107666016, "beta_dpo/loss_margin_mean": 16.129413604736328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8329554043839759, "grad_norm": 24.777706146240234, "learning_rate": 4.1857536341307176e-08, "logits/chosen": 0.4830327033996582, "logits/rejected": 0.4463438093662262, "loss": 1.1575, "step": 551 }, { "beta_dpo/beta": 0.20424462854862213, "beta_dpo/beta_margin_grad_mean": -0.3118079602718353, "beta_dpo/beta_margin_grad_std": 0.28090307116508484, "beta_dpo/beta_margin_mean": 4.122735023498535, "beta_dpo/beta_margin_std": 7.375041484832764, "beta_dpo/beta_used": 0.20424462854862213, "beta_dpo/beta_used_raw": 0.15399131178855896, "beta_dpo/gap_mean": 16.902812957763672, "beta_dpo/gap_std": 26.333946228027344, "beta_dpo/loss_margin_mean": 17.136497497558594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8344671201814059, "grad_norm": 26.707483291625977, "learning_rate": 4.112804714676593e-08, "logits/chosen": 0.4524657726287842, "logits/rejected": 0.39445942640304565, "loss": 0.7323, "step": 552 }, { "beta_dpo/beta": 0.15994586050510406, "beta_dpo/beta_margin_grad_mean": -0.3966883718967438, "beta_dpo/beta_margin_grad_std": 0.30760183930397034, "beta_dpo/beta_margin_mean": 3.007977247238159, "beta_dpo/beta_margin_std": 8.034646987915039, "beta_dpo/beta_used": 0.15994586050510406, "beta_dpo/beta_used_raw": 0.15004082024097443, "beta_dpo/gap_mean": 17.17023468017578, "beta_dpo/gap_std": 27.138286590576172, "beta_dpo/loss_margin_mean": 17.232175827026367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8359788359788359, "grad_norm": 161.55067443847656, "learning_rate": 4.0404400549748144e-08, "logits/chosen": 0.43592414259910583, "logits/rejected": 0.3231276869773865, "loss": 1.3322, "step": 553 }, { "beta_dpo/beta": 0.023860443383455276, "beta_dpo/beta_margin_grad_mean": -0.4225996136665344, "beta_dpo/beta_margin_grad_std": 0.1500791311264038, "beta_dpo/beta_margin_mean": 0.42666929960250854, "beta_dpo/beta_margin_std": 0.8989187479019165, "beta_dpo/beta_used": 0.023860443383455276, "beta_dpo/beta_used_raw": -0.12928947806358337, "beta_dpo/gap_mean": 17.246599197387695, "beta_dpo/gap_std": 27.404417037963867, "beta_dpo/loss_margin_mean": 18.306926727294922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8374905517762661, "grad_norm": 15.592290878295898, "learning_rate": 3.968661679220467e-08, "logits/chosen": 0.42959821224212646, "logits/rejected": 0.40046319365501404, "loss": 1.1491, "step": 554 }, { "beta_dpo/beta": 0.20455916225910187, "beta_dpo/beta_margin_grad_mean": -0.31857535243034363, "beta_dpo/beta_margin_grad_std": 0.287540078163147, "beta_dpo/beta_margin_mean": 4.8925042152404785, "beta_dpo/beta_margin_std": 8.230351448059082, "beta_dpo/beta_used": 0.20455916225910187, "beta_dpo/beta_used_raw": 0.15680401027202606, "beta_dpo/gap_mean": 17.05306625366211, "beta_dpo/gap_std": 27.7065372467041, "beta_dpo/loss_margin_mean": 17.45922088623047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8390022675736961, "grad_norm": 129.28990173339844, "learning_rate": 3.89747159520904e-08, "logits/chosen": 0.4454486072063446, "logits/rejected": 0.4100185036659241, "loss": 1.3243, "step": 555 }, { "beta_dpo/beta": 0.009554240852594376, "beta_dpo/beta_margin_grad_mean": -0.46103566884994507, "beta_dpo/beta_margin_grad_std": 0.08946454524993896, "beta_dpo/beta_margin_mean": 0.16767998039722443, "beta_dpo/beta_margin_std": 0.38722655177116394, "beta_dpo/beta_used": 0.009554240852594376, "beta_dpo/beta_used_raw": -0.12646989524364471, "beta_dpo/gap_mean": 17.357540130615234, "beta_dpo/gap_std": 27.795841217041016, "beta_dpo/loss_margin_mean": 16.72756576538086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8405139833711263, "grad_norm": 7.336466312408447, "learning_rate": 3.826871794280192e-08, "logits/chosen": 0.506888747215271, "logits/rejected": 0.4476608633995056, "loss": 1.2708, "step": 556 }, { "beta_dpo/beta": 0.21170280873775482, "beta_dpo/beta_margin_grad_mean": -0.33399519324302673, "beta_dpo/beta_margin_grad_std": 0.28276923298835754, "beta_dpo/beta_margin_mean": 5.678369522094727, "beta_dpo/beta_margin_std": 9.621817588806152, "beta_dpo/beta_used": 0.21170280873775482, "beta_dpo/beta_used_raw": 0.15560570359230042, "beta_dpo/gap_mean": 18.178462982177734, "beta_dpo/gap_std": 27.78179931640625, "beta_dpo/loss_margin_mean": 21.99428939819336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8420256991685563, "grad_norm": 101.20765686035156, "learning_rate": 3.756864251262143e-08, "logits/chosen": 0.571921706199646, "logits/rejected": 0.4873775243759155, "loss": 0.9721, "step": 557 }, { "beta_dpo/beta": 0.010930849239230156, "beta_dpo/beta_margin_grad_mean": -0.44395628571510315, "beta_dpo/beta_margin_grad_std": 0.10657542943954468, "beta_dpo/beta_margin_mean": 0.24835842847824097, "beta_dpo/beta_margin_std": 0.4842853546142578, "beta_dpo/beta_used": 0.010930849239230156, "beta_dpo/beta_used_raw": -0.05466505140066147, "beta_dpo/gap_mean": 18.70376968383789, "beta_dpo/gap_std": 27.77187728881836, "beta_dpo/loss_margin_mean": 21.812089920043945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8435374149659864, "grad_norm": 9.379203796386719, "learning_rate": 3.687450924416341e-08, "logits/chosen": 0.5313233137130737, "logits/rejected": 0.4695540964603424, "loss": 1.2377, "step": 558 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49510252475738525, "beta_dpo/beta_margin_grad_std": 0.007558883633464575, "beta_dpo/beta_margin_mean": 0.0195964053273201, "beta_dpo/beta_margin_std": 0.030248643830418587, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06615760177373886, "beta_dpo/gap_mean": 19.156770706176758, "beta_dpo/gap_std": 28.15302276611328, "beta_dpo/loss_margin_mean": 19.596405029296875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8450491307634165, "grad_norm": 0.8944501280784607, "learning_rate": 3.6186337553827743e-08, "logits/chosen": 0.43583738803863525, "logits/rejected": 0.3620571792125702, "loss": 1.3701, "step": 559 }, { "beta_dpo/beta": 0.06273768097162247, "beta_dpo/beta_margin_grad_mean": -0.37549564242362976, "beta_dpo/beta_margin_grad_std": 0.24666382372379303, "beta_dpo/beta_margin_mean": 1.3902297019958496, "beta_dpo/beta_margin_std": 2.78605055809021, "beta_dpo/beta_used": 0.06273768097162247, "beta_dpo/beta_used_raw": -0.2646280825138092, "beta_dpo/gap_mean": 18.45889663696289, "beta_dpo/gap_std": 27.95433807373047, "beta_dpo/loss_margin_mean": 17.313091278076172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8465608465608465, "grad_norm": 48.35575866699219, "learning_rate": 3.550414669125573e-08, "logits/chosen": 0.46553072333335876, "logits/rejected": 0.4221952557563782, "loss": 1.1836, "step": 560 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4949081540107727, "beta_dpo/beta_margin_grad_std": 0.00684754503890872, "beta_dpo/beta_margin_mean": 0.02037295140326023, "beta_dpo/beta_margin_std": 0.02739965170621872, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14487004280090332, "beta_dpo/gap_mean": 18.880569458007812, "beta_dpo/gap_std": 27.770397186279297, "beta_dpo/loss_margin_mean": 20.372949600219727, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8480725623582767, "grad_norm": 0.9027082920074463, "learning_rate": 3.482795573879241e-08, "logits/chosen": 0.4389171004295349, "logits/rejected": 0.4021867513656616, "loss": 1.3717, "step": 561 }, { "beta_dpo/beta": 0.2258734554052353, "beta_dpo/beta_margin_grad_mean": -0.3564591705799103, "beta_dpo/beta_margin_grad_std": 0.28591641783714294, "beta_dpo/beta_margin_mean": 5.468478679656982, "beta_dpo/beta_margin_std": 9.84677791595459, "beta_dpo/beta_used": 0.2258734554052353, "beta_dpo/beta_used_raw": 0.10015764832496643, "beta_dpo/gap_mean": 19.541290283203125, "beta_dpo/gap_std": 27.708114624023438, "beta_dpo/loss_margin_mean": 22.15447235107422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8495842781557067, "grad_norm": 134.08180236816406, "learning_rate": 3.415778361095226e-08, "logits/chosen": 0.46755078434944153, "logits/rejected": 0.42174720764160156, "loss": 1.2897, "step": 562 }, { "beta_dpo/beta": 0.10262561589479446, "beta_dpo/beta_margin_grad_mean": -0.3006736636161804, "beta_dpo/beta_margin_grad_std": 0.31810298562049866, "beta_dpo/beta_margin_mean": 2.1566312313079834, "beta_dpo/beta_margin_std": 3.6026384830474854, "beta_dpo/beta_used": 0.10262561589479446, "beta_dpo/beta_used_raw": 0.10262561589479446, "beta_dpo/gap_mean": 19.626052856445312, "beta_dpo/gap_std": 28.178102493286133, "beta_dpo/loss_margin_mean": 20.39893913269043, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8510959939531368, "grad_norm": 75.20328521728516, "learning_rate": 3.349364905389032e-08, "logits/chosen": 0.5157092809677124, "logits/rejected": 0.4610385298728943, "loss": 0.9912, "step": 563 }, { "beta_dpo/beta": 0.20660275220870972, "beta_dpo/beta_margin_grad_mean": -0.24291859567165375, "beta_dpo/beta_margin_grad_std": 0.32369184494018555, "beta_dpo/beta_margin_mean": 4.798521041870117, "beta_dpo/beta_margin_std": 6.981872081756592, "beta_dpo/beta_used": 0.20660275220870972, "beta_dpo/beta_used_raw": 0.20660275220870972, "beta_dpo/gap_mean": 20.09206771850586, "beta_dpo/gap_std": 28.188093185424805, "beta_dpo/loss_margin_mean": 22.53466033935547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8526077097505669, "grad_norm": 111.04369354248047, "learning_rate": 3.283557064487785e-08, "logits/chosen": 0.4254182279109955, "logits/rejected": 0.38883572816848755, "loss": 0.8814, "step": 564 }, { "beta_dpo/beta": 0.1317221075296402, "beta_dpo/beta_margin_grad_mean": -0.34109413623809814, "beta_dpo/beta_margin_grad_std": 0.2687946856021881, "beta_dpo/beta_margin_mean": 2.5971169471740723, "beta_dpo/beta_margin_std": 5.390993118286133, "beta_dpo/beta_used": 0.1317221075296402, "beta_dpo/beta_used_raw": -0.17019467055797577, "beta_dpo/gap_mean": 19.456600189208984, "beta_dpo/gap_std": 28.187774658203125, "beta_dpo/loss_margin_mean": 15.798896789550781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.854119425547997, "grad_norm": 95.097900390625, "learning_rate": 3.218356679178252e-08, "logits/chosen": 0.486441433429718, "logits/rejected": 0.42756447196006775, "loss": 1.1893, "step": 565 }, { "beta_dpo/beta": 0.09158875793218613, "beta_dpo/beta_margin_grad_mean": -0.4200616478919983, "beta_dpo/beta_margin_grad_std": 0.2922900915145874, "beta_dpo/beta_margin_mean": 1.3433125019073486, "beta_dpo/beta_margin_std": 3.9480140209198, "beta_dpo/beta_used": 0.09158875793218613, "beta_dpo/beta_used_raw": 0.06183715909719467, "beta_dpo/gap_mean": 19.037540435791016, "beta_dpo/gap_std": 28.31623077392578, "beta_dpo/loss_margin_mean": 17.304000854492188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8556311413454271, "grad_norm": 94.29534912109375, "learning_rate": 3.1537655732553764e-08, "logits/chosen": 0.4620034694671631, "logits/rejected": 0.43806010484695435, "loss": 1.3457, "step": 566 }, { "beta_dpo/beta": 0.045104626566171646, "beta_dpo/beta_margin_grad_mean": -0.35616597533226013, "beta_dpo/beta_margin_grad_std": 0.21555054187774658, "beta_dpo/beta_margin_mean": 1.1588886976242065, "beta_dpo/beta_margin_std": 1.898647427558899, "beta_dpo/beta_used": 0.045104626566171646, "beta_dpo/beta_used_raw": -0.020848926156759262, "beta_dpo/gap_mean": 19.459049224853516, "beta_dpo/gap_std": 27.901168823242188, "beta_dpo/loss_margin_mean": 19.77743148803711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8571428571428571, "grad_norm": 18.36300277709961, "learning_rate": 3.089785553471233e-08, "logits/chosen": 0.49507731199264526, "logits/rejected": 0.3956683278083801, "loss": 1.0065, "step": 567 }, { "beta_dpo/beta": 0.20853257179260254, "beta_dpo/beta_margin_grad_mean": -0.21012690663337708, "beta_dpo/beta_margin_grad_std": 0.313422828912735, "beta_dpo/beta_margin_mean": 4.9643449783325195, "beta_dpo/beta_margin_std": 6.101047992706299, "beta_dpo/beta_used": 0.20853257179260254, "beta_dpo/beta_used_raw": 0.20853257179260254, "beta_dpo/gap_mean": 19.520610809326172, "beta_dpo/gap_std": 27.042015075683594, "beta_dpo/loss_margin_mean": 22.951581954956055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8586545729402872, "grad_norm": 99.84920501708984, "learning_rate": 3.026418409484513e-08, "logits/chosen": 0.5173642635345459, "logits/rejected": 0.42536449432373047, "loss": 0.8444, "step": 568 }, { "beta_dpo/beta": 0.008185813203454018, "beta_dpo/beta_margin_grad_mean": -0.4594218134880066, "beta_dpo/beta_margin_grad_std": 0.07736992090940475, "beta_dpo/beta_margin_mean": 0.17122690379619598, "beta_dpo/beta_margin_std": 0.33035361766815186, "beta_dpo/beta_used": 0.008185813203454018, "beta_dpo/beta_used_raw": -0.08097762614488602, "beta_dpo/gap_mean": 19.53293228149414, "beta_dpo/gap_std": 27.189006805419922, "beta_dpo/loss_margin_mean": 15.246856689453125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8601662887377173, "grad_norm": 9.221585273742676, "learning_rate": 2.963665913810451e-08, "logits/chosen": 0.38372114300727844, "logits/rejected": 0.34946101903915405, "loss": 1.2617, "step": 569 }, { "beta_dpo/beta": 0.3106805086135864, "beta_dpo/beta_margin_grad_mean": -0.2486473172903061, "beta_dpo/beta_margin_grad_std": 0.23560231924057007, "beta_dpo/beta_margin_mean": 9.827374458312988, "beta_dpo/beta_margin_std": 15.061835289001465, "beta_dpo/beta_used": 0.3106805086135864, "beta_dpo/beta_used_raw": 0.3106805086135864, "beta_dpo/gap_mean": 19.72932243347168, "beta_dpo/gap_std": 26.982757568359375, "beta_dpo/loss_margin_mean": 26.050617218017578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8616780045351474, "grad_norm": 127.13440704345703, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 0.4091266095638275, "logits/rejected": 0.32067325711250305, "loss": 0.7522, "step": 570 }, { "beta_dpo/beta": 0.29264044761657715, "beta_dpo/beta_margin_grad_mean": -0.36735960841178894, "beta_dpo/beta_margin_grad_std": 0.31165120005607605, "beta_dpo/beta_margin_mean": 5.108920097351074, "beta_dpo/beta_margin_std": 12.917421340942383, "beta_dpo/beta_used": 0.29264044761657715, "beta_dpo/beta_used_raw": 0.1877349317073822, "beta_dpo/gap_mean": 19.32115936279297, "beta_dpo/gap_std": 26.78708267211914, "beta_dpo/loss_margin_mean": 14.153389930725098, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8631897203325775, "grad_norm": 37.17084884643555, "learning_rate": 2.840011871446962e-08, "logits/chosen": 0.4562925696372986, "logits/rejected": 0.4176176190376282, "loss": 0.8324, "step": 571 }, { "beta_dpo/beta": 0.10962247103452682, "beta_dpo/beta_margin_grad_mean": -0.3743179142475128, "beta_dpo/beta_margin_grad_std": 0.28637591004371643, "beta_dpo/beta_margin_mean": 1.9382827281951904, "beta_dpo/beta_margin_std": 4.297430992126465, "beta_dpo/beta_used": 0.10962247103452682, "beta_dpo/beta_used_raw": 0.01463637501001358, "beta_dpo/gap_mean": 18.996253967285156, "beta_dpo/gap_std": 26.56301498413086, "beta_dpo/loss_margin_mean": 17.643159866333008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8647014361300076, "grad_norm": 64.77835845947266, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 0.4863911271095276, "logits/rejected": 0.5237973928451538, "loss": 1.0658, "step": 572 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49567198753356934, "beta_dpo/beta_margin_grad_std": 0.00828883983194828, "beta_dpo/beta_margin_mean": 0.017318209633231163, "beta_dpo/beta_margin_std": 0.0331687331199646, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19534537196159363, "beta_dpo/gap_mean": 18.85485076904297, "beta_dpo/gap_std": 27.400794982910156, "beta_dpo/loss_margin_mean": 17.318208694458008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8662131519274376, "grad_norm": 0.8444698452949524, "learning_rate": 2.718837261761528e-08, "logits/chosen": 0.4736700654029846, "logits/rejected": 0.4235808849334717, "loss": 1.3725, "step": 573 }, { "beta_dpo/beta": 0.1897483617067337, "beta_dpo/beta_margin_grad_mean": -0.3185376226902008, "beta_dpo/beta_margin_grad_std": 0.25058743357658386, "beta_dpo/beta_margin_mean": 4.7107343673706055, "beta_dpo/beta_margin_std": 7.908545017242432, "beta_dpo/beta_used": 0.1897483617067337, "beta_dpo/beta_used_raw": 0.1897483617067337, "beta_dpo/gap_mean": 18.79144859313965, "beta_dpo/gap_std": 26.819414138793945, "beta_dpo/loss_margin_mean": 21.234031677246094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8677248677248677, "grad_norm": 90.03785705566406, "learning_rate": 2.659183991914696e-08, "logits/chosen": 0.5208003520965576, "logits/rejected": 0.44477763772010803, "loss": 0.9521, "step": 574 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4963507056236267, "beta_dpo/beta_margin_grad_std": 0.007400399073958397, "beta_dpo/beta_margin_mean": 0.014601343311369419, "beta_dpo/beta_margin_std": 0.029610570520162582, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.428772509098053, "beta_dpo/gap_mean": 18.696063995361328, "beta_dpo/gap_std": 27.33431625366211, "beta_dpo/loss_margin_mean": 14.601343154907227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8692365835222978, "grad_norm": 0.8435812592506409, "learning_rate": 2.600155642716606e-08, "logits/chosen": 0.531106173992157, "logits/rejected": 0.44675496220588684, "loss": 1.3766, "step": 575 }, { "beta_dpo/beta": 0.2750542461872101, "beta_dpo/beta_margin_grad_mean": -0.34378066658973694, "beta_dpo/beta_margin_grad_std": 0.2857505977153778, "beta_dpo/beta_margin_mean": 6.987796306610107, "beta_dpo/beta_margin_std": 12.458413124084473, "beta_dpo/beta_used": 0.2750542461872101, "beta_dpo/beta_used_raw": 0.14406917989253998, "beta_dpo/gap_mean": 18.92401695251465, "beta_dpo/gap_std": 27.16030502319336, "beta_dpo/loss_margin_mean": 22.06940460205078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8707482993197279, "grad_norm": 107.54598236083984, "learning_rate": 2.5417538653170754e-08, "logits/chosen": 0.5154881477355957, "logits/rejected": 0.4023563265800476, "loss": 0.949, "step": 576 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49634456634521484, "beta_dpo/beta_margin_grad_std": 0.007100496906787157, "beta_dpo/beta_margin_mean": 0.0146254263818264, "beta_dpo/beta_margin_std": 0.02840968407690525, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20439589023590088, "beta_dpo/gap_mean": 18.303625106811523, "beta_dpo/gap_std": 27.299203872680664, "beta_dpo/loss_margin_mean": 14.625425338745117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.872260015117158, "grad_norm": 0.8384003639221191, "learning_rate": 2.4839802933393607e-08, "logits/chosen": 0.46008801460266113, "logits/rejected": 0.4375826120376587, "loss": 1.3733, "step": 577 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4962397515773773, "beta_dpo/beta_margin_grad_std": 0.006909025367349386, "beta_dpo/beta_margin_mean": 0.015045084990561008, "beta_dpo/beta_margin_std": 0.027643948793411255, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14562451839447021, "beta_dpo/gap_mean": 17.768327713012695, "beta_dpo/gap_std": 27.450611114501953, "beta_dpo/loss_margin_mean": 15.045083999633789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.873771730914588, "grad_norm": 0.863936185836792, "learning_rate": 2.4268365428344733e-08, "logits/chosen": 0.5061689019203186, "logits/rejected": 0.4770342707633972, "loss": 1.3728, "step": 578 }, { "beta_dpo/beta": 0.2994960844516754, "beta_dpo/beta_margin_grad_mean": -0.3630431890487671, "beta_dpo/beta_margin_grad_std": 0.30570077896118164, "beta_dpo/beta_margin_mean": 6.887415409088135, "beta_dpo/beta_margin_std": 13.675480842590332, "beta_dpo/beta_used": 0.2994960844516754, "beta_dpo/beta_used_raw": 0.27384522557258606, "beta_dpo/gap_mean": 18.528472900390625, "beta_dpo/gap_std": 27.44584083557129, "beta_dpo/loss_margin_mean": 24.255210876464844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8752834467120182, "grad_norm": 176.87245178222656, "learning_rate": 2.3703242122359357e-08, "logits/chosen": 0.41431349515914917, "logits/rejected": 0.3788405656814575, "loss": 1.1633, "step": 579 }, { "beta_dpo/beta": 0.02022494003176689, "beta_dpo/beta_margin_grad_mean": -0.4239354431629181, "beta_dpo/beta_margin_grad_std": 0.16484029591083527, "beta_dpo/beta_margin_mean": 0.39223089814186096, "beta_dpo/beta_margin_std": 0.8803242444992065, "beta_dpo/beta_used": 0.02022494003176689, "beta_dpo/beta_used_raw": -0.11066954582929611, "beta_dpo/gap_mean": 18.55239486694336, "beta_dpo/gap_std": 27.90411376953125, "beta_dpo/loss_margin_mean": 17.821359634399414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8767951625094482, "grad_norm": 15.0440092086792, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 0.4520995020866394, "logits/rejected": 0.3926962614059448, "loss": 1.1646, "step": 580 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49613216519355774, "beta_dpo/beta_margin_grad_std": 0.006800650618970394, "beta_dpo/beta_margin_mean": 0.015474964864552021, "beta_dpo/beta_margin_std": 0.02721022255718708, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04371700435876846, "beta_dpo/gap_mean": 18.31734848022461, "beta_dpo/gap_std": 27.919649124145508, "beta_dpo/loss_margin_mean": 15.474964141845703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8783068783068783, "grad_norm": 0.9805479049682617, "learning_rate": 2.259200116137039e-08, "logits/chosen": 0.5199064016342163, "logits/rejected": 0.44630610942840576, "loss": 1.3706, "step": 581 }, { "beta_dpo/beta": 0.045170314610004425, "beta_dpo/beta_margin_grad_mean": -0.39904969930648804, "beta_dpo/beta_margin_grad_std": 0.2411165088415146, "beta_dpo/beta_margin_mean": 0.7901149392127991, "beta_dpo/beta_margin_std": 1.9573147296905518, "beta_dpo/beta_used": 0.045170314610004425, "beta_dpo/beta_used_raw": 0.04214519262313843, "beta_dpo/gap_mean": 17.89398765563965, "beta_dpo/gap_std": 27.529356002807617, "beta_dpo/loss_margin_mean": 17.459928512573242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8798185941043084, "grad_norm": 32.02427291870117, "learning_rate": 2.204591459016525e-08, "logits/chosen": 0.49430859088897705, "logits/rejected": 0.5163915157318115, "loss": 1.1756, "step": 582 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49534931778907776, "beta_dpo/beta_margin_grad_std": 0.007267021108418703, "beta_dpo/beta_margin_mean": 0.018607286736369133, "beta_dpo/beta_margin_std": 0.02907698228955269, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1370919942855835, "beta_dpo/gap_mean": 18.084922790527344, "beta_dpo/gap_std": 27.864459991455078, "beta_dpo/loss_margin_mean": 18.60728645324707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8813303099017384, "grad_norm": 1.099493145942688, "learning_rate": 2.1506204384751064e-08, "logits/chosen": 0.583713173866272, "logits/rejected": 0.46615666151046753, "loss": 1.3723, "step": 583 }, { "beta_dpo/beta": 0.05212414637207985, "beta_dpo/beta_margin_grad_mean": -0.4082799553871155, "beta_dpo/beta_margin_grad_std": 0.2577343285083771, "beta_dpo/beta_margin_mean": 1.1146053075790405, "beta_dpo/beta_margin_std": 2.6749753952026367, "beta_dpo/beta_used": 0.05212414637207985, "beta_dpo/beta_used_raw": 0.048223551362752914, "beta_dpo/gap_mean": 18.14144515991211, "beta_dpo/gap_std": 28.521961212158203, "beta_dpo/loss_margin_mean": 17.956157684326172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8828420256991686, "grad_norm": 55.47587585449219, "learning_rate": 2.09728856419826e-08, "logits/chosen": 0.592127799987793, "logits/rejected": 0.482388436794281, "loss": 1.4501, "step": 584 }, { "beta_dpo/beta": 0.18798543512821198, "beta_dpo/beta_margin_grad_mean": -0.36346742510795593, "beta_dpo/beta_margin_grad_std": 0.27915558218955994, "beta_dpo/beta_margin_mean": 3.220351457595825, "beta_dpo/beta_margin_std": 8.585843086242676, "beta_dpo/beta_used": 0.18798543512821198, "beta_dpo/beta_used_raw": 0.10892662405967712, "beta_dpo/gap_mean": 17.433303833007812, "beta_dpo/gap_std": 27.836421966552734, "beta_dpo/loss_margin_mean": 15.049211502075195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8843537414965986, "grad_norm": 92.1081314086914, "learning_rate": 2.044597327993153e-08, "logits/chosen": 0.4187523424625397, "logits/rejected": 0.3752346932888031, "loss": 0.9159, "step": 585 }, { "beta_dpo/beta": 0.22254735231399536, "beta_dpo/beta_margin_grad_mean": -0.22272846102714539, "beta_dpo/beta_margin_grad_std": 0.298261821269989, "beta_dpo/beta_margin_mean": 4.026047706604004, "beta_dpo/beta_margin_std": 5.531918048858643, "beta_dpo/beta_used": 0.22254735231399536, "beta_dpo/beta_used_raw": 0.22254735231399536, "beta_dpo/gap_mean": 17.71161651611328, "beta_dpo/gap_std": 27.249780654907227, "beta_dpo/loss_margin_mean": 18.725635528564453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8858654572940288, "grad_norm": 119.44193267822266, "learning_rate": 1.9925482037469187e-08, "logits/chosen": 0.5063425302505493, "logits/rejected": 0.44981545209884644, "loss": 1.0304, "step": 586 }, { "beta_dpo/beta": 0.1785348355770111, "beta_dpo/beta_margin_grad_mean": -0.3711192309856415, "beta_dpo/beta_margin_grad_std": 0.2906634509563446, "beta_dpo/beta_margin_mean": 3.2333409786224365, "beta_dpo/beta_margin_std": 8.240671157836914, "beta_dpo/beta_used": 0.1785348355770111, "beta_dpo/beta_used_raw": 0.0767713189125061, "beta_dpo/gap_mean": 17.747962951660156, "beta_dpo/gap_std": 27.803863525390625, "beta_dpo/loss_margin_mean": 18.480619430541992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8873771730914588, "grad_norm": 119.20809936523438, "learning_rate": 1.9411426473854687e-08, "logits/chosen": 0.49700820446014404, "logits/rejected": 0.48014259338378906, "loss": 1.1634, "step": 587 }, { "beta_dpo/beta": 0.003681016620248556, "beta_dpo/beta_margin_grad_mean": -0.4817616045475006, "beta_dpo/beta_margin_grad_std": 0.033373910933732986, "beta_dpo/beta_margin_mean": 0.0735802948474884, "beta_dpo/beta_margin_std": 0.13508976995944977, "beta_dpo/beta_used": 0.003681016620248556, "beta_dpo/beta_used_raw": -0.06429736316204071, "beta_dpo/gap_mean": 17.947223663330078, "beta_dpo/gap_std": 28.0413875579834, "beta_dpo/loss_margin_mean": 17.880977630615234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8888888888888888, "grad_norm": 3.93040132522583, "learning_rate": 1.890382096832699e-08, "logits/chosen": 0.5224720239639282, "logits/rejected": 0.47010236978530884, "loss": 1.3306, "step": 588 }, { "beta_dpo/beta": 0.2928984761238098, "beta_dpo/beta_margin_grad_mean": -0.18171432614326477, "beta_dpo/beta_margin_grad_std": 0.3138827085494995, "beta_dpo/beta_margin_mean": 6.427730083465576, "beta_dpo/beta_margin_std": 8.318559646606445, "beta_dpo/beta_used": 0.2928984761238098, "beta_dpo/beta_used_raw": 0.2928984761238098, "beta_dpo/gap_mean": 18.34659194946289, "beta_dpo/gap_std": 27.974504470825195, "beta_dpo/loss_margin_mean": 21.777563095092773, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.890400604686319, "grad_norm": 126.65879821777344, "learning_rate": 1.840267971970344e-08, "logits/chosen": 0.4884239137172699, "logits/rejected": 0.45349234342575073, "loss": 0.8287, "step": 589 }, { "beta_dpo/beta": 0.22133781015872955, "beta_dpo/beta_margin_grad_mean": -0.3547823429107666, "beta_dpo/beta_margin_grad_std": 0.28267359733581543, "beta_dpo/beta_margin_mean": 5.317359447479248, "beta_dpo/beta_margin_std": 10.699882507324219, "beta_dpo/beta_used": 0.22133781015872955, "beta_dpo/beta_used_raw": 0.1810942143201828, "beta_dpo/gap_mean": 18.91448211669922, "beta_dpo/gap_std": 28.036495208740234, "beta_dpo/loss_margin_mean": 21.749927520751953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.891912320483749, "grad_norm": 171.58700561523438, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 0.4372316896915436, "logits/rejected": 0.4048703610897064, "loss": 1.1663, "step": 590 }, { "beta_dpo/beta": 0.1789139211177826, "beta_dpo/beta_margin_grad_mean": -0.3391687273979187, "beta_dpo/beta_margin_grad_std": 0.28517794609069824, "beta_dpo/beta_margin_mean": 5.130792617797852, "beta_dpo/beta_margin_std": 8.753190994262695, "beta_dpo/beta_used": 0.1789139211177826, "beta_dpo/beta_used_raw": 0.0776260495185852, "beta_dpo/gap_mean": 19.851974487304688, "beta_dpo/gap_std": 28.40129280090332, "beta_dpo/loss_margin_mean": 21.574308395385742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8934240362811792, "grad_norm": 108.95458221435547, "learning_rate": 1.7419845883949098e-08, "logits/chosen": 0.5855053663253784, "logits/rejected": 0.5133456587791443, "loss": 1.3158, "step": 591 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4951789975166321, "beta_dpo/beta_margin_grad_std": 0.007032149471342564, "beta_dpo/beta_margin_mean": 0.019290054216980934, "beta_dpo/beta_margin_std": 0.0281399916857481, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.47162604331970215, "beta_dpo/gap_mean": 19.769153594970703, "beta_dpo/gap_std": 28.69797134399414, "beta_dpo/loss_margin_mean": 19.290054321289062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8949357520786092, "grad_norm": 0.760765790939331, "learning_rate": 1.6938180788793556e-08, "logits/chosen": 0.5299474000930786, "logits/rejected": 0.41111090779304504, "loss": 1.3762, "step": 592 }, { "beta_dpo/beta": 0.1399514377117157, "beta_dpo/beta_margin_grad_mean": -0.2983793616294861, "beta_dpo/beta_margin_grad_std": 0.32489582896232605, "beta_dpo/beta_margin_mean": 2.4921958446502686, "beta_dpo/beta_margin_std": 5.445460319519043, "beta_dpo/beta_used": 0.1399514377117157, "beta_dpo/beta_used_raw": 0.1399514377117157, "beta_dpo/gap_mean": 19.371078491210938, "beta_dpo/gap_std": 28.608142852783203, "beta_dpo/loss_margin_mean": 18.472137451171875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8964474678760394, "grad_norm": 72.96538543701172, "learning_rate": 1.6463034933723336e-08, "logits/chosen": 0.4820663630962372, "logits/rejected": 0.38356471061706543, "loss": 0.9054, "step": 593 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49589961767196655, "beta_dpo/beta_margin_grad_std": 0.00622721528634429, "beta_dpo/beta_margin_mean": 0.01640484668314457, "beta_dpo/beta_margin_std": 0.024914657697081566, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08366122096776962, "beta_dpo/gap_mean": 18.921382904052734, "beta_dpo/gap_std": 28.415973663330078, "beta_dpo/loss_margin_mean": 16.40484619140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8979591836734694, "grad_norm": 0.9117282032966614, "learning_rate": 1.5994421609589385e-08, "logits/chosen": 0.40028396248817444, "logits/rejected": 0.3776008188724518, "loss": 1.3706, "step": 594 }, { "beta_dpo/beta": 0.35656407475471497, "beta_dpo/beta_margin_grad_mean": -0.2003844678401947, "beta_dpo/beta_margin_grad_std": 0.36102327704429626, "beta_dpo/beta_margin_mean": 8.762179374694824, "beta_dpo/beta_margin_std": 11.819833755493164, "beta_dpo/beta_used": 0.35656407475471497, "beta_dpo/beta_used_raw": 0.35656407475471497, "beta_dpo/gap_mean": 19.40142822265625, "beta_dpo/gap_std": 28.8588809967041, "beta_dpo/loss_margin_mean": 24.271892547607422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8994708994708994, "grad_norm": 206.3365936279297, "learning_rate": 1.553235392451377e-08, "logits/chosen": 0.4917094111442566, "logits/rejected": 0.3987153470516205, "loss": 1.3807, "step": 595 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4972356855869293, "beta_dpo/beta_margin_grad_std": 0.0067308153957128525, "beta_dpo/beta_margin_mean": 0.01106041856110096, "beta_dpo/beta_margin_std": 0.026931054890155792, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.40151554346084595, "beta_dpo/gap_mean": 18.570661544799805, "beta_dpo/gap_std": 28.856903076171875, "beta_dpo/loss_margin_mean": 11.060418128967285, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9009826152683296, "grad_norm": 0.7493230104446411, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.3801739811897278, "logits/rejected": 0.3889135718345642, "loss": 1.3762, "step": 596 }, { "beta_dpo/beta": 0.12935660779476166, "beta_dpo/beta_margin_grad_mean": -0.38230597972869873, "beta_dpo/beta_margin_grad_std": 0.24667063355445862, "beta_dpo/beta_margin_mean": 2.488924741744995, "beta_dpo/beta_margin_std": 5.174806118011475, "beta_dpo/beta_used": 0.12935660779476166, "beta_dpo/beta_used_raw": -0.017168477177619934, "beta_dpo/gap_mean": 18.310619354248047, "beta_dpo/gap_std": 28.702266693115234, "beta_dpo/loss_margin_mean": 19.412736892700195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9024943310657596, "grad_norm": 54.9415283203125, "learning_rate": 1.4627906988186111e-08, "logits/chosen": 0.4415905177593231, "logits/rejected": 0.41522032022476196, "loss": 1.0468, "step": 597 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49707233905792236, "beta_dpo/beta_margin_grad_std": 0.006856715772300959, "beta_dpo/beta_margin_mean": 0.011713477782905102, "beta_dpo/beta_margin_std": 0.027433032169938087, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.15161052346229553, "beta_dpo/gap_mean": 17.37384033203125, "beta_dpo/gap_std": 28.340988159179688, "beta_dpo/loss_margin_mean": 11.71347713470459, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9040060468631897, "grad_norm": 0.8391776084899902, "learning_rate": 1.4185553036259095e-08, "logits/chosen": 0.5034236311912537, "logits/rejected": 0.421264111995697, "loss": 1.3733, "step": 598 }, { "beta_dpo/beta": 0.024908404797315598, "beta_dpo/beta_margin_grad_mean": -0.43633511662483215, "beta_dpo/beta_margin_grad_std": 0.17187337577342987, "beta_dpo/beta_margin_mean": 0.352827787399292, "beta_dpo/beta_margin_std": 0.9965790510177612, "beta_dpo/beta_used": 0.024908404797315598, "beta_dpo/beta_used_raw": -0.013875270262360573, "beta_dpo/gap_mean": 16.874439239501953, "beta_dpo/gap_std": 28.45179557800293, "beta_dpo/loss_margin_mean": 16.18917465209961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9055177626606198, "grad_norm": 19.783573150634766, "learning_rate": 1.3749795321332885e-08, "logits/chosen": 0.5565443634986877, "logits/rejected": 0.5078023076057434, "loss": 1.1524, "step": 599 }, { "beta_dpo/beta": 0.05185613036155701, "beta_dpo/beta_margin_grad_mean": -0.3738482892513275, "beta_dpo/beta_margin_grad_std": 0.23426569998264313, "beta_dpo/beta_margin_mean": 0.8810456991195679, "beta_dpo/beta_margin_std": 1.6753695011138916, "beta_dpo/beta_used": 0.05185613036155701, "beta_dpo/beta_used_raw": 0.05185613036155701, "beta_dpo/gap_mean": 16.18065071105957, "beta_dpo/gap_std": 28.42178726196289, "beta_dpo/loss_margin_mean": 14.054128646850586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9070294784580499, "grad_norm": 29.925748825073242, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 0.5630729794502258, "logits/rejected": 0.508116602897644, "loss": 1.0347, "step": 600 }, { "epoch": 0.9070294784580499, "eval_beta_dpo/beta": 0.1704542338848114, "eval_beta_dpo/beta_margin_grad_mean": -0.33918437361717224, "eval_beta_dpo/beta_margin_grad_std": 0.22294603288173676, "eval_beta_dpo/beta_margin_mean": 3.5151495933532715, "eval_beta_dpo/beta_margin_std": 4.756720542907715, "eval_beta_dpo/beta_used": 0.1704542338848114, "eval_beta_dpo/beta_used_raw": 0.10854478925466537, "eval_beta_dpo/gap_mean": 16.476804733276367, "eval_beta_dpo/gap_std": 28.413129806518555, "eval_beta_dpo/loss_margin_mean": 16.619216918945312, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 0.5020915269851685, "eval_logits/rejected": 0.4486764073371887, "eval_loss": 0.820326030254364, "eval_runtime": 38.9183, "eval_samples_per_second": 59.175, "eval_steps_per_second": 1.85, "step": 600 }, { "beta_dpo/beta": 0.18654750287532806, "beta_dpo/beta_margin_grad_mean": -0.2733524739742279, "beta_dpo/beta_margin_grad_std": 0.34814247488975525, "beta_dpo/beta_margin_mean": 3.743360757827759, "beta_dpo/beta_margin_std": 5.708962917327881, "beta_dpo/beta_used": 0.18654750287532806, "beta_dpo/beta_used_raw": 0.18654750287532806, "beta_dpo/gap_mean": 17.23125457763672, "beta_dpo/gap_std": 28.533397674560547, "beta_dpo/loss_margin_mean": 20.574644088745117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.90854119425548, "grad_norm": 103.95361328125, "learning_rate": 1.2898117173950868e-08, "logits/chosen": 0.4921482801437378, "logits/rejected": 0.41257598996162415, "loss": 1.2026, "step": 601 }, { "beta_dpo/beta": 0.0725310891866684, "beta_dpo/beta_margin_grad_mean": -0.37306496500968933, "beta_dpo/beta_margin_grad_std": 0.26036691665649414, "beta_dpo/beta_margin_mean": 1.5764257907867432, "beta_dpo/beta_margin_std": 3.257750988006592, "beta_dpo/beta_used": 0.0725310891866684, "beta_dpo/beta_used_raw": 0.0725310891866684, "beta_dpo/gap_mean": 17.40424346923828, "beta_dpo/gap_std": 28.048587799072266, "beta_dpo/loss_margin_mean": 19.2706298828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.91005291005291, "grad_norm": 54.13090515136719, "learning_rate": 1.2482220564763667e-08, "logits/chosen": 0.4925674796104431, "logits/rejected": 0.45530757308006287, "loss": 1.1811, "step": 602 }, { "beta_dpo/beta": 0.09298260509967804, "beta_dpo/beta_margin_grad_mean": -0.2825284004211426, "beta_dpo/beta_margin_grad_std": 0.25072601437568665, "beta_dpo/beta_margin_mean": 2.0486679077148438, "beta_dpo/beta_margin_std": 2.9732513427734375, "beta_dpo/beta_used": 0.09298260509967804, "beta_dpo/beta_used_raw": 0.09298260509967804, "beta_dpo/gap_mean": 18.233320236206055, "beta_dpo/gap_std": 27.81458282470703, "beta_dpo/loss_margin_mean": 21.40192413330078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9115646258503401, "grad_norm": 40.56179428100586, "learning_rate": 1.2072967838448051e-08, "logits/chosen": 0.4438127875328064, "logits/rejected": 0.3784589469432831, "loss": 0.8606, "step": 603 }, { "beta_dpo/beta": 0.12427462637424469, "beta_dpo/beta_margin_grad_mean": -0.4008530080318451, "beta_dpo/beta_margin_grad_std": 0.28347763419151306, "beta_dpo/beta_margin_mean": 2.275778293609619, "beta_dpo/beta_margin_std": 6.116153240203857, "beta_dpo/beta_used": 0.12427462637424469, "beta_dpo/beta_used_raw": 0.12097650021314621, "beta_dpo/gap_mean": 18.284557342529297, "beta_dpo/gap_std": 28.388805389404297, "beta_dpo/loss_margin_mean": 18.04819107055664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9130763416477702, "grad_norm": 97.59881591796875, "learning_rate": 1.1670370442682459e-08, "logits/chosen": 0.4511834383010864, "logits/rejected": 0.44945216178894043, "loss": 1.2998, "step": 604 }, { "beta_dpo/beta": 0.1606498509645462, "beta_dpo/beta_margin_grad_mean": -0.372048556804657, "beta_dpo/beta_margin_grad_std": 0.2992105782032013, "beta_dpo/beta_margin_mean": 3.194119691848755, "beta_dpo/beta_margin_std": 7.407918930053711, "beta_dpo/beta_used": 0.1606498509645462, "beta_dpo/beta_used_raw": 0.08958107978105545, "beta_dpo/gap_mean": 18.22399139404297, "beta_dpo/gap_std": 28.62871551513672, "beta_dpo/loss_margin_mean": 16.938005447387695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9145880574452003, "grad_norm": 166.026611328125, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 0.5186942219734192, "logits/rejected": 0.4566429853439331, "loss": 1.974, "step": 605 }, { "beta_dpo/beta": 0.048199739307165146, "beta_dpo/beta_margin_grad_mean": -0.3714365065097809, "beta_dpo/beta_margin_grad_std": 0.2303820103406906, "beta_dpo/beta_margin_mean": 1.0771068334579468, "beta_dpo/beta_margin_std": 2.071831226348877, "beta_dpo/beta_used": 0.048199739307165146, "beta_dpo/beta_used_raw": -0.15278953313827515, "beta_dpo/gap_mean": 17.971176147460938, "beta_dpo/gap_std": 28.285913467407227, "beta_dpo/loss_margin_mean": 18.89348793029785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9160997732426304, "grad_norm": 31.8127498626709, "learning_rate": 1.0885186502381016e-08, "logits/chosen": 0.4706125557422638, "logits/rejected": 0.4000948667526245, "loss": 1.0877, "step": 606 }, { "beta_dpo/beta": 0.05985303223133087, "beta_dpo/beta_margin_grad_mean": -0.3560808300971985, "beta_dpo/beta_margin_grad_std": 0.24726535379886627, "beta_dpo/beta_margin_mean": 1.5488981008529663, "beta_dpo/beta_margin_std": 2.7780096530914307, "beta_dpo/beta_used": 0.05985303223133087, "beta_dpo/beta_used_raw": -0.05173212289810181, "beta_dpo/gap_mean": 18.670974731445312, "beta_dpo/gap_std": 28.158889770507812, "beta_dpo/loss_margin_mean": 19.359159469604492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9176114890400605, "grad_norm": 41.329795837402344, "learning_rate": 1.0502621921127774e-08, "logits/chosen": 0.4254879951477051, "logits/rejected": 0.391355037689209, "loss": 1.0579, "step": 607 }, { "beta_dpo/beta": 0.007975354790687561, "beta_dpo/beta_margin_grad_mean": -0.46832314133644104, "beta_dpo/beta_margin_grad_std": 0.06795617938041687, "beta_dpo/beta_margin_mean": 0.1326778531074524, "beta_dpo/beta_margin_std": 0.29037222266197205, "beta_dpo/beta_used": 0.007975354790687561, "beta_dpo/beta_used_raw": -0.011102650314569473, "beta_dpo/gap_mean": 18.097671508789062, "beta_dpo/gap_std": 27.764236450195312, "beta_dpo/loss_margin_mean": 16.52004051208496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9191232048374905, "grad_norm": 9.120287895202637, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 0.4399696886539459, "logits/rejected": 0.3656252324581146, "loss": 1.2708, "step": 608 }, { "beta_dpo/beta": 0.23025476932525635, "beta_dpo/beta_margin_grad_mean": -0.19502045214176178, "beta_dpo/beta_margin_grad_std": 0.2594439685344696, "beta_dpo/beta_margin_mean": 5.668548583984375, "beta_dpo/beta_margin_std": 7.694107532501221, "beta_dpo/beta_used": 0.23025476932525635, "beta_dpo/beta_used_raw": 0.23025476932525635, "beta_dpo/gap_mean": 18.70809555053711, "beta_dpo/gap_std": 27.124021530151367, "beta_dpo/loss_margin_mean": 21.427459716796875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9206349206349206, "grad_norm": 77.6288070678711, "learning_rate": 9.757601041885694e-09, "logits/chosen": 0.5577750205993652, "logits/rejected": 0.5173044204711914, "loss": 0.7607, "step": 609 }, { "beta_dpo/beta": 0.060976896435022354, "beta_dpo/beta_margin_grad_mean": -0.37375470995903015, "beta_dpo/beta_margin_grad_std": 0.24247336387634277, "beta_dpo/beta_margin_mean": 1.3076086044311523, "beta_dpo/beta_margin_std": 2.8063716888427734, "beta_dpo/beta_used": 0.060976896435022354, "beta_dpo/beta_used_raw": -0.14492599666118622, "beta_dpo/gap_mean": 18.970539093017578, "beta_dpo/gap_std": 27.81329345703125, "beta_dpo/loss_margin_mean": 20.919218063354492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9221466364323507, "grad_norm": 58.45539093017578, "learning_rate": 9.395165583732379e-09, "logits/chosen": 0.4478539228439331, "logits/rejected": 0.44471752643585205, "loss": 1.0937, "step": 610 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49625667929649353, "beta_dpo/beta_margin_grad_std": 0.0058044674806296825, "beta_dpo/beta_margin_mean": 0.014975893311202526, "beta_dpo/beta_margin_std": 0.02322331629693508, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11901400983333588, "beta_dpo/gap_mean": 18.650028228759766, "beta_dpo/gap_std": 27.604820251464844, "beta_dpo/loss_margin_mean": 14.975893020629883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9236583522297808, "grad_norm": 0.8769718408584595, "learning_rate": 9.03946036001449e-09, "logits/chosen": 0.5406326055526733, "logits/rejected": 0.48837387561798096, "loss": 1.3715, "step": 611 }, { "beta_dpo/beta": 0.1870323270559311, "beta_dpo/beta_margin_grad_mean": -0.2257097363471985, "beta_dpo/beta_margin_grad_std": 0.30249518156051636, "beta_dpo/beta_margin_mean": 4.107241630554199, "beta_dpo/beta_margin_std": 6.412012100219727, "beta_dpo/beta_used": 0.1870323270559311, "beta_dpo/beta_used_raw": 0.1870323270559311, "beta_dpo/gap_mean": 18.657466888427734, "beta_dpo/gap_std": 27.669326782226562, "beta_dpo/loss_margin_mean": 21.523975372314453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9251700680272109, "grad_norm": 87.718017578125, "learning_rate": 8.690495320571839e-09, "logits/chosen": 0.40628379583358765, "logits/rejected": 0.3346533179283142, "loss": 0.5951, "step": 612 }, { "beta_dpo/beta": 0.2268044352531433, "beta_dpo/beta_margin_grad_mean": -0.31133928894996643, "beta_dpo/beta_margin_grad_std": 0.27967050671577454, "beta_dpo/beta_margin_mean": 6.159829616546631, "beta_dpo/beta_margin_std": 11.035490036010742, "beta_dpo/beta_used": 0.2268044352531433, "beta_dpo/beta_used_raw": 0.18576528131961823, "beta_dpo/gap_mean": 19.324243545532227, "beta_dpo/gap_std": 27.89671516418457, "beta_dpo/loss_margin_mean": 23.248201370239258, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.926681783824641, "grad_norm": 60.471290588378906, "learning_rate": 8.348280226706722e-09, "logits/chosen": 0.3724338114261627, "logits/rejected": 0.3626174032688141, "loss": 0.8819, "step": 613 }, { "beta_dpo/beta": 0.029754722490906715, "beta_dpo/beta_margin_grad_mean": -0.4166284203529358, "beta_dpo/beta_margin_grad_std": 0.18172357976436615, "beta_dpo/beta_margin_mean": 0.4524066746234894, "beta_dpo/beta_margin_std": 1.0519875288009644, "beta_dpo/beta_used": 0.029754722490906715, "beta_dpo/beta_used_raw": 0.029382742941379547, "beta_dpo/gap_mean": 19.42398452758789, "beta_dpo/gap_std": 27.316984176635742, "beta_dpo/loss_margin_mean": 18.69666290283203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9281934996220711, "grad_norm": 22.923320770263672, "learning_rate": 8.012824650910937e-09, "logits/chosen": 0.48424768447875977, "logits/rejected": 0.4690684676170349, "loss": 1.0765, "step": 614 }, { "beta_dpo/beta": 0.04531731456518173, "beta_dpo/beta_margin_grad_mean": -0.3658197224140167, "beta_dpo/beta_margin_grad_std": 0.2268880009651184, "beta_dpo/beta_margin_mean": 1.1839110851287842, "beta_dpo/beta_margin_std": 2.223015546798706, "beta_dpo/beta_used": 0.04531731456518173, "beta_dpo/beta_used_raw": -0.08215552568435669, "beta_dpo/gap_mean": 20.149887084960938, "beta_dpo/gap_std": 27.636917114257812, "beta_dpo/loss_margin_mean": 22.330238342285156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9297052154195011, "grad_norm": 34.31875228881836, "learning_rate": 7.684137976598088e-09, "logits/chosen": 0.439863383769989, "logits/rejected": 0.39079999923706055, "loss": 1.1883, "step": 615 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.495390921831131, "beta_dpo/beta_margin_grad_std": 0.006648669950664043, "beta_dpo/beta_margin_mean": 0.018440984189510345, "beta_dpo/beta_margin_std": 0.026603393256664276, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3312501907348633, "beta_dpo/gap_mean": 19.891841888427734, "beta_dpo/gap_std": 27.807634353637695, "beta_dpo/loss_margin_mean": 18.440982818603516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9312169312169312, "grad_norm": 0.8765391111373901, "learning_rate": 7.36222939784098e-09, "logits/chosen": 0.5462205410003662, "logits/rejected": 0.4555407762527466, "loss": 1.3737, "step": 616 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.495637446641922, "beta_dpo/beta_margin_grad_std": 0.006583395879715681, "beta_dpo/beta_margin_mean": 0.017454272136092186, "beta_dpo/beta_margin_std": 0.026341097429394722, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.15900275111198425, "beta_dpo/gap_mean": 19.752906799316406, "beta_dpo/gap_std": 27.470291137695312, "beta_dpo/loss_margin_mean": 17.45427131652832, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9327286470143613, "grad_norm": 0.891712486743927, "learning_rate": 7.047107919114586e-09, "logits/chosen": 0.4766712188720703, "logits/rejected": 0.426116943359375, "loss": 1.371, "step": 617 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4955264925956726, "beta_dpo/beta_margin_grad_std": 0.006272831000387669, "beta_dpo/beta_margin_mean": 0.01789809949696064, "beta_dpo/beta_margin_std": 0.025099413469433784, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06541182100772858, "beta_dpo/gap_mean": 19.225358963012695, "beta_dpo/gap_std": 26.753971099853516, "beta_dpo/loss_margin_mean": 17.898099899291992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9342403628117913, "grad_norm": 0.9894082546234131, "learning_rate": 6.738782355044048e-09, "logits/chosen": 0.44365543127059937, "logits/rejected": 0.3375873565673828, "loss": 1.37, "step": 618 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4957164525985718, "beta_dpo/beta_margin_grad_std": 0.0061353943310678005, "beta_dpo/beta_margin_mean": 0.01713750697672367, "beta_dpo/beta_margin_std": 0.024547090753912926, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07072946429252625, "beta_dpo/gap_mean": 18.825641632080078, "beta_dpo/gap_std": 26.591014862060547, "beta_dpo/loss_margin_mean": 17.13750648498535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9357520786092215, "grad_norm": 0.9369555711746216, "learning_rate": 6.437261330158206e-09, "logits/chosen": 0.5680551528930664, "logits/rejected": 0.4911302328109741, "loss": 1.3705, "step": 619 }, { "beta_dpo/beta": 0.04867827519774437, "beta_dpo/beta_margin_grad_mean": -0.35377609729766846, "beta_dpo/beta_margin_grad_std": 0.22725717723369598, "beta_dpo/beta_margin_mean": 1.2456239461898804, "beta_dpo/beta_margin_std": 2.0975840091705322, "beta_dpo/beta_used": 0.04867827519774437, "beta_dpo/beta_used_raw": -0.3070789873600006, "beta_dpo/gap_mean": 17.939937591552734, "beta_dpo/gap_std": 26.433208465576172, "beta_dpo/loss_margin_mean": 17.016864776611328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9372637944066515, "grad_norm": 33.47443389892578, "learning_rate": 6.142553278648238e-09, "logits/chosen": 0.4851877987384796, "logits/rejected": 0.47673359513282776, "loss": 1.0516, "step": 620 }, { "beta_dpo/beta": 0.03595667704939842, "beta_dpo/beta_margin_grad_mean": -0.4299541711807251, "beta_dpo/beta_margin_grad_std": 0.20840981602668762, "beta_dpo/beta_margin_mean": 0.38952481746673584, "beta_dpo/beta_margin_std": 1.332067847251892, "beta_dpo/beta_used": 0.03595667704939842, "beta_dpo/beta_used_raw": -0.09256462752819061, "beta_dpo/gap_mean": 17.678569793701172, "beta_dpo/gap_std": 26.529159545898438, "beta_dpo/loss_margin_mean": 15.055705070495605, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9387755102040817, "grad_norm": 19.573747634887695, "learning_rate": 5.854666444131934e-09, "logits/chosen": 0.4968072772026062, "logits/rejected": 0.39098191261291504, "loss": 1.0654, "step": 621 }, { "beta_dpo/beta": 0.07677318155765533, "beta_dpo/beta_margin_grad_mean": -0.3336784541606903, "beta_dpo/beta_margin_grad_std": 0.21298815310001373, "beta_dpo/beta_margin_mean": 1.1385470628738403, "beta_dpo/beta_margin_std": 1.7350324392318726, "beta_dpo/beta_used": 0.07677318155765533, "beta_dpo/beta_used_raw": 0.07677318155765533, "beta_dpo/gap_mean": 17.470783233642578, "beta_dpo/gap_std": 25.936145782470703, "beta_dpo/loss_margin_mean": 16.462358474731445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9402872260015117, "grad_norm": 35.35215759277344, "learning_rate": 5.573608879422875e-09, "logits/chosen": 0.45926177501678467, "logits/rejected": 0.4160395860671997, "loss": 0.8269, "step": 622 }, { "beta_dpo/beta": 0.23275969922542572, "beta_dpo/beta_margin_grad_mean": -0.34063273668289185, "beta_dpo/beta_margin_grad_std": 0.2965709865093231, "beta_dpo/beta_margin_mean": 5.009967803955078, "beta_dpo/beta_margin_std": 8.860584259033203, "beta_dpo/beta_used": 0.23275969922542572, "beta_dpo/beta_used_raw": 0.2276362031698227, "beta_dpo/gap_mean": 18.017637252807617, "beta_dpo/gap_std": 25.823673248291016, "beta_dpo/loss_margin_mean": 20.478515625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9417989417989417, "grad_norm": 65.43958282470703, "learning_rate": 5.299388446305342e-09, "logits/chosen": 0.4594137668609619, "logits/rejected": 0.3915611505508423, "loss": 0.8451, "step": 623 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49523109197616577, "beta_dpo/beta_margin_grad_std": 0.006707730703055859, "beta_dpo/beta_margin_mean": 0.01908011920750141, "beta_dpo/beta_margin_std": 0.026838213205337524, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.16606451570987701, "beta_dpo/gap_mean": 18.317127227783203, "beta_dpo/gap_std": 26.19288444519043, "beta_dpo/loss_margin_mean": 19.08011817932129, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9433106575963719, "grad_norm": 0.9514113068580627, "learning_rate": 5.03201281531429e-09, "logits/chosen": 0.458259642124176, "logits/rejected": 0.3561304807662964, "loss": 1.3726, "step": 624 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49637553095817566, "beta_dpo/beta_margin_grad_std": 0.006895896513015032, "beta_dpo/beta_margin_mean": 0.014501559548079967, "beta_dpo/beta_margin_std": 0.027591099962592125, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.17098666727542877, "beta_dpo/gap_mean": 17.662364959716797, "beta_dpo/gap_std": 26.34381103515625, "beta_dpo/loss_margin_mean": 14.501558303833008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9448223733938019, "grad_norm": 0.7157669067382812, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 0.5461119413375854, "logits/rejected": 0.4578895568847656, "loss": 1.3733, "step": 625 }, { "beta_dpo/beta": 0.23165586590766907, "beta_dpo/beta_margin_grad_mean": -0.3843648135662079, "beta_dpo/beta_margin_grad_std": 0.3157823085784912, "beta_dpo/beta_margin_mean": 5.67930793762207, "beta_dpo/beta_margin_std": 12.665205001831055, "beta_dpo/beta_used": 0.23165586590766907, "beta_dpo/beta_used_raw": 0.08626031875610352, "beta_dpo/gap_mean": 18.347545623779297, "beta_dpo/gap_std": 27.535655975341797, "beta_dpo/loss_margin_mean": 22.30915069580078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9463340891912321, "grad_norm": 216.2509002685547, "learning_rate": 4.517825684323323e-09, "logits/chosen": 0.5618187189102173, "logits/rejected": 0.42893868684768677, "loss": 2.5394, "step": 626 }, { "beta_dpo/beta": 0.22693291306495667, "beta_dpo/beta_margin_grad_mean": -0.20085382461547852, "beta_dpo/beta_margin_grad_std": 0.3276273012161255, "beta_dpo/beta_margin_mean": 5.549400806427002, "beta_dpo/beta_margin_std": 6.667171001434326, "beta_dpo/beta_used": 0.22693291306495667, "beta_dpo/beta_used_raw": 0.22693291306495667, "beta_dpo/gap_mean": 19.314483642578125, "beta_dpo/gap_std": 27.916873931884766, "beta_dpo/loss_margin_mean": 24.403059005737305, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9478458049886621, "grad_norm": 109.18547058105469, "learning_rate": 4.271028567242818e-09, "logits/chosen": 0.40109649300575256, "logits/rejected": 0.2826465666294098, "loss": 0.7044, "step": 627 }, { "beta_dpo/beta": 0.3724895119667053, "beta_dpo/beta_margin_grad_mean": -0.23820850253105164, "beta_dpo/beta_margin_grad_std": 0.38285335898399353, "beta_dpo/beta_margin_mean": 10.399557113647461, "beta_dpo/beta_margin_std": 13.7665376663208, "beta_dpo/beta_used": 0.3724895119667053, "beta_dpo/beta_used_raw": 0.3724895119667053, "beta_dpo/gap_mean": 19.812664031982422, "beta_dpo/gap_std": 29.181087493896484, "beta_dpo/loss_margin_mean": 23.914892196655273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9493575207860923, "grad_norm": 242.51388549804688, "learning_rate": 4.0311050177251895e-09, "logits/chosen": 0.5061334371566772, "logits/rejected": 0.4650648236274719, "loss": 2.0624, "step": 628 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49544239044189453, "beta_dpo/beta_margin_grad_std": 0.005720792803913355, "beta_dpo/beta_margin_mean": 0.018234064802527428, "beta_dpo/beta_margin_std": 0.022889522835612297, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.031100261956453323, "beta_dpo/gap_mean": 20.11130142211914, "beta_dpo/gap_std": 28.020038604736328, "beta_dpo/loss_margin_mean": 18.23406410217285, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9508692365835223, "grad_norm": 1.0008560419082642, "learning_rate": 3.798061746947995e-09, "logits/chosen": 0.45952147245407104, "logits/rejected": 0.44886553287506104, "loss": 1.3686, "step": 629 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4949391186237335, "beta_dpo/beta_margin_grad_std": 0.006423095241189003, "beta_dpo/beta_margin_mean": 0.02024826407432556, "beta_dpo/beta_margin_std": 0.025700394064188004, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11937922239303589, "beta_dpo/gap_mean": 19.894287109375, "beta_dpo/gap_std": 27.414846420288086, "beta_dpo/loss_margin_mean": 20.248262405395508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9523809523809523, "grad_norm": 1.0278126001358032, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 0.4336046576499939, "logits/rejected": 0.38262441754341125, "loss": 1.3702, "step": 630 }, { "beta_dpo/beta": 0.2094002515077591, "beta_dpo/beta_margin_grad_mean": -0.3453434407711029, "beta_dpo/beta_margin_grad_std": 0.31047523021698, "beta_dpo/beta_margin_mean": 6.9932379722595215, "beta_dpo/beta_margin_std": 11.639176368713379, "beta_dpo/beta_used": 0.2094002515077591, "beta_dpo/beta_used_raw": 0.11465645581483841, "beta_dpo/gap_mean": 21.329425811767578, "beta_dpo/gap_std": 28.016944885253906, "beta_dpo/loss_margin_mean": 26.024810791015625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9538926681783825, "grad_norm": 270.0311279296875, "learning_rate": 3.352641923861144e-09, "logits/chosen": 0.5511718988418579, "logits/rejected": 0.44041550159454346, "loss": 1.8247, "step": 631 }, { "beta_dpo/beta": 0.32847917079925537, "beta_dpo/beta_margin_grad_mean": -0.3383893370628357, "beta_dpo/beta_margin_grad_std": 0.309362530708313, "beta_dpo/beta_margin_mean": 8.23188304901123, "beta_dpo/beta_margin_std": 15.608662605285645, "beta_dpo/beta_used": 0.32847917079925537, "beta_dpo/beta_used_raw": 0.31657373905181885, "beta_dpo/gap_mean": 21.614561080932617, "beta_dpo/gap_std": 28.078006744384766, "beta_dpo/loss_margin_mean": 23.72018814086914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9554043839758125, "grad_norm": 259.4488220214844, "learning_rate": 3.140277830901428e-09, "logits/chosen": 0.5116778612136841, "logits/rejected": 0.4842451214790344, "loss": 1.6392, "step": 632 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4954851567745209, "beta_dpo/beta_margin_grad_std": 0.007191209588199854, "beta_dpo/beta_margin_mean": 0.018064936622977257, "beta_dpo/beta_margin_std": 0.02877538837492466, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3825916647911072, "beta_dpo/gap_mean": 21.042991638183594, "beta_dpo/gap_std": 27.94135856628418, "beta_dpo/loss_margin_mean": 18.0649356842041, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9569160997732427, "grad_norm": 0.7798741459846497, "learning_rate": 2.9348189350335007e-09, "logits/chosen": 0.46421927213668823, "logits/rejected": 0.39746445417404175, "loss": 1.3735, "step": 633 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4972086250782013, "beta_dpo/beta_margin_grad_std": 0.007200221065431833, "beta_dpo/beta_margin_mean": 0.01116915326565504, "beta_dpo/beta_margin_std": 0.02880818583071232, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4677852690219879, "beta_dpo/gap_mean": 19.498958587646484, "beta_dpo/gap_std": 28.379730224609375, "beta_dpo/loss_margin_mean": 11.169153213500977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9584278155706727, "grad_norm": 0.7306960225105286, "learning_rate": 2.736270983384276e-09, "logits/chosen": 0.5405286550521851, "logits/rejected": 0.5492910146713257, "loss": 1.3765, "step": 634 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4961506724357605, "beta_dpo/beta_margin_grad_std": 0.008018962107598782, "beta_dpo/beta_margin_mean": 0.015402358956634998, "beta_dpo/beta_margin_std": 0.03208545595407486, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.15202215313911438, "beta_dpo/gap_mean": 18.54298973083496, "beta_dpo/gap_std": 29.06603240966797, "beta_dpo/loss_margin_mean": 15.402358055114746, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9599395313681028, "grad_norm": 0.9664364457130432, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 0.35498249530792236, "logits/rejected": 0.28940173983573914, "loss": 1.3722, "step": 635 }, { "beta_dpo/beta": 0.17797021567821503, "beta_dpo/beta_margin_grad_mean": -0.2381211668252945, "beta_dpo/beta_margin_grad_std": 0.2688322961330414, "beta_dpo/beta_margin_mean": 4.206960678100586, "beta_dpo/beta_margin_std": 6.49386739730835, "beta_dpo/beta_used": 0.17797021567821503, "beta_dpo/beta_used_raw": 0.17797021567821503, "beta_dpo/gap_mean": 19.014446258544922, "beta_dpo/gap_std": 28.835285186767578, "beta_dpo/loss_margin_mean": 21.473487854003906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9614512471655329, "grad_norm": 79.36841583251953, "learning_rate": 2.359929934524829e-09, "logits/chosen": 0.4399072527885437, "logits/rejected": 0.3462012708187103, "loss": 0.6422, "step": 636 }, { "beta_dpo/beta": 0.10971579700708389, "beta_dpo/beta_margin_grad_mean": -0.3816455602645874, "beta_dpo/beta_margin_grad_std": 0.274883896112442, "beta_dpo/beta_margin_mean": 2.120753526687622, "beta_dpo/beta_margin_std": 5.4004292488098145, "beta_dpo/beta_used": 0.10971579700708389, "beta_dpo/beta_used_raw": -0.21972408890724182, "beta_dpo/gap_mean": 18.41397476196289, "beta_dpo/gap_std": 29.193927764892578, "beta_dpo/loss_margin_mean": 16.04454803466797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9629629629629629, "grad_norm": 69.94818878173828, "learning_rate": 2.1821473643827137e-09, "logits/chosen": 0.41486144065856934, "logits/rejected": 0.339840829372406, "loss": 1.0826, "step": 637 }, { "beta_dpo/beta": 0.18578550219535828, "beta_dpo/beta_margin_grad_mean": -0.28964492678642273, "beta_dpo/beta_margin_grad_std": 0.32899391651153564, "beta_dpo/beta_margin_mean": 3.6086082458496094, "beta_dpo/beta_margin_std": 5.766052722930908, "beta_dpo/beta_used": 0.18578550219535828, "beta_dpo/beta_used_raw": 0.18578550219535828, "beta_dpo/gap_mean": 18.60744857788086, "beta_dpo/gap_std": 29.17364501953125, "beta_dpo/loss_margin_mean": 19.51310920715332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9644746787603931, "grad_norm": 142.8914031982422, "learning_rate": 2.0112967923011646e-09, "logits/chosen": 0.46569958329200745, "logits/rejected": 0.4127267599105835, "loss": 1.1797, "step": 638 }, { "beta_dpo/beta": 0.030427690595388412, "beta_dpo/beta_margin_grad_mean": -0.4014831483364105, "beta_dpo/beta_margin_grad_std": 0.18178702890872955, "beta_dpo/beta_margin_mean": 0.603205680847168, "beta_dpo/beta_margin_std": 1.2123973369598389, "beta_dpo/beta_used": 0.030427690595388412, "beta_dpo/beta_used_raw": -0.04119763523340225, "beta_dpo/gap_mean": 19.007099151611328, "beta_dpo/gap_std": 28.996734619140625, "beta_dpo/loss_margin_mean": 21.59126091003418, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9659863945578231, "grad_norm": 21.713918685913086, "learning_rate": 1.847382997337943e-09, "logits/chosen": 0.47108232975006104, "logits/rejected": 0.35434550046920776, "loss": 1.1042, "step": 639 }, { "beta_dpo/beta": 0.09601413458585739, "beta_dpo/beta_margin_grad_mean": -0.3168926239013672, "beta_dpo/beta_margin_grad_std": 0.2539225220680237, "beta_dpo/beta_margin_mean": 1.7108955383300781, "beta_dpo/beta_margin_std": 3.4565632343292236, "beta_dpo/beta_used": 0.09601413458585739, "beta_dpo/beta_used_raw": 0.09601413458585739, "beta_dpo/gap_mean": 18.945955276489258, "beta_dpo/gap_std": 28.959095001220703, "beta_dpo/loss_margin_mean": 16.798215866088867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9674981103552532, "grad_norm": 47.492164611816406, "learning_rate": 1.690410564514244e-09, "logits/chosen": 0.5608204007148743, "logits/rejected": 0.4967699646949768, "loss": 0.7935, "step": 640 }, { "beta_dpo/beta": 0.04548508673906326, "beta_dpo/beta_margin_grad_mean": -0.39172929525375366, "beta_dpo/beta_margin_grad_std": 0.21917176246643066, "beta_dpo/beta_margin_mean": 0.8428230881690979, "beta_dpo/beta_margin_std": 1.80697500705719, "beta_dpo/beta_used": 0.04548508673906326, "beta_dpo/beta_used_raw": -0.013075441122055054, "beta_dpo/gap_mean": 18.613567352294922, "beta_dpo/gap_std": 28.440582275390625, "beta_dpo/loss_margin_mean": 17.370569229125977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9690098261526833, "grad_norm": 32.292781829833984, "learning_rate": 1.5403838846864692e-09, "logits/chosen": 0.46294909715652466, "logits/rejected": 0.43334758281707764, "loss": 1.0495, "step": 641 }, { "beta_dpo/beta": 0.03415125980973244, "beta_dpo/beta_margin_grad_mean": -0.4151100516319275, "beta_dpo/beta_margin_grad_std": 0.2164982706308365, "beta_dpo/beta_margin_mean": 0.6367584466934204, "beta_dpo/beta_margin_std": 1.5332374572753906, "beta_dpo/beta_used": 0.03415125980973244, "beta_dpo/beta_used_raw": -0.2329845428466797, "beta_dpo/gap_mean": 18.07403564453125, "beta_dpo/gap_std": 28.444255828857422, "beta_dpo/loss_margin_mean": 14.236799240112305, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9705215419501134, "grad_norm": 34.46243667602539, "learning_rate": 1.3973071544233218e-09, "logits/chosen": 0.42237862944602966, "logits/rejected": 0.42818087339401245, "loss": 1.1772, "step": 642 }, { "beta_dpo/beta": 0.06243688613176346, "beta_dpo/beta_margin_grad_mean": -0.3356240689754486, "beta_dpo/beta_margin_grad_std": 0.26229581236839294, "beta_dpo/beta_margin_mean": 1.0187599658966064, "beta_dpo/beta_margin_std": 1.7895455360412598, "beta_dpo/beta_used": 0.06243688613176346, "beta_dpo/beta_used_raw": 0.06243688613176346, "beta_dpo/gap_mean": 17.609920501708984, "beta_dpo/gap_std": 28.21491813659668, "beta_dpo/loss_margin_mean": 16.685531616210938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9720332577475435, "grad_norm": 82.64678955078125, "learning_rate": 1.261184375888541e-09, "logits/chosen": 0.4097246825695038, "logits/rejected": 0.3160959482192993, "loss": 0.9238, "step": 643 }, { "beta_dpo/beta": 0.2932557463645935, "beta_dpo/beta_margin_grad_mean": -0.36465975642204285, "beta_dpo/beta_margin_grad_std": 0.3044781982898712, "beta_dpo/beta_margin_mean": 5.584909915924072, "beta_dpo/beta_margin_std": 14.013591766357422, "beta_dpo/beta_used": 0.2932557463645935, "beta_dpo/beta_used_raw": 0.2932557463645935, "beta_dpo/gap_mean": 16.896411895751953, "beta_dpo/gap_std": 28.862873077392578, "beta_dpo/loss_margin_mean": 15.0465669631958, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9735449735449735, "grad_norm": 197.562255859375, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 0.5534158945083618, "logits/rejected": 0.5172810554504395, "loss": 1.2636, "step": 644 }, { "beta_dpo/beta": 0.09316226840019226, "beta_dpo/beta_margin_grad_mean": -0.37199652194976807, "beta_dpo/beta_margin_grad_std": 0.24579140543937683, "beta_dpo/beta_margin_mean": 1.6856802701950073, "beta_dpo/beta_margin_std": 3.9560256004333496, "beta_dpo/beta_used": 0.09316226840019226, "beta_dpo/beta_used_raw": 0.059367213398218155, "beta_dpo/gap_mean": 17.337440490722656, "beta_dpo/gap_std": 28.79489517211914, "beta_dpo/loss_margin_mean": 18.958377838134766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9750566893424036, "grad_norm": 38.84883499145508, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 0.4112117886543274, "logits/rejected": 0.38647520542144775, "loss": 0.9213, "step": 645 }, { "beta_dpo/beta": 0.12865673005580902, "beta_dpo/beta_margin_grad_mean": -0.36234015226364136, "beta_dpo/beta_margin_grad_std": 0.2799786329269409, "beta_dpo/beta_margin_mean": 2.2738983631134033, "beta_dpo/beta_margin_std": 6.0258049964904785, "beta_dpo/beta_used": 0.12865673005580902, "beta_dpo/beta_used_raw": 0.07598783075809479, "beta_dpo/gap_mean": 17.541095733642578, "beta_dpo/gap_std": 28.898643493652344, "beta_dpo/loss_margin_mean": 18.890525817871094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9765684051398337, "grad_norm": 96.72042846679688, "learning_rate": 8.945768539031783e-10, "logits/chosen": 0.5024992227554321, "logits/rejected": 0.44613319635391235, "loss": 1.3655, "step": 646 }, { "beta_dpo/beta": 0.11004437506198883, "beta_dpo/beta_margin_grad_mean": -0.2987731397151947, "beta_dpo/beta_margin_grad_std": 0.2559739351272583, "beta_dpo/beta_margin_mean": 2.851088047027588, "beta_dpo/beta_margin_std": 4.678904056549072, "beta_dpo/beta_used": 0.11004437506198883, "beta_dpo/beta_used_raw": 0.11004437506198883, "beta_dpo/gap_mean": 18.652297973632812, "beta_dpo/gap_std": 28.77882194519043, "beta_dpo/loss_margin_mean": 24.79449462890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9780801209372638, "grad_norm": 83.78498840332031, "learning_rate": 7.863060120144316e-10, "logits/chosen": 0.5124300122261047, "logits/rejected": 0.4137851595878601, "loss": 1.0114, "step": 647 }, { "beta_dpo/beta": 0.3000466823577881, "beta_dpo/beta_margin_grad_mean": -0.3578697144985199, "beta_dpo/beta_margin_grad_std": 0.3171420395374298, "beta_dpo/beta_margin_mean": 7.148730754852295, "beta_dpo/beta_margin_std": 16.316646575927734, "beta_dpo/beta_used": 0.3000466823577881, "beta_dpo/beta_used_raw": 0.24156461656093597, "beta_dpo/gap_mean": 19.34526252746582, "beta_dpo/gap_std": 29.065902709960938, "beta_dpo/loss_margin_mean": 19.919885635375977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9795918367346939, "grad_norm": 224.33082580566406, "learning_rate": 6.850062128694045e-10, "logits/chosen": 0.42840665578842163, "logits/rejected": 0.35993558168411255, "loss": 1.7301, "step": 648 }, { "beta_dpo/beta": 0.13330677151679993, "beta_dpo/beta_margin_grad_mean": -0.30887559056282043, "beta_dpo/beta_margin_grad_std": 0.3698316514492035, "beta_dpo/beta_margin_mean": 2.7245736122131348, "beta_dpo/beta_margin_std": 4.273642063140869, "beta_dpo/beta_used": 0.13330677151679993, "beta_dpo/beta_used_raw": 0.13330677151679993, "beta_dpo/gap_mean": 19.349628448486328, "beta_dpo/gap_std": 28.986984252929688, "beta_dpo/loss_margin_mean": 19.400482177734375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.981103552532124, "grad_norm": 110.32051086425781, "learning_rate": 5.906802900412788e-10, "logits/chosen": 0.5004416704177856, "logits/rejected": 0.43793487548828125, "loss": 1.2039, "step": 649 }, { "beta_dpo/beta": 0.1441710889339447, "beta_dpo/beta_margin_grad_mean": -0.3954494297504425, "beta_dpo/beta_margin_grad_std": 0.2960076630115509, "beta_dpo/beta_margin_mean": 2.8221378326416016, "beta_dpo/beta_margin_std": 7.158821105957031, "beta_dpo/beta_used": 0.1441710889339447, "beta_dpo/beta_used_raw": 0.09482648968696594, "beta_dpo/gap_mean": 19.037673950195312, "beta_dpo/gap_std": 29.18463897705078, "beta_dpo/loss_margin_mean": 18.523643493652344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.982615268329554, "grad_norm": 119.91067504882812, "learning_rate": 5.033308820289184e-10, "logits/chosen": 0.5848698019981384, "logits/rejected": 0.5156843066215515, "loss": 1.3874, "step": 650 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49571534991264343, "beta_dpo/beta_margin_grad_std": 0.006656995974481106, "beta_dpo/beta_margin_mean": 0.017142919823527336, "beta_dpo/beta_margin_std": 0.026636159047484398, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2897125482559204, "beta_dpo/gap_mean": 18.57063865661621, "beta_dpo/gap_std": 28.645057678222656, "beta_dpo/loss_margin_mean": 17.142919540405273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9841269841269841, "grad_norm": 0.8137643337249756, "learning_rate": 4.2296043218295606e-10, "logits/chosen": 0.5590111017227173, "logits/rejected": 0.4814707338809967, "loss": 1.3744, "step": 651 }, { "beta_dpo/beta": 0.01311043556779623, "beta_dpo/beta_margin_grad_mean": -0.44266369938850403, "beta_dpo/beta_margin_grad_std": 0.11703706532716751, "beta_dpo/beta_margin_mean": 0.26388391852378845, "beta_dpo/beta_margin_std": 0.5482731461524963, "beta_dpo/beta_used": 0.01311043556779623, "beta_dpo/beta_used_raw": -0.11602558195590973, "beta_dpo/gap_mean": 18.64056396484375, "beta_dpo/gap_std": 28.707130432128906, "beta_dpo/loss_margin_mean": 16.924619674682617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9856386999244142, "grad_norm": 9.936495780944824, "learning_rate": 3.4957118863768176e-10, "logits/chosen": 0.4921461343765259, "logits/rejected": 0.43392133712768555, "loss": 1.2267, "step": 652 }, { "beta_dpo/beta": 0.010960490442812443, "beta_dpo/beta_margin_grad_mean": -0.45152518153190613, "beta_dpo/beta_margin_grad_std": 0.0951455682516098, "beta_dpo/beta_margin_mean": 0.21232452988624573, "beta_dpo/beta_margin_std": 0.42660677433013916, "beta_dpo/beta_used": 0.010960490442812443, "beta_dpo/beta_used_raw": -0.1325518786907196, "beta_dpo/gap_mean": 18.40667724609375, "beta_dpo/gap_std": 28.581680297851562, "beta_dpo/loss_margin_mean": 18.254802703857422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9871504157218443, "grad_norm": 10.04647445678711, "learning_rate": 2.831652042480093e-10, "logits/chosen": 0.4425300359725952, "logits/rejected": 0.394487589597702, "loss": 1.2431, "step": 653 }, { "beta_dpo/beta": 0.00953736063092947, "beta_dpo/beta_margin_grad_mean": -0.4530160427093506, "beta_dpo/beta_margin_grad_std": 0.09966482222080231, "beta_dpo/beta_margin_mean": 0.2068350464105606, "beta_dpo/beta_margin_std": 0.44756725430488586, "beta_dpo/beta_used": 0.00953736063092947, "beta_dpo/beta_used_raw": -0.20549921691417694, "beta_dpo/gap_mean": 18.37057113647461, "beta_dpo/gap_std": 28.73928451538086, "beta_dpo/loss_margin_mean": 16.940853118896484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9886621315192744, "grad_norm": 10.299638748168945, "learning_rate": 2.2374433653205016e-10, "logits/chosen": 0.4537598490715027, "logits/rejected": 0.3531543016433716, "loss": 1.2612, "step": 654 }, { "beta_dpo/beta": 0.13284625113010406, "beta_dpo/beta_margin_grad_mean": -0.29767540097236633, "beta_dpo/beta_margin_grad_std": 0.24205495417118073, "beta_dpo/beta_margin_mean": 3.739741563796997, "beta_dpo/beta_margin_std": 5.718697547912598, "beta_dpo/beta_used": 0.13284625113010406, "beta_dpo/beta_used_raw": 0.11756931990385056, "beta_dpo/gap_mean": 18.73858642578125, "beta_dpo/gap_std": 27.660343170166016, "beta_dpo/loss_margin_mean": 20.653202056884766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9901738473167044, "grad_norm": 36.28606033325195, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 0.4345353841781616, "logits/rejected": 0.3393111824989319, "loss": 0.8738, "step": 655 }, { "beta_dpo/beta": 0.20375779271125793, "beta_dpo/beta_margin_grad_mean": -0.25210514664649963, "beta_dpo/beta_margin_grad_std": 0.3344373404979706, "beta_dpo/beta_margin_mean": 4.502808570861816, "beta_dpo/beta_margin_std": 6.9566426277160645, "beta_dpo/beta_used": 0.20375779271125793, "beta_dpo/beta_used_raw": 0.20375779271125793, "beta_dpo/gap_mean": 18.90631866455078, "beta_dpo/gap_std": 27.543325424194336, "beta_dpo/loss_margin_mean": 20.858234405517578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9916855631141346, "grad_norm": 89.89404296875, "learning_rate": 1.2586440420372934e-10, "logits/chosen": 0.4053155183792114, "logits/rejected": 0.34848740696907043, "loss": 0.8027, "step": 656 }, { "beta_dpo/beta": 0.2964837849140167, "beta_dpo/beta_margin_grad_mean": -0.21630139648914337, "beta_dpo/beta_margin_grad_std": 0.3331965208053589, "beta_dpo/beta_margin_mean": 7.668827056884766, "beta_dpo/beta_margin_std": 11.465837478637695, "beta_dpo/beta_used": 0.2964837849140167, "beta_dpo/beta_used_raw": 0.2964837849140167, "beta_dpo/gap_mean": 19.829242706298828, "beta_dpo/gap_std": 28.236858367919922, "beta_dpo/loss_margin_mean": 24.64881134033203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9931972789115646, "grad_norm": 171.66510009765625, "learning_rate": 8.740807750345913e-11, "logits/chosen": 0.5963433384895325, "logits/rejected": 0.49983879923820496, "loss": 0.821, "step": 657 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49546483159065247, "beta_dpo/beta_margin_grad_std": 0.008204931393265724, "beta_dpo/beta_margin_mean": 0.018146011978387833, "beta_dpo/beta_margin_std": 0.03283081203699112, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2486192286014557, "beta_dpo/gap_mean": 19.505714416503906, "beta_dpo/gap_std": 28.95309066772461, "beta_dpo/loss_margin_mean": 18.146011352539062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9947089947089947, "grad_norm": 0.8370410799980164, "learning_rate": 5.594234322453539e-11, "logits/chosen": 0.5124013423919678, "logits/rejected": 0.4685628414154053, "loss": 1.3728, "step": 658 }, { "beta_dpo/beta": 0.08610343188047409, "beta_dpo/beta_margin_grad_mean": -0.372318297624588, "beta_dpo/beta_margin_grad_std": 0.28925395011901855, "beta_dpo/beta_margin_mean": 1.7324120998382568, "beta_dpo/beta_margin_std": 3.9688851833343506, "beta_dpo/beta_used": 0.08610343188047409, "beta_dpo/beta_used_raw": -0.0352298840880394, "beta_dpo/gap_mean": 19.216293334960938, "beta_dpo/gap_std": 29.245624542236328, "beta_dpo/loss_margin_mean": 15.123144149780273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9962207105064248, "grad_norm": 63.744686126708984, "learning_rate": 3.146808153123293e-11, "logits/chosen": 0.5592527389526367, "logits/rejected": 0.48994961380958557, "loss": 1.3127, "step": 659 }, { "beta_dpo/beta": 0.3657574951648712, "beta_dpo/beta_margin_grad_mean": -0.19617773592472076, "beta_dpo/beta_margin_grad_std": 0.3252088725566864, "beta_dpo/beta_margin_mean": 8.107396125793457, "beta_dpo/beta_margin_std": 11.35789680480957, "beta_dpo/beta_used": 0.3657574951648712, "beta_dpo/beta_used_raw": 0.3657574951648712, "beta_dpo/gap_mean": 19.213069915771484, "beta_dpo/gap_std": 28.73113250732422, "beta_dpo/loss_margin_mean": 22.08330726623535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9977324263038548, "grad_norm": 170.74034118652344, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 0.5866272449493408, "logits/rejected": 0.5091443061828613, "loss": 0.9655, "step": 660 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49589696526527405, "beta_dpo/beta_margin_grad_std": 0.007532664109021425, "beta_dpo/beta_margin_mean": 0.016415830701589584, "beta_dpo/beta_margin_std": 0.0301388967782259, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.17030301690101624, "beta_dpo/gap_mean": 18.86831283569336, "beta_dpo/gap_std": 29.014659881591797, "beta_dpo/loss_margin_mean": 16.415828704833984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.999244142101285, "grad_norm": 0.792015016078949, "learning_rate": 3.4965187065971735e-12, "logits/chosen": 0.4464152455329895, "logits/rejected": 0.3606187105178833, "loss": 1.3721, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.1692889305268401, "train_runtime": 3152.686, "train_samples_per_second": 13.429, "train_steps_per_second": 0.21 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }