{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 100, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta": 0.09949193149805069, "beta_dpo/beta_margin_grad_mean": -0.5002161860466003, "beta_dpo/beta_margin_grad_std": 0.00568619929254055, "beta_dpo/beta_margin_mean": -0.0008644365007057786, "beta_dpo/beta_margin_std": 0.02274876832962036, "beta_dpo/beta_used": 0.09949193149805069, "beta_dpo/beta_used_raw": 0.09949193149805069, "beta_dpo/gap_mean": -0.002860965905711055, "beta_dpo/gap_std": 0.027476027607917786, "beta_dpo/loss_margin_mean": -0.00900276005268097, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0015117157974300832, "grad_norm": 68.95387268066406, "learning_rate": 0.0, "logits/chosen": -3.487456798553467, "logits/rejected": -3.4948604106903076, "loss": 1.3875, "step": 1 }, { "beta_dpo/beta": 0.09910144656896591, "beta_dpo/beta_margin_grad_mean": -0.5004775524139404, "beta_dpo/beta_margin_grad_std": 0.004114280920475721, "beta_dpo/beta_margin_mean": -0.0019103928934782743, "beta_dpo/beta_margin_std": 0.01645863801240921, "beta_dpo/beta_used": 0.09910144656896591, "beta_dpo/beta_used_raw": 0.09910144656896591, "beta_dpo/gap_mean": -0.004164176527410746, "beta_dpo/gap_std": 0.05989988148212433, "beta_dpo/loss_margin_mean": -0.01922258734703064, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0030234315948601664, "grad_norm": 63.74178695678711, "learning_rate": 7.462686567164179e-09, "logits/chosen": -3.489974021911621, "logits/rejected": -3.4899895191192627, "loss": 1.3882, "step": 2 }, { "beta_dpo/beta": 0.1007503867149353, "beta_dpo/beta_margin_grad_mean": -0.5004459619522095, "beta_dpo/beta_margin_grad_std": 0.0060578202828764915, "beta_dpo/beta_margin_mean": -0.0017849474679678679, "beta_dpo/beta_margin_std": 0.024235889315605164, "beta_dpo/beta_used": 0.1007503867149353, "beta_dpo/beta_used_raw": 0.1007503867149353, "beta_dpo/gap_mean": -0.004537786357104778, "beta_dpo/gap_std": 0.09120701253414154, "beta_dpo/loss_margin_mean": -0.018549904227256775, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0045351473922902496, "grad_norm": 94.64966583251953, "learning_rate": 1.4925373134328357e-08, "logits/chosen": -3.492800235748291, "logits/rejected": -3.5052385330200195, "loss": 1.3855, "step": 3 }, { "beta_dpo/beta": 0.09986072778701782, "beta_dpo/beta_margin_grad_mean": -0.4987858533859253, "beta_dpo/beta_margin_grad_std": 0.006075920071452856, "beta_dpo/beta_margin_mean": 0.004857912659645081, "beta_dpo/beta_margin_std": 0.024309273809194565, "beta_dpo/beta_used": 0.09986072778701782, "beta_dpo/beta_used_raw": 0.09986072778701782, "beta_dpo/gap_mean": -0.0007102746749296784, "beta_dpo/gap_std": 0.12064293026924133, "beta_dpo/loss_margin_mean": 0.04852989315986633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.006046863189720333, "grad_norm": 78.82814025878906, "learning_rate": 2.2388059701492534e-08, "logits/chosen": -3.47836971282959, "logits/rejected": -3.4763317108154297, "loss": 1.3866, "step": 4 }, { "beta_dpo/beta": 0.10005674511194229, "beta_dpo/beta_margin_grad_mean": -0.4992569386959076, "beta_dpo/beta_margin_grad_std": 0.0056086876429617405, "beta_dpo/beta_margin_mean": 0.002972628688439727, "beta_dpo/beta_margin_std": 0.0224379301071167, "beta_dpo/beta_used": 0.10005674511194229, "beta_dpo/beta_used_raw": 0.10005674511194229, "beta_dpo/gap_mean": 0.006695480085909367, "beta_dpo/gap_std": 0.13884103298187256, "beta_dpo/loss_margin_mean": 0.029651284217834473, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.007558578987150416, "grad_norm": 77.07207489013672, "learning_rate": 2.9850746268656714e-08, "logits/chosen": -3.465094804763794, "logits/rejected": -3.4696974754333496, "loss": 1.3856, "step": 5 }, { "beta_dpo/beta": 0.10029098391532898, "beta_dpo/beta_margin_grad_mean": -0.4995466470718384, "beta_dpo/beta_margin_grad_std": 0.006639161147177219, "beta_dpo/beta_margin_mean": 0.001812646514736116, "beta_dpo/beta_margin_std": 0.026568656787276268, "beta_dpo/beta_used": 0.10029098391532898, "beta_dpo/beta_used_raw": 0.10029098391532898, "beta_dpo/gap_mean": 0.010851222090423107, "beta_dpo/gap_std": 0.15967890620231628, "beta_dpo/loss_margin_mean": 0.017943859100341797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.009070294784580499, "grad_norm": 74.04022979736328, "learning_rate": 3.731343283582089e-08, "logits/chosen": -3.4667019844055176, "logits/rejected": -3.4714226722717285, "loss": 1.3848, "step": 6 }, { "beta_dpo/beta": 0.10088849067687988, "beta_dpo/beta_margin_grad_mean": -0.4983506500720978, "beta_dpo/beta_margin_grad_std": 0.0047491928562521935, "beta_dpo/beta_margin_mean": 0.006597965024411678, "beta_dpo/beta_margin_std": 0.018998507410287857, "beta_dpo/beta_used": 0.10088849067687988, "beta_dpo/beta_used_raw": 0.10088849067687988, "beta_dpo/gap_mean": 0.019090309739112854, "beta_dpo/gap_std": 0.17145544290542603, "beta_dpo/loss_margin_mean": 0.06506466865539551, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.010582010582010581, "grad_norm": 69.01170349121094, "learning_rate": 4.477611940298507e-08, "logits/chosen": -3.471862316131592, "logits/rejected": -3.473724842071533, "loss": 1.3829, "step": 7 }, { "beta_dpo/beta": 0.10075643658638, "beta_dpo/beta_margin_grad_mean": -0.49915874004364014, "beta_dpo/beta_margin_grad_std": 0.00561918830499053, "beta_dpo/beta_margin_mean": 0.0033658454194664955, "beta_dpo/beta_margin_std": 0.022479888051748276, "beta_dpo/beta_used": 0.10075643658638, "beta_dpo/beta_used_raw": 0.10075643658638, "beta_dpo/gap_mean": 0.02159273251891136, "beta_dpo/gap_std": 0.17980128526687622, "beta_dpo/loss_margin_mean": 0.03358778357505798, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.012093726379440665, "grad_norm": 71.97833251953125, "learning_rate": 5.223880597014925e-08, "logits/chosen": -3.4647488594055176, "logits/rejected": -3.462214469909668, "loss": 1.3828, "step": 8 }, { "beta_dpo/beta": 0.09967806935310364, "beta_dpo/beta_margin_grad_mean": -0.4994949698448181, "beta_dpo/beta_margin_grad_std": 0.005284009501338005, "beta_dpo/beta_margin_mean": 0.002020241692662239, "beta_dpo/beta_margin_std": 0.021138343960046768, "beta_dpo/beta_used": 0.09967806935310364, "beta_dpo/beta_used_raw": 0.09967806935310364, "beta_dpo/gap_mean": 0.022652022540569305, "beta_dpo/gap_std": 0.18474653363227844, "beta_dpo/loss_margin_mean": 0.020203545689582825, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.013605442176870748, "grad_norm": 75.72856140136719, "learning_rate": 5.970149253731343e-08, "logits/chosen": -3.485848903656006, "logits/rejected": -3.501671314239502, "loss": 1.3846, "step": 9 }, { "beta_dpo/beta": 0.10177969187498093, "beta_dpo/beta_margin_grad_mean": -0.4997633695602417, "beta_dpo/beta_margin_grad_std": 0.006110870745033026, "beta_dpo/beta_margin_mean": 0.0009462524903938174, "beta_dpo/beta_margin_std": 0.024446699768304825, "beta_dpo/beta_used": 0.10177969187498093, "beta_dpo/beta_used_raw": 0.10177969187498093, "beta_dpo/gap_mean": 0.017854779958724976, "beta_dpo/gap_std": 0.19548800587654114, "beta_dpo/loss_margin_mean": 0.008596926927566528, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.015117157974300832, "grad_norm": 75.9504165649414, "learning_rate": 6.71641791044776e-08, "logits/chosen": -3.483827590942383, "logits/rejected": -3.4898672103881836, "loss": 1.3815, "step": 10 }, { "beta_dpo/beta": 0.10070285201072693, "beta_dpo/beta_margin_grad_mean": -0.4989999830722809, "beta_dpo/beta_margin_grad_std": 0.006797553040087223, "beta_dpo/beta_margin_mean": 0.0039999885484576225, "beta_dpo/beta_margin_std": 0.027194734662771225, "beta_dpo/beta_used": 0.10070285201072693, "beta_dpo/beta_used_raw": 0.10070285201072693, "beta_dpo/gap_mean": 0.023937324061989784, "beta_dpo/gap_std": 0.20705005526542664, "beta_dpo/loss_margin_mean": 0.03899078071117401, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.016628873771730914, "grad_norm": 65.5156478881836, "learning_rate": 7.462686567164178e-08, "logits/chosen": -3.488257884979248, "logits/rejected": -3.497443199157715, "loss": 1.3827, "step": 11 }, { "beta_dpo/beta": 0.10033339262008667, "beta_dpo/beta_margin_grad_mean": -0.500347375869751, "beta_dpo/beta_margin_grad_std": 0.004478678107261658, "beta_dpo/beta_margin_mean": -0.0013897416647523642, "beta_dpo/beta_margin_std": 0.01791626773774624, "beta_dpo/beta_used": 0.10033339262008667, "beta_dpo/beta_used_raw": 0.10033339262008667, "beta_dpo/gap_mean": 0.018025288358330727, "beta_dpo/gap_std": 0.2077764868736267, "beta_dpo/loss_margin_mean": -0.01385033130645752, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.018140589569160998, "grad_norm": 73.49842071533203, "learning_rate": 8.208955223880596e-08, "logits/chosen": -3.4729955196380615, "logits/rejected": -3.473330497741699, "loss": 1.384, "step": 12 }, { "beta_dpo/beta": 0.09898576885461807, "beta_dpo/beta_margin_grad_mean": -0.4998151957988739, "beta_dpo/beta_margin_grad_std": 0.005596262402832508, "beta_dpo/beta_margin_mean": 0.0007393779815174639, "beta_dpo/beta_margin_std": 0.022389404475688934, "beta_dpo/beta_used": 0.09898576885461807, "beta_dpo/beta_used_raw": 0.09898576885461807, "beta_dpo/gap_mean": 0.016271326690912247, "beta_dpo/gap_std": 0.2102234661579132, "beta_dpo/loss_margin_mean": 0.007069885730743408, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.019652305366591082, "grad_norm": 69.19833374023438, "learning_rate": 8.955223880597014e-08, "logits/chosen": -3.4913244247436523, "logits/rejected": -3.503750801086426, "loss": 1.3864, "step": 13 }, { "beta_dpo/beta": 0.09997418522834778, "beta_dpo/beta_margin_grad_mean": -0.4998185336589813, "beta_dpo/beta_margin_grad_std": 0.00562079856172204, "beta_dpo/beta_margin_mean": 0.0007253867224790156, "beta_dpo/beta_margin_std": 0.022488731890916824, "beta_dpo/beta_used": 0.09997418522834778, "beta_dpo/beta_used_raw": 0.09997418522834778, "beta_dpo/gap_mean": 0.01367080770432949, "beta_dpo/gap_std": 0.21450088918209076, "beta_dpo/loss_margin_mean": 0.00729447603225708, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.021164021164021163, "grad_norm": 78.86023712158203, "learning_rate": 9.701492537313432e-08, "logits/chosen": -3.468996286392212, "logits/rejected": -3.480945587158203, "loss": 1.385, "step": 14 }, { "beta_dpo/beta": 0.10166953504085541, "beta_dpo/beta_margin_grad_mean": -0.4993080496788025, "beta_dpo/beta_margin_grad_std": 0.005408703349530697, "beta_dpo/beta_margin_mean": 0.0027681647334247828, "beta_dpo/beta_margin_std": 0.021638209000229836, "beta_dpo/beta_used": 0.10166953504085541, "beta_dpo/beta_used_raw": 0.10166953504085541, "beta_dpo/gap_mean": 0.012517506256699562, "beta_dpo/gap_std": 0.21141119301319122, "beta_dpo/loss_margin_mean": 0.026329442858695984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.022675736961451247, "grad_norm": 77.17921447753906, "learning_rate": 1.044776119402985e-07, "logits/chosen": -3.487567663192749, "logits/rejected": -3.491109848022461, "loss": 1.3822, "step": 15 }, { "beta_dpo/beta": 0.10217370092868805, "beta_dpo/beta_margin_grad_mean": -0.49842318892478943, "beta_dpo/beta_margin_grad_std": 0.004959672223776579, "beta_dpo/beta_margin_mean": 0.006308517884463072, "beta_dpo/beta_margin_std": 0.01984489895403385, "beta_dpo/beta_used": 0.10217370092868805, "beta_dpo/beta_used_raw": 0.10217370092868805, "beta_dpo/gap_mean": 0.022726912051439285, "beta_dpo/gap_std": 0.20642614364624023, "beta_dpo/loss_margin_mean": 0.061732217669487, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.02418745275888133, "grad_norm": 67.19996643066406, "learning_rate": 1.1194029850746268e-07, "logits/chosen": -3.4754326343536377, "logits/rejected": -3.489719867706299, "loss": 1.3803, "step": 16 }, { "beta_dpo/beta": 0.09984943270683289, "beta_dpo/beta_margin_grad_mean": -0.49886709451675415, "beta_dpo/beta_margin_grad_std": 0.005485767964273691, "beta_dpo/beta_margin_mean": 0.004532321821898222, "beta_dpo/beta_margin_std": 0.021946530789136887, "beta_dpo/beta_used": 0.09984943270683289, "beta_dpo/beta_used_raw": 0.09984943270683289, "beta_dpo/gap_mean": 0.02862522192299366, "beta_dpo/gap_std": 0.21039307117462158, "beta_dpo/loss_margin_mean": 0.04543180763721466, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.025699168556311415, "grad_norm": 72.48587799072266, "learning_rate": 1.1940298507462686e-07, "logits/chosen": -3.4845752716064453, "logits/rejected": -3.4855475425720215, "loss": 1.3837, "step": 17 }, { "beta_dpo/beta": 0.09787815809249878, "beta_dpo/beta_margin_grad_mean": -0.49952730536460876, "beta_dpo/beta_margin_grad_std": 0.005441566463559866, "beta_dpo/beta_margin_mean": 0.0018912701634690166, "beta_dpo/beta_margin_std": 0.021770119667053223, "beta_dpo/beta_used": 0.09787815809249878, "beta_dpo/beta_used_raw": 0.09787815809249878, "beta_dpo/gap_mean": 0.02734116092324257, "beta_dpo/gap_std": 0.2113610804080963, "beta_dpo/loss_margin_mean": 0.019237250089645386, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.027210884353741496, "grad_norm": 64.46170043945312, "learning_rate": 1.2686567164179106e-07, "logits/chosen": -3.4654946327209473, "logits/rejected": -3.4752371311187744, "loss": 1.3871, "step": 18 }, { "beta_dpo/beta": 0.10058543086051941, "beta_dpo/beta_margin_grad_mean": -0.4984739124774933, "beta_dpo/beta_margin_grad_std": 0.005919734016060829, "beta_dpo/beta_margin_mean": 0.006105437409132719, "beta_dpo/beta_margin_std": 0.023681944236159325, "beta_dpo/beta_used": 0.10058543086051941, "beta_dpo/beta_used_raw": 0.10058543086051941, "beta_dpo/gap_mean": 0.03188147768378258, "beta_dpo/gap_std": 0.2180713713169098, "beta_dpo/loss_margin_mean": 0.060575321316719055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.02872260015117158, "grad_norm": 74.11320495605469, "learning_rate": 1.343283582089552e-07, "logits/chosen": -3.49847412109375, "logits/rejected": -3.4848098754882812, "loss": 1.3822, "step": 19 }, { "beta_dpo/beta": 0.09968103468418121, "beta_dpo/beta_margin_grad_mean": -0.49871742725372314, "beta_dpo/beta_margin_grad_std": 0.00446416437625885, "beta_dpo/beta_margin_mean": 0.0051309531554579735, "beta_dpo/beta_margin_std": 0.017858445644378662, "beta_dpo/beta_used": 0.09968103468418121, "beta_dpo/beta_used_raw": 0.09968103468418121, "beta_dpo/gap_mean": 0.03486326336860657, "beta_dpo/gap_std": 0.21194185316562653, "beta_dpo/loss_margin_mean": 0.0515841543674469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.030234315948601664, "grad_norm": 74.15331268310547, "learning_rate": 1.4179104477611938e-07, "logits/chosen": -3.480916738510132, "logits/rejected": -3.487682342529297, "loss": 1.3834, "step": 20 }, { "beta_dpo/beta": 0.10118047147989273, "beta_dpo/beta_margin_grad_mean": -0.49813440442085266, "beta_dpo/beta_margin_grad_std": 0.006876260042190552, "beta_dpo/beta_margin_mean": 0.007462440058588982, "beta_dpo/beta_margin_std": 0.027516059577465057, "beta_dpo/beta_used": 0.10118047147989273, "beta_dpo/beta_used_raw": 0.10118047147989273, "beta_dpo/gap_mean": 0.04063236713409424, "beta_dpo/gap_std": 0.2174699306488037, "beta_dpo/loss_margin_mean": 0.0733160525560379, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.031746031746031744, "grad_norm": 76.7116928100586, "learning_rate": 1.4925373134328355e-07, "logits/chosen": -3.474630832672119, "logits/rejected": -3.4847922325134277, "loss": 1.3802, "step": 21 }, { "beta_dpo/beta": 0.1014518290758133, "beta_dpo/beta_margin_grad_mean": -0.4986741244792938, "beta_dpo/beta_margin_grad_std": 0.0053654685616493225, "beta_dpo/beta_margin_mean": 0.00530435424298048, "beta_dpo/beta_margin_std": 0.021464822813868523, "beta_dpo/beta_used": 0.1014518290758133, "beta_dpo/beta_used_raw": 0.1014518290758133, "beta_dpo/gap_mean": 0.045100364834070206, "beta_dpo/gap_std": 0.22138892114162445, "beta_dpo/loss_margin_mean": 0.05228887498378754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03325774754346183, "grad_norm": 70.56732177734375, "learning_rate": 1.5671641791044775e-07, "logits/chosen": -3.4839634895324707, "logits/rejected": -3.489530086517334, "loss": 1.3793, "step": 22 }, { "beta_dpo/beta": 0.10004732012748718, "beta_dpo/beta_margin_grad_mean": -0.4988941550254822, "beta_dpo/beta_margin_grad_std": 0.006940007209777832, "beta_dpo/beta_margin_mean": 0.004424452316015959, "beta_dpo/beta_margin_std": 0.027765844017267227, "beta_dpo/beta_used": 0.10004732012748718, "beta_dpo/beta_used_raw": 0.10004732012748718, "beta_dpo/gap_mean": 0.04534055292606354, "beta_dpo/gap_std": 0.22986072301864624, "beta_dpo/loss_margin_mean": 0.044214025139808655, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03476946334089191, "grad_norm": 81.26911926269531, "learning_rate": 1.6417910447761193e-07, "logits/chosen": -3.491093158721924, "logits/rejected": -3.4994139671325684, "loss": 1.3818, "step": 23 }, { "beta_dpo/beta": 0.10039770603179932, "beta_dpo/beta_margin_grad_mean": -0.49830013513565063, "beta_dpo/beta_margin_grad_std": 0.005815350916236639, "beta_dpo/beta_margin_mean": 0.006800240837037563, "beta_dpo/beta_margin_std": 0.0232648067176342, "beta_dpo/beta_used": 0.10039770603179932, "beta_dpo/beta_used_raw": 0.10039770603179932, "beta_dpo/gap_mean": 0.050212785601615906, "beta_dpo/gap_std": 0.23121167719364166, "beta_dpo/loss_margin_mean": 0.06687352061271667, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.036281179138321996, "grad_norm": 67.30644989013672, "learning_rate": 1.716417910447761e-07, "logits/chosen": -3.4758753776550293, "logits/rejected": -3.484215497970581, "loss": 1.3806, "step": 24 }, { "beta_dpo/beta": 0.10184156894683838, "beta_dpo/beta_margin_grad_mean": -0.496889591217041, "beta_dpo/beta_margin_grad_std": 0.0062943859957158566, "beta_dpo/beta_margin_mean": 0.012444637715816498, "beta_dpo/beta_margin_std": 0.025184577330946922, "beta_dpo/beta_used": 0.10184156894683838, "beta_dpo/beta_used_raw": 0.10184156894683838, "beta_dpo/gap_mean": 0.05874401330947876, "beta_dpo/gap_std": 0.2349245548248291, "beta_dpo/loss_margin_mean": 0.12228862941265106, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03779289493575208, "grad_norm": 71.70790100097656, "learning_rate": 1.7910447761194027e-07, "logits/chosen": -3.5098114013671875, "logits/rejected": -3.5169005393981934, "loss": 1.3773, "step": 25 }, { "beta_dpo/beta": 0.10080444812774658, "beta_dpo/beta_margin_grad_mean": -0.49682313203811646, "beta_dpo/beta_margin_grad_std": 0.006813944783061743, "beta_dpo/beta_margin_mean": 0.012710830196738243, "beta_dpo/beta_margin_std": 0.02726481482386589, "beta_dpo/beta_used": 0.10080444812774658, "beta_dpo/beta_used_raw": 0.10080444812774658, "beta_dpo/gap_mean": 0.07424643635749817, "beta_dpo/gap_std": 0.2401646077632904, "beta_dpo/loss_margin_mean": 0.1253078132867813, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.039304610733182165, "grad_norm": 72.3993148803711, "learning_rate": 1.8656716417910447e-07, "logits/chosen": -3.464113712310791, "logits/rejected": -3.465458393096924, "loss": 1.3775, "step": 26 }, { "beta_dpo/beta": 0.10233546793460846, "beta_dpo/beta_margin_grad_mean": -0.49768123030662537, "beta_dpo/beta_margin_grad_std": 0.00761442631483078, "beta_dpo/beta_margin_mean": 0.00927521288394928, "beta_dpo/beta_margin_std": 0.030466170981526375, "beta_dpo/beta_used": 0.10233546793460846, "beta_dpo/beta_used_raw": 0.10233546793460846, "beta_dpo/gap_mean": 0.07654713094234467, "beta_dpo/gap_std": 0.25189656019210815, "beta_dpo/loss_margin_mean": 0.09068039059638977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04081632653061224, "grad_norm": 75.62002563476562, "learning_rate": 1.9402985074626865e-07, "logits/chosen": -3.477059841156006, "logits/rejected": -3.4793524742126465, "loss": 1.3746, "step": 27 }, { "beta_dpo/beta": 0.1009044274687767, "beta_dpo/beta_margin_grad_mean": -0.4972415268421173, "beta_dpo/beta_margin_grad_std": 0.008482665754854679, "beta_dpo/beta_margin_mean": 0.011036441661417484, "beta_dpo/beta_margin_std": 0.033949114382267, "beta_dpo/beta_used": 0.1009044274687767, "beta_dpo/beta_used_raw": 0.1009044274687767, "beta_dpo/gap_mean": 0.08251934498548508, "beta_dpo/gap_std": 0.26663610339164734, "beta_dpo/loss_margin_mean": 0.10953138768672943, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.042328042328042326, "grad_norm": 72.76057434082031, "learning_rate": 2.0149253731343282e-07, "logits/chosen": -3.4753365516662598, "logits/rejected": -3.479668617248535, "loss": 1.3765, "step": 28 }, { "beta_dpo/beta": 0.10132871568202972, "beta_dpo/beta_margin_grad_mean": -0.4966908395290375, "beta_dpo/beta_margin_grad_std": 0.00963142141699791, "beta_dpo/beta_margin_mean": 0.013242037035524845, "beta_dpo/beta_margin_std": 0.03854740783572197, "beta_dpo/beta_used": 0.10132871568202972, "beta_dpo/beta_used_raw": 0.10132871568202972, "beta_dpo/gap_mean": 0.08902300894260406, "beta_dpo/gap_std": 0.2860987186431885, "beta_dpo/loss_margin_mean": 0.12946908175945282, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04383975812547241, "grad_norm": 80.11695098876953, "learning_rate": 2.08955223880597e-07, "logits/chosen": -3.4917006492614746, "logits/rejected": -3.4941205978393555, "loss": 1.3749, "step": 29 }, { "beta_dpo/beta": 0.10037538409233093, "beta_dpo/beta_margin_grad_mean": -0.497522234916687, "beta_dpo/beta_margin_grad_std": 0.010261783376336098, "beta_dpo/beta_margin_mean": 0.009913492947816849, "beta_dpo/beta_margin_std": 0.041076745837926865, "beta_dpo/beta_used": 0.10037538409233093, "beta_dpo/beta_used_raw": 0.10037538409233093, "beta_dpo/gap_mean": 0.09355901181697845, "beta_dpo/gap_std": 0.309474915266037, "beta_dpo/loss_margin_mean": 0.0988030731678009, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.045351473922902494, "grad_norm": 75.7330551147461, "learning_rate": 2.1641791044776117e-07, "logits/chosen": -3.4795217514038086, "logits/rejected": -3.4918265342712402, "loss": 1.3764, "step": 30 }, { "beta_dpo/beta": 0.10266469419002533, "beta_dpo/beta_margin_grad_mean": -0.49661797285079956, "beta_dpo/beta_margin_grad_std": 0.012874443084001541, "beta_dpo/beta_margin_mean": 0.013542445376515388, "beta_dpo/beta_margin_std": 0.05157284811139107, "beta_dpo/beta_used": 0.10266469419002533, "beta_dpo/beta_used_raw": 0.10266469419002533, "beta_dpo/gap_mean": 0.09349072724580765, "beta_dpo/gap_std": 0.33484184741973877, "beta_dpo/loss_margin_mean": 0.12851548194885254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04686318972033258, "grad_norm": 84.76753997802734, "learning_rate": 2.2388059701492537e-07, "logits/chosen": -3.484382152557373, "logits/rejected": -3.4840521812438965, "loss": 1.3723, "step": 31 }, { "beta_dpo/beta": 0.10319270193576813, "beta_dpo/beta_margin_grad_mean": -0.4945366382598877, "beta_dpo/beta_margin_grad_std": 0.011012133210897446, "beta_dpo/beta_margin_mean": 0.021867286413908005, "beta_dpo/beta_margin_std": 0.04407740384340286, "beta_dpo/beta_used": 0.10319270193576813, "beta_dpo/beta_used_raw": 0.10319270193576813, "beta_dpo/gap_mean": 0.11525650322437286, "beta_dpo/gap_std": 0.35890108346939087, "beta_dpo/loss_margin_mean": 0.21101342141628265, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04837490551776266, "grad_norm": 76.85587310791016, "learning_rate": 2.3134328358208954e-07, "logits/chosen": -3.490706443786621, "logits/rejected": -3.491973876953125, "loss": 1.3691, "step": 32 }, { "beta_dpo/beta": 0.1052633598446846, "beta_dpo/beta_margin_grad_mean": -0.49329954385757446, "beta_dpo/beta_margin_grad_std": 0.014426704496145248, "beta_dpo/beta_margin_mean": 0.02681746333837509, "beta_dpo/beta_margin_std": 0.057773277163505554, "beta_dpo/beta_used": 0.1052633598446846, "beta_dpo/beta_used_raw": 0.1052633598446846, "beta_dpo/gap_mean": 0.1396564096212387, "beta_dpo/gap_std": 0.3812027871608734, "beta_dpo/loss_margin_mean": 0.2525438666343689, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.049886621315192746, "grad_norm": 84.2064208984375, "learning_rate": 2.388059701492537e-07, "logits/chosen": -3.480504035949707, "logits/rejected": -3.4880025386810303, "loss": 1.3624, "step": 33 }, { "beta_dpo/beta": 0.09583413600921631, "beta_dpo/beta_margin_grad_mean": -0.49526041746139526, "beta_dpo/beta_margin_grad_std": 0.010195734910666943, "beta_dpo/beta_margin_mean": 0.018971463665366173, "beta_dpo/beta_margin_std": 0.04081565514206886, "beta_dpo/beta_used": 0.09583413600921631, "beta_dpo/beta_used_raw": 0.09583413600921631, "beta_dpo/gap_mean": 0.1564980447292328, "beta_dpo/gap_std": 0.39692509174346924, "beta_dpo/loss_margin_mean": 0.1971401423215866, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05139833711262283, "grad_norm": 64.87889099121094, "learning_rate": 2.4626865671641786e-07, "logits/chosen": -3.4754815101623535, "logits/rejected": -3.48795747756958, "loss": 1.378, "step": 34 }, { "beta_dpo/beta": 0.10109281539916992, "beta_dpo/beta_margin_grad_mean": -0.49520573019981384, "beta_dpo/beta_margin_grad_std": 0.013709837570786476, "beta_dpo/beta_margin_mean": 0.019193029031157494, "beta_dpo/beta_margin_std": 0.054904498159885406, "beta_dpo/beta_used": 0.10109281539916992, "beta_dpo/beta_used_raw": 0.10109281539916992, "beta_dpo/gap_mean": 0.16002866625785828, "beta_dpo/gap_std": 0.4229516386985779, "beta_dpo/loss_margin_mean": 0.189878448843956, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05291005291005291, "grad_norm": 66.06998443603516, "learning_rate": 2.537313432835821e-07, "logits/chosen": -3.4892160892486572, "logits/rejected": -3.489070415496826, "loss": 1.3687, "step": 35 }, { "beta_dpo/beta": 0.09607753157615662, "beta_dpo/beta_margin_grad_mean": -0.49553588032722473, "beta_dpo/beta_margin_grad_std": 0.01821037009358406, "beta_dpo/beta_margin_mean": 0.017847422510385513, "beta_dpo/beta_margin_std": 0.07308873534202576, "beta_dpo/beta_used": 0.09607753157615662, "beta_dpo/beta_used_raw": 0.09607753157615662, "beta_dpo/gap_mean": 0.1608276665210724, "beta_dpo/gap_std": 0.47150135040283203, "beta_dpo/loss_margin_mean": 0.17752361297607422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05442176870748299, "grad_norm": 69.9321517944336, "learning_rate": 2.611940298507462e-07, "logits/chosen": -3.4815025329589844, "logits/rejected": -3.489722967147827, "loss": 1.3766, "step": 36 }, { "beta_dpo/beta": 0.10805092751979828, "beta_dpo/beta_margin_grad_mean": -0.48942428827285767, "beta_dpo/beta_margin_grad_std": 0.02250627428293228, "beta_dpo/beta_margin_mean": 0.04226859286427498, "beta_dpo/beta_margin_std": 0.09062261879444122, "beta_dpo/beta_used": 0.10805092751979828, "beta_dpo/beta_used_raw": 0.10805092751979828, "beta_dpo/gap_mean": 0.2018118053674698, "beta_dpo/gap_std": 0.5458605289459229, "beta_dpo/loss_margin_mean": 0.38961470127105713, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.055933484504913075, "grad_norm": 88.1091537475586, "learning_rate": 2.686567164179104e-07, "logits/chosen": -3.4984347820281982, "logits/rejected": -3.527602434158325, "loss": 1.3507, "step": 37 }, { "beta_dpo/beta": 0.09843359887599945, "beta_dpo/beta_margin_grad_mean": -0.49174827337265015, "beta_dpo/beta_margin_grad_std": 0.016898149624466896, "beta_dpo/beta_margin_mean": 0.03306391090154648, "beta_dpo/beta_margin_std": 0.06772169470787048, "beta_dpo/beta_used": 0.09843359887599945, "beta_dpo/beta_used_raw": 0.09843359887599945, "beta_dpo/gap_mean": 0.2306603342294693, "beta_dpo/gap_std": 0.57441246509552, "beta_dpo/loss_margin_mean": 0.3296223282814026, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05744520030234316, "grad_norm": 72.93190002441406, "learning_rate": 2.761194029850746e-07, "logits/chosen": -3.4699416160583496, "logits/rejected": -3.474957227706909, "loss": 1.366, "step": 38 }, { "beta_dpo/beta": 0.10100552439689636, "beta_dpo/beta_margin_grad_mean": -0.48797351121902466, "beta_dpo/beta_margin_grad_std": 0.02125493995845318, "beta_dpo/beta_margin_mean": 0.04824261739850044, "beta_dpo/beta_margin_std": 0.08532541245222092, "beta_dpo/beta_used": 0.10100552439689636, "beta_dpo/beta_used_raw": 0.10100552439689636, "beta_dpo/gap_mean": 0.26710981130599976, "beta_dpo/gap_std": 0.6145649552345276, "beta_dpo/loss_margin_mean": 0.47763076424598694, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05895691609977324, "grad_norm": 74.70683288574219, "learning_rate": 2.8358208955223876e-07, "logits/chosen": -3.4818758964538574, "logits/rejected": -3.4779043197631836, "loss": 1.3584, "step": 39 }, { "beta_dpo/beta": 0.10295109450817108, "beta_dpo/beta_margin_grad_mean": -0.48708972334861755, "beta_dpo/beta_margin_grad_std": 0.01960979588329792, "beta_dpo/beta_margin_mean": 0.051765426993370056, "beta_dpo/beta_margin_std": 0.07871639728546143, "beta_dpo/beta_used": 0.10295109450817108, "beta_dpo/beta_used_raw": 0.10295109450817108, "beta_dpo/gap_mean": 0.31209754943847656, "beta_dpo/gap_std": 0.6482617855072021, "beta_dpo/loss_margin_mean": 0.5017773509025574, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06046863189720333, "grad_norm": 104.72574615478516, "learning_rate": 2.9104477611940296e-07, "logits/chosen": -3.454075813293457, "logits/rejected": -3.4554495811462402, "loss": 1.3501, "step": 40 }, { "beta_dpo/beta": 0.11064809560775757, "beta_dpo/beta_margin_grad_mean": -0.47926777601242065, "beta_dpo/beta_margin_grad_std": 0.03134298324584961, "beta_dpo/beta_margin_mean": 0.08335942775011063, "beta_dpo/beta_margin_std": 0.1263163834810257, "beta_dpo/beta_used": 0.11064809560775757, "beta_dpo/beta_used_raw": 0.11064809560775757, "beta_dpo/gap_mean": 0.38600417971611023, "beta_dpo/gap_std": 0.731959342956543, "beta_dpo/loss_margin_mean": 0.7464129328727722, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06198034769463341, "grad_norm": 85.89790344238281, "learning_rate": 2.985074626865671e-07, "logits/chosen": -3.5105371475219727, "logits/rejected": -3.508096933364868, "loss": 1.3258, "step": 41 }, { "beta_dpo/beta": 0.09837515652179718, "beta_dpo/beta_margin_grad_mean": -0.48587504029273987, "beta_dpo/beta_margin_grad_std": 0.02415025420486927, "beta_dpo/beta_margin_mean": 0.056625593453645706, "beta_dpo/beta_margin_std": 0.09701266139745712, "beta_dpo/beta_used": 0.09837515652179718, "beta_dpo/beta_used_raw": 0.09837515652179718, "beta_dpo/gap_mean": 0.4286617040634155, "beta_dpo/gap_std": 0.7775646448135376, "beta_dpo/loss_margin_mean": 0.5715749859809875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06349206349206349, "grad_norm": 70.29313659667969, "learning_rate": 3.059701492537313e-07, "logits/chosen": -3.471945285797119, "logits/rejected": -3.475313186645508, "loss": 1.3477, "step": 42 }, { "beta_dpo/beta": 0.10859352350234985, "beta_dpo/beta_margin_grad_mean": -0.4790210723876953, "beta_dpo/beta_margin_grad_std": 0.03158368915319443, "beta_dpo/beta_margin_mean": 0.08437040448188782, "beta_dpo/beta_margin_std": 0.12720288336277008, "beta_dpo/beta_used": 0.10859352350234985, "beta_dpo/beta_used_raw": 0.10859352350234985, "beta_dpo/gap_mean": 0.4834892153739929, "beta_dpo/gap_std": 0.8535457849502563, "beta_dpo/loss_margin_mean": 0.7795947790145874, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06500377928949358, "grad_norm": 85.16110229492188, "learning_rate": 3.134328358208955e-07, "logits/chosen": -3.475992441177368, "logits/rejected": -3.490475654602051, "loss": 1.3202, "step": 43 }, { "beta_dpo/beta": 0.09638853371143341, "beta_dpo/beta_margin_grad_mean": -0.4809112548828125, "beta_dpo/beta_margin_grad_std": 0.032165560871362686, "beta_dpo/beta_margin_mean": 0.07681908458471298, "beta_dpo/beta_margin_std": 0.12961971759796143, "beta_dpo/beta_used": 0.09638853371143341, "beta_dpo/beta_used_raw": 0.09638853371143341, "beta_dpo/gap_mean": 0.5448230504989624, "beta_dpo/gap_std": 0.9320578575134277, "beta_dpo/loss_margin_mean": 0.7893993258476257, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06651549508692366, "grad_norm": 80.78567504882812, "learning_rate": 3.2089552238805965e-07, "logits/chosen": -3.471874952316284, "logits/rejected": -3.4676132202148438, "loss": 1.3411, "step": 44 }, { "beta_dpo/beta": 0.10317748785018921, "beta_dpo/beta_margin_grad_mean": -0.47540462017059326, "beta_dpo/beta_margin_grad_std": 0.02889878675341606, "beta_dpo/beta_margin_mean": 0.09890253841876984, "beta_dpo/beta_margin_std": 0.11674586683511734, "beta_dpo/beta_used": 0.10317748785018921, "beta_dpo/beta_used_raw": 0.10317748785018921, "beta_dpo/gap_mean": 0.614529013633728, "beta_dpo/gap_std": 0.9891531467437744, "beta_dpo/loss_margin_mean": 0.9499869346618652, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06802721088435375, "grad_norm": 82.41168975830078, "learning_rate": 3.2835820895522385e-07, "logits/chosen": -3.477660655975342, "logits/rejected": -3.4796509742736816, "loss": 1.3189, "step": 45 }, { "beta_dpo/beta": 0.10708046704530716, "beta_dpo/beta_margin_grad_mean": -0.4760693609714508, "beta_dpo/beta_margin_grad_std": 0.040219008922576904, "beta_dpo/beta_margin_mean": 0.09645616263151169, "beta_dpo/beta_margin_std": 0.16234652698040009, "beta_dpo/beta_used": 0.10708046704530716, "beta_dpo/beta_used_raw": 0.10708046704530716, "beta_dpo/gap_mean": 0.6618906855583191, "beta_dpo/gap_std": 1.0743083953857422, "beta_dpo/loss_margin_mean": 0.8980126976966858, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06953892668178382, "grad_norm": 82.76873779296875, "learning_rate": 3.3582089552238805e-07, "logits/chosen": -3.4865612983703613, "logits/rejected": -3.501478672027588, "loss": 1.3078, "step": 46 }, { "beta_dpo/beta": 0.1050390973687172, "beta_dpo/beta_margin_grad_mean": -0.47665971517562866, "beta_dpo/beta_margin_grad_std": 0.0366806834936142, "beta_dpo/beta_margin_mean": 0.09390005469322205, "beta_dpo/beta_margin_std": 0.14768268167972565, "beta_dpo/beta_used": 0.1050390973687172, "beta_dpo/beta_used_raw": 0.1050390973687172, "beta_dpo/gap_mean": 0.7110254764556885, "beta_dpo/gap_std": 1.1429574489593506, "beta_dpo/loss_margin_mean": 0.8911280632019043, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0710506424792139, "grad_norm": 78.87757110595703, "learning_rate": 3.432835820895522e-07, "logits/chosen": -3.4726479053497314, "logits/rejected": -3.4852752685546875, "loss": 1.3059, "step": 47 }, { "beta_dpo/beta": 0.09973321855068207, "beta_dpo/beta_margin_grad_mean": -0.47354698181152344, "beta_dpo/beta_margin_grad_std": 0.03766561299562454, "beta_dpo/beta_margin_mean": 0.10683294385671616, "beta_dpo/beta_margin_std": 0.1530088633298874, "beta_dpo/beta_used": 0.09973321855068207, "beta_dpo/beta_used_raw": 0.09973321855068207, "beta_dpo/gap_mean": 0.7833503484725952, "beta_dpo/gap_std": 1.2213890552520752, "beta_dpo/loss_margin_mean": 1.0755493640899658, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07256235827664399, "grad_norm": 65.8699722290039, "learning_rate": 3.507462686567164e-07, "logits/chosen": -3.4720230102539062, "logits/rejected": -3.4713778495788574, "loss": 1.3125, "step": 48 }, { "beta_dpo/beta": 0.10683902353048325, "beta_dpo/beta_margin_grad_mean": -0.47896090149879456, "beta_dpo/beta_margin_grad_std": 0.0569651760160923, "beta_dpo/beta_margin_mean": 0.08544404804706573, "beta_dpo/beta_margin_std": 0.23137980699539185, "beta_dpo/beta_used": 0.10683902353048325, "beta_dpo/beta_used_raw": 0.10683902353048325, "beta_dpo/gap_mean": 0.7888141870498657, "beta_dpo/gap_std": 1.371895432472229, "beta_dpo/loss_margin_mean": 0.7935976982116699, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07407407407407407, "grad_norm": 76.08472442626953, "learning_rate": 3.5820895522388055e-07, "logits/chosen": -3.4482154846191406, "logits/rejected": -3.4542245864868164, "loss": 1.2973, "step": 49 }, { "beta_dpo/beta": 0.1023285984992981, "beta_dpo/beta_margin_grad_mean": -0.4778675138950348, "beta_dpo/beta_margin_grad_std": 0.043978314846754074, "beta_dpo/beta_margin_mean": 0.08967769891023636, "beta_dpo/beta_margin_std": 0.17872853577136993, "beta_dpo/beta_used": 0.1023285984992981, "beta_dpo/beta_used_raw": 0.1023285984992981, "beta_dpo/gap_mean": 0.8107864856719971, "beta_dpo/gap_std": 1.4515961408615112, "beta_dpo/loss_margin_mean": 0.8468451499938965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07558578987150416, "grad_norm": 77.416748046875, "learning_rate": 3.6567164179104475e-07, "logits/chosen": -3.4656267166137695, "logits/rejected": -3.474292039871216, "loss": 1.3056, "step": 50 }, { "beta_dpo/beta": 0.09330181777477264, "beta_dpo/beta_margin_grad_mean": -0.4759213626384735, "beta_dpo/beta_margin_grad_std": 0.05245961993932724, "beta_dpo/beta_margin_mean": 0.09779670089483261, "beta_dpo/beta_margin_std": 0.21340785920619965, "beta_dpo/beta_used": 0.09330181777477264, "beta_dpo/beta_used_raw": 0.09330181777477264, "beta_dpo/gap_mean": 0.8370683193206787, "beta_dpo/gap_std": 1.5868926048278809, "beta_dpo/loss_margin_mean": 1.0528600215911865, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07709750566893424, "grad_norm": 80.22843933105469, "learning_rate": 3.7313432835820895e-07, "logits/chosen": -3.4780006408691406, "logits/rejected": -3.4716055393218994, "loss": 1.324, "step": 51 }, { "beta_dpo/beta": 0.10757172107696533, "beta_dpo/beta_margin_grad_mean": -0.4713566303253174, "beta_dpo/beta_margin_grad_std": 0.06157148256897926, "beta_dpo/beta_margin_mean": 0.1164376363158226, "beta_dpo/beta_margin_std": 0.2508537769317627, "beta_dpo/beta_used": 0.10757172107696533, "beta_dpo/beta_used_raw": 0.10757172107696533, "beta_dpo/gap_mean": 0.879679799079895, "beta_dpo/gap_std": 1.7295918464660645, "beta_dpo/loss_margin_mean": 1.0719107389450073, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07860922146636433, "grad_norm": 90.09736633300781, "learning_rate": 3.805970149253731e-07, "logits/chosen": -3.493281841278076, "logits/rejected": -3.510999917984009, "loss": 1.2854, "step": 52 }, { "beta_dpo/beta": 0.11585356295108795, "beta_dpo/beta_margin_grad_mean": -0.44591474533081055, "beta_dpo/beta_margin_grad_std": 0.07729143649339676, "beta_dpo/beta_margin_mean": 0.2241854965686798, "beta_dpo/beta_margin_std": 0.3247769773006439, "beta_dpo/beta_used": 0.11585356295108795, "beta_dpo/beta_used_raw": 0.11585356295108795, "beta_dpo/gap_mean": 1.0488958358764648, "beta_dpo/gap_std": 1.8787274360656738, "beta_dpo/loss_margin_mean": 1.7876276969909668, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0801209372637944, "grad_norm": 79.20471954345703, "learning_rate": 3.880597014925373e-07, "logits/chosen": -3.4753904342651367, "logits/rejected": -3.4808661937713623, "loss": 1.2475, "step": 53 }, { "beta_dpo/beta": 0.09257584810256958, "beta_dpo/beta_margin_grad_mean": -0.4745701849460602, "beta_dpo/beta_margin_grad_std": 0.05094355344772339, "beta_dpo/beta_margin_mean": 0.10317223519086838, "beta_dpo/beta_margin_std": 0.2069990187883377, "beta_dpo/beta_used": 0.09257584810256958, "beta_dpo/beta_used_raw": 0.09257584810256958, "beta_dpo/gap_mean": 1.0485970973968506, "beta_dpo/gap_std": 1.9558327198028564, "beta_dpo/loss_margin_mean": 1.1117992401123047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08163265306122448, "grad_norm": 61.774192810058594, "learning_rate": 3.9552238805970144e-07, "logits/chosen": -3.459986925125122, "logits/rejected": -3.4660282135009766, "loss": 1.3082, "step": 54 }, { "beta_dpo/beta": 0.12305180728435516, "beta_dpo/beta_margin_grad_mean": -0.436162531375885, "beta_dpo/beta_margin_grad_std": 0.0924496054649353, "beta_dpo/beta_margin_mean": 0.26776817440986633, "beta_dpo/beta_margin_std": 0.3947688043117523, "beta_dpo/beta_used": 0.12305180728435516, "beta_dpo/beta_used_raw": 0.12305180728435516, "beta_dpo/gap_mean": 1.2106801271438599, "beta_dpo/gap_std": 2.1652746200561523, "beta_dpo/loss_margin_mean": 2.1424360275268555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08314436885865457, "grad_norm": 85.41687774658203, "learning_rate": 4.0298507462686564e-07, "logits/chosen": -3.4503068923950195, "logits/rejected": -3.4672203063964844, "loss": 1.2096, "step": 55 }, { "beta_dpo/beta": 0.113812655210495, "beta_dpo/beta_margin_grad_mean": -0.43281856179237366, "beta_dpo/beta_margin_grad_std": 0.08880013972520828, "beta_dpo/beta_margin_mean": 0.28000012040138245, "beta_dpo/beta_margin_std": 0.3765174448490143, "beta_dpo/beta_used": 0.113812655210495, "beta_dpo/beta_used_raw": 0.113812655210495, "beta_dpo/gap_mean": 1.4440956115722656, "beta_dpo/gap_std": 2.366764545440674, "beta_dpo/loss_margin_mean": 2.447467803955078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08465608465608465, "grad_norm": 78.24384307861328, "learning_rate": 4.1044776119402984e-07, "logits/chosen": -3.474026679992676, "logits/rejected": -3.471069574356079, "loss": 1.2186, "step": 56 }, { "beta_dpo/beta": 0.12063010782003403, "beta_dpo/beta_margin_grad_mean": -0.4170219302177429, "beta_dpo/beta_margin_grad_std": 0.11409434676170349, "beta_dpo/beta_margin_mean": 0.35627323389053345, "beta_dpo/beta_margin_std": 0.49630510807037354, "beta_dpo/beta_used": 0.12063010782003403, "beta_dpo/beta_used_raw": 0.12063010782003403, "beta_dpo/gap_mean": 1.71101713180542, "beta_dpo/gap_std": 2.6714401245117188, "beta_dpo/loss_margin_mean": 2.9442179203033447, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08616780045351474, "grad_norm": 86.52571105957031, "learning_rate": 4.17910447761194e-07, "logits/chosen": -3.4749884605407715, "logits/rejected": -3.4794540405273438, "loss": 1.1814, "step": 57 }, { "beta_dpo/beta": 0.14217594265937805, "beta_dpo/beta_margin_grad_mean": -0.41697031259536743, "beta_dpo/beta_margin_grad_std": 0.12016920745372772, "beta_dpo/beta_margin_mean": 0.3542155623435974, "beta_dpo/beta_margin_std": 0.5216997861862183, "beta_dpo/beta_used": 0.14217594265937805, "beta_dpo/beta_used_raw": 0.14217594265937805, "beta_dpo/gap_mean": 1.9042582511901855, "beta_dpo/gap_std": 2.851851463317871, "beta_dpo/loss_margin_mean": 2.408334732055664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08767951625094482, "grad_norm": 109.38397979736328, "learning_rate": 4.253731343283582e-07, "logits/chosen": -3.4674854278564453, "logits/rejected": -3.4712154865264893, "loss": 1.0854, "step": 58 }, { "beta_dpo/beta": 0.13565833866596222, "beta_dpo/beta_margin_grad_mean": -0.40875622630119324, "beta_dpo/beta_margin_grad_std": 0.13919131457805634, "beta_dpo/beta_margin_mean": 0.4166140556335449, "beta_dpo/beta_margin_std": 0.6682167649269104, "beta_dpo/beta_used": 0.13565833866596222, "beta_dpo/beta_used_raw": 0.13565833866596222, "beta_dpo/gap_mean": 1.9503271579742432, "beta_dpo/gap_std": 2.9919891357421875, "beta_dpo/loss_margin_mean": 2.674647092819214, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08919123204837491, "grad_norm": 95.52985382080078, "learning_rate": 4.3283582089552234e-07, "logits/chosen": -3.4265875816345215, "logits/rejected": -3.443531036376953, "loss": 1.0797, "step": 59 }, { "beta_dpo/beta": 0.09772248566150665, "beta_dpo/beta_margin_grad_mean": -0.44351616501808167, "beta_dpo/beta_margin_grad_std": 0.10381980240345001, "beta_dpo/beta_margin_mean": 0.23878909647464752, "beta_dpo/beta_margin_std": 0.443925142288208, "beta_dpo/beta_used": 0.09772248566150665, "beta_dpo/beta_used_raw": 0.09772248566150665, "beta_dpo/gap_mean": 2.108832359313965, "beta_dpo/gap_std": 3.2798352241516113, "beta_dpo/loss_margin_mean": 2.4079318046569824, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09070294784580499, "grad_norm": 77.57376098632812, "learning_rate": 4.4029850746268654e-07, "logits/chosen": -3.4555981159210205, "logits/rejected": -3.449897050857544, "loss": 1.2118, "step": 60 }, { "beta_dpo/beta": 0.09358173608779907, "beta_dpo/beta_margin_grad_mean": -0.44476786255836487, "beta_dpo/beta_margin_grad_std": 0.12264274060726166, "beta_dpo/beta_margin_mean": 0.23790551722049713, "beta_dpo/beta_margin_std": 0.542164146900177, "beta_dpo/beta_used": 0.09358173608779907, "beta_dpo/beta_used_raw": 0.09358173608779907, "beta_dpo/gap_mean": 2.1635830402374268, "beta_dpo/gap_std": 3.625548839569092, "beta_dpo/loss_margin_mean": 2.532042980194092, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09221466364323508, "grad_norm": 69.81476593017578, "learning_rate": 4.4776119402985074e-07, "logits/chosen": -3.4459221363067627, "logits/rejected": -3.4527337551116943, "loss": 1.2301, "step": 61 }, { "beta_dpo/beta": 0.1260245144367218, "beta_dpo/beta_margin_grad_mean": -0.37954550981521606, "beta_dpo/beta_margin_grad_std": 0.14129452407360077, "beta_dpo/beta_margin_mean": 0.556390643119812, "beta_dpo/beta_margin_std": 0.6934806704521179, "beta_dpo/beta_used": 0.1260245144367218, "beta_dpo/beta_used_raw": 0.1260245144367218, "beta_dpo/gap_mean": 2.493056297302246, "beta_dpo/gap_std": 4.033053874969482, "beta_dpo/loss_margin_mean": 4.416388988494873, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09372637944066516, "grad_norm": 100.77880096435547, "learning_rate": 4.552238805970149e-07, "logits/chosen": -3.4615097045898438, "logits/rejected": -3.462885856628418, "loss": 1.11, "step": 62 }, { "beta_dpo/beta": 0.19291238486766815, "beta_dpo/beta_margin_grad_mean": -0.32733672857284546, "beta_dpo/beta_margin_grad_std": 0.210123673081398, "beta_dpo/beta_margin_mean": 0.8742516040802002, "beta_dpo/beta_margin_std": 1.2748658657073975, "beta_dpo/beta_used": 0.19291238486766815, "beta_dpo/beta_used_raw": 0.19291238486766815, "beta_dpo/gap_mean": 2.880000591278076, "beta_dpo/gap_std": 4.315876483917236, "beta_dpo/loss_margin_mean": 4.4908342361450195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09523809523809523, "grad_norm": 110.8193588256836, "learning_rate": 4.626865671641791e-07, "logits/chosen": -3.424607038497925, "logits/rejected": -3.427009105682373, "loss": 0.8325, "step": 63 }, { "beta_dpo/beta": 0.042021431028842926, "beta_dpo/beta_margin_grad_mean": -0.47857698798179626, "beta_dpo/beta_margin_grad_std": 0.05938207358121872, "beta_dpo/beta_margin_mean": 0.08715548366308212, "beta_dpo/beta_margin_std": 0.24319951236248016, "beta_dpo/beta_used": 0.042021431028842926, "beta_dpo/beta_used_raw": 0.042021431028842926, "beta_dpo/gap_mean": 2.88523268699646, "beta_dpo/gap_std": 4.664064407348633, "beta_dpo/loss_margin_mean": 2.17319917678833, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09674981103552532, "grad_norm": 35.73752975463867, "learning_rate": 4.701492537313433e-07, "logits/chosen": -3.4301936626434326, "logits/rejected": -3.4358339309692383, "loss": 1.3121, "step": 64 }, { "beta_dpo/beta": 0.10541808605194092, "beta_dpo/beta_margin_grad_mean": -0.40356844663619995, "beta_dpo/beta_margin_grad_std": 0.16677476465702057, "beta_dpo/beta_margin_mean": 0.44347381591796875, "beta_dpo/beta_margin_std": 0.7979322671890259, "beta_dpo/beta_used": 0.10541808605194092, "beta_dpo/beta_used_raw": 0.10541808605194092, "beta_dpo/gap_mean": 3.0799174308776855, "beta_dpo/gap_std": 5.104286193847656, "beta_dpo/loss_margin_mean": 4.116312503814697, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0982615268329554, "grad_norm": 98.1263656616211, "learning_rate": 4.776119402985074e-07, "logits/chosen": -3.4343981742858887, "logits/rejected": -3.4361720085144043, "loss": 1.1526, "step": 65 }, { "beta_dpo/beta": 0.1767566055059433, "beta_dpo/beta_margin_grad_mean": -0.35773029923439026, "beta_dpo/beta_margin_grad_std": 0.2308226078748703, "beta_dpo/beta_margin_mean": 0.7839902639389038, "beta_dpo/beta_margin_std": 1.4043595790863037, "beta_dpo/beta_used": 0.1767566055059433, "beta_dpo/beta_used_raw": 0.1767566055059433, "beta_dpo/gap_mean": 3.3063292503356934, "beta_dpo/gap_std": 5.49251651763916, "beta_dpo/loss_margin_mean": 4.658176898956299, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09977324263038549, "grad_norm": 141.7904815673828, "learning_rate": 4.850746268656717e-07, "logits/chosen": -3.467107057571411, "logits/rejected": -3.4679934978485107, "loss": 0.907, "step": 66 }, { "beta_dpo/beta": 0.09946347773075104, "beta_dpo/beta_margin_grad_mean": -0.40777695178985596, "beta_dpo/beta_margin_grad_std": 0.18374623358249664, "beta_dpo/beta_margin_mean": 0.4506603181362152, "beta_dpo/beta_margin_std": 0.9327126741409302, "beta_dpo/beta_used": 0.09946347773075104, "beta_dpo/beta_used_raw": 0.09946347773075104, "beta_dpo/gap_mean": 3.526148796081543, "beta_dpo/gap_std": 6.106088638305664, "beta_dpo/loss_margin_mean": 4.415529251098633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10128495842781557, "grad_norm": 99.5558090209961, "learning_rate": 4.925373134328357e-07, "logits/chosen": -3.4243123531341553, "logits/rejected": -3.4297595024108887, "loss": 1.16, "step": 67 }, { "beta_dpo/beta": 0.1491156965494156, "beta_dpo/beta_margin_grad_mean": -0.3757003843784332, "beta_dpo/beta_margin_grad_std": 0.2276735007762909, "beta_dpo/beta_margin_mean": 0.7526575326919556, "beta_dpo/beta_margin_std": 1.405693531036377, "beta_dpo/beta_used": 0.1491156965494156, "beta_dpo/beta_used_raw": 0.1491156965494156, "beta_dpo/gap_mean": 3.7691352367401123, "beta_dpo/gap_std": 6.529148101806641, "beta_dpo/loss_margin_mean": 4.495668411254883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10279667422524566, "grad_norm": 125.12950134277344, "learning_rate": 5e-07, "logits/chosen": -3.4447755813598633, "logits/rejected": -3.453941822052002, "loss": 0.985, "step": 68 }, { "beta_dpo/beta": 0.078451007604599, "beta_dpo/beta_margin_grad_mean": -0.41160911321640015, "beta_dpo/beta_margin_grad_std": 0.1623861938714981, "beta_dpo/beta_margin_mean": 0.42135173082351685, "beta_dpo/beta_margin_std": 0.80238938331604, "beta_dpo/beta_used": 0.078451007604599, "beta_dpo/beta_used_raw": 0.078451007604599, "beta_dpo/gap_mean": 3.8410778045654297, "beta_dpo/gap_std": 6.965381622314453, "beta_dpo/loss_margin_mean": 4.968033790588379, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10430839002267574, "grad_norm": 91.32450103759766, "learning_rate": 4.999965034812934e-07, "logits/chosen": -3.4167556762695312, "logits/rejected": -3.4244508743286133, "loss": 1.1994, "step": 69 }, { "beta_dpo/beta": 0.142277330160141, "beta_dpo/beta_margin_grad_mean": -0.3595953583717346, "beta_dpo/beta_margin_grad_std": 0.23513264954090118, "beta_dpo/beta_margin_mean": 0.8268713355064392, "beta_dpo/beta_margin_std": 1.4967344999313354, "beta_dpo/beta_used": 0.142277330160141, "beta_dpo/beta_used_raw": 0.142277330160141, "beta_dpo/gap_mean": 4.193761348724365, "beta_dpo/gap_std": 7.399883270263672, "beta_dpo/loss_margin_mean": 5.270178318023682, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10582010582010581, "grad_norm": 117.31448364257812, "learning_rate": 4.999860140229787e-07, "logits/chosen": -3.401844024658203, "logits/rejected": -3.400637149810791, "loss": 0.9858, "step": 70 }, { "beta_dpo/beta": 0.10826882719993591, "beta_dpo/beta_margin_grad_mean": -0.40783292055130005, "beta_dpo/beta_margin_grad_std": 0.1903211772441864, "beta_dpo/beta_margin_mean": 0.449673056602478, "beta_dpo/beta_margin_std": 0.9671619534492493, "beta_dpo/beta_used": 0.10826882719993591, "beta_dpo/beta_used_raw": 0.10826882719993591, "beta_dpo/gap_mean": 4.15489387512207, "beta_dpo/gap_std": 7.742700576782227, "beta_dpo/loss_margin_mean": 4.325258731842041, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1073318216175359, "grad_norm": 104.58674621582031, "learning_rate": 4.999685319184688e-07, "logits/chosen": -3.408806324005127, "logits/rejected": -3.406421184539795, "loss": 1.0901, "step": 71 }, { "beta_dpo/beta": 0.11632607132196426, "beta_dpo/beta_margin_grad_mean": -0.34931424260139465, "beta_dpo/beta_margin_grad_std": 0.23163333535194397, "beta_dpo/beta_margin_mean": 0.9699787497520447, "beta_dpo/beta_margin_std": 1.607363224029541, "beta_dpo/beta_used": 0.11632607132196426, "beta_dpo/beta_used_raw": 0.11632607132196426, "beta_dpo/gap_mean": 4.78761625289917, "beta_dpo/gap_std": 8.365766525268555, "beta_dpo/loss_margin_mean": 7.621516227722168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10884353741496598, "grad_norm": 114.22623443603516, "learning_rate": 4.999440576567755e-07, "logits/chosen": -3.3776087760925293, "logits/rejected": -3.390096664428711, "loss": 1.1005, "step": 72 }, { "beta_dpo/beta": 0.06673535704612732, "beta_dpo/beta_margin_grad_mean": -0.4608076512813568, "beta_dpo/beta_margin_grad_std": 0.17916692793369293, "beta_dpo/beta_margin_mean": 0.18453335762023926, "beta_dpo/beta_margin_std": 0.8172470927238464, "beta_dpo/beta_used": 0.06673535704612732, "beta_dpo/beta_used_raw": 0.06673535704612732, "beta_dpo/gap_mean": 4.497790336608887, "beta_dpo/gap_std": 9.094923973083496, "beta_dpo/loss_margin_mean": 2.876835584640503, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11035525321239607, "grad_norm": 74.64056396484375, "learning_rate": 4.999125919224965e-07, "logits/chosen": -3.3894991874694824, "logits/rejected": -3.391726016998291, "loss": 1.2207, "step": 73 }, { "beta_dpo/beta": 0.25261440873146057, "beta_dpo/beta_margin_grad_mean": -0.2889004051685333, "beta_dpo/beta_margin_grad_std": 0.2619003355503082, "beta_dpo/beta_margin_mean": 2.9105119705200195, "beta_dpo/beta_margin_std": 4.6201043128967285, "beta_dpo/beta_used": 0.25261440873146057, "beta_dpo/beta_used_raw": 0.25261440873146057, "beta_dpo/gap_mean": 4.9444684982299805, "beta_dpo/gap_std": 9.611654281616211, "beta_dpo/loss_margin_mean": 8.769340515136719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11186696900982615, "grad_norm": 182.684326171875, "learning_rate": 4.998741355957963e-07, "logits/chosen": -3.3527235984802246, "logits/rejected": -3.357039451599121, "loss": 0.8304, "step": 74 }, { "beta_dpo/beta": 0.08466437458992004, "beta_dpo/beta_margin_grad_mean": -0.4008246660232544, "beta_dpo/beta_margin_grad_std": 0.21186015009880066, "beta_dpo/beta_margin_mean": 0.5326976180076599, "beta_dpo/beta_margin_std": 1.2269001007080078, "beta_dpo/beta_used": 0.08466437458992004, "beta_dpo/beta_used_raw": 0.08466437458992004, "beta_dpo/gap_mean": 5.70491886138916, "beta_dpo/gap_std": 10.372549057006836, "beta_dpo/loss_margin_mean": 7.167640686035156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11337868480725624, "grad_norm": 102.36708068847656, "learning_rate": 4.998286897523808e-07, "logits/chosen": -3.351316213607788, "logits/rejected": -3.3699235916137695, "loss": 1.144, "step": 75 }, { "beta_dpo/beta": 0.18824619054794312, "beta_dpo/beta_margin_grad_mean": -0.2869580090045929, "beta_dpo/beta_margin_grad_std": 0.24816998839378357, "beta_dpo/beta_margin_mean": 2.1148622035980225, "beta_dpo/beta_margin_std": 3.13854718208313, "beta_dpo/beta_used": 0.18824619054794312, "beta_dpo/beta_used_raw": 0.18824619054794312, "beta_dpo/gap_mean": 6.352941513061523, "beta_dpo/gap_std": 10.743419647216797, "beta_dpo/loss_margin_mean": 9.534296989440918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11489040060468632, "grad_norm": 171.01199340820312, "learning_rate": 4.997762556634679e-07, "logits/chosen": -3.327301025390625, "logits/rejected": -3.3365983963012695, "loss": 1.0189, "step": 76 }, { "beta_dpo/beta": 0.14552097022533417, "beta_dpo/beta_margin_grad_mean": -0.304735392332077, "beta_dpo/beta_margin_grad_std": 0.25858816504478455, "beta_dpo/beta_margin_mean": 1.3738024234771729, "beta_dpo/beta_margin_std": 1.8939155340194702, "beta_dpo/beta_used": 0.14552097022533417, "beta_dpo/beta_used_raw": 0.14552097022533417, "beta_dpo/gap_mean": 6.843048095703125, "beta_dpo/gap_std": 11.032407760620117, "beta_dpo/loss_margin_mean": 9.01302433013916, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1164021164021164, "grad_norm": 120.21430969238281, "learning_rate": 4.99716834795752e-07, "logits/chosen": -3.3611555099487305, "logits/rejected": -3.373170852661133, "loss": 0.9049, "step": 77 }, { "beta_dpo/beta": 0.041494932025671005, "beta_dpo/beta_margin_grad_mean": -0.43122273683547974, "beta_dpo/beta_margin_grad_std": 0.1662428379058838, "beta_dpo/beta_margin_mean": 0.35608235001564026, "beta_dpo/beta_margin_std": 0.8586031794548035, "beta_dpo/beta_used": 0.041494932025671005, "beta_dpo/beta_used_raw": -0.01816452667117119, "beta_dpo/gap_mean": 6.814278602600098, "beta_dpo/gap_std": 11.402191162109375, "beta_dpo/loss_margin_mean": 5.548126697540283, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11791383219954649, "grad_norm": 72.10340118408203, "learning_rate": 4.996504288113623e-07, "logits/chosen": -3.3710875511169434, "logits/rejected": -3.3661742210388184, "loss": 1.252, "step": 78 }, { "beta_dpo/beta": 0.1977195143699646, "beta_dpo/beta_margin_grad_mean": -0.25049352645874023, "beta_dpo/beta_margin_grad_std": 0.2710304260253906, "beta_dpo/beta_margin_mean": 2.008237838745117, "beta_dpo/beta_margin_std": 2.7375316619873047, "beta_dpo/beta_used": 0.1977195143699646, "beta_dpo/beta_used_raw": 0.1977195143699646, "beta_dpo/gap_mean": 7.165606498718262, "beta_dpo/gap_std": 11.763540267944336, "beta_dpo/loss_margin_mean": 10.299997329711914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11942554799697656, "grad_norm": 199.5998077392578, "learning_rate": 4.995770395678171e-07, "logits/chosen": -3.3480472564697266, "logits/rejected": -3.3739683628082275, "loss": 0.6999, "step": 79 }, { "beta_dpo/beta": 0.17561544477939606, "beta_dpo/beta_margin_grad_mean": -0.36698946356773376, "beta_dpo/beta_margin_grad_std": 0.28652095794677734, "beta_dpo/beta_margin_mean": 1.9338186979293823, "beta_dpo/beta_margin_std": 4.184875965118408, "beta_dpo/beta_used": 0.17561544477939606, "beta_dpo/beta_used_raw": 0.14658761024475098, "beta_dpo/gap_mean": 7.466344833374023, "beta_dpo/gap_std": 12.135894775390625, "beta_dpo/loss_margin_mean": 9.14448070526123, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12093726379440665, "grad_norm": 242.46189880371094, "learning_rate": 4.994966691179711e-07, "logits/chosen": -3.362766981124878, "logits/rejected": -3.373871326446533, "loss": 1.2158, "step": 80 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4974750578403473, "beta_dpo/beta_margin_grad_std": 0.0035836591850966215, "beta_dpo/beta_margin_mean": 0.010100403800606728, "beta_dpo/beta_margin_std": 0.014335720799863338, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.018039202317595482, "beta_dpo/gap_mean": 8.088890075683594, "beta_dpo/gap_std": 12.616556167602539, "beta_dpo/loss_margin_mean": 10.100403785705566, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12244897959183673, "grad_norm": 1.2602194547653198, "learning_rate": 4.994093197099587e-07, "logits/chosen": -3.343071460723877, "logits/rejected": -3.3518471717834473, "loss": 1.3802, "step": 81 }, { "beta_dpo/beta": 0.1988849639892578, "beta_dpo/beta_margin_grad_mean": -0.2265806794166565, "beta_dpo/beta_margin_grad_std": 0.280320942401886, "beta_dpo/beta_margin_mean": 2.634272575378418, "beta_dpo/beta_margin_std": 2.8367440700531006, "beta_dpo/beta_used": 0.1988849639892578, "beta_dpo/beta_used_raw": 0.1988849639892578, "beta_dpo/gap_mean": 8.929794311523438, "beta_dpo/gap_std": 12.968416213989258, "beta_dpo/loss_margin_mean": 13.391870498657227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12396069538926682, "grad_norm": 192.18162536621094, "learning_rate": 4.993149937871306e-07, "logits/chosen": -3.3310623168945312, "logits/rejected": -3.341090202331543, "loss": 0.7932, "step": 82 }, { "beta_dpo/beta": 0.1843191534280777, "beta_dpo/beta_margin_grad_mean": -0.2833937108516693, "beta_dpo/beta_margin_grad_std": 0.28331419825553894, "beta_dpo/beta_margin_mean": 1.9241818189620972, "beta_dpo/beta_margin_std": 3.19730806350708, "beta_dpo/beta_used": 0.1843191534280777, "beta_dpo/beta_used_raw": 0.1843191534280777, "beta_dpo/gap_mean": 9.336427688598633, "beta_dpo/gap_std": 13.179704666137695, "beta_dpo/loss_margin_mean": 11.15524673461914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1254724111866969, "grad_norm": 238.143798828125, "learning_rate": 4.992136939879856e-07, "logits/chosen": -3.3477907180786133, "logits/rejected": -3.3669016361236572, "loss": 0.8472, "step": 83 }, { "beta_dpo/beta": 0.11267973482608795, "beta_dpo/beta_margin_grad_mean": -0.3190261721611023, "beta_dpo/beta_margin_grad_std": 0.23150216042995453, "beta_dpo/beta_margin_mean": 1.1134321689605713, "beta_dpo/beta_margin_std": 1.629770278930664, "beta_dpo/beta_used": 0.11267973482608795, "beta_dpo/beta_used_raw": 0.11267973482608795, "beta_dpo/gap_mean": 9.562017440795898, "beta_dpo/gap_std": 13.26020622253418, "beta_dpo/loss_margin_mean": 9.725201606750488, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12698412698412698, "grad_norm": 113.03916931152344, "learning_rate": 4.991054231460969e-07, "logits/chosen": -3.329392671585083, "logits/rejected": -3.3444466590881348, "loss": 0.8401, "step": 84 }, { "beta_dpo/beta": 0.09879438579082489, "beta_dpo/beta_margin_grad_mean": -0.383115291595459, "beta_dpo/beta_margin_grad_std": 0.24746352434158325, "beta_dpo/beta_margin_mean": 1.2028299570083618, "beta_dpo/beta_margin_std": 2.4348294734954834, "beta_dpo/beta_used": 0.09879438579082489, "beta_dpo/beta_used_raw": 0.08103566616773605, "beta_dpo/gap_mean": 9.678817749023438, "beta_dpo/gap_std": 13.180517196655273, "beta_dpo/loss_margin_mean": 10.931384086608887, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12849584278155707, "grad_norm": 111.39769744873047, "learning_rate": 4.989901842900325e-07, "logits/chosen": -3.321516990661621, "logits/rejected": -3.3222968578338623, "loss": 1.0688, "step": 85 }, { "beta_dpo/beta": 0.07136266678571701, "beta_dpo/beta_margin_grad_mean": -0.3871707320213318, "beta_dpo/beta_margin_grad_std": 0.23100006580352783, "beta_dpo/beta_margin_mean": 0.9229219555854797, "beta_dpo/beta_margin_std": 1.8082295656204224, "beta_dpo/beta_used": 0.07136266678571701, "beta_dpo/beta_used_raw": 0.015174761414527893, "beta_dpo/gap_mean": 9.801012992858887, "beta_dpo/gap_std": 13.513420104980469, "beta_dpo/loss_margin_mean": 8.277644157409668, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13000755857898716, "grad_norm": 101.34297180175781, "learning_rate": 4.988679806432711e-07, "logits/chosen": -3.3395376205444336, "logits/rejected": -3.3449158668518066, "loss": 1.1039, "step": 86 }, { "beta_dpo/beta": 0.14920227229595184, "beta_dpo/beta_margin_grad_mean": -0.2833729684352875, "beta_dpo/beta_margin_grad_std": 0.2721627950668335, "beta_dpo/beta_margin_mean": 1.798041820526123, "beta_dpo/beta_margin_std": 2.643514633178711, "beta_dpo/beta_used": 0.14920227229595184, "beta_dpo/beta_used_raw": 0.14920227229595184, "beta_dpo/gap_mean": 9.804052352905273, "beta_dpo/gap_std": 13.875849723815918, "beta_dpo/loss_margin_mean": 11.038612365722656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13151927437641722, "grad_norm": 127.10075378417969, "learning_rate": 4.987388156241114e-07, "logits/chosen": -3.3276474475860596, "logits/rejected": -3.3481569290161133, "loss": 0.7641, "step": 87 }, { "beta_dpo/beta": 0.1592729240655899, "beta_dpo/beta_margin_grad_mean": -0.33554723858833313, "beta_dpo/beta_margin_grad_std": 0.23475810885429382, "beta_dpo/beta_margin_mean": 2.3355441093444824, "beta_dpo/beta_margin_std": 3.7071549892425537, "beta_dpo/beta_used": 0.1592729240655899, "beta_dpo/beta_used_raw": 0.10196053981781006, "beta_dpo/gap_mean": 10.079328536987305, "beta_dpo/gap_std": 14.221284866333008, "beta_dpo/loss_margin_mean": 10.8495454788208, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1330309901738473, "grad_norm": 137.05709838867188, "learning_rate": 4.986026928455767e-07, "logits/chosen": -3.3265223503112793, "logits/rejected": -3.321218729019165, "loss": 0.9096, "step": 88 }, { "beta_dpo/beta": 0.004432837013155222, "beta_dpo/beta_margin_grad_mean": -0.4919796288013458, "beta_dpo/beta_margin_grad_std": 0.023224812000989914, "beta_dpo/beta_margin_mean": 0.03219681233167648, "beta_dpo/beta_margin_std": 0.09327611327171326, "beta_dpo/beta_used": 0.004432837013155222, "beta_dpo/beta_used_raw": -0.05657649785280228, "beta_dpo/gap_mean": 9.50676155090332, "beta_dpo/gap_std": 14.471736907958984, "beta_dpo/loss_margin_mean": 6.9620680809021, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1345427059712774, "grad_norm": 8.242236137390137, "learning_rate": 4.984596161153135e-07, "logits/chosen": -3.310055732727051, "logits/rejected": -3.339357852935791, "loss": 1.3537, "step": 89 }, { "beta_dpo/beta": 0.20705099403858185, "beta_dpo/beta_margin_grad_mean": -0.33812034130096436, "beta_dpo/beta_margin_grad_std": 0.25459006428718567, "beta_dpo/beta_margin_mean": 2.6444900035858154, "beta_dpo/beta_margin_std": 5.163547992706299, "beta_dpo/beta_used": 0.20705099403858185, "beta_dpo/beta_used_raw": 0.14731192588806152, "beta_dpo/gap_mean": 9.444209098815918, "beta_dpo/gap_std": 14.568005561828613, "beta_dpo/loss_margin_mean": 10.64816665649414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1360544217687075, "grad_norm": 119.21234130859375, "learning_rate": 4.983095894354857e-07, "logits/chosen": -3.306340217590332, "logits/rejected": -3.3120946884155273, "loss": 0.8974, "step": 90 }, { "beta_dpo/beta": 0.03152452036738396, "beta_dpo/beta_margin_grad_mean": -0.4407292306423187, "beta_dpo/beta_margin_grad_std": 0.15332013368606567, "beta_dpo/beta_margin_mean": 0.2929452359676361, "beta_dpo/beta_margin_std": 0.7780600190162659, "beta_dpo/beta_used": 0.03152452036738396, "beta_dpo/beta_used_raw": 0.009608536958694458, "beta_dpo/gap_mean": 9.621158599853516, "beta_dpo/gap_std": 14.920358657836914, "beta_dpo/loss_margin_mean": 9.786998748779297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13756613756613756, "grad_norm": 38.26823043823242, "learning_rate": 4.98152617002662e-07, "logits/chosen": -3.3391175270080566, "logits/rejected": -3.3490490913391113, "loss": 1.2081, "step": 91 }, { "beta_dpo/beta": 0.13908042013645172, "beta_dpo/beta_margin_grad_mean": -0.35717082023620605, "beta_dpo/beta_margin_grad_std": 0.24447351694107056, "beta_dpo/beta_margin_mean": 1.8169987201690674, "beta_dpo/beta_margin_std": 3.39219069480896, "beta_dpo/beta_used": 0.13908042013645172, "beta_dpo/beta_used_raw": 0.041757889091968536, "beta_dpo/gap_mean": 9.578210830688477, "beta_dpo/gap_std": 14.993568420410156, "beta_dpo/loss_margin_mean": 10.055578231811523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13907785336356765, "grad_norm": 186.3566436767578, "learning_rate": 4.979887032076988e-07, "logits/chosen": -3.2997212409973145, "logits/rejected": -3.32633376121521, "loss": 1.0076, "step": 92 }, { "beta_dpo/beta": 0.10875581204891205, "beta_dpo/beta_margin_grad_mean": -0.35252755880355835, "beta_dpo/beta_margin_grad_std": 0.2674812376499176, "beta_dpo/beta_margin_mean": 0.9274733066558838, "beta_dpo/beta_margin_std": 1.7559343576431274, "beta_dpo/beta_used": 0.10875581204891205, "beta_dpo/beta_used_raw": 0.10875581204891205, "beta_dpo/gap_mean": 9.541094779968262, "beta_dpo/gap_std": 15.093782424926758, "beta_dpo/loss_margin_mean": 8.429204940795898, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14058956916099774, "grad_norm": 106.12804412841797, "learning_rate": 4.978178526356172e-07, "logits/chosen": -3.297229766845703, "logits/rejected": -3.302245616912842, "loss": 0.9099, "step": 93 }, { "beta_dpo/beta": 0.10703656077384949, "beta_dpo/beta_margin_grad_mean": -0.3893989324569702, "beta_dpo/beta_margin_grad_std": 0.2398282289505005, "beta_dpo/beta_margin_mean": 0.9909035563468933, "beta_dpo/beta_margin_std": 2.612060070037842, "beta_dpo/beta_used": 0.10703656077384949, "beta_dpo/beta_used_raw": 0.08424051105976105, "beta_dpo/gap_mean": 9.722146987915039, "beta_dpo/gap_std": 15.254247665405273, "beta_dpo/loss_margin_mean": 10.538039207458496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1421012849584278, "grad_norm": 116.64041137695312, "learning_rate": 4.976400700654751e-07, "logits/chosen": -3.3025875091552734, "logits/rejected": -3.3130154609680176, "loss": 0.9871, "step": 94 }, { "beta_dpo/beta": 0.3490155339241028, "beta_dpo/beta_margin_grad_mean": -0.2538135051727295, "beta_dpo/beta_margin_grad_std": 0.3552146255970001, "beta_dpo/beta_margin_mean": 4.290498733520508, "beta_dpo/beta_margin_std": 6.96637487411499, "beta_dpo/beta_used": 0.3490155339241028, "beta_dpo/beta_used_raw": 0.3490155339241028, "beta_dpo/gap_mean": 10.081417083740234, "beta_dpo/gap_std": 15.954303741455078, "beta_dpo/loss_margin_mean": 12.237871170043945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1436130007558579, "grad_norm": 406.6619567871094, "learning_rate": 4.974553604702332e-07, "logits/chosen": -3.3025193214416504, "logits/rejected": -3.310507297515869, "loss": 0.7284, "step": 95 }, { "beta_dpo/beta": 0.06385838240385056, "beta_dpo/beta_margin_grad_mean": -0.36486878991127014, "beta_dpo/beta_margin_grad_std": 0.2169518917798996, "beta_dpo/beta_margin_mean": 0.9240705370903015, "beta_dpo/beta_margin_std": 1.6645779609680176, "beta_dpo/beta_used": 0.06385838240385056, "beta_dpo/beta_used_raw": 0.06385838240385056, "beta_dpo/gap_mean": 10.47520923614502, "beta_dpo/gap_std": 16.70541763305664, "beta_dpo/loss_margin_mean": 12.856772422790527, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14512471655328799, "grad_norm": 71.98176574707031, "learning_rate": 4.972637290166157e-07, "logits/chosen": -3.3070459365844727, "logits/rejected": -3.322648048400879, "loss": 1.0676, "step": 96 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4981251657009125, "beta_dpo/beta_margin_grad_std": 0.004490617197006941, "beta_dpo/beta_margin_mean": 0.007500056177377701, "beta_dpo/beta_margin_std": 0.017964085564017296, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.03594258427619934, "beta_dpo/gap_mean": 10.320394515991211, "beta_dpo/gap_std": 17.007476806640625, "beta_dpo/loss_margin_mean": 7.50005578994751, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14663643235071808, "grad_norm": 1.7046000957489014, "learning_rate": 4.970651810649666e-07, "logits/chosen": -3.3001856803894043, "logits/rejected": -3.3082635402679443, "loss": 1.3783, "step": 97 }, { "beta_dpo/beta": 0.1992720514535904, "beta_dpo/beta_margin_grad_mean": -0.37769633531570435, "beta_dpo/beta_margin_grad_std": 0.28409111499786377, "beta_dpo/beta_margin_mean": 2.664219856262207, "beta_dpo/beta_margin_std": 5.922903060913086, "beta_dpo/beta_used": 0.1992720514535904, "beta_dpo/beta_used_raw": 0.15579071640968323, "beta_dpo/gap_mean": 10.343599319458008, "beta_dpo/gap_std": 17.06980323791504, "beta_dpo/loss_margin_mean": 11.241314888000488, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14814814814814814, "grad_norm": 442.90679931640625, "learning_rate": 4.968597221690985e-07, "logits/chosen": -3.3171591758728027, "logits/rejected": -3.3124804496765137, "loss": 1.1151, "step": 98 }, { "beta_dpo/beta": 0.2299761325120926, "beta_dpo/beta_margin_grad_mean": -0.30642494559288025, "beta_dpo/beta_margin_grad_std": 0.2615741193294525, "beta_dpo/beta_margin_mean": 3.1031556129455566, "beta_dpo/beta_margin_std": 6.05756950378418, "beta_dpo/beta_used": 0.2299761325120926, "beta_dpo/beta_used_raw": 0.2299761325120926, "beta_dpo/gap_mean": 10.645599365234375, "beta_dpo/gap_std": 16.824813842773438, "beta_dpo/loss_margin_mean": 12.99911117553711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14965986394557823, "grad_norm": 276.3519287109375, "learning_rate": 4.966473580761389e-07, "logits/chosen": -3.28450870513916, "logits/rejected": -3.29331636428833, "loss": 1.0461, "step": 99 }, { "beta_dpo/beta": 0.13315755128860474, "beta_dpo/beta_margin_grad_mean": -0.3134312033653259, "beta_dpo/beta_margin_grad_std": 0.3042459487915039, "beta_dpo/beta_margin_mean": 1.7308166027069092, "beta_dpo/beta_margin_std": 2.553818702697754, "beta_dpo/beta_used": 0.13315755128860474, "beta_dpo/beta_used_raw": 0.13315755128860474, "beta_dpo/gap_mean": 11.017045974731445, "beta_dpo/gap_std": 17.152666091918945, "beta_dpo/loss_margin_mean": 12.954180717468262, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15117157974300832, "grad_norm": 196.86199951171875, "learning_rate": 4.964280947263676e-07, "logits/chosen": -3.333110809326172, "logits/rejected": -3.3278937339782715, "loss": 0.9953, "step": 100 }, { "epoch": 0.15117157974300832, "eval_beta_dpo/beta": 0.061328914016485214, "eval_beta_dpo/beta_margin_grad_mean": -0.42010626196861267, "eval_beta_dpo/beta_margin_grad_std": 0.12268673628568649, "eval_beta_dpo/beta_margin_mean": 0.7724127173423767, "eval_beta_dpo/beta_margin_std": 1.1312227249145508, "eval_beta_dpo/beta_used": 0.061328914016485214, "eval_beta_dpo/beta_used_raw": -0.026162950322031975, "eval_beta_dpo/gap_mean": 11.231735229492188, "eval_beta_dpo/gap_std": 17.3662052154541, "eval_beta_dpo/loss_margin_mean": 9.129016876220703, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -3.3065404891967773, "eval_logits/rejected": -3.3154821395874023, "eval_loss": 0.648465633392334, "eval_runtime": 36.8664, "eval_samples_per_second": 62.469, "eval_steps_per_second": 1.953, "step": 100 }, { "beta_dpo/beta": 0.10702672600746155, "beta_dpo/beta_margin_grad_mean": -0.33292368054389954, "beta_dpo/beta_margin_grad_std": 0.25112685561180115, "beta_dpo/beta_margin_mean": 1.6487940549850464, "beta_dpo/beta_margin_std": 2.924790620803833, "beta_dpo/beta_used": 0.10702672600746155, "beta_dpo/beta_used_raw": 0.04923146218061447, "beta_dpo/gap_mean": 11.476530075073242, "beta_dpo/gap_std": 17.14791488647461, "beta_dpo/loss_margin_mean": 13.512877464294434, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15268329554043839, "grad_norm": 109.97279357910156, "learning_rate": 4.96201938253052e-07, "logits/chosen": -3.2759666442871094, "logits/rejected": -3.2825520038604736, "loss": 1.1453, "step": 101 }, { "beta_dpo/beta": 0.3127828538417816, "beta_dpo/beta_margin_grad_mean": -0.28607505559921265, "beta_dpo/beta_margin_grad_std": 0.3804353177547455, "beta_dpo/beta_margin_mean": 3.4580235481262207, "beta_dpo/beta_margin_std": 7.335368633270264, "beta_dpo/beta_used": 0.3127828538417816, "beta_dpo/beta_used_raw": 0.3127828538417816, "beta_dpo/gap_mean": 11.59719181060791, "beta_dpo/gap_std": 17.798389434814453, "beta_dpo/loss_margin_mean": 11.870264053344727, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15419501133786848, "grad_norm": 446.07623291015625, "learning_rate": 4.959688949822748e-07, "logits/chosen": -3.313218593597412, "logits/rejected": -3.3073246479034424, "loss": 1.0181, "step": 102 }, { "beta_dpo/beta": 0.10902046412229538, "beta_dpo/beta_margin_grad_mean": -0.35592639446258545, "beta_dpo/beta_margin_grad_std": 0.24294497072696686, "beta_dpo/beta_margin_mean": 1.3516149520874023, "beta_dpo/beta_margin_std": 2.5189883708953857, "beta_dpo/beta_used": 0.10902046412229538, "beta_dpo/beta_used_raw": 0.10902046412229538, "beta_dpo/gap_mean": 11.740519523620605, "beta_dpo/gap_std": 17.701553344726562, "beta_dpo/loss_margin_mean": 11.213488578796387, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15570672713529857, "grad_norm": 145.86431884765625, "learning_rate": 4.957289714327572e-07, "logits/chosen": -3.2825355529785156, "logits/rejected": -3.2872812747955322, "loss": 0.9671, "step": 103 }, { "beta_dpo/beta": 0.053928766399621964, "beta_dpo/beta_margin_grad_mean": -0.40135452151298523, "beta_dpo/beta_margin_grad_std": 0.2233276218175888, "beta_dpo/beta_margin_mean": 0.8575385212898254, "beta_dpo/beta_margin_std": 1.8291181325912476, "beta_dpo/beta_used": 0.053928766399621964, "beta_dpo/beta_used_raw": 0.0007075034081935883, "beta_dpo/gap_mean": 11.516962051391602, "beta_dpo/gap_std": 17.790897369384766, "beta_dpo/loss_margin_mean": 12.073646545410156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15721844293272866, "grad_norm": 122.20460510253906, "learning_rate": 4.954821743156767e-07, "logits/chosen": -3.2585346698760986, "logits/rejected": -3.2844367027282715, "loss": 1.2087, "step": 104 }, { "beta_dpo/beta": 0.08077219873666763, "beta_dpo/beta_margin_grad_mean": -0.37532395124435425, "beta_dpo/beta_margin_grad_std": 0.25372788310050964, "beta_dpo/beta_margin_mean": 1.0991127490997314, "beta_dpo/beta_margin_std": 2.2974021434783936, "beta_dpo/beta_used": 0.08077219873666763, "beta_dpo/beta_used_raw": -0.011477030813694, "beta_dpo/gap_mean": 11.434713363647461, "beta_dpo/gap_std": 17.877893447875977, "beta_dpo/loss_margin_mean": 10.53282356262207, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15873015873015872, "grad_norm": 186.44876098632812, "learning_rate": 4.952285105344791e-07, "logits/chosen": -3.2931785583496094, "logits/rejected": -3.3190624713897705, "loss": 1.2273, "step": 105 }, { "beta_dpo/beta": 0.12502261996269226, "beta_dpo/beta_margin_grad_mean": -0.32863035798072815, "beta_dpo/beta_margin_grad_std": 0.2691400945186615, "beta_dpo/beta_margin_mean": 1.324576497077942, "beta_dpo/beta_margin_std": 2.8297853469848633, "beta_dpo/beta_used": 0.12502261996269226, "beta_dpo/beta_used_raw": 0.12502261996269226, "beta_dpo/gap_mean": 11.368999481201172, "beta_dpo/gap_std": 18.08009147644043, "beta_dpo/loss_margin_mean": 9.703131675720215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1602418745275888, "grad_norm": 195.53463745117188, "learning_rate": 4.949679871846857e-07, "logits/chosen": -3.2830629348754883, "logits/rejected": -3.283719778060913, "loss": 0.9661, "step": 106 }, { "beta_dpo/beta": 0.16479669511318207, "beta_dpo/beta_margin_grad_mean": -0.4065779447555542, "beta_dpo/beta_margin_grad_std": 0.3104124367237091, "beta_dpo/beta_margin_mean": 1.5142831802368164, "beta_dpo/beta_margin_std": 5.243839740753174, "beta_dpo/beta_used": 0.16479669511318207, "beta_dpo/beta_used_raw": 0.14887505769729614, "beta_dpo/gap_mean": 11.298818588256836, "beta_dpo/gap_std": 18.31937599182129, "beta_dpo/loss_margin_mean": 11.326576232910156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1617535903250189, "grad_norm": 419.9315185546875, "learning_rate": 4.947006115536947e-07, "logits/chosen": -3.336994171142578, "logits/rejected": -3.33107852935791, "loss": 1.4917, "step": 107 }, { "beta_dpo/beta": 0.27067288756370544, "beta_dpo/beta_margin_grad_mean": -0.2779853641986847, "beta_dpo/beta_margin_grad_std": 0.36746031045913696, "beta_dpo/beta_margin_mean": 3.8700578212738037, "beta_dpo/beta_margin_std": 5.521655082702637, "beta_dpo/beta_used": 0.27067288756370544, "beta_dpo/beta_used_raw": 0.27067288756370544, "beta_dpo/gap_mean": 11.675544738769531, "beta_dpo/gap_std": 18.756027221679688, "beta_dpo/loss_margin_mean": 14.086289405822754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16326530612244897, "grad_norm": 398.2725830078125, "learning_rate": 4.944263911205772e-07, "logits/chosen": -3.319603681564331, "logits/rejected": -3.329835891723633, "loss": 1.0832, "step": 108 }, { "beta_dpo/beta": 0.13511931896209717, "beta_dpo/beta_margin_grad_mean": -0.2998422682285309, "beta_dpo/beta_margin_grad_std": 0.25949904322624207, "beta_dpo/beta_margin_mean": 1.695755124092102, "beta_dpo/beta_margin_std": 2.8789122104644775, "beta_dpo/beta_used": 0.13511931896209717, "beta_dpo/beta_used_raw": 0.13511931896209717, "beta_dpo/gap_mean": 12.130621910095215, "beta_dpo/gap_std": 18.512916564941406, "beta_dpo/loss_margin_mean": 13.88644027709961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16477702191987906, "grad_norm": 182.8151397705078, "learning_rate": 4.941453335558681e-07, "logits/chosen": -3.304962158203125, "logits/rejected": -3.3313684463500977, "loss": 0.8526, "step": 109 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4973180592060089, "beta_dpo/beta_margin_grad_std": 0.00407151784747839, "beta_dpo/beta_margin_mean": 0.010728972032666206, "beta_dpo/beta_margin_std": 0.01628948375582695, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.02168526127934456, "beta_dpo/gap_mean": 11.985815048217773, "beta_dpo/gap_std": 18.2330322265625, "beta_dpo/loss_margin_mean": 10.728971481323242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16628873771730915, "grad_norm": 1.6084506511688232, "learning_rate": 4.938574467213517e-07, "logits/chosen": -3.334700584411621, "logits/rejected": -3.315509796142578, "loss": 1.3764, "step": 110 }, { "beta_dpo/beta": 0.07999280840158463, "beta_dpo/beta_margin_grad_mean": -0.32643070816993713, "beta_dpo/beta_margin_grad_std": 0.22944535315036774, "beta_dpo/beta_margin_mean": 1.0365407466888428, "beta_dpo/beta_margin_std": 1.3925178050994873, "beta_dpo/beta_used": 0.07999280840158463, "beta_dpo/beta_used_raw": 0.07999280840158463, "beta_dpo/gap_mean": 11.973346710205078, "beta_dpo/gap_std": 17.88604736328125, "beta_dpo/loss_margin_mean": 12.886275291442871, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16780045351473924, "grad_norm": 101.35095977783203, "learning_rate": 4.935627386698418e-07, "logits/chosen": -3.304298162460327, "logits/rejected": -3.32550048828125, "loss": 0.9563, "step": 111 }, { "beta_dpo/beta": 0.30072295665740967, "beta_dpo/beta_margin_grad_mean": -0.2684202492237091, "beta_dpo/beta_margin_grad_std": 0.343357115983963, "beta_dpo/beta_margin_mean": 4.119093418121338, "beta_dpo/beta_margin_std": 7.785393238067627, "beta_dpo/beta_used": 0.30072295665740967, "beta_dpo/beta_used_raw": 0.30072295665740967, "beta_dpo/gap_mean": 12.52230167388916, "beta_dpo/gap_std": 18.061965942382812, "beta_dpo/loss_margin_mean": 14.822799682617188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1693121693121693, "grad_norm": 336.1982727050781, "learning_rate": 4.932612176449559e-07, "logits/chosen": -3.347121238708496, "logits/rejected": -3.3621816635131836, "loss": 0.7794, "step": 112 }, { "beta_dpo/beta": 0.09019893407821655, "beta_dpo/beta_margin_grad_mean": -0.3130282759666443, "beta_dpo/beta_margin_grad_std": 0.25264549255371094, "beta_dpo/beta_margin_mean": 1.2460495233535767, "beta_dpo/beta_margin_std": 1.8240975141525269, "beta_dpo/beta_used": 0.09019893407821655, "beta_dpo/beta_used_raw": 0.09019893407821655, "beta_dpo/gap_mean": 12.521149635314941, "beta_dpo/gap_std": 18.38436508178711, "beta_dpo/loss_margin_mean": 12.998438835144043, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1708238851095994, "grad_norm": 93.59181213378906, "learning_rate": 4.929528920808854e-07, "logits/chosen": -3.287510633468628, "logits/rejected": -3.3015475273132324, "loss": 0.8636, "step": 113 }, { "beta_dpo/beta": 0.031618744134902954, "beta_dpo/beta_margin_grad_mean": -0.4364977180957794, "beta_dpo/beta_margin_grad_std": 0.1669948250055313, "beta_dpo/beta_margin_mean": 0.31983914971351624, "beta_dpo/beta_margin_std": 0.8308923244476318, "beta_dpo/beta_used": 0.031618744134902954, "beta_dpo/beta_used_raw": 0.031618744134902954, "beta_dpo/gap_mean": 12.35753345489502, "beta_dpo/gap_std": 18.50242042541504, "beta_dpo/loss_margin_mean": 10.953230857849121, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17233560090702948, "grad_norm": 62.89752197265625, "learning_rate": 4.92637770602159e-07, "logits/chosen": -3.289074659347534, "logits/rejected": -3.304262161254883, "loss": 1.1957, "step": 114 }, { "beta_dpo/beta": 0.06044984608888626, "beta_dpo/beta_margin_grad_mean": -0.3418780565261841, "beta_dpo/beta_margin_grad_std": 0.17799124121665955, "beta_dpo/beta_margin_mean": 0.7988328337669373, "beta_dpo/beta_margin_std": 0.9704313278198242, "beta_dpo/beta_used": 0.06044984608888626, "beta_dpo/beta_used_raw": 0.06044984608888626, "beta_dpo/gap_mean": 12.469953536987305, "beta_dpo/gap_std": 17.994335174560547, "beta_dpo/loss_margin_mean": 13.105659484863281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17384731670445955, "grad_norm": 57.57271194458008, "learning_rate": 4.923158620234019e-07, "logits/chosen": -3.319486618041992, "logits/rejected": -3.3349642753601074, "loss": 0.9164, "step": 115 }, { "beta_dpo/beta": 0.0968373566865921, "beta_dpo/beta_margin_grad_mean": -0.38010072708129883, "beta_dpo/beta_margin_grad_std": 0.2710416913032532, "beta_dpo/beta_margin_mean": 1.3985378742218018, "beta_dpo/beta_margin_std": 2.8638670444488525, "beta_dpo/beta_used": 0.0968373566865921, "beta_dpo/beta_used_raw": 0.09599150717258453, "beta_dpo/gap_mean": 12.721582412719727, "beta_dpo/gap_std": 17.726070404052734, "beta_dpo/loss_margin_mean": 14.203131675720215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17535903250188964, "grad_norm": 177.60740661621094, "learning_rate": 4.91987175349089e-07, "logits/chosen": -3.3052141666412354, "logits/rejected": -3.3214163780212402, "loss": 1.311, "step": 116 }, { "beta_dpo/beta": 0.132050022482872, "beta_dpo/beta_margin_grad_mean": -0.32548680901527405, "beta_dpo/beta_margin_grad_std": 0.23864923417568207, "beta_dpo/beta_margin_mean": 2.548675060272217, "beta_dpo/beta_margin_std": 3.935479164123535, "beta_dpo/beta_used": 0.132050022482872, "beta_dpo/beta_used_raw": 0.11093597859144211, "beta_dpo/gap_mean": 13.294087409973145, "beta_dpo/gap_std": 17.970184326171875, "beta_dpo/loss_margin_mean": 14.694787979125977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17687074829931973, "grad_norm": 204.1652069091797, "learning_rate": 4.916517197732933e-07, "logits/chosen": -3.3179454803466797, "logits/rejected": -3.3263401985168457, "loss": 0.9744, "step": 117 }, { "beta_dpo/beta": 0.05474819988012314, "beta_dpo/beta_margin_grad_mean": -0.35628294944763184, "beta_dpo/beta_margin_grad_std": 0.17293918132781982, "beta_dpo/beta_margin_mean": 0.6831283569335938, "beta_dpo/beta_margin_std": 0.8625761270523071, "beta_dpo/beta_used": 0.05474819988012314, "beta_dpo/beta_used_raw": 0.05474819988012314, "beta_dpo/gap_mean": 12.97573471069336, "beta_dpo/gap_std": 17.96988296508789, "beta_dpo/loss_margin_mean": 12.617765426635742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17838246409674982, "grad_norm": 64.94096374511719, "learning_rate": 4.913095046794281e-07, "logits/chosen": -3.3115124702453613, "logits/rejected": -3.3236846923828125, "loss": 0.9531, "step": 118 }, { "beta_dpo/beta": 0.02536691352725029, "beta_dpo/beta_margin_grad_mean": -0.4406428039073944, "beta_dpo/beta_margin_grad_std": 0.15547248721122742, "beta_dpo/beta_margin_mean": 0.2915641963481903, "beta_dpo/beta_margin_std": 0.7470220327377319, "beta_dpo/beta_used": 0.02536691352725029, "beta_dpo/beta_used_raw": -0.06982914358377457, "beta_dpo/gap_mean": 12.281299591064453, "beta_dpo/gap_std": 17.952869415283203, "beta_dpo/loss_margin_mean": 8.689970970153809, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17989417989417988, "grad_norm": 43.38718795776367, "learning_rate": 4.909605396399855e-07, "logits/chosen": -3.361435890197754, "logits/rejected": -3.3659839630126953, "loss": 1.2301, "step": 119 }, { "beta_dpo/beta": 0.12747015058994293, "beta_dpo/beta_margin_grad_mean": -0.3678387701511383, "beta_dpo/beta_margin_grad_std": 0.2749040424823761, "beta_dpo/beta_margin_mean": 2.0166592597961426, "beta_dpo/beta_margin_std": 3.838064670562744, "beta_dpo/beta_used": 0.12747015058994293, "beta_dpo/beta_used_raw": 0.04408044368028641, "beta_dpo/gap_mean": 12.654379844665527, "beta_dpo/gap_std": 18.12213897705078, "beta_dpo/loss_margin_mean": 14.718048095703125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18140589569160998, "grad_norm": 216.78616333007812, "learning_rate": 4.906048344162676e-07, "logits/chosen": -3.328334331512451, "logits/rejected": -3.343393325805664, "loss": 1.1061, "step": 120 }, { "beta_dpo/beta": 0.04815954342484474, "beta_dpo/beta_margin_grad_mean": -0.40159502625465393, "beta_dpo/beta_margin_grad_std": 0.20640181005001068, "beta_dpo/beta_margin_mean": 0.5983519554138184, "beta_dpo/beta_margin_std": 1.435462474822998, "beta_dpo/beta_used": 0.04815954342484474, "beta_dpo/beta_used_raw": 0.04493497684597969, "beta_dpo/gap_mean": 12.72224235534668, "beta_dpo/gap_std": 18.192995071411133, "beta_dpo/loss_margin_mean": 13.013574600219727, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18291761148904007, "grad_norm": 50.60379409790039, "learning_rate": 4.902423989581143e-07, "logits/chosen": -3.313652276992798, "logits/rejected": -3.3476529121398926, "loss": 1.1083, "step": 121 }, { "beta_dpo/beta": 0.17443186044692993, "beta_dpo/beta_margin_grad_mean": -0.2969356179237366, "beta_dpo/beta_margin_grad_std": 0.27412667870521545, "beta_dpo/beta_margin_mean": 2.469672203063965, "beta_dpo/beta_margin_std": 3.7644035816192627, "beta_dpo/beta_used": 0.17443186044692993, "beta_dpo/beta_used_raw": 0.17443186044692993, "beta_dpo/gap_mean": 12.795064926147461, "beta_dpo/gap_std": 17.958881378173828, "beta_dpo/loss_margin_mean": 13.30368423461914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18442932728647016, "grad_norm": 171.00291442871094, "learning_rate": 4.898732434036243e-07, "logits/chosen": -3.323568105697632, "logits/rejected": -3.3388819694519043, "loss": 0.7791, "step": 122 }, { "beta_dpo/beta": 0.15132403373718262, "beta_dpo/beta_margin_grad_mean": -0.30665212869644165, "beta_dpo/beta_margin_grad_std": 0.23419080674648285, "beta_dpo/beta_margin_mean": 2.5235509872436523, "beta_dpo/beta_margin_std": 3.878403663635254, "beta_dpo/beta_used": 0.15132403373718262, "beta_dpo/beta_used_raw": 0.15132403373718262, "beta_dpo/gap_mean": 13.087821960449219, "beta_dpo/gap_std": 17.78058624267578, "beta_dpo/loss_margin_mean": 14.883736610412598, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18594104308390022, "grad_norm": 108.17544555664062, "learning_rate": 4.894973780788722e-07, "logits/chosen": -3.3349435329437256, "logits/rejected": -3.3474483489990234, "loss": 0.8074, "step": 123 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4965851604938507, "beta_dpo/beta_margin_grad_std": 0.004146179184317589, "beta_dpo/beta_margin_mean": 0.013660573400557041, "beta_dpo/beta_margin_std": 0.01658688299357891, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09001453220844269, "beta_dpo/gap_mean": 13.315803527832031, "beta_dpo/gap_std": 17.493318557739258, "beta_dpo/loss_margin_mean": 13.66057300567627, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1874527588813303, "grad_norm": 1.6013367176055908, "learning_rate": 4.89114813497619e-07, "logits/chosen": -3.3313074111938477, "logits/rejected": -3.355624198913574, "loss": 1.3762, "step": 124 }, { "beta_dpo/beta": 0.05969487130641937, "beta_dpo/beta_margin_grad_mean": -0.3555901050567627, "beta_dpo/beta_margin_grad_std": 0.19629493355751038, "beta_dpo/beta_margin_mean": 1.0195149183273315, "beta_dpo/beta_margin_std": 1.603021502494812, "beta_dpo/beta_used": 0.05969487130641937, "beta_dpo/beta_used_raw": -0.020462922751903534, "beta_dpo/gap_mean": 13.516692161560059, "beta_dpo/gap_std": 17.451478958129883, "beta_dpo/loss_margin_mean": 13.241378784179688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1889644746787604, "grad_norm": 64.26988983154297, "learning_rate": 4.887255603610184e-07, "logits/chosen": -3.3624677658081055, "logits/rejected": -3.3855817317962646, "loss": 0.9877, "step": 125 }, { "beta_dpo/beta": 0.004270387347787619, "beta_dpo/beta_margin_grad_mean": -0.4860527813434601, "beta_dpo/beta_margin_grad_std": 0.02659439854323864, "beta_dpo/beta_margin_mean": 0.05607512220740318, "beta_dpo/beta_margin_std": 0.10699854791164398, "beta_dpo/beta_used": 0.004270387347787619, "beta_dpo/beta_used_raw": -0.04096106067299843, "beta_dpo/gap_mean": 12.902446746826172, "beta_dpo/gap_std": 17.716262817382812, "beta_dpo/loss_margin_mean": 11.313679695129395, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19047619047619047, "grad_norm": 8.320745468139648, "learning_rate": 4.883296295573176e-07, "logits/chosen": -3.352705478668213, "logits/rejected": -3.3358869552612305, "loss": 1.3413, "step": 126 }, { "beta_dpo/beta": 0.043115854263305664, "beta_dpo/beta_margin_grad_mean": -0.3736512064933777, "beta_dpo/beta_margin_grad_std": 0.1453510969877243, "beta_dpo/beta_margin_mean": 0.5749568939208984, "beta_dpo/beta_margin_std": 0.6788315176963806, "beta_dpo/beta_used": 0.043115854263305664, "beta_dpo/beta_used_raw": 0.043115854263305664, "beta_dpo/gap_mean": 13.043050765991211, "beta_dpo/gap_std": 17.539508819580078, "beta_dpo/loss_margin_mean": 13.345462799072266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19198790627362056, "grad_norm": 51.55445861816406, "learning_rate": 4.87927032161552e-07, "logits/chosen": -3.3343491554260254, "logits/rejected": -3.329026460647583, "loss": 1.0171, "step": 127 }, { "beta_dpo/beta": 0.14029397070407867, "beta_dpo/beta_margin_grad_mean": -0.36535340547561646, "beta_dpo/beta_margin_grad_std": 0.28294602036476135, "beta_dpo/beta_margin_mean": 1.8758174180984497, "beta_dpo/beta_margin_std": 3.9560065269470215, "beta_dpo/beta_used": 0.14029397070407867, "beta_dpo/beta_used_raw": 0.12094033509492874, "beta_dpo/gap_mean": 13.112518310546875, "beta_dpo/gap_std": 17.750701904296875, "beta_dpo/loss_margin_mean": 13.866514205932617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19349962207105065, "grad_norm": 200.18682861328125, "learning_rate": 4.875177794352363e-07, "logits/chosen": -3.3554821014404297, "logits/rejected": -3.3803372383117676, "loss": 0.883, "step": 128 }, { "beta_dpo/beta": 0.023648953065276146, "beta_dpo/beta_margin_grad_mean": -0.4368211328983307, "beta_dpo/beta_margin_grad_std": 0.1546202152967453, "beta_dpo/beta_margin_mean": 0.31830909848213196, "beta_dpo/beta_margin_std": 0.7734503149986267, "beta_dpo/beta_used": 0.023648953065276146, "beta_dpo/beta_used_raw": 0.008171427063643932, "beta_dpo/gap_mean": 13.18086051940918, "beta_dpo/gap_std": 18.537490844726562, "beta_dpo/loss_margin_mean": 13.067851066589355, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19501133786848074, "grad_norm": 43.7829475402832, "learning_rate": 4.871018828260491e-07, "logits/chosen": -3.345930576324463, "logits/rejected": -3.3373584747314453, "loss": 1.22, "step": 129 }, { "beta_dpo/beta": 0.04935254156589508, "beta_dpo/beta_margin_grad_mean": -0.3538309931755066, "beta_dpo/beta_margin_grad_std": 0.20434898138046265, "beta_dpo/beta_margin_mean": 0.751793622970581, "beta_dpo/beta_margin_std": 1.1252886056900024, "beta_dpo/beta_used": 0.04935254156589508, "beta_dpo/beta_used_raw": 0.04935254156589508, "beta_dpo/gap_mean": 13.687162399291992, "beta_dpo/gap_std": 18.96971893310547, "beta_dpo/loss_margin_mean": 16.04789161682129, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1965230536659108, "grad_norm": 64.4195556640625, "learning_rate": 4.866793539675126e-07, "logits/chosen": -3.3322033882141113, "logits/rejected": -3.3460071086883545, "loss": 1.0315, "step": 130 }, { "beta_dpo/beta": 0.147065669298172, "beta_dpo/beta_margin_grad_mean": -0.342940092086792, "beta_dpo/beta_margin_grad_std": 0.2701457142829895, "beta_dpo/beta_margin_mean": 2.4948854446411133, "beta_dpo/beta_margin_std": 4.793673515319824, "beta_dpo/beta_used": 0.147065669298172, "beta_dpo/beta_used_raw": 0.13867664337158203, "beta_dpo/gap_mean": 13.932304382324219, "beta_dpo/gap_std": 19.49631118774414, "beta_dpo/loss_margin_mean": 15.796794891357422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1980347694633409, "grad_norm": 142.7205810546875, "learning_rate": 4.86250204678667e-07, "logits/chosen": -3.324023962020874, "logits/rejected": -3.3562068939208984, "loss": 1.0579, "step": 131 }, { "beta_dpo/beta": 0.09857457131147385, "beta_dpo/beta_margin_grad_mean": -0.3639555871486664, "beta_dpo/beta_margin_grad_std": 0.2656742036342621, "beta_dpo/beta_margin_mean": 1.3347876071929932, "beta_dpo/beta_margin_std": 2.7419517040252686, "beta_dpo/beta_used": 0.09857457131147385, "beta_dpo/beta_used_raw": 0.0864531397819519, "beta_dpo/gap_mean": 14.162351608276367, "beta_dpo/gap_std": 19.121501922607422, "beta_dpo/loss_margin_mean": 15.047807693481445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19954648526077098, "grad_norm": 107.42191314697266, "learning_rate": 4.858144469637408e-07, "logits/chosen": -3.3415815830230713, "logits/rejected": -3.3510897159576416, "loss": 1.047, "step": 132 }, { "beta_dpo/beta": 0.0017336343880742788, "beta_dpo/beta_margin_grad_mean": -0.4941368103027344, "beta_dpo/beta_margin_grad_std": 0.01007362175732851, "beta_dpo/beta_margin_mean": 0.023465832695364952, "beta_dpo/beta_margin_std": 0.04031944274902344, "beta_dpo/beta_used": 0.0017336343880742788, "beta_dpo/beta_used_raw": -0.04028265178203583, "beta_dpo/gap_mean": 14.131145477294922, "beta_dpo/gap_std": 19.188018798828125, "beta_dpo/loss_margin_mean": 12.276420593261719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20105820105820105, "grad_norm": 2.9273688793182373, "learning_rate": 4.853720930118138e-07, "logits/chosen": -3.3473305702209473, "logits/rejected": -3.345794677734375, "loss": 1.3656, "step": 133 }, { "beta_dpo/beta": 0.2282017022371292, "beta_dpo/beta_margin_grad_mean": -0.2386937141418457, "beta_dpo/beta_margin_grad_std": 0.3078445494174957, "beta_dpo/beta_margin_mean": 3.9094603061676025, "beta_dpo/beta_margin_std": 5.4880571365356445, "beta_dpo/beta_used": 0.2282017022371292, "beta_dpo/beta_used_raw": 0.2282017022371292, "beta_dpo/gap_mean": 14.498592376708984, "beta_dpo/gap_std": 19.41600227355957, "beta_dpo/loss_margin_mean": 17.716445922851562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20256991685563114, "grad_norm": 223.05345153808594, "learning_rate": 4.849231551964771e-07, "logits/chosen": -3.3360257148742676, "logits/rejected": -3.351398468017578, "loss": 0.7892, "step": 134 }, { "beta_dpo/beta": 0.04041796550154686, "beta_dpo/beta_margin_grad_mean": -0.4121745824813843, "beta_dpo/beta_margin_grad_std": 0.182328462600708, "beta_dpo/beta_margin_mean": 0.48748740553855896, "beta_dpo/beta_margin_std": 1.0515720844268799, "beta_dpo/beta_used": 0.04041796550154686, "beta_dpo/beta_used_raw": 0.04041796550154686, "beta_dpo/gap_mean": 14.310811996459961, "beta_dpo/gap_std": 19.547698974609375, "beta_dpo/loss_margin_mean": 12.49234390258789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20408163265306123, "grad_norm": 61.08085632324219, "learning_rate": 4.844676460754862e-07, "logits/chosen": -3.327956199645996, "logits/rejected": -3.3364243507385254, "loss": 1.0703, "step": 135 }, { "beta_dpo/beta": 0.1498934030532837, "beta_dpo/beta_margin_grad_mean": -0.31762951612472534, "beta_dpo/beta_margin_grad_std": 0.29164251685142517, "beta_dpo/beta_margin_mean": 2.570695400238037, "beta_dpo/beta_margin_std": 4.504148960113525, "beta_dpo/beta_used": 0.1498934030532837, "beta_dpo/beta_used_raw": 0.1498934030532837, "beta_dpo/gap_mean": 14.416141510009766, "beta_dpo/gap_std": 19.936372756958008, "beta_dpo/loss_margin_mean": 14.88786792755127, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20559334845049132, "grad_norm": 277.1800842285156, "learning_rate": 4.840055783904106e-07, "logits/chosen": -3.333883285522461, "logits/rejected": -3.3552680015563965, "loss": 1.1056, "step": 136 }, { "beta_dpo/beta": 0.08450040221214294, "beta_dpo/beta_margin_grad_mean": -0.37931200861930847, "beta_dpo/beta_margin_grad_std": 0.24967019259929657, "beta_dpo/beta_margin_mean": 1.2663687467575073, "beta_dpo/beta_margin_std": 2.509981155395508, "beta_dpo/beta_used": 0.08450040221214294, "beta_dpo/beta_used_raw": 0.06899924576282501, "beta_dpo/gap_mean": 13.820323944091797, "beta_dpo/gap_std": 20.457162857055664, "beta_dpo/loss_margin_mean": 11.979299545288086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20710506424792138, "grad_norm": 155.48312377929688, "learning_rate": 4.835369650662767e-07, "logits/chosen": -3.3146543502807617, "logits/rejected": -3.327342987060547, "loss": 1.1313, "step": 137 }, { "beta_dpo/beta": 0.09970663487911224, "beta_dpo/beta_margin_grad_mean": -0.3133697807788849, "beta_dpo/beta_margin_grad_std": 0.271743506193161, "beta_dpo/beta_margin_mean": 1.1647833585739136, "beta_dpo/beta_margin_std": 2.0706329345703125, "beta_dpo/beta_used": 0.09970663487911224, "beta_dpo/beta_used_raw": 0.09970663487911224, "beta_dpo/gap_mean": 13.604002952575684, "beta_dpo/gap_std": 20.282379150390625, "beta_dpo/loss_margin_mean": 11.7897310256958, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20861678004535147, "grad_norm": 70.18770599365234, "learning_rate": 4.830618192112065e-07, "logits/chosen": -3.335930347442627, "logits/rejected": -3.342681407928467, "loss": 0.7101, "step": 138 }, { "beta_dpo/beta": 0.1375807821750641, "beta_dpo/beta_margin_grad_mean": -0.35882195830345154, "beta_dpo/beta_margin_grad_std": 0.26474788784980774, "beta_dpo/beta_margin_mean": 2.2247705459594727, "beta_dpo/beta_margin_std": 4.507172584533691, "beta_dpo/beta_used": 0.1375807821750641, "beta_dpo/beta_used_raw": 0.13499371707439423, "beta_dpo/gap_mean": 13.861026763916016, "beta_dpo/gap_std": 20.320680618286133, "beta_dpo/loss_margin_mean": 16.075502395629883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21012849584278157, "grad_norm": 128.69879150390625, "learning_rate": 4.825801541160509e-07, "logits/chosen": -3.340639114379883, "logits/rejected": -3.3341877460479736, "loss": 0.9392, "step": 139 }, { "beta_dpo/beta": 0.3182719051837921, "beta_dpo/beta_margin_grad_mean": -0.2815694808959961, "beta_dpo/beta_margin_grad_std": 0.30072757601737976, "beta_dpo/beta_margin_mean": 6.113375186920166, "beta_dpo/beta_margin_std": 11.143556594848633, "beta_dpo/beta_used": 0.3182719051837921, "beta_dpo/beta_used_raw": 0.3182719051837921, "beta_dpo/gap_mean": 14.645448684692383, "beta_dpo/gap_std": 20.552196502685547, "beta_dpo/loss_margin_mean": 18.274456024169922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21164021164021163, "grad_norm": 366.61572265625, "learning_rate": 4.820919832540181e-07, "logits/chosen": -3.338654041290283, "logits/rejected": -3.350745439529419, "loss": 0.942, "step": 140 }, { "beta_dpo/beta": 0.008213422261178493, "beta_dpo/beta_margin_grad_mean": -0.4638515114784241, "beta_dpo/beta_margin_grad_std": 0.06582221388816833, "beta_dpo/beta_margin_mean": 0.15062931180000305, "beta_dpo/beta_margin_std": 0.2794095277786255, "beta_dpo/beta_used": 0.008213422261178493, "beta_dpo/beta_used_raw": -0.015965929254889488, "beta_dpo/gap_mean": 14.991787910461426, "beta_dpo/gap_std": 20.643108367919922, "beta_dpo/loss_margin_mean": 15.102987289428711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21315192743764172, "grad_norm": 16.13588523864746, "learning_rate": 4.815973202802966e-07, "logits/chosen": -3.3420023918151855, "logits/rejected": -3.356560707092285, "loss": 1.2876, "step": 141 }, { "beta_dpo/beta": 0.014141488820314407, "beta_dpo/beta_margin_grad_mean": -0.44556012749671936, "beta_dpo/beta_margin_grad_std": 0.09043306857347488, "beta_dpo/beta_margin_mean": 0.23377063870429993, "beta_dpo/beta_margin_std": 0.3970645070075989, "beta_dpo/beta_used": 0.014141488820314407, "beta_dpo/beta_used_raw": -0.04336583614349365, "beta_dpo/gap_mean": 14.939939498901367, "beta_dpo/gap_std": 20.174400329589844, "beta_dpo/loss_margin_mean": 15.348499298095703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2146636432350718, "grad_norm": 23.85763931274414, "learning_rate": 4.810961790316729e-07, "logits/chosen": -3.333073616027832, "logits/rejected": -3.3334572315216064, "loss": 1.228, "step": 142 }, { "beta_dpo/beta": 0.07208716869354248, "beta_dpo/beta_margin_grad_mean": -0.39046409726142883, "beta_dpo/beta_margin_grad_std": 0.2325820028781891, "beta_dpo/beta_margin_mean": 0.7290058732032776, "beta_dpo/beta_margin_std": 2.102476119995117, "beta_dpo/beta_used": 0.07208716869354248, "beta_dpo/beta_used_raw": 0.017211638391017914, "beta_dpo/gap_mean": 14.151898384094238, "beta_dpo/gap_std": 20.170135498046875, "beta_dpo/loss_margin_mean": 9.85452938079834, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2161753590325019, "grad_norm": 77.52479553222656, "learning_rate": 4.805885735261454e-07, "logits/chosen": -3.3221468925476074, "logits/rejected": -3.3139724731445312, "loss": 0.9909, "step": 143 }, { "beta_dpo/beta": 0.0595400407910347, "beta_dpo/beta_margin_grad_mean": -0.3856506943702698, "beta_dpo/beta_margin_grad_std": 0.22696533799171448, "beta_dpo/beta_margin_mean": 0.6182453632354736, "beta_dpo/beta_margin_std": 1.3108974695205688, "beta_dpo/beta_used": 0.0595400407910347, "beta_dpo/beta_used_raw": 0.0595400407910347, "beta_dpo/gap_mean": 13.700199127197266, "beta_dpo/gap_std": 20.346786499023438, "beta_dpo/loss_margin_mean": 11.316890716552734, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21768707482993196, "grad_norm": 64.55220794677734, "learning_rate": 4.800745179625307e-07, "logits/chosen": -3.3223190307617188, "logits/rejected": -3.324495792388916, "loss": 0.9382, "step": 144 }, { "beta_dpo/beta": 0.14726224541664124, "beta_dpo/beta_margin_grad_mean": -0.27921420335769653, "beta_dpo/beta_margin_grad_std": 0.2893240749835968, "beta_dpo/beta_margin_mean": 2.3826241493225098, "beta_dpo/beta_margin_std": 3.6393394470214844, "beta_dpo/beta_used": 0.14726224541664124, "beta_dpo/beta_used_raw": 0.14726224541664124, "beta_dpo/gap_mean": 13.917230606079102, "beta_dpo/gap_std": 20.533353805541992, "beta_dpo/loss_margin_mean": 16.579933166503906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21919879062736206, "grad_norm": 189.30157470703125, "learning_rate": 4.795540267200686e-07, "logits/chosen": -3.346303939819336, "logits/rejected": -3.3454699516296387, "loss": 0.8055, "step": 145 }, { "beta_dpo/beta": 0.06543400138616562, "beta_dpo/beta_margin_grad_mean": -0.3946565091609955, "beta_dpo/beta_margin_grad_std": 0.23178449273109436, "beta_dpo/beta_margin_mean": 0.9500102996826172, "beta_dpo/beta_margin_std": 2.0022237300872803, "beta_dpo/beta_used": 0.06543400138616562, "beta_dpo/beta_used_raw": 0.05708005279302597, "beta_dpo/gap_mean": 13.826836585998535, "beta_dpo/gap_std": 20.62220001220703, "beta_dpo/loss_margin_mean": 13.209084510803223, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22071050642479215, "grad_norm": 102.3393783569336, "learning_rate": 4.790271143580173e-07, "logits/chosen": -3.3137712478637695, "logits/rejected": -3.308354139328003, "loss": 1.1162, "step": 146 }, { "beta_dpo/beta": 0.09549230337142944, "beta_dpo/beta_margin_grad_mean": -0.3872320353984833, "beta_dpo/beta_margin_grad_std": 0.2581307291984558, "beta_dpo/beta_margin_mean": 1.4723294973373413, "beta_dpo/beta_margin_std": 3.005223035812378, "beta_dpo/beta_used": 0.09549230337142944, "beta_dpo/beta_used_raw": 0.07679538428783417, "beta_dpo/gap_mean": 13.881373405456543, "beta_dpo/gap_std": 20.608814239501953, "beta_dpo/loss_margin_mean": 14.324885368347168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2222222222222222, "grad_norm": 163.24281311035156, "learning_rate": 4.784937956152489e-07, "logits/chosen": -3.3383469581604004, "logits/rejected": -3.337419033050537, "loss": 1.0803, "step": 147 }, { "beta_dpo/beta": 0.049887314438819885, "beta_dpo/beta_margin_grad_mean": -0.3772048354148865, "beta_dpo/beta_margin_grad_std": 0.20383627712726593, "beta_dpo/beta_margin_mean": 1.0147607326507568, "beta_dpo/beta_margin_std": 1.9053665399551392, "beta_dpo/beta_used": 0.049887314438819885, "beta_dpo/beta_used_raw": 0.009546924382448196, "beta_dpo/gap_mean": 14.160110473632812, "beta_dpo/gap_std": 20.884716033935547, "beta_dpo/loss_margin_mean": 16.530052185058594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2237339380196523, "grad_norm": 62.660152435302734, "learning_rate": 4.779540854098347e-07, "logits/chosen": -3.3094582557678223, "logits/rejected": -3.32807993888855, "loss": 1.0848, "step": 148 }, { "beta_dpo/beta": 0.0466022863984108, "beta_dpo/beta_margin_grad_mean": -0.41487088799476624, "beta_dpo/beta_margin_grad_std": 0.23305167257785797, "beta_dpo/beta_margin_mean": 0.6102204322814941, "beta_dpo/beta_margin_std": 1.6688002347946167, "beta_dpo/beta_used": 0.0466022863984108, "beta_dpo/beta_used_raw": 0.010175202041864395, "beta_dpo/gap_mean": 14.3313570022583, "beta_dpo/gap_std": 21.182098388671875, "beta_dpo/loss_margin_mean": 13.540119171142578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2252456538170824, "grad_norm": 96.15580749511719, "learning_rate": 4.774079988386296e-07, "logits/chosen": -3.2743802070617676, "logits/rejected": -3.284924030303955, "loss": 1.1845, "step": 149 }, { "beta_dpo/beta": 0.19884531199932098, "beta_dpo/beta_margin_grad_mean": -0.21449099481105804, "beta_dpo/beta_margin_grad_std": 0.30374783277511597, "beta_dpo/beta_margin_mean": 4.370296001434326, "beta_dpo/beta_margin_std": 5.271862506866455, "beta_dpo/beta_used": 0.19884531199932098, "beta_dpo/beta_used_raw": 0.19884531199932098, "beta_dpo/gap_mean": 15.378077507019043, "beta_dpo/gap_std": 21.256366729736328, "beta_dpo/loss_margin_mean": 20.71977996826172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22675736961451248, "grad_norm": 213.7008819580078, "learning_rate": 4.768555511768486e-07, "logits/chosen": -3.3375020027160645, "logits/rejected": -3.343487024307251, "loss": 0.8479, "step": 150 }, { "beta_dpo/beta": 0.2805306613445282, "beta_dpo/beta_margin_grad_mean": -0.25795936584472656, "beta_dpo/beta_margin_grad_std": 0.36935240030288696, "beta_dpo/beta_margin_mean": 4.957238674163818, "beta_dpo/beta_margin_std": 7.137806415557861, "beta_dpo/beta_used": 0.2805306613445282, "beta_dpo/beta_used_raw": 0.2805306613445282, "beta_dpo/gap_mean": 15.795341491699219, "beta_dpo/gap_std": 21.945510864257812, "beta_dpo/loss_margin_mean": 17.629186630249023, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22826908541194255, "grad_norm": 554.007568359375, "learning_rate": 4.762967578776406e-07, "logits/chosen": -3.322493076324463, "logits/rejected": -3.3346638679504395, "loss": 0.8538, "step": 151 }, { "beta_dpo/beta": 0.17714881896972656, "beta_dpo/beta_margin_grad_mean": -0.33929643034935, "beta_dpo/beta_margin_grad_std": 0.2884059250354767, "beta_dpo/beta_margin_mean": 3.272763729095459, "beta_dpo/beta_margin_std": 6.407361030578613, "beta_dpo/beta_used": 0.17714881896972656, "beta_dpo/beta_used_raw": 0.0768561065196991, "beta_dpo/gap_mean": 15.577496528625488, "beta_dpo/gap_std": 22.255632400512695, "beta_dpo/loss_margin_mean": 14.780200958251953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22978080120937264, "grad_norm": 280.85235595703125, "learning_rate": 4.757316345716553e-07, "logits/chosen": -3.323756217956543, "logits/rejected": -3.33445405960083, "loss": 1.0671, "step": 152 }, { "beta_dpo/beta": 0.08415161818265915, "beta_dpo/beta_margin_grad_mean": -0.3395489752292633, "beta_dpo/beta_margin_grad_std": 0.22877615690231323, "beta_dpo/beta_margin_mean": 1.5211905241012573, "beta_dpo/beta_margin_std": 2.5890166759490967, "beta_dpo/beta_used": 0.08415161818265915, "beta_dpo/beta_used_raw": 0.05796004831790924, "beta_dpo/gap_mean": 15.996269226074219, "beta_dpo/gap_std": 21.825340270996094, "beta_dpo/loss_margin_mean": 17.323328018188477, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23129251700680273, "grad_norm": 151.55897521972656, "learning_rate": 4.751601970666064e-07, "logits/chosen": -3.3437438011169434, "logits/rejected": -3.3381764888763428, "loss": 0.9072, "step": 153 }, { "beta_dpo/beta": 0.06097334995865822, "beta_dpo/beta_margin_grad_mean": -0.39823541045188904, "beta_dpo/beta_margin_grad_std": 0.2587531805038452, "beta_dpo/beta_margin_mean": 0.762320339679718, "beta_dpo/beta_margin_std": 2.397218704223633, "beta_dpo/beta_used": 0.06097334995865822, "beta_dpo/beta_used_raw": 0.0043108463287353516, "beta_dpo/gap_mean": 15.49760627746582, "beta_dpo/gap_std": 22.214107513427734, "beta_dpo/loss_margin_mean": 12.32646369934082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2328042328042328, "grad_norm": 114.33980560302734, "learning_rate": 4.745824613468292e-07, "logits/chosen": -3.3356807231903076, "logits/rejected": -3.3275413513183594, "loss": 1.203, "step": 154 }, { "beta_dpo/beta": 0.20179255306720734, "beta_dpo/beta_margin_grad_mean": -0.3471258282661438, "beta_dpo/beta_margin_grad_std": 0.2985984981060028, "beta_dpo/beta_margin_mean": 4.433549404144287, "beta_dpo/beta_margin_std": 8.430581092834473, "beta_dpo/beta_used": 0.20179255306720734, "beta_dpo/beta_used_raw": 0.18239660561084747, "beta_dpo/gap_mean": 15.837427139282227, "beta_dpo/gap_std": 22.66343879699707, "beta_dpo/loss_margin_mean": 17.480371475219727, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23431594860166288, "grad_norm": 381.34991455078125, "learning_rate": 4.7399844357283393e-07, "logits/chosen": -3.342803478240967, "logits/rejected": -3.3430895805358887, "loss": 1.2675, "step": 155 }, { "beta_dpo/beta": 0.017000947147607803, "beta_dpo/beta_margin_grad_mean": -0.4466518759727478, "beta_dpo/beta_margin_grad_std": 0.10175792872905731, "beta_dpo/beta_margin_mean": 0.2248079627752304, "beta_dpo/beta_margin_std": 0.44073355197906494, "beta_dpo/beta_used": 0.017000947147607803, "beta_dpo/beta_used_raw": 0.017000947147607803, "beta_dpo/gap_mean": 15.509801864624023, "beta_dpo/gap_std": 22.530162811279297, "beta_dpo/loss_margin_mean": 14.006168365478516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23582766439909297, "grad_norm": 27.029804229736328, "learning_rate": 4.7340816008085305e-07, "logits/chosen": -3.3521556854248047, "logits/rejected": -3.353640079498291, "loss": 1.1947, "step": 156 }, { "beta_dpo/beta": 0.1710837334394455, "beta_dpo/beta_margin_grad_mean": -0.3583267629146576, "beta_dpo/beta_margin_grad_std": 0.2934838533401489, "beta_dpo/beta_margin_mean": 3.050497531890869, "beta_dpo/beta_margin_std": 5.634936332702637, "beta_dpo/beta_used": 0.1710837334394455, "beta_dpo/beta_used_raw": 0.07584992796182632, "beta_dpo/gap_mean": 15.374656677246094, "beta_dpo/gap_std": 22.11768341064453, "beta_dpo/loss_margin_mean": 14.452281951904297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23733938019652306, "grad_norm": 171.72964477539062, "learning_rate": 4.728116273823847e-07, "logits/chosen": -3.3176865577697754, "logits/rejected": -3.3212122917175293, "loss": 0.9895, "step": 157 }, { "beta_dpo/beta": 0.16476041078567505, "beta_dpo/beta_margin_grad_mean": -0.2862817645072937, "beta_dpo/beta_margin_grad_std": 0.3239341378211975, "beta_dpo/beta_margin_mean": 2.1851229667663574, "beta_dpo/beta_margin_std": 3.4346442222595215, "beta_dpo/beta_used": 0.16476041078567505, "beta_dpo/beta_used_raw": 0.16476041078567505, "beta_dpo/gap_mean": 14.694307327270508, "beta_dpo/gap_std": 21.618305206298828, "beta_dpo/loss_margin_mean": 12.814839363098145, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23885109599395313, "grad_norm": 192.72439575195312, "learning_rate": 4.7220886216373085e-07, "logits/chosen": -3.324219226837158, "logits/rejected": -3.3319594860076904, "loss": 0.8983, "step": 158 }, { "beta_dpo/beta": 0.007234493736177683, "beta_dpo/beta_margin_grad_mean": -0.47092530131340027, "beta_dpo/beta_margin_grad_std": 0.04833231866359711, "beta_dpo/beta_margin_mean": 0.11883436888456345, "beta_dpo/beta_margin_std": 0.20030085742473602, "beta_dpo/beta_used": 0.007234493736177683, "beta_dpo/beta_used_raw": -0.00955403782427311, "beta_dpo/gap_mean": 14.409493446350098, "beta_dpo/gap_std": 21.04364013671875, "beta_dpo/loss_margin_mean": 13.555635452270508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24036281179138322, "grad_norm": 13.922643661499023, "learning_rate": 4.715998812855304e-07, "logits/chosen": -3.317610263824463, "logits/rejected": -3.3311636447906494, "loss": 1.2992, "step": 159 }, { "beta_dpo/beta": 0.10042039304971695, "beta_dpo/beta_margin_grad_mean": -0.37785497307777405, "beta_dpo/beta_margin_grad_std": 0.2555430829524994, "beta_dpo/beta_margin_mean": 1.435318946838379, "beta_dpo/beta_margin_std": 3.2165510654449463, "beta_dpo/beta_used": 0.10042039304971695, "beta_dpo/beta_used_raw": -0.09143070876598358, "beta_dpo/gap_mean": 14.128339767456055, "beta_dpo/gap_std": 20.657943725585938, "beta_dpo/loss_margin_mean": 12.320213317871094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2418745275888133, "grad_norm": 115.22747802734375, "learning_rate": 4.7098470178228755e-07, "logits/chosen": -3.2877392768859863, "logits/rejected": -3.3001627922058105, "loss": 1.016, "step": 160 }, { "beta_dpo/beta": 0.058217551559209824, "beta_dpo/beta_margin_grad_mean": -0.4167746901512146, "beta_dpo/beta_margin_grad_std": 0.2455618679523468, "beta_dpo/beta_margin_mean": 0.7758399844169617, "beta_dpo/beta_margin_std": 2.212191104888916, "beta_dpo/beta_used": 0.058217551559209824, "beta_dpo/beta_used_raw": 0.01114998385310173, "beta_dpo/gap_mean": 13.952075958251953, "beta_dpo/gap_std": 20.97524642944336, "beta_dpo/loss_margin_mean": 12.638019561767578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24338624338624337, "grad_norm": 87.436279296875, "learning_rate": 4.703633408618955e-07, "logits/chosen": -3.3042478561401367, "logits/rejected": -3.316617965698242, "loss": 1.1296, "step": 161 }, { "beta_dpo/beta": 0.22748082876205444, "beta_dpo/beta_margin_grad_mean": -0.20840942859649658, "beta_dpo/beta_margin_grad_std": 0.28813979029655457, "beta_dpo/beta_margin_mean": 4.266569137573242, "beta_dpo/beta_margin_std": 5.0452094078063965, "beta_dpo/beta_used": 0.22748082876205444, "beta_dpo/beta_used_raw": 0.22748082876205444, "beta_dpo/gap_mean": 14.301603317260742, "beta_dpo/gap_std": 20.577198028564453, "beta_dpo/loss_margin_mean": 17.888410568237305, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24489795918367346, "grad_norm": 234.487060546875, "learning_rate": 4.697358159051549e-07, "logits/chosen": -3.3278586864471436, "logits/rejected": -3.3324155807495117, "loss": 0.8148, "step": 162 }, { "beta_dpo/beta": 0.1334669440984726, "beta_dpo/beta_margin_grad_mean": -0.32191550731658936, "beta_dpo/beta_margin_grad_std": 0.23917384445667267, "beta_dpo/beta_margin_mean": 2.9123542308807373, "beta_dpo/beta_margin_std": 4.524738788604736, "beta_dpo/beta_used": 0.1334669440984726, "beta_dpo/beta_used_raw": 0.0748547613620758, "beta_dpo/gap_mean": 14.945584297180176, "beta_dpo/gap_std": 20.219942092895508, "beta_dpo/loss_margin_mean": 17.941997528076172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24640967498110355, "grad_norm": 129.17434692382812, "learning_rate": 4.691021444652876e-07, "logits/chosen": -3.31915020942688, "logits/rejected": -3.3229875564575195, "loss": 0.9564, "step": 163 }, { "beta_dpo/beta": 0.09072095900774002, "beta_dpo/beta_margin_grad_mean": -0.32996121048927307, "beta_dpo/beta_margin_grad_std": 0.2191038280725479, "beta_dpo/beta_margin_mean": 1.4329532384872437, "beta_dpo/beta_margin_std": 2.1871438026428223, "beta_dpo/beta_used": 0.09072095900774002, "beta_dpo/beta_used_raw": 0.09072095900774002, "beta_dpo/gap_mean": 15.211969375610352, "beta_dpo/gap_std": 20.254531860351562, "beta_dpo/loss_margin_mean": 14.932231903076172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24792139077853365, "grad_norm": 70.2077865600586, "learning_rate": 4.6846234426744624e-07, "logits/chosen": -3.3396153450012207, "logits/rejected": -3.347288131713867, "loss": 0.8678, "step": 164 }, { "beta_dpo/beta": 0.12051883339881897, "beta_dpo/beta_margin_grad_mean": -0.2684549391269684, "beta_dpo/beta_margin_grad_std": 0.24503667652606964, "beta_dpo/beta_margin_mean": 1.8624266386032104, "beta_dpo/beta_margin_std": 2.4064364433288574, "beta_dpo/beta_used": 0.12051883339881897, "beta_dpo/beta_used_raw": 0.12051883339881897, "beta_dpo/gap_mean": 15.14497184753418, "beta_dpo/gap_std": 19.699504852294922, "beta_dpo/loss_margin_mean": 14.89899730682373, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2494331065759637, "grad_norm": 89.30416107177734, "learning_rate": 4.678164332082175e-07, "logits/chosen": -3.311272144317627, "logits/rejected": -3.313823938369751, "loss": 0.6259, "step": 165 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49724671244621277, "beta_dpo/beta_margin_grad_std": 0.0040176804177463055, "beta_dpo/beta_margin_mean": 0.011014166288077831, "beta_dpo/beta_margin_std": 0.016072683036327362, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1693364530801773, "beta_dpo/gap_mean": 14.756368637084961, "beta_dpo/gap_std": 19.05362319946289, "beta_dpo/loss_margin_mean": 11.014165878295898, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2509448223733938, "grad_norm": 1.6793140172958374, "learning_rate": 4.6716442935512214e-07, "logits/chosen": -3.310637950897217, "logits/rejected": -3.3459417819976807, "loss": 1.3761, "step": 166 }, { "beta_dpo/beta": 0.06182756647467613, "beta_dpo/beta_margin_grad_mean": -0.3825829327106476, "beta_dpo/beta_margin_grad_std": 0.2320934534072876, "beta_dpo/beta_margin_mean": 0.8981536626815796, "beta_dpo/beta_margin_std": 1.8215402364730835, "beta_dpo/beta_used": 0.06182756647467613, "beta_dpo/beta_used_raw": 0.020578034222126007, "beta_dpo/gap_mean": 14.261093139648438, "beta_dpo/gap_std": 18.644453048706055, "beta_dpo/loss_margin_mean": 13.281981468200684, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25245653817082386, "grad_norm": 78.32828521728516, "learning_rate": 4.6650635094610966e-07, "logits/chosen": -3.3038928508758545, "logits/rejected": -3.306029796600342, "loss": 1.0166, "step": 167 }, { "beta_dpo/beta": 0.027534862980246544, "beta_dpo/beta_margin_grad_mean": -0.4381820559501648, "beta_dpo/beta_margin_grad_std": 0.15032429993152618, "beta_dpo/beta_margin_mean": 0.2957206666469574, "beta_dpo/beta_margin_std": 0.7526847124099731, "beta_dpo/beta_used": 0.027534862980246544, "beta_dpo/beta_used_raw": 0.027534862980246544, "beta_dpo/gap_mean": 13.721107482910156, "beta_dpo/gap_std": 18.38541603088379, "beta_dpo/loss_margin_mean": 11.09146499633789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25396825396825395, "grad_norm": 39.39701461791992, "learning_rate": 4.6584221638904767e-07, "logits/chosen": -3.3236608505249023, "logits/rejected": -3.339106559753418, "loss": 1.146, "step": 168 }, { "beta_dpo/beta": 0.012150160036981106, "beta_dpo/beta_margin_grad_mean": -0.4621245861053467, "beta_dpo/beta_margin_grad_std": 0.07575680315494537, "beta_dpo/beta_margin_mean": 0.15889044106006622, "beta_dpo/beta_margin_std": 0.32937243580818176, "beta_dpo/beta_used": 0.012150160036981106, "beta_dpo/beta_used_raw": -0.017097095027565956, "beta_dpo/gap_mean": 13.431455612182617, "beta_dpo/gap_std": 18.825054168701172, "beta_dpo/loss_margin_mean": 12.781668663024902, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25547996976568405, "grad_norm": 19.16012191772461, "learning_rate": 4.651720442612075e-07, "logits/chosen": -3.2991950511932373, "logits/rejected": -3.3047733306884766, "loss": 1.2584, "step": 169 }, { "beta_dpo/beta": 0.2144870012998581, "beta_dpo/beta_margin_grad_mean": -0.20379450917243958, "beta_dpo/beta_margin_grad_std": 0.27501732110977173, "beta_dpo/beta_margin_mean": 3.4251480102539062, "beta_dpo/beta_margin_std": 4.072988033294678, "beta_dpo/beta_used": 0.2144870012998581, "beta_dpo/beta_used_raw": 0.2144870012998581, "beta_dpo/gap_mean": 13.796789169311523, "beta_dpo/gap_std": 18.566261291503906, "beta_dpo/loss_margin_mean": 15.981682777404785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25699168556311414, "grad_norm": 142.732666015625, "learning_rate": 4.6449585330874425e-07, "logits/chosen": -3.3022265434265137, "logits/rejected": -3.3024675846099854, "loss": 0.5073, "step": 170 }, { "beta_dpo/beta": 0.07076213508844376, "beta_dpo/beta_margin_grad_mean": -0.34848496317863464, "beta_dpo/beta_margin_grad_std": 0.24325984716415405, "beta_dpo/beta_margin_mean": 1.2304704189300537, "beta_dpo/beta_margin_std": 2.1269867420196533, "beta_dpo/beta_used": 0.07076213508844376, "beta_dpo/beta_used_raw": 0.03332207724452019, "beta_dpo/gap_mean": 14.206443786621094, "beta_dpo/gap_std": 18.874755859375, "beta_dpo/loss_margin_mean": 15.383790016174316, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2585034013605442, "grad_norm": 105.55207061767578, "learning_rate": 4.6381366244617224e-07, "logits/chosen": -3.3019609451293945, "logits/rejected": -3.3179931640625, "loss": 1.109, "step": 171 }, { "beta_dpo/beta": 0.0536719411611557, "beta_dpo/beta_margin_grad_mean": -0.33865830302238464, "beta_dpo/beta_margin_grad_std": 0.2031082957983017, "beta_dpo/beta_margin_mean": 0.841856062412262, "beta_dpo/beta_margin_std": 1.1628142595291138, "beta_dpo/beta_used": 0.0536719411611557, "beta_dpo/beta_used_raw": 0.0536719411611557, "beta_dpo/gap_mean": 14.301804542541504, "beta_dpo/gap_std": 19.452861785888672, "beta_dpo/loss_margin_mean": 16.217395782470703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2600151171579743, "grad_norm": 58.22761154174805, "learning_rate": 4.631254907558365e-07, "logits/chosen": -3.3333513736724854, "logits/rejected": -3.3494720458984375, "loss": 0.9375, "step": 172 }, { "beta_dpo/beta": 0.21331170201301575, "beta_dpo/beta_margin_grad_mean": -0.3400728702545166, "beta_dpo/beta_margin_grad_std": 0.2709886133670807, "beta_dpo/beta_margin_mean": 4.779065132141113, "beta_dpo/beta_margin_std": 7.826768398284912, "beta_dpo/beta_used": 0.21331170201301575, "beta_dpo/beta_used_raw": 0.18569156527519226, "beta_dpo/gap_mean": 15.125507354736328, "beta_dpo/gap_std": 20.31143569946289, "beta_dpo/loss_margin_mean": 16.441253662109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2615268329554044, "grad_norm": 292.74566650390625, "learning_rate": 4.624313574873786e-07, "logits/chosen": -3.304172992706299, "logits/rejected": -3.322803020477295, "loss": 1.0188, "step": 173 }, { "beta_dpo/beta": 0.0062421588227152824, "beta_dpo/beta_margin_grad_mean": -0.47547459602355957, "beta_dpo/beta_margin_grad_std": 0.03574493154883385, "beta_dpo/beta_margin_mean": 0.09859683364629745, "beta_dpo/beta_margin_std": 0.1440763771533966, "beta_dpo/beta_used": 0.0062421588227152824, "beta_dpo/beta_used_raw": 0.0062421588227152824, "beta_dpo/gap_mean": 14.888914108276367, "beta_dpo/gap_std": 21.216093063354492, "beta_dpo/loss_margin_mean": 15.613706588745117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26303854875283444, "grad_norm": 10.061347007751465, "learning_rate": 4.61731282057198e-07, "logits/chosen": -3.2965517044067383, "logits/rejected": -3.316274404525757, "loss": 1.3077, "step": 174 }, { "beta_dpo/beta": 0.2785711884498596, "beta_dpo/beta_margin_grad_mean": -0.22018657624721527, "beta_dpo/beta_margin_grad_std": 0.3459617793560028, "beta_dpo/beta_margin_mean": 5.1916093826293945, "beta_dpo/beta_margin_std": 6.421470642089844, "beta_dpo/beta_used": 0.2785711884498596, "beta_dpo/beta_used_raw": 0.2785711884498596, "beta_dpo/gap_mean": 15.445871353149414, "beta_dpo/gap_std": 21.41485595703125, "beta_dpo/loss_margin_mean": 18.914648056030273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26455026455026454, "grad_norm": 280.62322998046875, "learning_rate": 4.6102528404790965e-07, "logits/chosen": -3.3232455253601074, "logits/rejected": -3.315721035003662, "loss": 0.7615, "step": 175 }, { "beta_dpo/beta": 0.09155848622322083, "beta_dpo/beta_margin_grad_mean": -0.35378387570381165, "beta_dpo/beta_margin_grad_std": 0.24764010310173035, "beta_dpo/beta_margin_mean": 1.6799997091293335, "beta_dpo/beta_margin_std": 3.233999252319336, "beta_dpo/beta_used": 0.09155848622322083, "beta_dpo/beta_used_raw": -0.0362052246928215, "beta_dpo/gap_mean": 15.972650527954102, "beta_dpo/gap_std": 21.989521026611328, "beta_dpo/loss_margin_mean": 16.272171020507812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2660619803476946, "grad_norm": 125.4261474609375, "learning_rate": 4.603133832077953e-07, "logits/chosen": -3.324751615524292, "logits/rejected": -3.3269219398498535, "loss": 1.0249, "step": 176 }, { "beta_dpo/beta": 0.18628999590873718, "beta_dpo/beta_margin_grad_mean": -0.169387549161911, "beta_dpo/beta_margin_grad_std": 0.2704985439777374, "beta_dpo/beta_margin_mean": 4.050033092498779, "beta_dpo/beta_margin_std": 4.005526542663574, "beta_dpo/beta_used": 0.18628999590873718, "beta_dpo/beta_used_raw": 0.18628999590873718, "beta_dpo/gap_mean": 16.732494354248047, "beta_dpo/gap_std": 21.966861724853516, "beta_dpo/loss_margin_mean": 21.59198570251465, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2675736961451247, "grad_norm": 186.697265625, "learning_rate": 4.5959559945025183e-07, "logits/chosen": -3.311750888824463, "logits/rejected": -3.3363020420074463, "loss": 0.6518, "step": 177 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49686744809150696, "beta_dpo/beta_margin_grad_std": 0.005036898888647556, "beta_dpo/beta_margin_mean": 0.012532144784927368, "beta_dpo/beta_margin_std": 0.02015141397714615, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22806313633918762, "beta_dpo/gap_mean": 16.291656494140625, "beta_dpo/gap_std": 21.730037689208984, "beta_dpo/loss_margin_mean": 12.532143592834473, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2690854119425548, "grad_norm": 1.6683392524719238, "learning_rate": 4.588719528532341e-07, "logits/chosen": -3.304107666015625, "logits/rejected": -3.312753200531006, "loss": 1.3756, "step": 178 }, { "beta_dpo/beta": 0.08604112267494202, "beta_dpo/beta_margin_grad_mean": -0.36135733127593994, "beta_dpo/beta_margin_grad_std": 0.25707748532295227, "beta_dpo/beta_margin_mean": 1.8054059743881226, "beta_dpo/beta_margin_std": 3.473832845687866, "beta_dpo/beta_used": 0.08604112267494202, "beta_dpo/beta_used_raw": -0.0028993189334869385, "beta_dpo/gap_mean": 16.365154266357422, "beta_dpo/gap_std": 21.843692779541016, "beta_dpo/loss_margin_mean": 18.55828094482422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2705971277399849, "grad_norm": 118.9190902709961, "learning_rate": 4.581424636586928e-07, "logits/chosen": -3.2978873252868652, "logits/rejected": -3.296720266342163, "loss": 1.0073, "step": 179 }, { "beta_dpo/beta": 0.025969501584768295, "beta_dpo/beta_margin_grad_mean": -0.4097326695919037, "beta_dpo/beta_margin_grad_std": 0.15282127261161804, "beta_dpo/beta_margin_mean": 0.4748833477497101, "beta_dpo/beta_margin_std": 0.8532183170318604, "beta_dpo/beta_used": 0.025969501584768295, "beta_dpo/beta_used_raw": -0.14239290356636047, "beta_dpo/gap_mean": 16.275737762451172, "beta_dpo/gap_std": 22.001022338867188, "beta_dpo/loss_margin_mean": 15.27760124206543, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.272108843537415, "grad_norm": 30.002450942993164, "learning_rate": 4.5740715227200897e-07, "logits/chosen": -3.3086326122283936, "logits/rejected": -3.3033132553100586, "loss": 1.1487, "step": 180 }, { "beta_dpo/beta": 0.14988866448402405, "beta_dpo/beta_margin_grad_mean": -0.29601407051086426, "beta_dpo/beta_margin_grad_std": 0.2319822609424591, "beta_dpo/beta_margin_mean": 3.5474066734313965, "beta_dpo/beta_margin_std": 5.365967750549316, "beta_dpo/beta_used": 0.14988866448402405, "beta_dpo/beta_used_raw": 0.10269590467214584, "beta_dpo/gap_mean": 16.893951416015625, "beta_dpo/gap_std": 21.91824722290039, "beta_dpo/loss_margin_mean": 20.77993392944336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.273620559334845, "grad_norm": 72.06979370117188, "learning_rate": 4.566660392614228e-07, "logits/chosen": -3.303773880004883, "logits/rejected": -3.3182597160339355, "loss": 0.8003, "step": 181 }, { "beta_dpo/beta": 0.16635611653327942, "beta_dpo/beta_margin_grad_mean": -0.3139011263847351, "beta_dpo/beta_margin_grad_std": 0.28551292419433594, "beta_dpo/beta_margin_mean": 3.821103811264038, "beta_dpo/beta_margin_std": 7.0303215980529785, "beta_dpo/beta_used": 0.16635611653327942, "beta_dpo/beta_used_raw": 0.16635611653327942, "beta_dpo/gap_mean": 17.882640838623047, "beta_dpo/gap_std": 22.00394630432129, "beta_dpo/loss_margin_mean": 21.96054458618164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2751322751322751, "grad_norm": 389.318603515625, "learning_rate": 4.5591914535745817e-07, "logits/chosen": -3.2827534675598145, "logits/rejected": -3.3011820316314697, "loss": 1.3734, "step": 182 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4973997473716736, "beta_dpo/beta_margin_grad_std": 0.005496290512382984, "beta_dpo/beta_margin_mean": 0.010402663610875607, "beta_dpo/beta_margin_std": 0.021989716216921806, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2942630350589752, "beta_dpo/gap_mean": 17.208568572998047, "beta_dpo/gap_std": 22.319711685180664, "beta_dpo/loss_margin_mean": 10.402663230895996, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2766439909297052, "grad_norm": 1.7042104005813599, "learning_rate": 4.551664914523433e-07, "logits/chosen": -3.2784993648529053, "logits/rejected": -3.2837271690368652, "loss": 1.3758, "step": 183 }, { "beta_dpo/beta": 0.06759776175022125, "beta_dpo/beta_margin_grad_mean": -0.3611212968826294, "beta_dpo/beta_margin_grad_std": 0.2309117615222931, "beta_dpo/beta_margin_mean": 1.376258134841919, "beta_dpo/beta_margin_std": 2.513367176055908, "beta_dpo/beta_used": 0.06759776175022125, "beta_dpo/beta_used_raw": 0.04414837062358856, "beta_dpo/gap_mean": 17.1044921875, "beta_dpo/gap_std": 22.272825241088867, "beta_dpo/loss_margin_mean": 19.561098098754883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2781557067271353, "grad_norm": 130.62432861328125, "learning_rate": 4.544080985994258e-07, "logits/chosen": -3.2587945461273193, "logits/rejected": -3.2747392654418945, "loss": 1.1321, "step": 184 }, { "beta_dpo/beta": 0.29575276374816895, "beta_dpo/beta_margin_grad_mean": -0.273440957069397, "beta_dpo/beta_margin_grad_std": 0.36424484848976135, "beta_dpo/beta_margin_mean": 5.976340293884277, "beta_dpo/beta_margin_std": 9.795293807983398, "beta_dpo/beta_used": 0.29575276374816895, "beta_dpo/beta_used_raw": 0.29575276374816895, "beta_dpo/gap_mean": 17.594070434570312, "beta_dpo/gap_std": 22.862598419189453, "beta_dpo/loss_margin_mean": 19.736000061035156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2796674225245654, "grad_norm": 400.15191650390625, "learning_rate": 4.5364398801258394e-07, "logits/chosen": -3.2343192100524902, "logits/rejected": -3.2469515800476074, "loss": 1.1488, "step": 185 }, { "beta_dpo/beta": 0.13505233824253082, "beta_dpo/beta_margin_grad_mean": -0.3404614329338074, "beta_dpo/beta_margin_grad_std": 0.2723199725151062, "beta_dpo/beta_margin_mean": 3.0053255558013916, "beta_dpo/beta_margin_std": 5.504380226135254, "beta_dpo/beta_used": 0.13505233824253082, "beta_dpo/beta_used_raw": 0.07200624793767929, "beta_dpo/gap_mean": 18.211688995361328, "beta_dpo/gap_std": 23.061481475830078, "beta_dpo/loss_margin_mean": 21.62055015563965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2811791383219955, "grad_norm": 185.14002990722656, "learning_rate": 4.5287418106563354e-07, "logits/chosen": -3.274827003479004, "logits/rejected": -3.2913155555725098, "loss": 0.9694, "step": 186 }, { "beta_dpo/beta": 0.13739535212516785, "beta_dpo/beta_margin_grad_mean": -0.3313996195793152, "beta_dpo/beta_margin_grad_std": 0.2579861283302307, "beta_dpo/beta_margin_mean": 2.851552963256836, "beta_dpo/beta_margin_std": 5.720654010772705, "beta_dpo/beta_used": 0.13739535212516785, "beta_dpo/beta_used_raw": 0.13739535212516785, "beta_dpo/gap_mean": 18.755878448486328, "beta_dpo/gap_std": 23.751571655273438, "beta_dpo/loss_margin_mean": 20.688385009765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.28269085411942557, "grad_norm": 96.4170150756836, "learning_rate": 4.520986992917297e-07, "logits/chosen": -3.285475730895996, "logits/rejected": -3.286539316177368, "loss": 0.8125, "step": 187 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.494864821434021, "beta_dpo/beta_margin_grad_std": 0.006589571945369244, "beta_dpo/beta_margin_mean": 0.02054525725543499, "beta_dpo/beta_margin_std": 0.02636602707207203, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11430029571056366, "beta_dpo/gap_mean": 19.172245025634766, "beta_dpo/gap_std": 24.501110076904297, "beta_dpo/loss_margin_mean": 20.545255661010742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2842025699168556, "grad_norm": 2.0644304752349854, "learning_rate": 4.5131756438276466e-07, "logits/chosen": -3.2795681953430176, "logits/rejected": -3.2722291946411133, "loss": 1.3709, "step": 188 }, { "beta_dpo/beta": 0.14308232069015503, "beta_dpo/beta_margin_grad_mean": -0.25916537642478943, "beta_dpo/beta_margin_grad_std": 0.3050540089607239, "beta_dpo/beta_margin_mean": 2.8555290699005127, "beta_dpo/beta_margin_std": 3.7916877269744873, "beta_dpo/beta_used": 0.14308232069015503, "beta_dpo/beta_used_raw": 0.14308232069015503, "beta_dpo/gap_mean": 19.251052856445312, "beta_dpo/gap_std": 24.474946975708008, "beta_dpo/loss_margin_mean": 18.743436813354492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2857142857142857, "grad_norm": 160.66934204101562, "learning_rate": 4.5053079818876096e-07, "logits/chosen": -3.262721061706543, "logits/rejected": -3.2614657878875732, "loss": 0.6836, "step": 189 }, { "beta_dpo/beta": 0.03282923623919487, "beta_dpo/beta_margin_grad_mean": -0.38839754462242126, "beta_dpo/beta_margin_grad_std": 0.19055677950382233, "beta_dpo/beta_margin_mean": 0.6823714971542358, "beta_dpo/beta_margin_std": 1.2347010374069214, "beta_dpo/beta_used": 0.03282923623919487, "beta_dpo/beta_used_raw": 0.02927407994866371, "beta_dpo/gap_mean": 19.367252349853516, "beta_dpo/gap_std": 24.682527542114258, "beta_dpo/loss_margin_mean": 21.144336700439453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2872260015117158, "grad_norm": 47.77622604370117, "learning_rate": 4.4973842271726024e-07, "logits/chosen": -3.2488441467285156, "logits/rejected": -3.2798590660095215, "loss": 1.0755, "step": 190 }, { "beta_dpo/beta": 0.11748480051755905, "beta_dpo/beta_margin_grad_mean": -0.29674074053764343, "beta_dpo/beta_margin_grad_std": 0.2827965021133423, "beta_dpo/beta_margin_mean": 2.221395254135132, "beta_dpo/beta_margin_std": 3.81189227104187, "beta_dpo/beta_used": 0.11748480051755905, "beta_dpo/beta_used_raw": 0.11748480051755905, "beta_dpo/gap_mean": 19.401161193847656, "beta_dpo/gap_std": 25.253219604492188, "beta_dpo/loss_margin_mean": 19.080123901367188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2887377173091459, "grad_norm": 155.86167907714844, "learning_rate": 4.48940460132708e-07, "logits/chosen": -3.2814383506774902, "logits/rejected": -3.2890639305114746, "loss": 0.7357, "step": 191 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4971839189529419, "beta_dpo/beta_margin_grad_std": 0.006073605734854937, "beta_dpo/beta_margin_mean": 0.011266072280704975, "beta_dpo/beta_margin_std": 0.024298807606101036, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2048242688179016, "beta_dpo/gap_mean": 18.151763916015625, "beta_dpo/gap_std": 25.211868286132812, "beta_dpo/loss_margin_mean": 11.266072273254395, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29024943310657597, "grad_norm": 1.9646292924880981, "learning_rate": 4.481369327558329e-07, "logits/chosen": -3.246117115020752, "logits/rejected": -3.2485339641571045, "loss": 1.3733, "step": 192 }, { "beta_dpo/beta": 0.11741842329502106, "beta_dpo/beta_margin_grad_mean": -0.35161691904067993, "beta_dpo/beta_margin_grad_std": 0.277233749628067, "beta_dpo/beta_margin_mean": 2.721064805984497, "beta_dpo/beta_margin_std": 5.209236145019531, "beta_dpo/beta_used": 0.11741842329502106, "beta_dpo/beta_used_raw": 0.10825469344854355, "beta_dpo/gap_mean": 18.415042877197266, "beta_dpo/gap_std": 25.374204635620117, "beta_dpo/loss_margin_mean": 21.18407440185547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29176114890400606, "grad_norm": 230.92539978027344, "learning_rate": 4.47327863063023e-07, "logits/chosen": -3.228006362915039, "logits/rejected": -3.219388723373413, "loss": 1.2094, "step": 193 }, { "beta_dpo/beta": 0.23822082579135895, "beta_dpo/beta_margin_grad_mean": -0.39058759808540344, "beta_dpo/beta_margin_grad_std": 0.3258950114250183, "beta_dpo/beta_margin_mean": 6.6397705078125, "beta_dpo/beta_margin_std": 12.369970321655273, "beta_dpo/beta_used": 0.23822082579135895, "beta_dpo/beta_used_raw": 0.13888207077980042, "beta_dpo/gap_mean": 19.15418243408203, "beta_dpo/gap_std": 25.954063415527344, "beta_dpo/loss_margin_mean": 21.19514274597168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29327286470143615, "grad_norm": 779.953369140625, "learning_rate": 4.4651327368569684e-07, "logits/chosen": -3.2417874336242676, "logits/rejected": -3.2450783252716064, "loss": 1.9997, "step": 194 }, { "beta_dpo/beta": 0.2709110677242279, "beta_dpo/beta_margin_grad_mean": -0.17781004309654236, "beta_dpo/beta_margin_grad_std": 0.30291855335235596, "beta_dpo/beta_margin_mean": 6.938320636749268, "beta_dpo/beta_margin_std": 8.197758674621582, "beta_dpo/beta_used": 0.2709110677242279, "beta_dpo/beta_used_raw": 0.2709110677242279, "beta_dpo/gap_mean": 19.833911895751953, "beta_dpo/gap_std": 25.908788681030273, "beta_dpo/loss_margin_mean": 25.43059539794922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2947845804988662, "grad_norm": 454.9778137207031, "learning_rate": 4.4569318740967043e-07, "logits/chosen": -3.2771263122558594, "logits/rejected": -3.2649998664855957, "loss": 1.4759, "step": 195 }, { "beta_dpo/beta": 0.006062302738428116, "beta_dpo/beta_margin_grad_mean": -0.47434884309768677, "beta_dpo/beta_margin_grad_std": 0.05719894543290138, "beta_dpo/beta_margin_mean": 0.10431405156850815, "beta_dpo/beta_margin_std": 0.23538783192634583, "beta_dpo/beta_used": 0.006062302738428116, "beta_dpo/beta_used_raw": -0.002708437852561474, "beta_dpo/gap_mean": 20.151742935180664, "beta_dpo/gap_std": 26.28466796875, "beta_dpo/loss_margin_mean": 21.88735580444336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2962962962962963, "grad_norm": 15.817523956298828, "learning_rate": 4.448676271745197e-07, "logits/chosen": -3.2736239433288574, "logits/rejected": -3.2925784587860107, "loss": 1.2881, "step": 196 }, { "beta_dpo/beta": 0.29121875762939453, "beta_dpo/beta_margin_grad_mean": -0.20116354525089264, "beta_dpo/beta_margin_grad_std": 0.3477545380592346, "beta_dpo/beta_margin_mean": 6.0863165855407715, "beta_dpo/beta_margin_std": 8.08215045928955, "beta_dpo/beta_used": 0.29121875762939453, "beta_dpo/beta_used_raw": 0.29121875762939453, "beta_dpo/gap_mean": 20.43471908569336, "beta_dpo/gap_std": 26.642892837524414, "beta_dpo/loss_margin_mean": 20.978351593017578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29780801209372637, "grad_norm": 315.002685546875, "learning_rate": 4.440366160729392e-07, "logits/chosen": -3.2312936782836914, "logits/rejected": -3.2578063011169434, "loss": 0.7178, "step": 197 }, { "beta_dpo/beta": 0.26085951924324036, "beta_dpo/beta_margin_grad_mean": -0.35325923562049866, "beta_dpo/beta_margin_grad_std": 0.3079518675804138, "beta_dpo/beta_margin_mean": 6.148621559143066, "beta_dpo/beta_margin_std": 10.981660842895508, "beta_dpo/beta_used": 0.26085951924324036, "beta_dpo/beta_used_raw": 0.17097902297973633, "beta_dpo/gap_mean": 20.784318923950195, "beta_dpo/gap_std": 26.447837829589844, "beta_dpo/loss_margin_mean": 22.010072708129883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29931972789115646, "grad_norm": 468.2619323730469, "learning_rate": 4.432001773500957e-07, "logits/chosen": -3.237609386444092, "logits/rejected": -3.2565200328826904, "loss": 1.0384, "step": 198 }, { "beta_dpo/beta": 0.1917811632156372, "beta_dpo/beta_margin_grad_mean": -0.29717642068862915, "beta_dpo/beta_margin_grad_std": 0.27208179235458374, "beta_dpo/beta_margin_mean": 4.629688739776611, "beta_dpo/beta_margin_std": 8.40585708618164, "beta_dpo/beta_used": 0.1917811632156372, "beta_dpo/beta_used_raw": 0.1917811632156372, "beta_dpo/gap_mean": 20.602859497070312, "beta_dpo/gap_std": 26.541152954101562, "beta_dpo/loss_margin_mean": 20.05705451965332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30083144368858655, "grad_norm": 306.9058532714844, "learning_rate": 4.4235833440297856e-07, "logits/chosen": -3.2496700286865234, "logits/rejected": -3.2558960914611816, "loss": 1.1221, "step": 199 }, { "beta_dpo/beta": 0.13134683668613434, "beta_dpo/beta_margin_grad_mean": -0.36728352308273315, "beta_dpo/beta_margin_grad_std": 0.28023678064346313, "beta_dpo/beta_margin_mean": 3.4423487186431885, "beta_dpo/beta_margin_std": 6.672379493713379, "beta_dpo/beta_used": 0.13134683668613434, "beta_dpo/beta_used_raw": 0.1196913868188858, "beta_dpo/gap_mean": 21.225582122802734, "beta_dpo/gap_std": 27.08563995361328, "beta_dpo/loss_margin_mean": 22.524049758911133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30234315948601664, "grad_norm": 221.3130340576172, "learning_rate": 4.415111107797445e-07, "logits/chosen": -3.266768455505371, "logits/rejected": -3.2701830863952637, "loss": 1.0952, "step": 200 }, { "epoch": 0.30234315948601664, "eval_beta_dpo/beta": 0.016000408679246902, "eval_beta_dpo/beta_margin_grad_mean": -0.46594667434692383, "eval_beta_dpo/beta_margin_grad_std": 0.04558912664651871, "eval_beta_dpo/beta_margin_mean": 0.34341442584991455, "eval_beta_dpo/beta_margin_std": 0.4801006615161896, "eval_beta_dpo/beta_used": 0.016000408679246902, "eval_beta_dpo/beta_used_raw": -0.2764070928096771, "eval_beta_dpo/gap_mean": 21.098539352416992, "eval_beta_dpo/gap_std": 27.064327239990234, "eval_beta_dpo/loss_margin_mean": 14.82508659362793, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -3.2892954349517822, "eval_logits/rejected": -3.294360637664795, "eval_loss": 0.6745692491531372, "eval_runtime": 36.2978, "eval_samples_per_second": 63.447, "eval_steps_per_second": 1.984, "step": 200 }, { "beta_dpo/beta": 0.16286759078502655, "beta_dpo/beta_margin_grad_mean": -0.3552103340625763, "beta_dpo/beta_margin_grad_std": 0.28962671756744385, "beta_dpo/beta_margin_mean": 4.282315254211426, "beta_dpo/beta_margin_std": 8.11639404296875, "beta_dpo/beta_used": 0.16286759078502655, "beta_dpo/beta_used_raw": 0.07481355965137482, "beta_dpo/gap_mean": 21.18798065185547, "beta_dpo/gap_std": 27.247983932495117, "beta_dpo/loss_margin_mean": 19.636842727661133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30385487528344673, "grad_norm": 338.0337829589844, "learning_rate": 4.4065853017905953e-07, "logits/chosen": -3.2681736946105957, "logits/rejected": -3.267396926879883, "loss": 1.281, "step": 201 }, { "beta_dpo/beta": 0.041739847511053085, "beta_dpo/beta_margin_grad_mean": -0.37753400206565857, "beta_dpo/beta_margin_grad_std": 0.21136833727359772, "beta_dpo/beta_margin_mean": 0.6857576966285706, "beta_dpo/beta_margin_std": 1.3818042278289795, "beta_dpo/beta_used": 0.041739847511053085, "beta_dpo/beta_used_raw": 0.041739847511053085, "beta_dpo/gap_mean": 19.68051528930664, "beta_dpo/gap_std": 27.123966217041016, "beta_dpo/loss_margin_mean": 14.209870338439941, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30536659108087677, "grad_norm": 62.69889450073242, "learning_rate": 4.3980061644943575e-07, "logits/chosen": -3.2481625080108643, "logits/rejected": -3.2584476470947266, "loss": 0.9603, "step": 202 }, { "beta_dpo/beta": 0.07021359354257584, "beta_dpo/beta_margin_grad_mean": -0.3578731417655945, "beta_dpo/beta_margin_grad_std": 0.2622828483581543, "beta_dpo/beta_margin_mean": 1.4660753011703491, "beta_dpo/beta_margin_std": 3.2228591442108154, "beta_dpo/beta_used": 0.07021359354257584, "beta_dpo/beta_used_raw": -0.050061143934726715, "beta_dpo/gap_mean": 19.621105194091797, "beta_dpo/gap_std": 27.237117767333984, "beta_dpo/loss_margin_mean": 19.756534576416016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30687830687830686, "grad_norm": 130.79299926757812, "learning_rate": 4.3893739358856455e-07, "logits/chosen": -3.278709650039673, "logits/rejected": -3.3059582710266113, "loss": 1.0217, "step": 203 }, { "beta_dpo/beta": 0.29017174243927, "beta_dpo/beta_margin_grad_mean": -0.3226993680000305, "beta_dpo/beta_margin_grad_std": 0.29813989996910095, "beta_dpo/beta_margin_mean": 7.778024196624756, "beta_dpo/beta_margin_std": 12.823948860168457, "beta_dpo/beta_used": 0.29017174243927, "beta_dpo/beta_used_raw": 0.13998277485370636, "beta_dpo/gap_mean": 19.98705291748047, "beta_dpo/gap_std": 26.805776596069336, "beta_dpo/loss_margin_mean": 20.49005699157715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30839002267573695, "grad_norm": 338.6097412109375, "learning_rate": 4.380688857426449e-07, "logits/chosen": -3.238096237182617, "logits/rejected": -3.2416350841522217, "loss": 1.2486, "step": 204 }, { "beta_dpo/beta": 0.2872874438762665, "beta_dpo/beta_margin_grad_mean": -0.19735579192638397, "beta_dpo/beta_margin_grad_std": 0.34152576327323914, "beta_dpo/beta_margin_mean": 6.9864420890808105, "beta_dpo/beta_margin_std": 8.581732749938965, "beta_dpo/beta_used": 0.2872874438762665, "beta_dpo/beta_used_raw": 0.2872874438762665, "beta_dpo/gap_mean": 20.530967712402344, "beta_dpo/gap_std": 26.722898483276367, "beta_dpo/loss_margin_mean": 24.720294952392578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30990173847316704, "grad_norm": 366.4891357421875, "learning_rate": 4.3719511720570814e-07, "logits/chosen": -3.26456880569458, "logits/rejected": -3.279543876647949, "loss": 0.9758, "step": 205 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49615031480789185, "beta_dpo/beta_margin_grad_std": 0.0071373567916452885, "beta_dpo/beta_margin_mean": 0.015402463264763355, "beta_dpo/beta_margin_std": 0.02855776436626911, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22389058768749237, "beta_dpo/gap_mean": 19.731658935546875, "beta_dpo/gap_std": 27.339256286621094, "beta_dpo/loss_margin_mean": 15.402462005615234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31141345427059713, "grad_norm": 1.963984489440918, "learning_rate": 4.363161124189387e-07, "logits/chosen": -3.260132312774658, "logits/rejected": -3.271867275238037, "loss": 1.3721, "step": 206 }, { "beta_dpo/beta": 0.22665925323963165, "beta_dpo/beta_margin_grad_mean": -0.2844540476799011, "beta_dpo/beta_margin_grad_std": 0.2940859794616699, "beta_dpo/beta_margin_mean": 4.840798854827881, "beta_dpo/beta_margin_std": 8.58215618133545, "beta_dpo/beta_used": 0.22665925323963165, "beta_dpo/beta_used_raw": 0.22665925323963165, "beta_dpo/gap_mean": 19.807737350463867, "beta_dpo/gap_std": 27.32258415222168, "beta_dpo/loss_margin_mean": 20.418903350830078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3129251700680272, "grad_norm": 443.6466064453125, "learning_rate": 4.3543189596998986e-07, "logits/chosen": -3.270188808441162, "logits/rejected": -3.292062759399414, "loss": 1.324, "step": 207 }, { "beta_dpo/beta": 0.19777607917785645, "beta_dpo/beta_margin_grad_mean": -0.3751469850540161, "beta_dpo/beta_margin_grad_std": 0.304066926240921, "beta_dpo/beta_margin_mean": 4.046874046325684, "beta_dpo/beta_margin_std": 8.362845420837402, "beta_dpo/beta_used": 0.19777607917785645, "beta_dpo/beta_used_raw": 0.18612337112426758, "beta_dpo/gap_mean": 19.386688232421875, "beta_dpo/gap_std": 27.094337463378906, "beta_dpo/loss_margin_mean": 17.85321807861328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3144368858654573, "grad_norm": 328.3226623535156, "learning_rate": 4.3454249259229664e-07, "logits/chosen": -3.2676501274108887, "logits/rejected": -3.263576030731201, "loss": 1.2187, "step": 208 }, { "beta_dpo/beta": 0.25237298011779785, "beta_dpo/beta_margin_grad_mean": -0.22381648421287537, "beta_dpo/beta_margin_grad_std": 0.31607723236083984, "beta_dpo/beta_margin_mean": 5.337110996246338, "beta_dpo/beta_margin_std": 8.909046173095703, "beta_dpo/beta_used": 0.25237298011779785, "beta_dpo/beta_used_raw": 0.25237298011779785, "beta_dpo/gap_mean": 19.410289764404297, "beta_dpo/gap_std": 27.153701782226562, "beta_dpo/loss_margin_mean": 19.8518009185791, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31594860166288735, "grad_norm": 313.1184387207031, "learning_rate": 4.336479271643833e-07, "logits/chosen": -3.266294002532959, "logits/rejected": -3.2863268852233887, "loss": 0.6238, "step": 209 }, { "beta_dpo/beta": 0.17239555716514587, "beta_dpo/beta_margin_grad_mean": -0.3229129910469055, "beta_dpo/beta_margin_grad_std": 0.2825334370136261, "beta_dpo/beta_margin_mean": 4.328238010406494, "beta_dpo/beta_margin_std": 7.7128071784973145, "beta_dpo/beta_used": 0.17239555716514587, "beta_dpo/beta_used_raw": 0.17239555716514587, "beta_dpo/gap_mean": 19.874401092529297, "beta_dpo/gap_std": 27.465452194213867, "beta_dpo/loss_margin_mean": 22.63663673400879, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31746031746031744, "grad_norm": 297.1786804199219, "learning_rate": 4.327482247091679e-07, "logits/chosen": -3.2740461826324463, "logits/rejected": -3.299710512161255, "loss": 1.2982, "step": 210 }, { "beta_dpo/beta": 0.0435391403734684, "beta_dpo/beta_margin_grad_mean": -0.338135302066803, "beta_dpo/beta_margin_grad_std": 0.18870781362056732, "beta_dpo/beta_margin_mean": 0.8430750370025635, "beta_dpo/beta_margin_std": 1.062753677368164, "beta_dpo/beta_used": 0.0435391403734684, "beta_dpo/beta_used_raw": 0.0435391403734684, "beta_dpo/gap_mean": 19.97136878967285, "beta_dpo/gap_std": 27.011962890625, "beta_dpo/loss_margin_mean": 19.604583740234375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31897203325774753, "grad_norm": 67.0111312866211, "learning_rate": 4.3184341039326217e-07, "logits/chosen": -3.2469844818115234, "logits/rejected": -3.279109001159668, "loss": 0.8721, "step": 211 }, { "beta_dpo/beta": 0.20144376158714294, "beta_dpo/beta_margin_grad_mean": -0.23173686861991882, "beta_dpo/beta_margin_grad_std": 0.32392561435699463, "beta_dpo/beta_margin_mean": 4.304144382476807, "beta_dpo/beta_margin_std": 5.742385387420654, "beta_dpo/beta_used": 0.20144376158714294, "beta_dpo/beta_used_raw": 0.20144376158714294, "beta_dpo/gap_mean": 20.256540298461914, "beta_dpo/gap_std": 27.046674728393555, "beta_dpo/loss_margin_mean": 20.608171463012695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3204837490551776, "grad_norm": 216.76519775390625, "learning_rate": 4.309335095262675e-07, "logits/chosen": -3.25557804107666, "logits/rejected": -3.262904644012451, "loss": 0.7518, "step": 212 }, { "beta_dpo/beta": 0.04288367182016373, "beta_dpo/beta_margin_grad_mean": -0.39783409237861633, "beta_dpo/beta_margin_grad_std": 0.21729709208011627, "beta_dpo/beta_margin_mean": 0.784849226474762, "beta_dpo/beta_margin_std": 1.8496593236923218, "beta_dpo/beta_used": 0.04288367182016373, "beta_dpo/beta_used_raw": -0.05440632253885269, "beta_dpo/gap_mean": 19.813491821289062, "beta_dpo/gap_std": 27.621681213378906, "beta_dpo/loss_margin_mean": 17.58714485168457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3219954648526077, "grad_norm": 79.05302429199219, "learning_rate": 4.3001854756006724e-07, "logits/chosen": -3.295498847961426, "logits/rejected": -3.2903060913085938, "loss": 1.025, "step": 213 }, { "beta_dpo/beta": 0.14685657620429993, "beta_dpo/beta_margin_grad_mean": -0.2411939799785614, "beta_dpo/beta_margin_grad_std": 0.33106184005737305, "beta_dpo/beta_margin_mean": 3.089200973510742, "beta_dpo/beta_margin_std": 4.400245189666748, "beta_dpo/beta_used": 0.14685657620429993, "beta_dpo/beta_used_raw": 0.14685657620429993, "beta_dpo/gap_mean": 19.749910354614258, "beta_dpo/gap_std": 28.07058334350586, "beta_dpo/loss_margin_mean": 20.78150177001953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3235071806500378, "grad_norm": 147.87237548828125, "learning_rate": 4.290985500881143e-07, "logits/chosen": -3.282254457473755, "logits/rejected": -3.2713708877563477, "loss": 0.7766, "step": 214 }, { "beta_dpo/beta": 0.3372025489807129, "beta_dpo/beta_margin_grad_mean": -0.20430360734462738, "beta_dpo/beta_margin_grad_std": 0.3473961651325226, "beta_dpo/beta_margin_mean": 8.567434310913086, "beta_dpo/beta_margin_std": 10.161273956298828, "beta_dpo/beta_used": 0.3372025489807129, "beta_dpo/beta_used_raw": 0.3372025489807129, "beta_dpo/gap_mean": 20.45541763305664, "beta_dpo/gap_std": 28.54821014404297, "beta_dpo/loss_margin_mean": 24.713115692138672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3250188964474679, "grad_norm": 478.9543151855469, "learning_rate": 4.281735428447157e-07, "logits/chosen": -3.2881035804748535, "logits/rejected": -3.295775890350342, "loss": 0.9709, "step": 215 }, { "beta_dpo/beta": 0.029055660590529442, "beta_dpo/beta_margin_grad_mean": -0.40032076835632324, "beta_dpo/beta_margin_grad_std": 0.19772301614284515, "beta_dpo/beta_margin_mean": 0.6975875496864319, "beta_dpo/beta_margin_std": 1.4651895761489868, "beta_dpo/beta_used": 0.029055660590529442, "beta_dpo/beta_used_raw": 0.009354954585433006, "beta_dpo/gap_mean": 20.790666580200195, "beta_dpo/gap_std": 28.88433265686035, "beta_dpo/loss_margin_mean": 18.89692497253418, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.32653061224489793, "grad_norm": 56.74131393432617, "learning_rate": 4.2724355170431247e-07, "logits/chosen": -3.3080687522888184, "logits/rejected": -3.325687885284424, "loss": 1.0933, "step": 216 }, { "beta_dpo/beta": 0.11183890700340271, "beta_dpo/beta_margin_grad_mean": -0.3460896909236908, "beta_dpo/beta_margin_grad_std": 0.2733393907546997, "beta_dpo/beta_margin_mean": 2.4672842025756836, "beta_dpo/beta_margin_std": 5.133706092834473, "beta_dpo/beta_used": 0.11183890700340271, "beta_dpo/beta_used_raw": 0.11183890700340271, "beta_dpo/gap_mean": 20.666297912597656, "beta_dpo/gap_std": 28.4071044921875, "beta_dpo/loss_margin_mean": 22.13884162902832, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.328042328042328, "grad_norm": 205.36830139160156, "learning_rate": 4.26308602680756e-07, "logits/chosen": -3.277308940887451, "logits/rejected": -3.2828803062438965, "loss": 1.0462, "step": 217 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49692150950431824, "beta_dpo/beta_margin_grad_std": 0.007039310876280069, "beta_dpo/beta_margin_mean": 0.012317297048866749, "beta_dpo/beta_margin_std": 0.028165044263005257, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.15160530805587769, "beta_dpo/gap_mean": 19.414508819580078, "beta_dpo/gap_std": 28.22842025756836, "beta_dpo/loss_margin_mean": 12.317296981811523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3295540438397581, "grad_norm": 2.0457305908203125, "learning_rate": 4.253687219265803e-07, "logits/chosen": -3.293532133102417, "logits/rejected": -3.290022611618042, "loss": 1.3712, "step": 218 }, { "beta_dpo/beta": 0.011915156617760658, "beta_dpo/beta_margin_grad_mean": -0.45243147015571594, "beta_dpo/beta_margin_grad_std": 0.0773100033402443, "beta_dpo/beta_margin_mean": 0.19927677512168884, "beta_dpo/beta_margin_std": 0.328296422958374, "beta_dpo/beta_used": 0.011915156617760658, "beta_dpo/beta_used_raw": -0.004701080732047558, "beta_dpo/gap_mean": 18.749467849731445, "beta_dpo/gap_std": 27.01136016845703, "beta_dpo/loss_margin_mean": 16.104183197021484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3310657596371882, "grad_norm": 25.289398193359375, "learning_rate": 4.2442393573227043e-07, "logits/chosen": -3.290285110473633, "logits/rejected": -3.2973287105560303, "loss": 1.211, "step": 219 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49513691663742065, "beta_dpo/beta_margin_grad_std": 0.0066979266703128815, "beta_dpo/beta_margin_mean": 0.019456947222352028, "beta_dpo/beta_margin_std": 0.026801228523254395, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.16698744893074036, "beta_dpo/gap_mean": 18.785911560058594, "beta_dpo/gap_std": 26.530315399169922, "beta_dpo/loss_margin_mean": 19.456945419311523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3325774754346183, "grad_norm": 1.7459897994995117, "learning_rate": 4.234742705255272e-07, "logits/chosen": -3.255657196044922, "logits/rejected": -3.2655575275421143, "loss": 1.3721, "step": 220 }, { "beta_dpo/beta": 0.17717352509498596, "beta_dpo/beta_margin_grad_mean": -0.28028541803359985, "beta_dpo/beta_margin_grad_std": 0.3005788326263428, "beta_dpo/beta_margin_mean": 3.4370474815368652, "beta_dpo/beta_margin_std": 6.133222579956055, "beta_dpo/beta_used": 0.17717352509498596, "beta_dpo/beta_used_raw": 0.17717352509498596, "beta_dpo/gap_mean": 18.612773895263672, "beta_dpo/gap_std": 26.989688873291016, "beta_dpo/loss_margin_mean": 18.417015075683594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3340891912320484, "grad_norm": 160.68197631835938, "learning_rate": 4.22519752870528e-07, "logits/chosen": -3.317051410675049, "logits/rejected": -3.3476691246032715, "loss": 0.6941, "step": 221 }, { "beta_dpo/beta": 0.057558316737413406, "beta_dpo/beta_margin_grad_mean": -0.34050142765045166, "beta_dpo/beta_margin_grad_std": 0.21491679549217224, "beta_dpo/beta_margin_mean": 1.4228301048278809, "beta_dpo/beta_margin_std": 2.227675676345825, "beta_dpo/beta_used": 0.057558316737413406, "beta_dpo/beta_used_raw": -0.017362136393785477, "beta_dpo/gap_mean": 19.714611053466797, "beta_dpo/gap_std": 27.128393173217773, "beta_dpo/loss_margin_mean": 26.319135665893555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3356009070294785, "grad_norm": 81.41806030273438, "learning_rate": 4.2156040946718343e-07, "logits/chosen": -3.302428722381592, "logits/rejected": -3.330812454223633, "loss": 0.9621, "step": 222 }, { "beta_dpo/beta": 0.11043448746204376, "beta_dpo/beta_margin_grad_mean": -0.3506692051887512, "beta_dpo/beta_margin_grad_std": 0.27599218487739563, "beta_dpo/beta_margin_mean": 2.4996182918548584, "beta_dpo/beta_margin_std": 4.717770099639893, "beta_dpo/beta_used": 0.11043448746204376, "beta_dpo/beta_used_raw": 0.0481397770345211, "beta_dpo/gap_mean": 20.32034683227539, "beta_dpo/gap_std": 27.67890167236328, "beta_dpo/loss_margin_mean": 20.74583625793457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3371126228269085, "grad_norm": 229.6683349609375, "learning_rate": 4.2059626715039065e-07, "logits/chosen": -3.2972192764282227, "logits/rejected": -3.313932418823242, "loss": 1.323, "step": 223 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4962630271911621, "beta_dpo/beta_margin_grad_std": 0.006226621102541685, "beta_dpo/beta_margin_mean": 0.014951368793845177, "beta_dpo/beta_margin_std": 0.02491535060107708, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22395360469818115, "beta_dpo/gap_mean": 19.58163070678711, "beta_dpo/gap_std": 27.313385009765625, "beta_dpo/loss_margin_mean": 14.95136833190918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3386243386243386, "grad_norm": 1.8963252305984497, "learning_rate": 4.1962735288928304e-07, "logits/chosen": -3.3100385665893555, "logits/rejected": -3.324625253677368, "loss": 1.3722, "step": 224 }, { "beta_dpo/beta": 0.11167913675308228, "beta_dpo/beta_margin_grad_mean": -0.3448824882507324, "beta_dpo/beta_margin_grad_std": 0.28536394238471985, "beta_dpo/beta_margin_mean": 2.633960247039795, "beta_dpo/beta_margin_std": 4.381831169128418, "beta_dpo/beta_used": 0.11167913675308228, "beta_dpo/beta_used_raw": -0.005499660968780518, "beta_dpo/gap_mean": 19.675987243652344, "beta_dpo/gap_std": 26.83489227294922, "beta_dpo/loss_margin_mean": 21.864295959472656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3401360544217687, "grad_norm": 215.1800079345703, "learning_rate": 4.186536937864752e-07, "logits/chosen": -3.2780818939208984, "logits/rejected": -3.2924938201904297, "loss": 1.1357, "step": 225 }, { "beta_dpo/beta": 0.21144188940525055, "beta_dpo/beta_margin_grad_mean": -0.2692214846611023, "beta_dpo/beta_margin_grad_std": 0.3168647885322571, "beta_dpo/beta_margin_mean": 5.076745986938477, "beta_dpo/beta_margin_std": 9.32989501953125, "beta_dpo/beta_used": 0.21144188940525055, "beta_dpo/beta_used_raw": 0.21144188940525055, "beta_dpo/gap_mean": 20.018922805786133, "beta_dpo/gap_std": 26.834392547607422, "beta_dpo/loss_margin_mean": 22.54623031616211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3416477702191988, "grad_norm": 314.6019287109375, "learning_rate": 4.176753170773052e-07, "logits/chosen": -3.272228240966797, "logits/rejected": -3.277193069458008, "loss": 1.3053, "step": 226 }, { "beta_dpo/beta": 0.2749040126800537, "beta_dpo/beta_margin_grad_mean": -0.23924539983272552, "beta_dpo/beta_margin_grad_std": 0.35665369033813477, "beta_dpo/beta_margin_mean": 5.016239166259766, "beta_dpo/beta_margin_std": 7.605776309967041, "beta_dpo/beta_used": 0.2749040126800537, "beta_dpo/beta_used_raw": 0.2749040126800537, "beta_dpo/gap_mean": 20.03274154663086, "beta_dpo/gap_std": 27.213176727294922, "beta_dpo/loss_margin_mean": 17.853012084960938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3431594860166289, "grad_norm": 239.6156768798828, "learning_rate": 4.166922501290729e-07, "logits/chosen": -3.2811617851257324, "logits/rejected": -3.2786879539489746, "loss": 0.5139, "step": 227 }, { "beta_dpo/beta": 0.09486433863639832, "beta_dpo/beta_margin_grad_mean": -0.35757672786712646, "beta_dpo/beta_margin_grad_std": 0.27951931953430176, "beta_dpo/beta_margin_mean": 2.100579261779785, "beta_dpo/beta_margin_std": 4.581786632537842, "beta_dpo/beta_used": 0.09486433863639832, "beta_dpo/beta_used_raw": 0.039446499198675156, "beta_dpo/gap_mean": 20.004276275634766, "beta_dpo/gap_std": 27.34688949584961, "beta_dpo/loss_margin_mean": 21.535043716430664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34467120181405897, "grad_norm": 125.40672302246094, "learning_rate": 4.1570452044027405e-07, "logits/chosen": -3.26662015914917, "logits/rejected": -3.273747205734253, "loss": 0.9515, "step": 228 }, { "beta_dpo/beta": 0.15300440788269043, "beta_dpo/beta_margin_grad_mean": -0.29611581563949585, "beta_dpo/beta_margin_grad_std": 0.3123247027397156, "beta_dpo/beta_margin_mean": 3.213453531265259, "beta_dpo/beta_margin_std": 5.308069229125977, "beta_dpo/beta_used": 0.15300440788269043, "beta_dpo/beta_used_raw": 0.15300440788269043, "beta_dpo/gap_mean": 19.881502151489258, "beta_dpo/gap_std": 27.33250617980957, "beta_dpo/loss_margin_mean": 19.248979568481445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34618291761148906, "grad_norm": 204.28244018554688, "learning_rate": 4.147121556398312e-07, "logits/chosen": -3.281358242034912, "logits/rejected": -3.281156301498413, "loss": 0.8386, "step": 229 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4952212870121002, "beta_dpo/beta_margin_grad_std": 0.007806302979588509, "beta_dpo/beta_margin_mean": 0.019118983298540115, "beta_dpo/beta_margin_std": 0.03124173916876316, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08556269854307175, "beta_dpo/gap_mean": 19.63062286376953, "beta_dpo/gap_std": 27.971975326538086, "beta_dpo/loss_margin_mean": 19.118982315063477, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3476946334089191, "grad_norm": 1.7689595222473145, "learning_rate": 4.137151834863213e-07, "logits/chosen": -3.2919821739196777, "logits/rejected": -3.292602062225342, "loss": 1.3699, "step": 230 }, { "beta_dpo/beta": 0.48542118072509766, "beta_dpo/beta_margin_grad_mean": -0.16059984266757965, "beta_dpo/beta_margin_grad_std": 0.3215428292751312, "beta_dpo/beta_margin_mean": 13.145872116088867, "beta_dpo/beta_margin_std": 14.5763521194458, "beta_dpo/beta_used": 0.48542118072509766, "beta_dpo/beta_used_raw": 0.48542118072509766, "beta_dpo/gap_mean": 20.86912727355957, "beta_dpo/gap_std": 27.646385192871094, "beta_dpo/loss_margin_mean": 25.624082565307617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3492063492063492, "grad_norm": 487.8958435058594, "learning_rate": 4.1271363186719835e-07, "logits/chosen": -3.275543689727783, "logits/rejected": -3.2655463218688965, "loss": 1.0957, "step": 231 }, { "beta_dpo/beta": 0.11720205843448639, "beta_dpo/beta_margin_grad_mean": -0.3646947145462036, "beta_dpo/beta_margin_grad_std": 0.286128968000412, "beta_dpo/beta_margin_mean": 2.2955892086029053, "beta_dpo/beta_margin_std": 4.999972343444824, "beta_dpo/beta_used": 0.11720205843448639, "beta_dpo/beta_used_raw": 0.02803657203912735, "beta_dpo/gap_mean": 20.76835823059082, "beta_dpo/gap_std": 27.87753677368164, "beta_dpo/loss_margin_mean": 20.00889015197754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3507180650037793, "grad_norm": 175.38694763183594, "learning_rate": 4.1170752879801436e-07, "logits/chosen": -3.2884902954101562, "logits/rejected": -3.3059821128845215, "loss": 1.1498, "step": 232 }, { "beta_dpo/beta": 0.03464564308524132, "beta_dpo/beta_margin_grad_mean": -0.39322006702423096, "beta_dpo/beta_margin_grad_std": 0.20333139598369598, "beta_dpo/beta_margin_mean": 0.709579586982727, "beta_dpo/beta_margin_std": 1.3930869102478027, "beta_dpo/beta_used": 0.03464564308524132, "beta_dpo/beta_used_raw": -0.08792220056056976, "beta_dpo/gap_mean": 20.235342025756836, "beta_dpo/gap_std": 27.681325912475586, "beta_dpo/loss_margin_mean": 15.653010368347168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35222978080120937, "grad_norm": 55.460296630859375, "learning_rate": 4.106969024216348e-07, "logits/chosen": -3.2439560890197754, "logits/rejected": -3.2535340785980225, "loss": 1.0405, "step": 233 }, { "beta_dpo/beta": 0.1527194380760193, "beta_dpo/beta_margin_grad_mean": -0.3214091956615448, "beta_dpo/beta_margin_grad_std": 0.26886996626853943, "beta_dpo/beta_margin_mean": 3.466355085372925, "beta_dpo/beta_margin_std": 6.77988338470459, "beta_dpo/beta_used": 0.1527194380760193, "beta_dpo/beta_used_raw": -0.0909288078546524, "beta_dpo/gap_mean": 18.938209533691406, "beta_dpo/gap_std": 27.582704544067383, "beta_dpo/loss_margin_mean": 16.264286041259766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35374149659863946, "grad_norm": 266.7095947265625, "learning_rate": 4.09681781007452e-07, "logits/chosen": -3.269228935241699, "logits/rejected": -3.269357442855835, "loss": 0.9565, "step": 234 }, { "beta_dpo/beta": 0.30560052394866943, "beta_dpo/beta_margin_grad_mean": -0.14003857970237732, "beta_dpo/beta_margin_grad_std": 0.27959078550338745, "beta_dpo/beta_margin_mean": 7.613363742828369, "beta_dpo/beta_margin_std": 7.636228084564209, "beta_dpo/beta_used": 0.30560052394866943, "beta_dpo/beta_used_raw": 0.30560052394866943, "beta_dpo/gap_mean": 19.932910919189453, "beta_dpo/gap_std": 27.26492691040039, "beta_dpo/loss_margin_mean": 25.049949645996094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35525321239606955, "grad_norm": 323.9731140136719, "learning_rate": 4.08662192950594e-07, "logits/chosen": -3.3123879432678223, "logits/rejected": -3.316612720489502, "loss": 0.4978, "step": 235 }, { "beta_dpo/beta": 0.37364262342453003, "beta_dpo/beta_margin_grad_mean": -0.20121777057647705, "beta_dpo/beta_margin_grad_std": 0.36329779028892517, "beta_dpo/beta_margin_mean": 8.671346664428711, "beta_dpo/beta_margin_std": 10.987950325012207, "beta_dpo/beta_used": 0.37364262342453003, "beta_dpo/beta_used_raw": 0.37364262342453003, "beta_dpo/gap_mean": 20.939083099365234, "beta_dpo/gap_std": 27.456195831298828, "beta_dpo/loss_margin_mean": 23.402854919433594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35676492819349964, "grad_norm": 661.39013671875, "learning_rate": 4.076381667711306e-07, "logits/chosen": -3.2854065895080566, "logits/rejected": -3.274115800857544, "loss": 0.9594, "step": 236 }, { "beta_dpo/beta": 0.09945555031299591, "beta_dpo/beta_margin_grad_mean": -0.27587732672691345, "beta_dpo/beta_margin_grad_std": 0.28971001505851746, "beta_dpo/beta_margin_mean": 2.4051265716552734, "beta_dpo/beta_margin_std": 3.4533803462982178, "beta_dpo/beta_used": 0.09945555031299591, "beta_dpo/beta_used_raw": 0.09945555031299591, "beta_dpo/gap_mean": 21.28778076171875, "beta_dpo/gap_std": 27.852872848510742, "beta_dpo/loss_margin_mean": 23.50334358215332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35827664399092973, "grad_norm": 211.1277313232422, "learning_rate": 4.066097311132753e-07, "logits/chosen": -3.2915210723876953, "logits/rejected": -3.293576955795288, "loss": 0.856, "step": 237 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4955797791481018, "beta_dpo/beta_margin_grad_std": 0.006702260579913855, "beta_dpo/beta_margin_mean": 0.017684506252408028, "beta_dpo/beta_margin_std": 0.026815088465809822, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2128760814666748, "beta_dpo/gap_mean": 20.925275802612305, "beta_dpo/gap_std": 27.556137084960938, "beta_dpo/loss_margin_mean": 17.684505462646484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35978835978835977, "grad_norm": 7.044036865234375, "learning_rate": 4.0557691474458414e-07, "logits/chosen": -3.2529449462890625, "logits/rejected": -3.262035608291626, "loss": 1.3708, "step": 238 }, { "beta_dpo/beta": 0.05461409315466881, "beta_dpo/beta_margin_grad_mean": -0.38040855526924133, "beta_dpo/beta_margin_grad_std": 0.2407120168209076, "beta_dpo/beta_margin_mean": 1.208554983139038, "beta_dpo/beta_margin_std": 2.6001651287078857, "beta_dpo/beta_used": 0.05461409315466881, "beta_dpo/beta_used_raw": 0.03262433409690857, "beta_dpo/gap_mean": 20.93581199645996, "beta_dpo/gap_std": 27.82280731201172, "beta_dpo/loss_margin_mean": 23.322006225585938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36130007558578986, "grad_norm": 84.934326171875, "learning_rate": 4.045397465551513e-07, "logits/chosen": -3.267176628112793, "logits/rejected": -3.295515537261963, "loss": 1.0566, "step": 239 }, { "beta_dpo/beta": 0.4454389214515686, "beta_dpo/beta_margin_grad_mean": -0.1757676601409912, "beta_dpo/beta_margin_grad_std": 0.33982476592063904, "beta_dpo/beta_margin_mean": 10.719533920288086, "beta_dpo/beta_margin_std": 10.665504455566406, "beta_dpo/beta_used": 0.4454389214515686, "beta_dpo/beta_used_raw": 0.4454389214515686, "beta_dpo/gap_mean": 21.506826400756836, "beta_dpo/gap_std": 27.147459030151367, "beta_dpo/loss_margin_mean": 24.314071655273438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36281179138321995, "grad_norm": 513.2327880859375, "learning_rate": 4.0349825555680045e-07, "logits/chosen": -3.2560102939605713, "logits/rejected": -3.2747344970703125, "loss": 0.5211, "step": 240 }, { "beta_dpo/beta": 0.06813672184944153, "beta_dpo/beta_margin_grad_mean": -0.363924503326416, "beta_dpo/beta_margin_grad_std": 0.23285789787769318, "beta_dpo/beta_margin_mean": 1.3553699254989624, "beta_dpo/beta_margin_std": 2.7104809284210205, "beta_dpo/beta_used": 0.06813672184944153, "beta_dpo/beta_used_raw": -0.07167855650186539, "beta_dpo/gap_mean": 21.41180419921875, "beta_dpo/gap_std": 26.821788787841797, "beta_dpo/loss_margin_mean": 19.534835815429688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36432350718065004, "grad_norm": 66.69193267822266, "learning_rate": 4.0245247088227377e-07, "logits/chosen": -3.25724720954895, "logits/rejected": -3.2746810913085938, "loss": 0.9479, "step": 241 }, { "beta_dpo/beta": 0.05317524075508118, "beta_dpo/beta_margin_grad_mean": -0.29675236344337463, "beta_dpo/beta_margin_grad_std": 0.21193645894527435, "beta_dpo/beta_margin_mean": 1.4541407823562622, "beta_dpo/beta_margin_std": 1.9665377140045166, "beta_dpo/beta_used": 0.05317524075508118, "beta_dpo/beta_used_raw": 0.05317524075508118, "beta_dpo/gap_mean": 22.155099868774414, "beta_dpo/gap_std": 27.43655776977539, "beta_dpo/loss_margin_mean": 25.84035873413086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36583522297808013, "grad_norm": 67.37097930908203, "learning_rate": 4.0140242178441665e-07, "logits/chosen": -3.2400145530700684, "logits/rejected": -3.2478156089782715, "loss": 0.8496, "step": 242 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4958913326263428, "beta_dpo/beta_margin_grad_std": 0.006393721327185631, "beta_dpo/beta_margin_mean": 0.016438335180282593, "beta_dpo/beta_margin_std": 0.02558121271431446, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09002360701560974, "beta_dpo/gap_mean": 21.35116958618164, "beta_dpo/gap_std": 27.41248321533203, "beta_dpo/loss_margin_mean": 16.43833351135254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3673469387755102, "grad_norm": 1.8394923210144043, "learning_rate": 4.003481376353596e-07, "logits/chosen": -3.2727673053741455, "logits/rejected": -3.2656126022338867, "loss": 1.3683, "step": 243 }, { "beta_dpo/beta": 0.32955145835876465, "beta_dpo/beta_margin_grad_mean": -0.1536446064710617, "beta_dpo/beta_margin_grad_std": 0.2893112897872925, "beta_dpo/beta_margin_mean": 8.640202522277832, "beta_dpo/beta_margin_std": 7.9720234870910645, "beta_dpo/beta_used": 0.32955145835876465, "beta_dpo/beta_used_raw": 0.32955145835876465, "beta_dpo/gap_mean": 21.91408348083496, "beta_dpo/gap_std": 26.89801025390625, "beta_dpo/loss_margin_mean": 26.282230377197266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3688586545729403, "grad_norm": 358.5315856933594, "learning_rate": 3.9928964792569654e-07, "logits/chosen": -3.2303824424743652, "logits/rejected": -3.2301125526428223, "loss": 0.642, "step": 244 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4936436414718628, "beta_dpo/beta_margin_grad_std": 0.006070741917937994, "beta_dpo/beta_margin_mean": 0.025431012734770775, "beta_dpo/beta_margin_std": 0.024291541427373886, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06644029915332794, "beta_dpo/gap_mean": 22.572145462036133, "beta_dpo/gap_std": 26.450565338134766, "beta_dpo/loss_margin_mean": 25.431011199951172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37037037037037035, "grad_norm": 2.086357831954956, "learning_rate": 3.982269822636601e-07, "logits/chosen": -3.228754997253418, "logits/rejected": -3.2338013648986816, "loss": 1.3667, "step": 245 }, { "beta_dpo/beta": 0.04253571480512619, "beta_dpo/beta_margin_grad_mean": -0.37704116106033325, "beta_dpo/beta_margin_grad_std": 0.24304994940757751, "beta_dpo/beta_margin_mean": 0.9785481691360474, "beta_dpo/beta_margin_std": 2.127553701400757, "beta_dpo/beta_used": 0.04253571480512619, "beta_dpo/beta_used_raw": -0.015344377607107162, "beta_dpo/gap_mean": 22.191429138183594, "beta_dpo/gap_std": 27.405582427978516, "beta_dpo/loss_margin_mean": 20.26742935180664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37188208616780044, "grad_norm": 85.47932434082031, "learning_rate": 3.971601703742932e-07, "logits/chosen": -3.2285194396972656, "logits/rejected": -3.2426650524139404, "loss": 1.072, "step": 246 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4956916272640228, "beta_dpo/beta_margin_grad_std": 0.007161261048167944, "beta_dpo/beta_margin_mean": 0.017237938940525055, "beta_dpo/beta_margin_std": 0.028652969747781754, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20848971605300903, "beta_dpo/gap_mean": 21.35602569580078, "beta_dpo/gap_std": 28.036100387573242, "beta_dpo/loss_margin_mean": 17.237937927246094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37339380196523053, "grad_norm": 2.1120619773864746, "learning_rate": 3.960892420986177e-07, "logits/chosen": -3.240222930908203, "logits/rejected": -3.24595046043396, "loss": 1.3703, "step": 247 }, { "beta_dpo/beta": 0.16819608211517334, "beta_dpo/beta_margin_grad_mean": -0.32370662689208984, "beta_dpo/beta_margin_grad_std": 0.2876364588737488, "beta_dpo/beta_margin_mean": 4.243810176849365, "beta_dpo/beta_margin_std": 8.333431243896484, "beta_dpo/beta_used": 0.16819608211517334, "beta_dpo/beta_used_raw": 0.15640771389007568, "beta_dpo/gap_mean": 21.29971694946289, "beta_dpo/gap_std": 28.30411720275879, "beta_dpo/loss_margin_mean": 22.132286071777344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3749055177626606, "grad_norm": 158.7293243408203, "learning_rate": 3.9501422739279953e-07, "logits/chosen": -3.2170867919921875, "logits/rejected": -3.1951141357421875, "loss": 0.8385, "step": 248 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4974122941493988, "beta_dpo/beta_margin_grad_std": 0.007395448163151741, "beta_dpo/beta_margin_mean": 0.010353420861065388, "beta_dpo/beta_margin_std": 0.029589040204882622, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.43927276134490967, "beta_dpo/gap_mean": 19.619159698486328, "beta_dpo/gap_std": 28.558395385742188, "beta_dpo/loss_margin_mean": 10.35342025756836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3764172335600907, "grad_norm": 1.9444804191589355, "learning_rate": 3.9393515632731094e-07, "logits/chosen": -3.195737361907959, "logits/rejected": -3.178128957748413, "loss": 1.3758, "step": 249 }, { "beta_dpo/beta": 0.6326093673706055, "beta_dpo/beta_margin_grad_mean": -0.18713483214378357, "beta_dpo/beta_margin_grad_std": 0.357028067111969, "beta_dpo/beta_margin_mean": 17.81401252746582, "beta_dpo/beta_margin_std": 19.00658416748047, "beta_dpo/beta_used": 0.6326093673706055, "beta_dpo/beta_used_raw": 0.6326093673706055, "beta_dpo/gap_mean": 20.576045989990234, "beta_dpo/gap_std": 28.874229431152344, "beta_dpo/loss_margin_mean": 27.96128273010254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3779289493575208, "grad_norm": 770.96875, "learning_rate": 3.9285205908608934e-07, "logits/chosen": -3.1680002212524414, "logits/rejected": -3.1686441898345947, "loss": 0.6203, "step": 250 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4964104890823364, "beta_dpo/beta_margin_grad_std": 0.007367901504039764, "beta_dpo/beta_margin_mean": 0.014362086541950703, "beta_dpo/beta_margin_std": 0.02948029339313507, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.33875617384910583, "beta_dpo/gap_mean": 20.447429656982422, "beta_dpo/gap_std": 28.979272842407227, "beta_dpo/loss_margin_mean": 14.362086296081543, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3794406651549509, "grad_norm": 2.251450300216675, "learning_rate": 3.9176496596569265e-07, "logits/chosen": -3.1883554458618164, "logits/rejected": -3.1973702907562256, "loss": 1.3733, "step": 251 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4954353868961334, "beta_dpo/beta_margin_grad_std": 0.008493933826684952, "beta_dpo/beta_margin_mean": 0.018263807520270348, "beta_dpo/beta_margin_std": 0.033991072326898575, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20695188641548157, "beta_dpo/gap_mean": 19.295875549316406, "beta_dpo/gap_std": 29.833553314208984, "beta_dpo/loss_margin_mean": 18.26380729675293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.38095238095238093, "grad_norm": 1.9482089281082153, "learning_rate": 3.9067390737445254e-07, "logits/chosen": -3.1611921787261963, "logits/rejected": -3.1702466011047363, "loss": 1.3723, "step": 252 }, { "beta_dpo/beta": 0.15635497868061066, "beta_dpo/beta_margin_grad_mean": -0.36020636558532715, "beta_dpo/beta_margin_grad_std": 0.2878256142139435, "beta_dpo/beta_margin_mean": 3.688851833343506, "beta_dpo/beta_margin_std": 8.520410537719727, "beta_dpo/beta_used": 0.15635497868061066, "beta_dpo/beta_used_raw": 0.11544579267501831, "beta_dpo/gap_mean": 19.734420776367188, "beta_dpo/gap_std": 30.259693145751953, "beta_dpo/loss_margin_mean": 19.922733306884766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.382464096749811, "grad_norm": 350.47784423828125, "learning_rate": 3.8957891383162304e-07, "logits/chosen": -3.1174440383911133, "logits/rejected": -3.1282904148101807, "loss": 1.8085, "step": 253 }, { "beta_dpo/beta": 0.08370675146579742, "beta_dpo/beta_margin_grad_mean": -0.3806697130203247, "beta_dpo/beta_margin_grad_std": 0.2770395576953888, "beta_dpo/beta_margin_mean": 2.0539305210113525, "beta_dpo/beta_margin_std": 4.707830905914307, "beta_dpo/beta_used": 0.08370675146579742, "beta_dpo/beta_used_raw": -0.06456176191568375, "beta_dpo/gap_mean": 19.630905151367188, "beta_dpo/gap_std": 30.923583984375, "beta_dpo/loss_margin_mean": 18.080299377441406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3839758125472411, "grad_norm": 216.11676025390625, "learning_rate": 3.884800159665276e-07, "logits/chosen": -3.170478343963623, "logits/rejected": -3.1861348152160645, "loss": 1.1379, "step": 254 }, { "beta_dpo/beta": 0.22263258695602417, "beta_dpo/beta_margin_grad_mean": -0.32336732745170593, "beta_dpo/beta_margin_grad_std": 0.28287842869758606, "beta_dpo/beta_margin_mean": 7.826587677001953, "beta_dpo/beta_margin_std": 14.295151710510254, "beta_dpo/beta_used": 0.22263258695602417, "beta_dpo/beta_used_raw": 0.11471735686063766, "beta_dpo/gap_mean": 20.62637710571289, "beta_dpo/gap_std": 32.07659912109375, "beta_dpo/loss_margin_mean": 26.220294952392578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3854875283446712, "grad_norm": 650.1557006835938, "learning_rate": 3.873772445177015e-07, "logits/chosen": -3.1712052822113037, "logits/rejected": -3.179037094116211, "loss": 2.1495, "step": 255 }, { "beta_dpo/beta": 0.16558979451656342, "beta_dpo/beta_margin_grad_mean": -0.3782199025154114, "beta_dpo/beta_margin_grad_std": 0.307743638753891, "beta_dpo/beta_margin_mean": 4.105532169342041, "beta_dpo/beta_margin_std": 8.570287704467773, "beta_dpo/beta_used": 0.16558979451656342, "beta_dpo/beta_used_raw": 0.1294323205947876, "beta_dpo/gap_mean": 21.077714920043945, "beta_dpo/gap_std": 32.54633331298828, "beta_dpo/loss_margin_mean": 24.78595542907715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3869992441421013, "grad_norm": 582.988037109375, "learning_rate": 3.862706303320329e-07, "logits/chosen": -3.144796848297119, "logits/rejected": -3.1616220474243164, "loss": 2.1925, "step": 256 }, { "beta_dpo/beta": 0.2947536110877991, "beta_dpo/beta_margin_grad_mean": -0.17713895440101624, "beta_dpo/beta_margin_grad_std": 0.3150961697101593, "beta_dpo/beta_margin_mean": 8.477952003479004, "beta_dpo/beta_margin_std": 12.130805015563965, "beta_dpo/beta_used": 0.2947536110877991, "beta_dpo/beta_used_raw": 0.2947536110877991, "beta_dpo/gap_mean": 22.49274444580078, "beta_dpo/gap_std": 33.52637481689453, "beta_dpo/loss_margin_mean": 29.009363174438477, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3885109599395314, "grad_norm": 413.6116027832031, "learning_rate": 3.851602043638994e-07, "logits/chosen": -3.1539788246154785, "logits/rejected": -3.1706466674804688, "loss": 0.7214, "step": 257 }, { "beta_dpo/beta": 0.2633950710296631, "beta_dpo/beta_margin_grad_mean": -0.29857465624809265, "beta_dpo/beta_margin_grad_std": 0.2786755859851837, "beta_dpo/beta_margin_mean": 8.789876937866211, "beta_dpo/beta_margin_std": 13.870450019836426, "beta_dpo/beta_used": 0.2633950710296631, "beta_dpo/beta_used_raw": -0.04980570077896118, "beta_dpo/gap_mean": 23.006622314453125, "beta_dpo/gap_std": 33.62416076660156, "beta_dpo/loss_margin_mean": 26.75441551208496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3900226757369615, "grad_norm": 657.8723754882812, "learning_rate": 3.840459976743023e-07, "logits/chosen": -3.138174295425415, "logits/rejected": -3.165945053100586, "loss": 1.6342, "step": 258 }, { "beta_dpo/beta": 0.34684550762176514, "beta_dpo/beta_margin_grad_mean": -0.20287807285785675, "beta_dpo/beta_margin_grad_std": 0.3509141206741333, "beta_dpo/beta_margin_mean": 11.281720161437988, "beta_dpo/beta_margin_std": 12.954914093017578, "beta_dpo/beta_used": 0.34684550762176514, "beta_dpo/beta_used_raw": 0.34684550762176514, "beta_dpo/gap_mean": 24.934921264648438, "beta_dpo/gap_std": 33.59513473510742, "beta_dpo/loss_margin_mean": 31.152149200439453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3915343915343915, "grad_norm": 723.6175537109375, "learning_rate": 3.8292804142999796e-07, "logits/chosen": -3.1428725719451904, "logits/rejected": -3.1634674072265625, "loss": 1.524, "step": 259 }, { "beta_dpo/beta": 0.049117155373096466, "beta_dpo/beta_margin_grad_mean": -0.3828244209289551, "beta_dpo/beta_margin_grad_std": 0.2421586811542511, "beta_dpo/beta_margin_mean": 1.1651880741119385, "beta_dpo/beta_margin_std": 2.5332014560699463, "beta_dpo/beta_used": 0.049117155373096466, "beta_dpo/beta_used_raw": -0.10537585616111755, "beta_dpo/gap_mean": 25.15224266052246, "beta_dpo/gap_std": 33.792938232421875, "beta_dpo/loss_margin_mean": 25.78077507019043, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3930461073318216, "grad_norm": 107.15544128417969, "learning_rate": 3.818063669026256e-07, "logits/chosen": -3.123897075653076, "logits/rejected": -3.1420979499816895, "loss": 1.153, "step": 260 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49438992142677307, "beta_dpo/beta_margin_grad_std": 0.007078561000525951, "beta_dpo/beta_margin_mean": 0.022446028888225555, "beta_dpo/beta_margin_std": 0.028323406353592873, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12177471816539764, "beta_dpo/gap_mean": 24.7503662109375, "beta_dpo/gap_std": 33.19297409057617, "beta_dpo/loss_margin_mean": 22.446027755737305, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3945578231292517, "grad_norm": 2.29372501373291, "learning_rate": 3.806810054678331e-07, "logits/chosen": -3.1444356441497803, "logits/rejected": -3.134612560272217, "loss": 1.3655, "step": 261 }, { "beta_dpo/beta": 0.14830927550792694, "beta_dpo/beta_margin_grad_mean": -0.35249003767967224, "beta_dpo/beta_margin_grad_std": 0.2969633638858795, "beta_dpo/beta_margin_mean": 3.340433120727539, "beta_dpo/beta_margin_std": 6.50543737411499, "beta_dpo/beta_used": 0.14830927550792694, "beta_dpo/beta_used_raw": 0.07512392103672028, "beta_dpo/gap_mean": 24.296005249023438, "beta_dpo/gap_std": 32.12373352050781, "beta_dpo/loss_margin_mean": 22.67593765258789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3960695389266818, "grad_norm": 253.0749969482422, "learning_rate": 3.7955198860439887e-07, "logits/chosen": -3.1409544944763184, "logits/rejected": -3.160794258117676, "loss": 1.0158, "step": 262 }, { "beta_dpo/beta": 0.002609849674627185, "beta_dpo/beta_margin_grad_mean": -0.4891809821128845, "beta_dpo/beta_margin_grad_std": 0.023940352723002434, "beta_dpo/beta_margin_mean": 0.04340985417366028, "beta_dpo/beta_margin_std": 0.09623526781797409, "beta_dpo/beta_used": 0.002609849674627185, "beta_dpo/beta_used_raw": -0.1014128252863884, "beta_dpo/gap_mean": 23.623592376708984, "beta_dpo/gap_std": 31.66000747680664, "beta_dpo/loss_margin_mean": 19.01215934753418, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3975812547241119, "grad_norm": 6.745489120483398, "learning_rate": 3.784193478933516e-07, "logits/chosen": -3.1504030227661133, "logits/rejected": -3.1817703247070312, "loss": 1.3338, "step": 263 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49462515115737915, "beta_dpo/beta_margin_grad_std": 0.007510695606470108, "beta_dpo/beta_margin_mean": 0.021505767479538918, "beta_dpo/beta_margin_std": 0.03005310706794262, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3539937138557434, "beta_dpo/gap_mean": 22.857873916625977, "beta_dpo/gap_std": 31.427021026611328, "beta_dpo/loss_margin_mean": 21.505767822265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.39909297052154197, "grad_norm": 2.348557472229004, "learning_rate": 3.7728311501708674e-07, "logits/chosen": -3.163296699523926, "logits/rejected": -3.170408248901367, "loss": 1.3712, "step": 264 }, { "beta_dpo/beta": 0.3959474563598633, "beta_dpo/beta_margin_grad_mean": -0.26147815585136414, "beta_dpo/beta_margin_grad_std": 0.40265628695487976, "beta_dpo/beta_margin_mean": 9.483842849731445, "beta_dpo/beta_margin_std": 16.816068649291992, "beta_dpo/beta_used": 0.3959474563598633, "beta_dpo/beta_used_raw": 0.3959474563598633, "beta_dpo/gap_mean": 22.634410858154297, "beta_dpo/gap_std": 31.731470108032227, "beta_dpo/loss_margin_mean": 22.369054794311523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40060468631897206, "grad_norm": 776.9564208984375, "learning_rate": 3.7614332175848027e-07, "logits/chosen": -3.119457244873047, "logits/rejected": -3.1327133178710938, "loss": 2.2347, "step": 265 }, { "beta_dpo/beta": 0.005646655801683664, "beta_dpo/beta_margin_grad_mean": -0.47482630610466003, "beta_dpo/beta_margin_grad_std": 0.06348370015621185, "beta_dpo/beta_margin_mean": 0.10376403480768204, "beta_dpo/beta_margin_std": 0.26494264602661133, "beta_dpo/beta_used": 0.005646655801683664, "beta_dpo/beta_used_raw": -0.04304119572043419, "beta_dpo/gap_mean": 22.353633880615234, "beta_dpo/gap_std": 31.866960525512695, "beta_dpo/loss_margin_mean": 19.274166107177734, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4021164021164021, "grad_norm": 15.863897323608398, "learning_rate": 3.75e-07, "logits/chosen": -3.150226354598999, "logits/rejected": -3.180527687072754, "loss": 1.283, "step": 266 }, { "beta_dpo/beta": 0.05279780179262161, "beta_dpo/beta_margin_grad_mean": -0.35302603244781494, "beta_dpo/beta_margin_grad_std": 0.21988117694854736, "beta_dpo/beta_margin_mean": 1.6686517000198364, "beta_dpo/beta_margin_std": 2.897858142852783, "beta_dpo/beta_used": 0.05279780179262161, "beta_dpo/beta_used_raw": -0.19854456186294556, "beta_dpo/gap_mean": 21.90056610107422, "beta_dpo/gap_std": 31.87493324279785, "beta_dpo/loss_margin_mean": 23.186372756958008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4036281179138322, "grad_norm": 69.50460815429688, "learning_rate": 3.738531817228131e-07, "logits/chosen": -3.114777088165283, "logits/rejected": -3.127145290374756, "loss": 1.0439, "step": 267 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4949630796909332, "beta_dpo/beta_margin_grad_std": 0.007268788758665323, "beta_dpo/beta_margin_mean": 0.02015492133796215, "beta_dpo/beta_margin_std": 0.029093481600284576, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20411354303359985, "beta_dpo/gap_mean": 21.996845245361328, "beta_dpo/gap_std": 31.68831443786621, "beta_dpo/loss_margin_mean": 20.15492057800293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4051398337112623, "grad_norm": 2.0732991695404053, "learning_rate": 3.7270289900589204e-07, "logits/chosen": -3.1337013244628906, "logits/rejected": -3.14176607131958, "loss": 1.3696, "step": 268 }, { "beta_dpo/beta": 0.13603897392749786, "beta_dpo/beta_margin_grad_mean": -0.33785995841026306, "beta_dpo/beta_margin_grad_std": 0.27836501598358154, "beta_dpo/beta_margin_mean": 3.901336908340454, "beta_dpo/beta_margin_std": 6.962329387664795, "beta_dpo/beta_used": 0.13603897392749786, "beta_dpo/beta_used_raw": -0.08372128009796143, "beta_dpo/gap_mean": 21.401334762573242, "beta_dpo/gap_std": 31.724838256835938, "beta_dpo/loss_margin_mean": 20.39788246154785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40665154950869237, "grad_norm": 313.16552734375, "learning_rate": 3.7154918402511714e-07, "logits/chosen": -3.145228862762451, "logits/rejected": -3.1478958129882812, "loss": 1.3126, "step": 269 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49513691663742065, "beta_dpo/beta_margin_grad_std": 0.007251319475471973, "beta_dpo/beta_margin_mean": 0.019458677619695663, "beta_dpo/beta_margin_std": 0.02901856042444706, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1537623405456543, "beta_dpo/gap_mean": 21.666542053222656, "beta_dpo/gap_std": 31.45153045654297, "beta_dpo/loss_margin_mean": 19.458675384521484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40816326530612246, "grad_norm": 2.4263079166412354, "learning_rate": 3.7039206905237656e-07, "logits/chosen": -3.1230382919311523, "logits/rejected": -3.15578293800354, "loss": 1.3691, "step": 270 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49473121762275696, "beta_dpo/beta_margin_grad_std": 0.009824409149587154, "beta_dpo/beta_margin_mean": 0.021087976172566414, "beta_dpo/beta_margin_std": 0.03932539001107216, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3831254541873932, "beta_dpo/gap_mean": 21.330623626708984, "beta_dpo/gap_std": 32.055625915527344, "beta_dpo/loss_margin_mean": 21.087974548339844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40967498110355255, "grad_norm": 2.2488882541656494, "learning_rate": 3.692315864546635e-07, "logits/chosen": -3.1240901947021484, "logits/rejected": -3.150813579559326, "loss": 1.3732, "step": 271 }, { "beta_dpo/beta": 0.5425400733947754, "beta_dpo/beta_margin_grad_mean": -0.13226215541362762, "beta_dpo/beta_margin_grad_std": 0.2805606424808502, "beta_dpo/beta_margin_mean": 17.538484573364258, "beta_dpo/beta_margin_std": 15.896415710449219, "beta_dpo/beta_used": 0.5425400733947754, "beta_dpo/beta_used_raw": 0.5425400733947754, "beta_dpo/gap_mean": 22.86958122253418, "beta_dpo/gap_std": 32.0460205078125, "beta_dpo/loss_margin_mean": 32.03363037109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41118669690098264, "grad_norm": 575.8597412109375, "learning_rate": 3.6806776869317067e-07, "logits/chosen": -3.1413285732269287, "logits/rejected": -3.1322383880615234, "loss": 0.9124, "step": 272 }, { "beta_dpo/beta": 0.025581976398825645, "beta_dpo/beta_margin_grad_mean": -0.38602912425994873, "beta_dpo/beta_margin_grad_std": 0.19099541008472443, "beta_dpo/beta_margin_mean": 0.7089114189147949, "beta_dpo/beta_margin_std": 1.3611912727355957, "beta_dpo/beta_used": 0.025581976398825645, "beta_dpo/beta_used_raw": -0.055108197033405304, "beta_dpo/gap_mean": 23.795854568481445, "beta_dpo/gap_std": 32.6051139831543, "beta_dpo/loss_margin_mean": 26.4256649017334, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4126984126984127, "grad_norm": 50.3040885925293, "learning_rate": 3.669006483223828e-07, "logits/chosen": -3.1269006729125977, "logits/rejected": -3.153369426727295, "loss": 1.0815, "step": 273 }, { "beta_dpo/beta": 0.10980037599802017, "beta_dpo/beta_margin_grad_mean": -0.33080747723579407, "beta_dpo/beta_margin_grad_std": 0.2663941979408264, "beta_dpo/beta_margin_mean": 3.319112539291382, "beta_dpo/beta_margin_std": 6.285099983215332, "beta_dpo/beta_used": 0.10980037599802017, "beta_dpo/beta_used_raw": 0.0991683080792427, "beta_dpo/gap_mean": 23.904796600341797, "beta_dpo/gap_std": 33.53688049316406, "beta_dpo/loss_margin_mean": 25.596914291381836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41421012849584277, "grad_norm": 172.63458251953125, "learning_rate": 3.657302579891656e-07, "logits/chosen": -3.1327056884765625, "logits/rejected": -3.134082794189453, "loss": 0.9537, "step": 274 }, { "beta_dpo/beta": 0.04865710437297821, "beta_dpo/beta_margin_grad_mean": -0.35247406363487244, "beta_dpo/beta_margin_grad_std": 0.22889384627342224, "beta_dpo/beta_margin_mean": 1.5195780992507935, "beta_dpo/beta_margin_std": 2.562974452972412, "beta_dpo/beta_used": 0.04865710437297821, "beta_dpo/beta_used_raw": 0.03514295443892479, "beta_dpo/gap_mean": 24.787761688232422, "beta_dpo/gap_std": 33.226646423339844, "beta_dpo/loss_margin_mean": 28.79128074645996, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41572184429327286, "grad_norm": 89.05858612060547, "learning_rate": 3.645566304318526e-07, "logits/chosen": -3.131340742111206, "logits/rejected": -3.1462135314941406, "loss": 0.9959, "step": 275 }, { "beta_dpo/beta": 0.2305426448583603, "beta_dpo/beta_margin_grad_mean": -0.35385870933532715, "beta_dpo/beta_margin_grad_std": 0.2957158386707306, "beta_dpo/beta_margin_mean": 6.705293655395508, "beta_dpo/beta_margin_std": 12.562973976135254, "beta_dpo/beta_used": 0.2305426448583603, "beta_dpo/beta_used_raw": 0.22652789950370789, "beta_dpo/gap_mean": 25.334609985351562, "beta_dpo/gap_std": 33.08951187133789, "beta_dpo/loss_margin_mean": 25.393156051635742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41723356009070295, "grad_norm": 458.3935241699219, "learning_rate": 3.633797984793294e-07, "logits/chosen": -3.112989664077759, "logits/rejected": -3.117621660232544, "loss": 1.2243, "step": 276 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49625149369239807, "beta_dpo/beta_margin_grad_std": 0.009044786915183067, "beta_dpo/beta_margin_mean": 0.014998854137957096, "beta_dpo/beta_margin_std": 0.036193422973155975, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.21672311425209045, "beta_dpo/gap_mean": 23.66399383544922, "beta_dpo/gap_std": 33.6298713684082, "beta_dpo/loss_margin_mean": 14.998852729797363, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41874527588813304, "grad_norm": 2.0547492504119873, "learning_rate": 3.6219979505011555e-07, "logits/chosen": -3.115434169769287, "logits/rejected": -3.1027560234069824, "loss": 1.3682, "step": 277 }, { "beta_dpo/beta": 0.2880277633666992, "beta_dpo/beta_margin_grad_mean": -0.3204805850982666, "beta_dpo/beta_margin_grad_std": 0.29095593094825745, "beta_dpo/beta_margin_mean": 8.897863388061523, "beta_dpo/beta_margin_std": 15.400246620178223, "beta_dpo/beta_used": 0.2880277633666992, "beta_dpo/beta_used_raw": 0.2747938930988312, "beta_dpo/gap_mean": 22.691707611083984, "beta_dpo/gap_std": 33.781612396240234, "beta_dpo/loss_margin_mean": 22.27745819091797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42025699168556313, "grad_norm": 1209.379150390625, "learning_rate": 3.6101665315144353e-07, "logits/chosen": -3.124952554702759, "logits/rejected": -3.1554203033447266, "loss": 1.7403, "step": 278 }, { "beta_dpo/beta": 0.2570856809616089, "beta_dpo/beta_margin_grad_mean": -0.19151908159255981, "beta_dpo/beta_margin_grad_std": 0.2793121933937073, "beta_dpo/beta_margin_mean": 8.134492874145508, "beta_dpo/beta_margin_std": 10.537945747375488, "beta_dpo/beta_used": 0.2570856809616089, "beta_dpo/beta_used_raw": 0.2570856809616089, "beta_dpo/gap_mean": 23.83153533935547, "beta_dpo/gap_std": 33.45591735839844, "beta_dpo/loss_margin_mean": 29.187664031982422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4217687074829932, "grad_norm": 237.06866455078125, "learning_rate": 3.5983040587833563e-07, "logits/chosen": -3.129159450531006, "logits/rejected": -3.1331429481506348, "loss": 0.574, "step": 279 }, { "beta_dpo/beta": 0.16108359396457672, "beta_dpo/beta_margin_grad_mean": -0.2740139365196228, "beta_dpo/beta_margin_grad_std": 0.26515132188796997, "beta_dpo/beta_margin_mean": 4.792131423950195, "beta_dpo/beta_margin_std": 7.665543079376221, "beta_dpo/beta_used": 0.16108359396457672, "beta_dpo/beta_used_raw": 0.16108359396457672, "beta_dpo/gap_mean": 24.996429443359375, "beta_dpo/gap_std": 33.09587860107422, "beta_dpo/loss_margin_mean": 28.564146041870117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42328042328042326, "grad_norm": 321.95233154296875, "learning_rate": 3.586410864126781e-07, "logits/chosen": -3.11377215385437, "logits/rejected": -3.1314854621887207, "loss": 1.1929, "step": 280 }, { "beta_dpo/beta": 0.1646243929862976, "beta_dpo/beta_margin_grad_mean": -0.2770417332649231, "beta_dpo/beta_margin_grad_std": 0.2952728867530823, "beta_dpo/beta_margin_mean": 4.605935096740723, "beta_dpo/beta_margin_std": 8.153287887573242, "beta_dpo/beta_used": 0.1646243929862976, "beta_dpo/beta_used_raw": 0.1646243929862976, "beta_dpo/gap_mean": 25.285266876220703, "beta_dpo/gap_std": 33.30162811279297, "beta_dpo/loss_margin_mean": 26.789043426513672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42479213907785335, "grad_norm": 241.8627471923828, "learning_rate": 3.574487280222929e-07, "logits/chosen": -3.115567684173584, "logits/rejected": -3.0991907119750977, "loss": 0.8431, "step": 281 }, { "beta_dpo/beta": 0.05279150977730751, "beta_dpo/beta_margin_grad_mean": -0.3536136746406555, "beta_dpo/beta_margin_grad_std": 0.24429401755332947, "beta_dpo/beta_margin_mean": 1.628803014755249, "beta_dpo/beta_margin_std": 3.227506637573242, "beta_dpo/beta_used": 0.05279150977730751, "beta_dpo/beta_used_raw": 0.018425598740577698, "beta_dpo/gap_mean": 25.778846740722656, "beta_dpo/gap_std": 33.7174072265625, "beta_dpo/loss_margin_mean": 28.50662612915039, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42630385487528344, "grad_norm": 129.214111328125, "learning_rate": 3.562533640600075e-07, "logits/chosen": -3.127708673477173, "logits/rejected": -3.144028663635254, "loss": 1.1211, "step": 282 }, { "beta_dpo/beta": 0.037356920540332794, "beta_dpo/beta_margin_grad_mean": -0.38511744141578674, "beta_dpo/beta_margin_grad_std": 0.23253542184829712, "beta_dpo/beta_margin_mean": 0.8937662243843079, "beta_dpo/beta_margin_std": 1.8212085962295532, "beta_dpo/beta_used": 0.037356920540332794, "beta_dpo/beta_used_raw": -0.00982586294412613, "beta_dpo/gap_mean": 25.79236602783203, "beta_dpo/gap_std": 33.630287170410156, "beta_dpo/loss_margin_mean": 24.450763702392578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42781557067271353, "grad_norm": 77.45944213867188, "learning_rate": 3.550550279627215e-07, "logits/chosen": -3.1351306438446045, "logits/rejected": -3.1759040355682373, "loss": 1.0516, "step": 283 }, { "beta_dpo/beta": 0.23237170279026031, "beta_dpo/beta_margin_grad_mean": -0.2390763908624649, "beta_dpo/beta_margin_grad_std": 0.34266528487205505, "beta_dpo/beta_margin_mean": 5.581095218658447, "beta_dpo/beta_margin_std": 8.388540267944336, "beta_dpo/beta_used": 0.23237170279026031, "beta_dpo/beta_used_raw": 0.23237170279026031, "beta_dpo/gap_mean": 25.81268310546875, "beta_dpo/gap_std": 33.67734146118164, "beta_dpo/loss_margin_mean": 24.30912208557129, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4293272864701436, "grad_norm": 394.56317138671875, "learning_rate": 3.5385375325047163e-07, "logits/chosen": -3.1143360137939453, "logits/rejected": -3.1349940299987793, "loss": 0.8281, "step": 284 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4946158826351166, "beta_dpo/beta_margin_grad_std": 0.008639446459710598, "beta_dpo/beta_margin_mean": 0.02154547907412052, "beta_dpo/beta_margin_std": 0.03457494452595711, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3157495856285095, "beta_dpo/gap_mean": 24.874420166015625, "beta_dpo/gap_std": 33.82136535644531, "beta_dpo/loss_margin_mean": 21.54547882080078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4308390022675737, "grad_norm": 2.731858968734741, "learning_rate": 3.5264957352549375e-07, "logits/chosen": -3.119109630584717, "logits/rejected": -3.1246259212493896, "loss": 1.3686, "step": 285 }, { "beta_dpo/beta": 0.3855738937854767, "beta_dpo/beta_margin_grad_mean": -0.16172371804714203, "beta_dpo/beta_margin_grad_std": 0.32219523191452026, "beta_dpo/beta_margin_mean": 14.698241233825684, "beta_dpo/beta_margin_std": 21.05354118347168, "beta_dpo/beta_used": 0.3855738937854767, "beta_dpo/beta_used_raw": 0.3855738937854767, "beta_dpo/gap_mean": 25.75153160095215, "beta_dpo/gap_std": 34.764095306396484, "beta_dpo/loss_margin_mean": 34.22718048095703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4323507180650038, "grad_norm": 403.9075012207031, "learning_rate": 3.514425224712835e-07, "logits/chosen": -3.1518945693969727, "logits/rejected": -3.187169075012207, "loss": 0.7101, "step": 286 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49369317293167114, "beta_dpo/beta_margin_grad_std": 0.00864699762314558, "beta_dpo/beta_margin_mean": 0.025238126516342163, "beta_dpo/beta_margin_std": 0.03460447117686272, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18191684782505035, "beta_dpo/gap_mean": 26.148000717163086, "beta_dpo/gap_std": 35.01335144042969, "beta_dpo/loss_margin_mean": 25.23812484741211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43386243386243384, "grad_norm": 1.7744338512420654, "learning_rate": 3.502326338516534e-07, "logits/chosen": -3.075338840484619, "logits/rejected": -3.087643623352051, "loss": 1.3652, "step": 287 }, { "beta_dpo/beta": 0.07703159749507904, "beta_dpo/beta_margin_grad_mean": -0.3526802957057953, "beta_dpo/beta_margin_grad_std": 0.26365530490875244, "beta_dpo/beta_margin_mean": 1.9095759391784668, "beta_dpo/beta_margin_std": 3.8526155948638916, "beta_dpo/beta_used": 0.07703159749507904, "beta_dpo/beta_used_raw": 0.07703159749507904, "beta_dpo/gap_mean": 25.93368148803711, "beta_dpo/gap_std": 35.061134338378906, "beta_dpo/loss_margin_mean": 23.770599365234375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43537414965986393, "grad_norm": 240.46803283691406, "learning_rate": 3.490199415097892e-07, "logits/chosen": -3.124175548553467, "logits/rejected": -3.1412646770477295, "loss": 1.1649, "step": 288 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49461233615875244, "beta_dpo/beta_margin_grad_std": 0.008139989338815212, "beta_dpo/beta_margin_mean": 0.021558493375778198, "beta_dpo/beta_margin_std": 0.03257429599761963, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.16580015420913696, "beta_dpo/gap_mean": 25.205562591552734, "beta_dpo/gap_std": 34.782997131347656, "beta_dpo/loss_margin_mean": 21.558490753173828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.436885865457294, "grad_norm": 2.0100018978118896, "learning_rate": 3.4780447936730247e-07, "logits/chosen": -3.0957653522491455, "logits/rejected": -3.1041059494018555, "loss": 1.3658, "step": 289 }, { "beta_dpo/beta": 0.08695121854543686, "beta_dpo/beta_margin_grad_mean": -0.2400132119655609, "beta_dpo/beta_margin_grad_std": 0.26995664834976196, "beta_dpo/beta_margin_mean": 2.7945497035980225, "beta_dpo/beta_margin_std": 3.4169130325317383, "beta_dpo/beta_used": 0.08695121854543686, "beta_dpo/beta_used_raw": 0.08695121854543686, "beta_dpo/gap_mean": 26.052141189575195, "beta_dpo/gap_std": 34.53221893310547, "beta_dpo/loss_margin_mean": 31.12602424621582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4383975812547241, "grad_norm": 127.68869018554688, "learning_rate": 3.465862814232821e-07, "logits/chosen": -3.113938808441162, "logits/rejected": -3.1347272396087646, "loss": 0.7997, "step": 290 }, { "beta_dpo/beta": 0.39539340138435364, "beta_dpo/beta_margin_grad_mean": -0.17899632453918457, "beta_dpo/beta_margin_grad_std": 0.31650540232658386, "beta_dpo/beta_margin_mean": 13.287332534790039, "beta_dpo/beta_margin_std": 20.05366325378418, "beta_dpo/beta_used": 0.39539340138435364, "beta_dpo/beta_used_raw": 0.39539340138435364, "beta_dpo/gap_mean": 26.921337127685547, "beta_dpo/gap_std": 35.714210510253906, "beta_dpo/loss_margin_mean": 32.074607849121094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4399092970521542, "grad_norm": 124.12476348876953, "learning_rate": 3.4536538175334343e-07, "logits/chosen": -3.1052820682525635, "logits/rejected": -3.1392335891723633, "loss": 0.3092, "step": 291 }, { "beta_dpo/beta": 0.2568642795085907, "beta_dpo/beta_margin_grad_mean": -0.32715895771980286, "beta_dpo/beta_margin_grad_std": 0.28328001499176025, "beta_dpo/beta_margin_mean": 7.2955803871154785, "beta_dpo/beta_margin_std": 13.455894470214844, "beta_dpo/beta_used": 0.2568642795085907, "beta_dpo/beta_used_raw": 0.14661070704460144, "beta_dpo/gap_mean": 26.693593978881836, "beta_dpo/gap_std": 35.771141052246094, "beta_dpo/loss_margin_mean": 24.197370529174805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4414210128495843, "grad_norm": 232.77931213378906, "learning_rate": 3.4414181450867465e-07, "logits/chosen": -3.114081382751465, "logits/rejected": -3.1203083992004395, "loss": 0.8163, "step": 292 }, { "beta_dpo/beta": 0.16329149901866913, "beta_dpo/beta_margin_grad_mean": -0.21914827823638916, "beta_dpo/beta_margin_grad_std": 0.32468345761299133, "beta_dpo/beta_margin_mean": 4.863969802856445, "beta_dpo/beta_margin_std": 6.511205673217773, "beta_dpo/beta_used": 0.16329149901866913, "beta_dpo/beta_used_raw": 0.16329149901866913, "beta_dpo/gap_mean": 27.06631088256836, "beta_dpo/gap_std": 36.21523666381836, "beta_dpo/loss_margin_mean": 29.423086166381836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4429327286470144, "grad_norm": 210.0855255126953, "learning_rate": 3.4291561391508185e-07, "logits/chosen": -3.091078281402588, "logits/rejected": -3.123889207839966, "loss": 0.8069, "step": 293 }, { "beta_dpo/beta": 0.2324911504983902, "beta_dpo/beta_margin_grad_mean": -0.290340781211853, "beta_dpo/beta_margin_grad_std": 0.2663319408893585, "beta_dpo/beta_margin_mean": 7.494677543640137, "beta_dpo/beta_margin_std": 11.694056510925293, "beta_dpo/beta_used": 0.2324911504983902, "beta_dpo/beta_used_raw": -0.005689352750778198, "beta_dpo/gap_mean": 27.724380493164062, "beta_dpo/gap_std": 35.48869705200195, "beta_dpo/loss_margin_mean": 29.247159957885742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4444444444444444, "grad_norm": 25.350112915039062, "learning_rate": 3.4168681427203153e-07, "logits/chosen": -3.119723081588745, "logits/rejected": -3.12868070602417, "loss": 0.7012, "step": 294 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4931543469429016, "beta_dpo/beta_margin_grad_std": 0.007606986910104752, "beta_dpo/beta_margin_mean": 0.027391238138079643, "beta_dpo/beta_margin_std": 0.030441265553236008, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04372384026646614, "beta_dpo/gap_mean": 27.602527618408203, "beta_dpo/gap_std": 34.61024475097656, "beta_dpo/loss_margin_mean": 27.391237258911133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4459561602418745, "grad_norm": 2.2545580863952637, "learning_rate": 3.4045544995169125e-07, "logits/chosen": -3.111898422241211, "logits/rejected": -3.147273540496826, "loss": 1.3614, "step": 295 }, { "beta_dpo/beta": 0.3043525218963623, "beta_dpo/beta_margin_grad_mean": -0.23906980454921722, "beta_dpo/beta_margin_grad_std": 0.30588892102241516, "beta_dpo/beta_margin_mean": 9.003376007080078, "beta_dpo/beta_margin_std": 15.752115249633789, "beta_dpo/beta_used": 0.3043525218963623, "beta_dpo/beta_used_raw": 0.3043525218963623, "beta_dpo/gap_mean": 28.519481658935547, "beta_dpo/gap_std": 35.11084747314453, "beta_dpo/loss_margin_mean": 32.819244384765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4474678760393046, "grad_norm": 623.9177856445312, "learning_rate": 3.392215553979679e-07, "logits/chosen": -3.124897003173828, "logits/rejected": -3.137674331665039, "loss": 1.2411, "step": 296 }, { "beta_dpo/beta": 0.23584313690662384, "beta_dpo/beta_margin_grad_mean": -0.21231801807880402, "beta_dpo/beta_margin_grad_std": 0.330021470785141, "beta_dpo/beta_margin_mean": 6.7015581130981445, "beta_dpo/beta_margin_std": 7.194095611572266, "beta_dpo/beta_used": 0.23584313690662384, "beta_dpo/beta_used_raw": 0.23584313690662384, "beta_dpo/gap_mean": 28.46847152709961, "beta_dpo/gap_std": 34.36936950683594, "beta_dpo/loss_margin_mean": 29.151132583618164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4489795918367347, "grad_norm": 383.8264465332031, "learning_rate": 3.3798516512554485e-07, "logits/chosen": -3.0995192527770996, "logits/rejected": -3.104422092437744, "loss": 0.7781, "step": 297 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49327847361564636, "beta_dpo/beta_margin_grad_std": 0.00809707585722208, "beta_dpo/beta_margin_mean": 0.026895053684711456, "beta_dpo/beta_margin_std": 0.032401006668806076, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1754533052444458, "beta_dpo/gap_mean": 28.4078369140625, "beta_dpo/gap_std": 34.15364456176758, "beta_dpo/loss_margin_mean": 26.89505386352539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4504913076341648, "grad_norm": 2.5030081272125244, "learning_rate": 3.367463137189156e-07, "logits/chosen": -3.1021337509155273, "logits/rejected": -3.110154628753662, "loss": 1.3628, "step": 298 }, { "beta_dpo/beta": 0.027304884046316147, "beta_dpo/beta_margin_grad_mean": -0.37381458282470703, "beta_dpo/beta_margin_grad_std": 0.20344248414039612, "beta_dpo/beta_margin_mean": 0.8136813640594482, "beta_dpo/beta_margin_std": 1.3851598501205444, "beta_dpo/beta_used": 0.027304884046316147, "beta_dpo/beta_used_raw": -0.10461076349020004, "beta_dpo/gap_mean": 28.38265037536621, "beta_dpo/gap_std": 34.259788513183594, "beta_dpo/loss_margin_mean": 27.814064025878906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4520030234315949, "grad_norm": 46.56258010864258, "learning_rate": 3.355050358314172e-07, "logits/chosen": -3.117727756500244, "logits/rejected": -3.119908571243286, "loss": 1.0292, "step": 299 }, { "beta_dpo/beta": 0.03998471051454544, "beta_dpo/beta_margin_grad_mean": -0.3602712154388428, "beta_dpo/beta_margin_grad_std": 0.21306991577148438, "beta_dpo/beta_margin_mean": 1.1096277236938477, "beta_dpo/beta_margin_std": 1.8668972253799438, "beta_dpo/beta_used": 0.03998471051454544, "beta_dpo/beta_used_raw": -0.006209302693605423, "beta_dpo/gap_mean": 27.957683563232422, "beta_dpo/gap_std": 34.10755157470703, "beta_dpo/loss_margin_mean": 25.57542610168457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.45351473922902497, "grad_norm": 72.46057891845703, "learning_rate": 3.3426136618426043e-07, "logits/chosen": -3.077998399734497, "logits/rejected": -3.09999942779541, "loss": 0.957, "step": 300 }, { "epoch": 0.45351473922902497, "eval_beta_dpo/beta": 0.008930782787501812, "eval_beta_dpo/beta_margin_grad_mean": -0.48181334137916565, "eval_beta_dpo/beta_margin_grad_std": 0.02493375353515148, "eval_beta_dpo/beta_margin_mean": 0.24725361168384552, "eval_beta_dpo/beta_margin_std": 0.31334593892097473, "eval_beta_dpo/beta_used": 0.008930782787501812, "eval_beta_dpo/beta_used_raw": -0.4499760866165161, "eval_beta_dpo/gap_mean": 27.71484375, "eval_beta_dpo/gap_std": 34.235591888427734, "eval_beta_dpo/loss_margin_mean": 18.54857635498047, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -3.1462209224700928, "eval_logits/rejected": -3.1521239280700684, "eval_loss": 0.6778478622436523, "eval_runtime": 36.3038, "eval_samples_per_second": 63.437, "eval_steps_per_second": 1.983, "step": 300 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4945734441280365, "beta_dpo/beta_margin_grad_std": 0.008833258412778378, "beta_dpo/beta_margin_mean": 0.02171691320836544, "beta_dpo/beta_margin_std": 0.035356417298316956, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.43761324882507324, "beta_dpo/gap_mean": 27.043323516845703, "beta_dpo/gap_std": 34.602012634277344, "beta_dpo/loss_margin_mean": 21.7169132232666, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.455026455026455, "grad_norm": 2.3677213191986084, "learning_rate": 3.3301533956555885e-07, "logits/chosen": -3.0917038917541504, "logits/rejected": -3.093848705291748, "loss": 1.3685, "step": 301 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4958673417568207, "beta_dpo/beta_margin_grad_std": 0.00956540647894144, "beta_dpo/beta_margin_mean": 0.016539258882403374, "beta_dpo/beta_margin_std": 0.03828737139701843, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4286280870437622, "beta_dpo/gap_mean": 25.154075622558594, "beta_dpo/gap_std": 35.083038330078125, "beta_dpo/loss_margin_mean": 16.53925895690918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4565381708238851, "grad_norm": 2.3276662826538086, "learning_rate": 3.317669908293554e-07, "logits/chosen": -3.0932130813598633, "logits/rejected": -3.1112184524536133, "loss": 1.3702, "step": 302 }, { "beta_dpo/beta": 0.22752873599529266, "beta_dpo/beta_margin_grad_mean": -0.2653931975364685, "beta_dpo/beta_margin_grad_std": 0.3641763925552368, "beta_dpo/beta_margin_mean": 5.339740753173828, "beta_dpo/beta_margin_std": 10.414875030517578, "beta_dpo/beta_used": 0.22752873599529266, "beta_dpo/beta_used_raw": 0.22752873599529266, "beta_dpo/gap_mean": 25.054428100585938, "beta_dpo/gap_std": 35.5212287902832, "beta_dpo/loss_margin_mean": 26.166316986083984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4580498866213152, "grad_norm": 575.6218872070312, "learning_rate": 3.3051635489464793e-07, "logits/chosen": -3.102856397628784, "logits/rejected": -3.1255390644073486, "loss": 1.4269, "step": 303 }, { "beta_dpo/beta": 0.2756160795688629, "beta_dpo/beta_margin_grad_mean": -0.14261303842067719, "beta_dpo/beta_margin_grad_std": 0.25643348693847656, "beta_dpo/beta_margin_mean": 8.64633560180664, "beta_dpo/beta_margin_std": 8.03923511505127, "beta_dpo/beta_used": 0.2756160795688629, "beta_dpo/beta_used_raw": 0.2756160795688629, "beta_dpo/gap_mean": 25.673105239868164, "beta_dpo/gap_std": 34.77922821044922, "beta_dpo/loss_margin_mean": 31.018600463867188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4595616024187453, "grad_norm": 258.0193786621094, "learning_rate": 3.292634667444117e-07, "logits/chosen": -3.096405267715454, "logits/rejected": -3.104170799255371, "loss": 0.4785, "step": 304 }, { "beta_dpo/beta": 0.024367928504943848, "beta_dpo/beta_margin_grad_mean": -0.4015085995197296, "beta_dpo/beta_margin_grad_std": 0.1910669356584549, "beta_dpo/beta_margin_mean": 0.6269474625587463, "beta_dpo/beta_margin_std": 1.2529315948486328, "beta_dpo/beta_used": 0.024367928504943848, "beta_dpo/beta_used_raw": 0.008822512812912464, "beta_dpo/gap_mean": 25.67166519165039, "beta_dpo/gap_std": 34.137535095214844, "beta_dpo/loss_margin_mean": 22.330886840820312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46107331821617537, "grad_norm": 56.8546028137207, "learning_rate": 3.280083614246217e-07, "logits/chosen": -3.1241707801818848, "logits/rejected": -3.1085455417633057, "loss": 1.1314, "step": 305 }, { "beta_dpo/beta": 0.12957826256752014, "beta_dpo/beta_margin_grad_mean": -0.3111119866371155, "beta_dpo/beta_margin_grad_std": 0.25792446732521057, "beta_dpo/beta_margin_mean": 3.537123203277588, "beta_dpo/beta_margin_std": 5.821466445922852, "beta_dpo/beta_used": 0.12957826256752014, "beta_dpo/beta_used_raw": -0.16957488656044006, "beta_dpo/gap_mean": 25.289710998535156, "beta_dpo/gap_std": 33.20673751831055, "beta_dpo/loss_margin_mean": 24.207487106323242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46258503401360546, "grad_norm": 100.6487045288086, "learning_rate": 3.267510740432719e-07, "logits/chosen": -3.0455853939056396, "logits/rejected": -3.0711987018585205, "loss": 0.8367, "step": 306 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49670496582984924, "beta_dpo/beta_margin_grad_std": 0.00668869586661458, "beta_dpo/beta_margin_mean": 0.013183152303099632, "beta_dpo/beta_margin_std": 0.02676079049706459, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22974413633346558, "beta_dpo/gap_mean": 23.26692771911621, "beta_dpo/gap_std": 32.258201599121094, "beta_dpo/loss_margin_mean": 13.183152198791504, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46409674981103555, "grad_norm": 2.3683605194091797, "learning_rate": 3.2549163976939285e-07, "logits/chosen": -3.0820600986480713, "logits/rejected": -3.088656425476074, "loss": 1.3688, "step": 307 }, { "beta_dpo/beta": 0.17736674845218658, "beta_dpo/beta_margin_grad_mean": -0.3689655363559723, "beta_dpo/beta_margin_grad_std": 0.2941977381706238, "beta_dpo/beta_margin_mean": 4.363021373748779, "beta_dpo/beta_margin_std": 8.855971336364746, "beta_dpo/beta_used": 0.17736674845218658, "beta_dpo/beta_used_raw": 0.05397101491689682, "beta_dpo/gap_mean": 22.642669677734375, "beta_dpo/gap_std": 31.97774314880371, "beta_dpo/loss_margin_mean": 20.16625213623047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4656084656084656, "grad_norm": 431.4309997558594, "learning_rate": 3.2423009383206874e-07, "logits/chosen": -3.1188559532165527, "logits/rejected": -3.113797426223755, "loss": 1.2251, "step": 308 }, { "beta_dpo/beta": 0.31901365518569946, "beta_dpo/beta_margin_grad_mean": -0.19414803385734558, "beta_dpo/beta_margin_grad_std": 0.33326566219329834, "beta_dpo/beta_margin_mean": 8.445496559143066, "beta_dpo/beta_margin_std": 9.520452499389648, "beta_dpo/beta_used": 0.31901365518569946, "beta_dpo/beta_used_raw": 0.31901365518569946, "beta_dpo/gap_mean": 22.634998321533203, "beta_dpo/gap_std": 31.254989624023438, "beta_dpo/loss_margin_mean": 25.834514617919922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4671201814058957, "grad_norm": 631.443603515625, "learning_rate": 3.229664715194511e-07, "logits/chosen": -3.1062450408935547, "logits/rejected": -3.117509126663208, "loss": 1.2065, "step": 309 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49598562717437744, "beta_dpo/beta_margin_grad_std": 0.007933667860925198, "beta_dpo/beta_margin_mean": 0.016062457114458084, "beta_dpo/beta_margin_std": 0.03174532577395439, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.21138682961463928, "beta_dpo/gap_mean": 21.895801544189453, "beta_dpo/gap_std": 31.27450180053711, "beta_dpo/loss_margin_mean": 16.062456130981445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46863189720332576, "grad_norm": 2.2562851905822754, "learning_rate": 3.2170080817777257e-07, "logits/chosen": -3.1440863609313965, "logits/rejected": -3.1391515731811523, "loss": 1.3698, "step": 310 }, { "beta_dpo/beta": 0.04605334252119064, "beta_dpo/beta_margin_grad_mean": -0.3821720480918884, "beta_dpo/beta_margin_grad_std": 0.22194671630859375, "beta_dpo/beta_margin_mean": 0.9799299836158752, "beta_dpo/beta_margin_std": 2.2561373710632324, "beta_dpo/beta_used": 0.04605334252119064, "beta_dpo/beta_used_raw": -0.2480972558259964, "beta_dpo/gap_mean": 21.276159286499023, "beta_dpo/gap_std": 31.672595977783203, "beta_dpo/loss_margin_mean": 17.68033218383789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47014361300075586, "grad_norm": 90.18292236328125, "learning_rate": 3.204331392103574e-07, "logits/chosen": -3.092625141143799, "logits/rejected": -3.1545519828796387, "loss": 1.0585, "step": 311 }, { "beta_dpo/beta": 0.19550317525863647, "beta_dpo/beta_margin_grad_mean": -0.32613497972488403, "beta_dpo/beta_margin_grad_std": 0.2660830020904541, "beta_dpo/beta_margin_mean": 5.628912448883057, "beta_dpo/beta_margin_std": 9.664520263671875, "beta_dpo/beta_used": 0.19550317525863647, "beta_dpo/beta_used_raw": 0.18216973543167114, "beta_dpo/gap_mean": 21.88336944580078, "beta_dpo/gap_std": 31.547531127929688, "beta_dpo/loss_margin_mean": 27.235628128051758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47165532879818595, "grad_norm": 248.5424041748047, "learning_rate": 3.1916350007663176e-07, "logits/chosen": -3.0510053634643555, "logits/rejected": -3.0680179595947266, "loss": 0.8905, "step": 312 }, { "beta_dpo/beta": 0.07774581015110016, "beta_dpo/beta_margin_grad_mean": -0.3854230046272278, "beta_dpo/beta_margin_grad_std": 0.26763999462127686, "beta_dpo/beta_margin_mean": 1.5867866277694702, "beta_dpo/beta_margin_std": 4.212478160858154, "beta_dpo/beta_used": 0.07774581015110016, "beta_dpo/beta_used_raw": -0.010134972631931305, "beta_dpo/gap_mean": 21.556020736694336, "beta_dpo/gap_std": 30.96898651123047, "beta_dpo/loss_margin_mean": 18.918975830078125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47316704459561604, "grad_norm": 182.7671356201172, "learning_rate": 3.178919262911314e-07, "logits/chosen": -3.0897302627563477, "logits/rejected": -3.090061664581299, "loss": 1.2014, "step": 313 }, { "beta_dpo/beta": 0.3596024513244629, "beta_dpo/beta_margin_grad_mean": -0.3246336877346039, "beta_dpo/beta_margin_grad_std": 0.27894648909568787, "beta_dpo/beta_margin_mean": 10.439764022827148, "beta_dpo/beta_margin_std": 17.289690017700195, "beta_dpo/beta_used": 0.3596024513244629, "beta_dpo/beta_used_raw": 0.1643376499414444, "beta_dpo/gap_mean": 22.33397102355957, "beta_dpo/gap_std": 30.98306655883789, "beta_dpo/loss_margin_mean": 26.475399017333984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47467876039304613, "grad_norm": 748.7551879882812, "learning_rate": 3.166184534225087e-07, "logits/chosen": -3.0986547470092773, "logits/rejected": -3.084320545196533, "loss": 1.0381, "step": 314 }, { "beta_dpo/beta": 0.10931921005249023, "beta_dpo/beta_margin_grad_mean": -0.23580533266067505, "beta_dpo/beta_margin_grad_std": 0.2497079223394394, "beta_dpo/beta_margin_mean": 2.5652661323547363, "beta_dpo/beta_margin_std": 2.8321359157562256, "beta_dpo/beta_used": 0.10931921005249023, "beta_dpo/beta_used_raw": 0.10931921005249023, "beta_dpo/gap_mean": 22.541690826416016, "beta_dpo/gap_std": 30.285381317138672, "beta_dpo/loss_margin_mean": 24.391767501831055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47619047619047616, "grad_norm": 132.3946533203125, "learning_rate": 3.1534311709253723e-07, "logits/chosen": -3.1117889881134033, "logits/rejected": -3.1100306510925293, "loss": 0.7136, "step": 315 }, { "beta_dpo/beta": 0.24178995192050934, "beta_dpo/beta_margin_grad_mean": -0.28057804703712463, "beta_dpo/beta_margin_grad_std": 0.2632658779621124, "beta_dpo/beta_margin_mean": 8.426165580749512, "beta_dpo/beta_margin_std": 13.852724075317383, "beta_dpo/beta_used": 0.24178995192050934, "beta_dpo/beta_used_raw": 0.24178995192050934, "beta_dpo/gap_mean": 22.66561508178711, "beta_dpo/gap_std": 29.550121307373047, "beta_dpo/loss_margin_mean": 24.974931716918945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47770219198790626, "grad_norm": 232.22964477539062, "learning_rate": 3.1406595297511564e-07, "logits/chosen": -3.0905113220214844, "logits/rejected": -3.130472183227539, "loss": 0.8534, "step": 316 }, { "beta_dpo/beta": 0.05425131693482399, "beta_dpo/beta_margin_grad_mean": -0.34794288873672485, "beta_dpo/beta_margin_grad_std": 0.22771987318992615, "beta_dpo/beta_margin_mean": 1.4577387571334839, "beta_dpo/beta_margin_std": 2.5639567375183105, "beta_dpo/beta_used": 0.05425131693482399, "beta_dpo/beta_used_raw": -0.01501971110701561, "beta_dpo/gap_mean": 23.502927780151367, "beta_dpo/gap_std": 29.66475486755371, "beta_dpo/loss_margin_mean": 23.33938217163086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47921390778533635, "grad_norm": 134.48892211914062, "learning_rate": 3.1278699679526975e-07, "logits/chosen": -3.080479621887207, "logits/rejected": -3.094151020050049, "loss": 0.9584, "step": 317 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4947490096092224, "beta_dpo/beta_margin_grad_std": 0.007676298264414072, "beta_dpo/beta_margin_mean": 0.021011577919125557, "beta_dpo/beta_margin_std": 0.03071926161646843, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3076367974281311, "beta_dpo/gap_mean": 23.12640380859375, "beta_dpo/gap_std": 30.08043670654297, "beta_dpo/loss_margin_mean": 21.011577606201172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48072562358276644, "grad_norm": 2.19538950920105, "learning_rate": 3.1150628432815336e-07, "logits/chosen": -3.1016392707824707, "logits/rejected": -3.126415491104126, "loss": 1.3702, "step": 318 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49478694796562195, "beta_dpo/beta_margin_grad_std": 0.007146132178604603, "beta_dpo/beta_margin_mean": 0.020857563242316246, "beta_dpo/beta_margin_std": 0.028595075011253357, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11540670692920685, "beta_dpo/gap_mean": 22.578601837158203, "beta_dpo/gap_std": 29.50411605834961, "beta_dpo/loss_margin_mean": 20.857563018798828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48223733938019653, "grad_norm": 2.7037503719329834, "learning_rate": 3.1022385139804707e-07, "logits/chosen": -3.122105121612549, "logits/rejected": -3.1292996406555176, "loss": 1.3676, "step": 319 }, { "beta_dpo/beta": 0.013909117318689823, "beta_dpo/beta_margin_grad_mean": -0.4385174512863159, "beta_dpo/beta_margin_grad_std": 0.12322162836790085, "beta_dpo/beta_margin_mean": 0.2849900722503662, "beta_dpo/beta_margin_std": 0.5963538289070129, "beta_dpo/beta_used": 0.013909117318689823, "beta_dpo/beta_used_raw": -0.02531212382018566, "beta_dpo/gap_mean": 21.585987091064453, "beta_dpo/gap_std": 29.407155990600586, "beta_dpo/loss_margin_mean": 17.481781005859375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4837490551776266, "grad_norm": 31.11320686340332, "learning_rate": 3.0893973387735683e-07, "logits/chosen": -3.0919084548950195, "logits/rejected": -3.0990891456604004, "loss": 1.1796, "step": 320 }, { "beta_dpo/beta": 0.10938524454832077, "beta_dpo/beta_margin_grad_mean": -0.3188154101371765, "beta_dpo/beta_margin_grad_std": 0.26359623670578003, "beta_dpo/beta_margin_mean": 2.4428858757019043, "beta_dpo/beta_margin_std": 5.073835372924805, "beta_dpo/beta_used": 0.10938524454832077, "beta_dpo/beta_used_raw": -0.029399722814559937, "beta_dpo/gap_mean": 21.24488067626953, "beta_dpo/gap_std": 29.5472469329834, "beta_dpo/loss_margin_mean": 18.210344314575195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4852607709750567, "grad_norm": 81.23643493652344, "learning_rate": 3.0765396768561004e-07, "logits/chosen": -3.050200939178467, "logits/rejected": -3.052530288696289, "loss": 0.816, "step": 321 }, { "beta_dpo/beta": 0.33700641989707947, "beta_dpo/beta_margin_grad_mean": -0.19358323514461517, "beta_dpo/beta_margin_grad_std": 0.3171708583831787, "beta_dpo/beta_margin_mean": 9.05479621887207, "beta_dpo/beta_margin_std": 12.222949028015137, "beta_dpo/beta_used": 0.33700641989707947, "beta_dpo/beta_used_raw": 0.33700641989707947, "beta_dpo/gap_mean": 21.0283203125, "beta_dpo/gap_std": 29.169437408447266, "beta_dpo/loss_margin_mean": 23.73666763305664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48677248677248675, "grad_norm": 504.5341796875, "learning_rate": 3.063665887884511e-07, "logits/chosen": -3.0815377235412598, "logits/rejected": -3.115631341934204, "loss": 0.6304, "step": 322 }, { "beta_dpo/beta": 0.04038708657026291, "beta_dpo/beta_margin_grad_mean": -0.40935397148132324, "beta_dpo/beta_margin_grad_std": 0.24039596319198608, "beta_dpo/beta_margin_mean": 0.7104516625404358, "beta_dpo/beta_margin_std": 2.1563162803649902, "beta_dpo/beta_used": 0.04038708657026291, "beta_dpo/beta_used_raw": -0.09349645674228668, "beta_dpo/gap_mean": 21.222349166870117, "beta_dpo/gap_std": 30.24327850341797, "beta_dpo/loss_margin_mean": 20.792327880859375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48828420256991684, "grad_norm": 89.33271789550781, "learning_rate": 3.0507763319663517e-07, "logits/chosen": -3.0897750854492188, "logits/rejected": -3.1133480072021484, "loss": 1.0797, "step": 323 }, { "beta_dpo/beta": 0.06554640829563141, "beta_dpo/beta_margin_grad_mean": -0.34972235560417175, "beta_dpo/beta_margin_grad_std": 0.22968170046806335, "beta_dpo/beta_margin_mean": 1.5705469846725464, "beta_dpo/beta_margin_std": 2.733186960220337, "beta_dpo/beta_used": 0.06554640829563141, "beta_dpo/beta_used_raw": 0.030260443687438965, "beta_dpo/gap_mean": 21.68192481994629, "beta_dpo/gap_std": 30.1425724029541, "beta_dpo/loss_margin_mean": 23.29752540588379, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4897959183673469, "grad_norm": 141.7134246826172, "learning_rate": 3.0378713696502097e-07, "logits/chosen": -3.0900096893310547, "logits/rejected": -3.1014513969421387, "loss": 0.9758, "step": 324 }, { "beta_dpo/beta": 0.1374254822731018, "beta_dpo/beta_margin_grad_mean": -0.33930516242980957, "beta_dpo/beta_margin_grad_std": 0.26967304944992065, "beta_dpo/beta_margin_mean": 4.346703052520752, "beta_dpo/beta_margin_std": 7.695768356323242, "beta_dpo/beta_used": 0.1374254822731018, "beta_dpo/beta_used_raw": 0.08679935336112976, "beta_dpo/gap_mean": 22.803340911865234, "beta_dpo/gap_std": 30.76717758178711, "beta_dpo/loss_margin_mean": 27.983734130859375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.491307634164777, "grad_norm": 266.2857971191406, "learning_rate": 3.0249513619156206e-07, "logits/chosen": -3.080242156982422, "logits/rejected": -3.102264881134033, "loss": 1.0491, "step": 325 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49647924304008484, "beta_dpo/beta_margin_grad_std": 0.0077100652270019054, "beta_dpo/beta_margin_mean": 0.014087283983826637, "beta_dpo/beta_margin_std": 0.03085058182477951, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.25530245900154114, "beta_dpo/gap_mean": 21.672607421875, "beta_dpo/gap_std": 30.98316192626953, "beta_dpo/loss_margin_mean": 14.08728313446045, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4928193499622071, "grad_norm": 2.3197293281555176, "learning_rate": 3.012016670162977e-07, "logits/chosen": -3.093902587890625, "logits/rejected": -3.079530715942383, "loss": 1.3708, "step": 326 }, { "beta_dpo/beta": 0.08799388259649277, "beta_dpo/beta_margin_grad_mean": -0.360331654548645, "beta_dpo/beta_margin_grad_std": 0.24783527851104736, "beta_dpo/beta_margin_mean": 2.034658670425415, "beta_dpo/beta_margin_std": 4.489603042602539, "beta_dpo/beta_used": 0.08799388259649277, "beta_dpo/beta_used_raw": -0.13810209929943085, "beta_dpo/gap_mean": 21.196407318115234, "beta_dpo/gap_std": 31.41318702697754, "beta_dpo/loss_margin_mean": 20.30933380126953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4943310657596372, "grad_norm": 131.3867645263672, "learning_rate": 2.99906765620341e-07, "logits/chosen": -3.1136116981506348, "logits/rejected": -3.109332799911499, "loss": 0.9219, "step": 327 }, { "beta_dpo/beta": 0.18263430893421173, "beta_dpo/beta_margin_grad_mean": -0.27841395139694214, "beta_dpo/beta_margin_grad_std": 0.3048444986343384, "beta_dpo/beta_margin_mean": 3.9657938480377197, "beta_dpo/beta_margin_std": 6.533381938934326, "beta_dpo/beta_used": 0.18263430893421173, "beta_dpo/beta_used_raw": 0.18263430893421173, "beta_dpo/gap_mean": 21.557958602905273, "beta_dpo/gap_std": 31.18227767944336, "beta_dpo/loss_margin_mean": 23.918094635009766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4958427815570673, "grad_norm": 292.41162109375, "learning_rate": 2.9861046822486766e-07, "logits/chosen": -3.1102488040924072, "logits/rejected": -3.1353330612182617, "loss": 0.9977, "step": 328 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49358275532722473, "beta_dpo/beta_margin_grad_std": 0.008446736261248589, "beta_dpo/beta_margin_mean": 0.025679970160126686, "beta_dpo/beta_margin_std": 0.03380631282925606, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14240968227386475, "beta_dpo/gap_mean": 22.238048553466797, "beta_dpo/gap_std": 31.547744750976562, "beta_dpo/loss_margin_mean": 25.679967880249023, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4973544973544973, "grad_norm": 2.604764461517334, "learning_rate": 2.9731281109010253e-07, "logits/chosen": -3.081068992614746, "logits/rejected": -3.1000638008117676, "loss": 1.3684, "step": 329 }, { "beta_dpo/beta": 0.2137632519006729, "beta_dpo/beta_margin_grad_mean": -0.3222753703594208, "beta_dpo/beta_margin_grad_std": 0.26691463589668274, "beta_dpo/beta_margin_mean": 6.505101203918457, "beta_dpo/beta_margin_std": 10.973447799682617, "beta_dpo/beta_used": 0.2137632519006729, "beta_dpo/beta_used_raw": 0.034893929958343506, "beta_dpo/gap_mean": 22.187713623046875, "beta_dpo/gap_std": 31.6619930267334, "beta_dpo/loss_margin_mean": 23.690584182739258, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4988662131519274, "grad_norm": 177.8109130859375, "learning_rate": 2.9601383051430505e-07, "logits/chosen": -3.0457208156585693, "logits/rejected": -3.0508902072906494, "loss": 0.7966, "step": 330 }, { "beta_dpo/beta": 0.422149658203125, "beta_dpo/beta_margin_grad_mean": -0.16604149341583252, "beta_dpo/beta_margin_grad_std": 0.31206512451171875, "beta_dpo/beta_margin_mean": 15.053980827331543, "beta_dpo/beta_margin_std": 18.832624435424805, "beta_dpo/beta_used": 0.422149658203125, "beta_dpo/beta_used_raw": 0.422149658203125, "beta_dpo/gap_mean": 24.266925811767578, "beta_dpo/gap_std": 31.902421951293945, "beta_dpo/loss_margin_mean": 34.640830993652344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5003779289493575, "grad_norm": 952.3717651367188, "learning_rate": 2.947135628327544e-07, "logits/chosen": -3.0645158290863037, "logits/rejected": -3.066699981689453, "loss": 1.2058, "step": 331 }, { "beta_dpo/beta": 0.08614806830883026, "beta_dpo/beta_margin_grad_mean": -0.24676524102687836, "beta_dpo/beta_margin_grad_std": 0.25958746671676636, "beta_dpo/beta_margin_mean": 2.62640118598938, "beta_dpo/beta_margin_std": 3.29146671295166, "beta_dpo/beta_used": 0.08614806830883026, "beta_dpo/beta_used_raw": 0.08614806830883026, "beta_dpo/gap_mean": 25.25523567199707, "beta_dpo/gap_std": 32.544822692871094, "beta_dpo/loss_margin_mean": 28.859230041503906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5018896447467877, "grad_norm": 113.77040100097656, "learning_rate": 2.934120444167326e-07, "logits/chosen": -3.1057910919189453, "logits/rejected": -3.105198383331299, "loss": 0.8209, "step": 332 }, { "beta_dpo/beta": 0.46624481678009033, "beta_dpo/beta_margin_grad_mean": -0.19371409714221954, "beta_dpo/beta_margin_grad_std": 0.36706358194351196, "beta_dpo/beta_margin_mean": 15.309916496276855, "beta_dpo/beta_margin_std": 15.727444648742676, "beta_dpo/beta_used": 0.46624481678009033, "beta_dpo/beta_used_raw": 0.46624481678009033, "beta_dpo/gap_mean": 26.6208553314209, "beta_dpo/gap_std": 32.83556365966797, "beta_dpo/loss_margin_mean": 32.52452850341797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5034013605442177, "grad_norm": 676.2604370117188, "learning_rate": 2.921093116725076e-07, "logits/chosen": -3.084399461746216, "logits/rejected": -3.111693859100342, "loss": 1.1624, "step": 333 }, { "beta_dpo/beta": 0.13088715076446533, "beta_dpo/beta_margin_grad_mean": -0.27541545033454895, "beta_dpo/beta_margin_grad_std": 0.3252545893192291, "beta_dpo/beta_margin_mean": 3.337428092956543, "beta_dpo/beta_margin_std": 5.199645519256592, "beta_dpo/beta_used": 0.13088715076446533, "beta_dpo/beta_used_raw": 0.13088715076446533, "beta_dpo/gap_mean": 26.728729248046875, "beta_dpo/gap_std": 33.057777404785156, "beta_dpo/loss_margin_mean": 25.13127899169922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5049130763416477, "grad_norm": 203.92538452148438, "learning_rate": 2.9080540104031484e-07, "logits/chosen": -3.062560796737671, "logits/rejected": -3.090407609939575, "loss": 0.7656, "step": 334 }, { "beta_dpo/beta": 0.09639487415552139, "beta_dpo/beta_margin_grad_mean": -0.34168577194213867, "beta_dpo/beta_margin_grad_std": 0.2604123055934906, "beta_dpo/beta_margin_mean": 2.6311426162719727, "beta_dpo/beta_margin_std": 4.993647575378418, "beta_dpo/beta_used": 0.09639487415552139, "beta_dpo/beta_used_raw": -0.053624749183654785, "beta_dpo/gap_mean": 26.683143615722656, "beta_dpo/gap_std": 32.86904525756836, "beta_dpo/loss_margin_mean": 26.375513076782227, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5064247921390779, "grad_norm": 98.15483093261719, "learning_rate": 2.895003489933375e-07, "logits/chosen": -3.034583568572998, "logits/rejected": -3.0533030033111572, "loss": 0.9138, "step": 335 }, { "beta_dpo/beta": 0.02283310517668724, "beta_dpo/beta_margin_grad_mean": -0.36344635486602783, "beta_dpo/beta_margin_grad_std": 0.1789086014032364, "beta_dpo/beta_margin_mean": 0.8441472053527832, "beta_dpo/beta_margin_std": 1.3744276762008667, "beta_dpo/beta_used": 0.02283310517668724, "beta_dpo/beta_used_raw": -0.07438748329877853, "beta_dpo/gap_mean": 27.459758758544922, "beta_dpo/gap_std": 33.12605285644531, "beta_dpo/loss_margin_mean": 30.238086700439453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5079365079365079, "grad_norm": 54.78199005126953, "learning_rate": 2.8819419203668675e-07, "logits/chosen": -3.0631165504455566, "logits/rejected": -3.077131748199463, "loss": 1.0358, "step": 336 }, { "beta_dpo/beta": 0.2168283760547638, "beta_dpo/beta_margin_grad_mean": -0.2655988335609436, "beta_dpo/beta_margin_grad_std": 0.370185524225235, "beta_dpo/beta_margin_mean": 5.198827743530273, "beta_dpo/beta_margin_std": 8.442659378051758, "beta_dpo/beta_used": 0.2168283760547638, "beta_dpo/beta_used_raw": 0.2168283760547638, "beta_dpo/gap_mean": 26.88589096069336, "beta_dpo/gap_std": 34.229557037353516, "beta_dpo/loss_margin_mean": 23.979177474975586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.509448223733938, "grad_norm": 383.6023254394531, "learning_rate": 2.8688696670638053e-07, "logits/chosen": -3.093763828277588, "logits/rejected": -3.103431224822998, "loss": 0.8139, "step": 337 }, { "beta_dpo/beta": 0.15525385737419128, "beta_dpo/beta_margin_grad_mean": -0.22968794405460358, "beta_dpo/beta_margin_grad_std": 0.3068045377731323, "beta_dpo/beta_margin_mean": 4.072103977203369, "beta_dpo/beta_margin_std": 6.031183242797852, "beta_dpo/beta_used": 0.15525385737419128, "beta_dpo/beta_used_raw": 0.15525385737419128, "beta_dpo/gap_mean": 26.548250198364258, "beta_dpo/gap_std": 34.126678466796875, "beta_dpo/loss_margin_mean": 26.27375030517578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5109599395313681, "grad_norm": 221.94122314453125, "learning_rate": 2.8557870956832133e-07, "logits/chosen": -3.0307488441467285, "logits/rejected": -3.0351829528808594, "loss": 0.6189, "step": 338 }, { "beta_dpo/beta": 0.09032527357339859, "beta_dpo/beta_margin_grad_mean": -0.35613423585891724, "beta_dpo/beta_margin_grad_std": 0.28700828552246094, "beta_dpo/beta_margin_mean": 2.6703038215637207, "beta_dpo/beta_margin_std": 5.1349921226501465, "beta_dpo/beta_used": 0.09032527357339859, "beta_dpo/beta_used_raw": -0.07975070178508759, "beta_dpo/gap_mean": 26.73078155517578, "beta_dpo/gap_std": 34.21294021606445, "beta_dpo/loss_margin_mean": 27.1737117767334, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5124716553287982, "grad_norm": 277.2163391113281, "learning_rate": 2.842694572172736e-07, "logits/chosen": -3.0108513832092285, "logits/rejected": -3.042417526245117, "loss": 1.2379, "step": 339 }, { "beta_dpo/beta": 0.04224841296672821, "beta_dpo/beta_margin_grad_mean": -0.38084977865219116, "beta_dpo/beta_margin_grad_std": 0.24013683199882507, "beta_dpo/beta_margin_mean": 1.1847941875457764, "beta_dpo/beta_margin_std": 2.4049177169799805, "beta_dpo/beta_used": 0.04224841296672821, "beta_dpo/beta_used_raw": 0.038572296500205994, "beta_dpo/gap_mean": 26.897045135498047, "beta_dpo/gap_std": 34.5772819519043, "beta_dpo/loss_margin_mean": 28.697805404663086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5139833711262283, "grad_norm": 86.58854675292969, "learning_rate": 2.8295924627584004e-07, "logits/chosen": -3.0347347259521484, "logits/rejected": -3.033782958984375, "loss": 1.1256, "step": 340 }, { "beta_dpo/beta": 0.3806644678115845, "beta_dpo/beta_margin_grad_mean": -0.33302900195121765, "beta_dpo/beta_margin_grad_std": 0.3049252927303314, "beta_dpo/beta_margin_mean": 13.137605667114258, "beta_dpo/beta_margin_std": 22.449344635009766, "beta_dpo/beta_used": 0.3806644678115845, "beta_dpo/beta_used_raw": 0.15880805253982544, "beta_dpo/gap_mean": 27.218364715576172, "beta_dpo/gap_std": 34.92621994018555, "beta_dpo/loss_margin_mean": 25.560880661010742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5154950869236583, "grad_norm": 676.5250244140625, "learning_rate": 2.816481133934373e-07, "logits/chosen": -3.041550397872925, "logits/rejected": -3.067988395690918, "loss": 1.6666, "step": 341 }, { "beta_dpo/beta": 0.12396855652332306, "beta_dpo/beta_margin_grad_mean": -0.39350685477256775, "beta_dpo/beta_margin_grad_std": 0.29025062918663025, "beta_dpo/beta_margin_mean": 3.8081181049346924, "beta_dpo/beta_margin_std": 7.531540870666504, "beta_dpo/beta_used": 0.12396855652332306, "beta_dpo/beta_used_raw": -0.028024710714817047, "beta_dpo/gap_mean": 27.129390716552734, "beta_dpo/gap_std": 35.468772888183594, "beta_dpo/loss_margin_mean": 29.53389549255371, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5170068027210885, "grad_norm": 300.1034851074219, "learning_rate": 2.8033609524527046e-07, "logits/chosen": -2.9938724040985107, "logits/rejected": -3.0116021633148193, "loss": 1.3446, "step": 342 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4942605793476105, "beta_dpo/beta_margin_grad_std": 0.007328690029680729, "beta_dpo/beta_margin_mean": 0.022965017706155777, "beta_dpo/beta_margin_std": 0.02932850830256939, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.37825995683670044, "beta_dpo/gap_mean": 26.96743392944336, "beta_dpo/gap_std": 34.50664520263672, "beta_dpo/loss_margin_mean": 22.965015411376953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5185185185185185, "grad_norm": 2.441251039505005, "learning_rate": 2.7902322853130753e-07, "logits/chosen": -3.1121671199798584, "logits/rejected": -3.112107753753662, "loss": 1.3675, "step": 343 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4937654137611389, "beta_dpo/beta_margin_grad_std": 0.008288533426821232, "beta_dpo/beta_margin_mean": 0.024948405101895332, "beta_dpo/beta_margin_std": 0.03317340835928917, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12604382634162903, "beta_dpo/gap_mean": 26.214689254760742, "beta_dpo/gap_std": 34.25294494628906, "beta_dpo/loss_margin_mean": 24.94840431213379, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5200302343159486, "grad_norm": 2.634648323059082, "learning_rate": 2.7770954997525274e-07, "logits/chosen": -3.0305612087249756, "logits/rejected": -3.0571539402008057, "loss": 1.3642, "step": 344 }, { "beta_dpo/beta": 0.4966987371444702, "beta_dpo/beta_margin_grad_mean": -0.27788203954696655, "beta_dpo/beta_margin_grad_std": 0.40353062748908997, "beta_dpo/beta_margin_mean": 13.674936294555664, "beta_dpo/beta_margin_std": 20.05881690979004, "beta_dpo/beta_used": 0.4966987371444702, "beta_dpo/beta_used_raw": 0.4966987371444702, "beta_dpo/gap_mean": 26.283050537109375, "beta_dpo/gap_std": 34.696529388427734, "beta_dpo/loss_margin_mean": 26.915929794311523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5215419501133787, "grad_norm": 643.2703857421875, "learning_rate": 2.7639509632351927e-07, "logits/chosen": -3.063925266265869, "logits/rejected": -3.0827674865722656, "loss": 1.4227, "step": 345 }, { "beta_dpo/beta": 0.12412844598293304, "beta_dpo/beta_margin_grad_mean": -0.27781710028648376, "beta_dpo/beta_margin_grad_std": 0.3056802749633789, "beta_dpo/beta_margin_mean": 3.6865780353546143, "beta_dpo/beta_margin_std": 6.440756797790527, "beta_dpo/beta_used": 0.12412844598293304, "beta_dpo/beta_used_raw": 0.12412844598293304, "beta_dpo/gap_mean": 26.50796127319336, "beta_dpo/gap_std": 34.49790954589844, "beta_dpo/loss_margin_mean": 28.906465530395508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5230536659108088, "grad_norm": 330.2554016113281, "learning_rate": 2.7507990434420123e-07, "logits/chosen": -3.0775437355041504, "logits/rejected": -3.0960750579833984, "loss": 0.9698, "step": 346 }, { "beta_dpo/beta": 0.006778246723115444, "beta_dpo/beta_margin_grad_mean": -0.450185626745224, "beta_dpo/beta_margin_grad_std": 0.08872085064649582, "beta_dpo/beta_margin_mean": 0.21163569390773773, "beta_dpo/beta_margin_std": 0.3837166428565979, "beta_dpo/beta_used": 0.006778246723115444, "beta_dpo/beta_used_raw": -0.2274744063615799, "beta_dpo/gap_mean": 26.954315185546875, "beta_dpo/gap_std": 35.01631164550781, "beta_dpo/loss_margin_mean": 26.906822204589844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5245653817082389, "grad_norm": 16.952098846435547, "learning_rate": 2.737640108260456e-07, "logits/chosen": -3.0027999877929688, "logits/rejected": -3.021895170211792, "loss": 1.2436, "step": 347 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49310407042503357, "beta_dpo/beta_margin_grad_std": 0.008573891595005989, "beta_dpo/beta_margin_mean": 0.027595363557338715, "beta_dpo/beta_margin_std": 0.03431578353047371, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10145647823810577, "beta_dpo/gap_mean": 27.0013427734375, "beta_dpo/gap_std": 34.96954345703125, "beta_dpo/loss_margin_mean": 27.59536361694336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5260770975056689, "grad_norm": 2.287372589111328, "learning_rate": 2.724474525774229e-07, "logits/chosen": -3.0348544120788574, "logits/rejected": -3.048654556274414, "loss": 1.363, "step": 348 }, { "beta_dpo/beta": 0.43581968545913696, "beta_dpo/beta_margin_grad_mean": -0.2282140702009201, "beta_dpo/beta_margin_grad_std": 0.3532249629497528, "beta_dpo/beta_margin_mean": 15.442870140075684, "beta_dpo/beta_margin_std": 23.58800506591797, "beta_dpo/beta_used": 0.43581968545913696, "beta_dpo/beta_used_raw": 0.43581968545913696, "beta_dpo/gap_mean": 27.023746490478516, "beta_dpo/gap_std": 34.86962127685547, "beta_dpo/loss_margin_mean": 30.510757446289062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.527588813303099, "grad_norm": 881.6084594726562, "learning_rate": 2.711302664252973e-07, "logits/chosen": -3.040865898132324, "logits/rejected": -3.05991792678833, "loss": 1.1449, "step": 349 }, { "beta_dpo/beta": 0.18699753284454346, "beta_dpo/beta_margin_grad_mean": -0.20494963228702545, "beta_dpo/beta_margin_grad_std": 0.3321399390697479, "beta_dpo/beta_margin_mean": 5.577596187591553, "beta_dpo/beta_margin_std": 6.836084365844727, "beta_dpo/beta_used": 0.18699753284454346, "beta_dpo/beta_used_raw": 0.18699753284454346, "beta_dpo/gap_mean": 28.044925689697266, "beta_dpo/gap_std": 35.316062927246094, "beta_dpo/loss_margin_mean": 29.924606323242188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5291005291005291, "grad_norm": 306.10894775390625, "learning_rate": 2.698124892141971e-07, "logits/chosen": -3.0329248905181885, "logits/rejected": -3.0526227951049805, "loss": 1.1619, "step": 350 }, { "beta_dpo/beta": 0.1541515588760376, "beta_dpo/beta_margin_grad_mean": -0.25190550088882446, "beta_dpo/beta_margin_grad_std": 0.3161279857158661, "beta_dpo/beta_margin_mean": 4.117360591888428, "beta_dpo/beta_margin_std": 8.284092903137207, "beta_dpo/beta_used": 0.1541515588760376, "beta_dpo/beta_used_raw": 0.1541515588760376, "beta_dpo/gap_mean": 28.019916534423828, "beta_dpo/gap_std": 36.171875, "beta_dpo/loss_margin_mean": 27.668113708496094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5306122448979592, "grad_norm": 276.933837890625, "learning_rate": 2.6849415780518357e-07, "logits/chosen": -3.0552468299865723, "logits/rejected": -3.074361801147461, "loss": 0.9665, "step": 351 }, { "beta_dpo/beta": 0.08424170315265656, "beta_dpo/beta_margin_grad_mean": -0.3242875039577484, "beta_dpo/beta_margin_grad_std": 0.2602542042732239, "beta_dpo/beta_margin_mean": 2.619900703430176, "beta_dpo/beta_margin_std": 4.4611945152282715, "beta_dpo/beta_used": 0.08424170315265656, "beta_dpo/beta_used_raw": 0.0368683747947216, "beta_dpo/gap_mean": 28.124879837036133, "beta_dpo/gap_std": 35.932098388671875, "beta_dpo/loss_margin_mean": 28.422788619995117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5321239606953893, "grad_norm": 190.7087860107422, "learning_rate": 2.6717530907482024e-07, "logits/chosen": -3.0320374965667725, "logits/rejected": -3.0459280014038086, "loss": 1.0425, "step": 352 }, { "beta_dpo/beta": 0.18638579547405243, "beta_dpo/beta_margin_grad_mean": -0.29680606722831726, "beta_dpo/beta_margin_grad_std": 0.2870534360408783, "beta_dpo/beta_margin_mean": 6.207462787628174, "beta_dpo/beta_margin_std": 10.147928237915039, "beta_dpo/beta_used": 0.18638579547405243, "beta_dpo/beta_used_raw": 0.18638579547405243, "beta_dpo/gap_mean": 27.78827667236328, "beta_dpo/gap_std": 35.74189758300781, "beta_dpo/loss_margin_mean": 28.24288558959961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5336356764928194, "grad_norm": 242.77536010742188, "learning_rate": 2.658559799141411e-07, "logits/chosen": -3.063218355178833, "logits/rejected": -3.0526952743530273, "loss": 0.9444, "step": 353 }, { "beta_dpo/beta": 0.23877619206905365, "beta_dpo/beta_margin_grad_mean": -0.34234434366226196, "beta_dpo/beta_margin_grad_std": 0.31028464436531067, "beta_dpo/beta_margin_mean": 8.543641090393066, "beta_dpo/beta_margin_std": 14.812517166137695, "beta_dpo/beta_used": 0.23877619206905365, "beta_dpo/beta_used_raw": -0.002296730875968933, "beta_dpo/gap_mean": 28.852535247802734, "beta_dpo/gap_std": 35.82675552368164, "beta_dpo/loss_margin_mean": 32.170711517333984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5351473922902494, "grad_norm": 581.1064453125, "learning_rate": 2.6453620722761895e-07, "logits/chosen": -3.008025646209717, "logits/rejected": -3.0519351959228516, "loss": 1.4547, "step": 354 }, { "beta_dpo/beta": 0.22954122722148895, "beta_dpo/beta_margin_grad_mean": -0.24678458273410797, "beta_dpo/beta_margin_grad_std": 0.3609658479690552, "beta_dpo/beta_margin_mean": 6.767334461212158, "beta_dpo/beta_margin_std": 9.707292556762695, "beta_dpo/beta_used": 0.22954122722148895, "beta_dpo/beta_used_raw": 0.22954122722148895, "beta_dpo/gap_mean": 28.857769012451172, "beta_dpo/gap_std": 36.830535888671875, "beta_dpo/loss_margin_mean": 30.060108184814453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5366591080876795, "grad_norm": 343.97576904296875, "learning_rate": 2.632160279321328e-07, "logits/chosen": -3.011507749557495, "logits/rejected": -3.050220489501953, "loss": 0.9037, "step": 355 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4941824674606323, "beta_dpo/beta_margin_grad_std": 0.010640624910593033, "beta_dpo/beta_margin_mean": 0.023282045498490334, "beta_dpo/beta_margin_std": 0.0425870306789875, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.31006890535354614, "beta_dpo/gap_mean": 28.550745010375977, "beta_dpo/gap_std": 37.39855194091797, "beta_dpo/loss_margin_mean": 23.28204345703125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5381708238851096, "grad_norm": 2.3557276725769043, "learning_rate": 2.618954789559356e-07, "logits/chosen": -3.0701308250427246, "logits/rejected": -3.1028971672058105, "loss": 1.365, "step": 356 }, { "beta_dpo/beta": 0.20575466752052307, "beta_dpo/beta_margin_grad_mean": -0.3113231658935547, "beta_dpo/beta_margin_grad_std": 0.2871396243572235, "beta_dpo/beta_margin_mean": 7.273690223693848, "beta_dpo/beta_margin_std": 11.427718162536621, "beta_dpo/beta_used": 0.20575466752052307, "beta_dpo/beta_used_raw": 0.0213395357131958, "beta_dpo/gap_mean": 28.180965423583984, "beta_dpo/gap_std": 36.696205139160156, "beta_dpo/loss_margin_mean": 27.05858612060547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5396825396825397, "grad_norm": 166.8899688720703, "learning_rate": 2.6057459723762076e-07, "logits/chosen": -3.0578231811523438, "logits/rejected": -3.068431854248047, "loss": 0.8761, "step": 357 }, { "beta_dpo/beta": 0.33314621448516846, "beta_dpo/beta_margin_grad_mean": -0.1788199245929718, "beta_dpo/beta_margin_grad_std": 0.3286896347999573, "beta_dpo/beta_margin_mean": 11.201356887817383, "beta_dpo/beta_margin_std": 12.418009757995605, "beta_dpo/beta_used": 0.33314621448516846, "beta_dpo/beta_used_raw": 0.33314621448516846, "beta_dpo/gap_mean": 28.484966278076172, "beta_dpo/gap_std": 36.438079833984375, "beta_dpo/loss_margin_mean": 33.80877685546875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5411942554799698, "grad_norm": 385.5603942871094, "learning_rate": 2.5925341972508954e-07, "logits/chosen": -3.0701661109924316, "logits/rejected": -3.071199893951416, "loss": 0.7726, "step": 358 }, { "beta_dpo/beta": 0.12201699614524841, "beta_dpo/beta_margin_grad_mean": -0.36096030473709106, "beta_dpo/beta_margin_grad_std": 0.28947874903678894, "beta_dpo/beta_margin_mean": 3.56563138961792, "beta_dpo/beta_margin_std": 7.321877479553223, "beta_dpo/beta_used": 0.12201699614524841, "beta_dpo/beta_used_raw": -0.1470046043395996, "beta_dpo/gap_mean": 27.712411880493164, "beta_dpo/gap_std": 35.134971618652344, "beta_dpo/loss_margin_mean": 22.88962745666504, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5427059712773998, "grad_norm": 316.13494873046875, "learning_rate": 2.579319833745169e-07, "logits/chosen": -3.070073366165161, "logits/rejected": -3.0710277557373047, "loss": 1.2098, "step": 359 }, { "beta_dpo/beta": 0.05261658504605293, "beta_dpo/beta_margin_grad_mean": -0.35027578473091125, "beta_dpo/beta_margin_grad_std": 0.22255617380142212, "beta_dpo/beta_margin_mean": 1.4933468103408813, "beta_dpo/beta_margin_std": 2.535097122192383, "beta_dpo/beta_used": 0.05261658504605293, "beta_dpo/beta_used_raw": 0.05161638185381889, "beta_dpo/gap_mean": 27.89132308959961, "beta_dpo/gap_std": 35.49271774291992, "beta_dpo/loss_margin_mean": 28.442256927490234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54421768707483, "grad_norm": 103.88701629638672, "learning_rate": 2.5661032514931834e-07, "logits/chosen": -3.0532474517822266, "logits/rejected": -3.082444190979004, "loss": 0.9755, "step": 360 }, { "beta_dpo/beta": 0.16896192729473114, "beta_dpo/beta_margin_grad_mean": -0.25768980383872986, "beta_dpo/beta_margin_grad_std": 0.34136348962783813, "beta_dpo/beta_margin_mean": 4.6439409255981445, "beta_dpo/beta_margin_std": 6.361680030822754, "beta_dpo/beta_used": 0.16896192729473114, "beta_dpo/beta_used_raw": 0.16896192729473114, "beta_dpo/gap_mean": 27.861595153808594, "beta_dpo/gap_std": 35.75341796875, "beta_dpo/loss_margin_mean": 27.495941162109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54572940287226, "grad_norm": 217.03408813476562, "learning_rate": 2.552884820191154e-07, "logits/chosen": -3.047079086303711, "logits/rejected": -3.0629630088806152, "loss": 0.8215, "step": 361 }, { "beta_dpo/beta": 0.023837152868509293, "beta_dpo/beta_margin_grad_mean": -0.4032648205757141, "beta_dpo/beta_margin_grad_std": 0.2020334005355835, "beta_dpo/beta_margin_mean": 0.6487870812416077, "beta_dpo/beta_margin_std": 1.410294771194458, "beta_dpo/beta_used": 0.023837152868509293, "beta_dpo/beta_used_raw": -0.07430331408977509, "beta_dpo/gap_mean": 27.96929359436035, "beta_dpo/gap_std": 36.01551818847656, "beta_dpo/loss_margin_mean": 29.493749618530273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54724111866969, "grad_norm": 45.558921813964844, "learning_rate": 2.53966490958702e-07, "logits/chosen": -3.0698437690734863, "logits/rejected": -3.108633518218994, "loss": 1.1054, "step": 362 }, { "beta_dpo/beta": 0.012740159407258034, "beta_dpo/beta_margin_grad_mean": -0.4181475043296814, "beta_dpo/beta_margin_grad_std": 0.1294555813074112, "beta_dpo/beta_margin_mean": 0.4015863537788391, "beta_dpo/beta_margin_std": 0.7025443911552429, "beta_dpo/beta_used": 0.012740159407258034, "beta_dpo/beta_used_raw": -0.04291588068008423, "beta_dpo/gap_mean": 28.640567779541016, "beta_dpo/gap_std": 36.29180908203125, "beta_dpo/loss_margin_mean": 31.320804595947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5487528344671202, "grad_norm": 26.676973342895508, "learning_rate": 2.526443889470099e-07, "logits/chosen": -3.0346968173980713, "logits/rejected": -3.092362642288208, "loss": 1.1394, "step": 363 }, { "beta_dpo/beta": 0.16321328282356262, "beta_dpo/beta_margin_grad_mean": -0.26698994636535645, "beta_dpo/beta_margin_grad_std": 0.33409571647644043, "beta_dpo/beta_margin_mean": 5.1491007804870605, "beta_dpo/beta_margin_std": 7.250192165374756, "beta_dpo/beta_used": 0.16321328282356262, "beta_dpo/beta_used_raw": 0.16321328282356262, "beta_dpo/gap_mean": 29.141733169555664, "beta_dpo/gap_std": 37.76863098144531, "beta_dpo/loss_margin_mean": 32.329715728759766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5502645502645502, "grad_norm": 212.32305908203125, "learning_rate": 2.513222129660744e-07, "logits/chosen": -3.0427498817443848, "logits/rejected": -3.0514578819274902, "loss": 0.8291, "step": 364 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49362868070602417, "beta_dpo/beta_margin_grad_std": 0.00868096761405468, "beta_dpo/beta_margin_mean": 0.025498641654849052, "beta_dpo/beta_margin_std": 0.03475683555006981, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3776160776615143, "beta_dpo/gap_mean": 28.81844139099121, "beta_dpo/gap_std": 37.955848693847656, "beta_dpo/loss_margin_mean": 25.498640060424805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5517762660619804, "grad_norm": 2.6512248516082764, "learning_rate": 2.5e-07, "logits/chosen": -3.06278133392334, "logits/rejected": -3.0636744499206543, "loss": 1.3657, "step": 365 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49392661452293396, "beta_dpo/beta_margin_grad_std": 0.008409538306295872, "beta_dpo/beta_margin_mean": 0.024302352219820023, "beta_dpo/beta_margin_std": 0.03365331143140793, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2066284716129303, "beta_dpo/gap_mean": 27.96464729309082, "beta_dpo/gap_std": 37.263710021972656, "beta_dpo/loss_margin_mean": 24.302350997924805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5532879818594104, "grad_norm": 2.931002378463745, "learning_rate": 2.486777870339255e-07, "logits/chosen": -3.0443501472473145, "logits/rejected": -3.047083854675293, "loss": 1.3638, "step": 366 }, { "beta_dpo/beta": 0.02233603037893772, "beta_dpo/beta_margin_grad_mean": -0.4081335961818695, "beta_dpo/beta_margin_grad_std": 0.19261126220226288, "beta_dpo/beta_margin_mean": 0.5572895407676697, "beta_dpo/beta_margin_std": 1.2073376178741455, "beta_dpo/beta_used": 0.02233603037893772, "beta_dpo/beta_used_raw": -0.29430317878723145, "beta_dpo/gap_mean": 27.48886489868164, "beta_dpo/gap_std": 36.262733459472656, "beta_dpo/loss_margin_mean": 24.916200637817383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5547996976568406, "grad_norm": 69.64478302001953, "learning_rate": 2.4735561105299014e-07, "logits/chosen": -3.0140490531921387, "logits/rejected": -3.0410256385803223, "loss": 1.0924, "step": 367 }, { "beta_dpo/beta": 0.26728492975234985, "beta_dpo/beta_margin_grad_mean": -0.34833693504333496, "beta_dpo/beta_margin_grad_std": 0.301403284072876, "beta_dpo/beta_margin_mean": 8.373762130737305, "beta_dpo/beta_margin_std": 16.855165481567383, "beta_dpo/beta_used": 0.26728492975234985, "beta_dpo/beta_used_raw": 0.2518630623817444, "beta_dpo/gap_mean": 27.578670501708984, "beta_dpo/gap_std": 36.712608337402344, "beta_dpo/loss_margin_mean": 27.794593811035156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5563114134542706, "grad_norm": 449.7203369140625, "learning_rate": 2.46033509041298e-07, "logits/chosen": -3.0827462673187256, "logits/rejected": -3.080749750137329, "loss": 1.1387, "step": 368 }, { "beta_dpo/beta": 0.29284167289733887, "beta_dpo/beta_margin_grad_mean": -0.21756578981876373, "beta_dpo/beta_margin_grad_std": 0.34182238578796387, "beta_dpo/beta_margin_mean": 8.50714111328125, "beta_dpo/beta_margin_std": 10.777283668518066, "beta_dpo/beta_used": 0.29284167289733887, "beta_dpo/beta_used_raw": 0.29284167289733887, "beta_dpo/gap_mean": 27.435359954833984, "beta_dpo/gap_std": 37.216773986816406, "beta_dpo/loss_margin_mean": 28.464027404785156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5578231292517006, "grad_norm": 283.1849365234375, "learning_rate": 2.447115179808846e-07, "logits/chosen": -3.03106689453125, "logits/rejected": -3.041107177734375, "loss": 0.8772, "step": 369 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49333760142326355, "beta_dpo/beta_margin_grad_std": 0.01029953919351101, "beta_dpo/beta_margin_mean": 0.026663921773433685, "beta_dpo/beta_margin_std": 0.04122249782085419, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.25127974152565, "beta_dpo/gap_mean": 27.672956466674805, "beta_dpo/gap_std": 37.77960205078125, "beta_dpo/loss_margin_mean": 26.663921356201172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5593348450491308, "grad_norm": 2.7624871730804443, "learning_rate": 2.4338967485068164e-07, "logits/chosen": -2.998018503189087, "logits/rejected": -3.0110349655151367, "loss": 1.3648, "step": 370 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4933054745197296, "beta_dpo/beta_margin_grad_std": 0.00889476016163826, "beta_dpo/beta_margin_mean": 0.026788493618369102, "beta_dpo/beta_margin_std": 0.035594578832387924, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10879316926002502, "beta_dpo/gap_mean": 27.25868797302246, "beta_dpo/gap_std": 37.62822723388672, "beta_dpo/loss_margin_mean": 26.78849220275879, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5608465608465608, "grad_norm": 2.443422794342041, "learning_rate": 2.420680166254831e-07, "logits/chosen": -3.0092287063598633, "logits/rejected": -3.0086848735809326, "loss": 1.3629, "step": 371 }, { "beta_dpo/beta": 0.2747513949871063, "beta_dpo/beta_margin_grad_mean": -0.3540157377719879, "beta_dpo/beta_margin_grad_std": 0.29853445291519165, "beta_dpo/beta_margin_mean": 9.724706649780273, "beta_dpo/beta_margin_std": 18.833621978759766, "beta_dpo/beta_used": 0.2747513949871063, "beta_dpo/beta_used_raw": -0.10304847359657288, "beta_dpo/gap_mean": 26.12897300720215, "beta_dpo/gap_std": 37.682151794433594, "beta_dpo/loss_margin_mean": 23.0501766204834, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.562358276643991, "grad_norm": 932.7671508789062, "learning_rate": 2.4074658027491044e-07, "logits/chosen": -3.0054891109466553, "logits/rejected": -3.0314760208129883, "loss": 2.2511, "step": 372 }, { "beta_dpo/beta": 0.2764260172843933, "beta_dpo/beta_margin_grad_mean": -0.3450475335121155, "beta_dpo/beta_margin_grad_std": 0.29912158846855164, "beta_dpo/beta_margin_mean": 7.09043025970459, "beta_dpo/beta_margin_std": 18.69756507873535, "beta_dpo/beta_used": 0.2764260172843933, "beta_dpo/beta_used_raw": 0.21868909895420074, "beta_dpo/gap_mean": 26.588321685791016, "beta_dpo/gap_std": 38.78236389160156, "beta_dpo/loss_margin_mean": 26.749818801879883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.563869992441421, "grad_norm": 524.5596923828125, "learning_rate": 2.394254027623792e-07, "logits/chosen": -3.018575668334961, "logits/rejected": -3.024758815765381, "loss": 1.8229, "step": 373 }, { "beta_dpo/beta": 0.38422077894210815, "beta_dpo/beta_margin_grad_mean": -0.15745118260383606, "beta_dpo/beta_margin_grad_std": 0.32486772537231445, "beta_dpo/beta_margin_mean": 14.442462921142578, "beta_dpo/beta_margin_std": 14.842865943908691, "beta_dpo/beta_used": 0.38422077894210815, "beta_dpo/beta_used_raw": 0.38422077894210815, "beta_dpo/gap_mean": 28.249542236328125, "beta_dpo/gap_std": 38.90614318847656, "beta_dpo/loss_margin_mean": 37.55119705200195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5653817082388511, "grad_norm": 693.2622680664062, "learning_rate": 2.381045210440644e-07, "logits/chosen": -3.0473880767822266, "logits/rejected": -3.0371317863464355, "loss": 1.7168, "step": 374 }, { "beta_dpo/beta": 0.057004883885383606, "beta_dpo/beta_margin_grad_mean": -0.3686520457267761, "beta_dpo/beta_margin_grad_std": 0.2601342499256134, "beta_dpo/beta_margin_mean": 1.6653286218643188, "beta_dpo/beta_margin_std": 3.5258891582489014, "beta_dpo/beta_used": 0.057004883885383606, "beta_dpo/beta_used_raw": -0.18324482440948486, "beta_dpo/gap_mean": 28.376663208007812, "beta_dpo/gap_std": 38.72953796386719, "beta_dpo/loss_margin_mean": 25.014204025268555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5668934240362812, "grad_norm": 185.9263916015625, "learning_rate": 2.3678397206786715e-07, "logits/chosen": -3.0153164863586426, "logits/rejected": -3.0256357192993164, "loss": 1.2298, "step": 375 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49177441000938416, "beta_dpo/beta_margin_grad_std": 0.01139663252979517, "beta_dpo/beta_margin_mean": 0.03292226418852806, "beta_dpo/beta_margin_std": 0.04562165588140488, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.01881096512079239, "beta_dpo/gap_mean": 28.85112762451172, "beta_dpo/gap_std": 39.73705291748047, "beta_dpo/loss_margin_mean": 32.92226028442383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5684051398337112, "grad_norm": 2.6528704166412354, "learning_rate": 2.3546379277238103e-07, "logits/chosen": -2.9775443077087402, "logits/rejected": -3.0031423568725586, "loss": 1.36, "step": 376 }, { "beta_dpo/beta": 0.12980836629867554, "beta_dpo/beta_margin_grad_mean": -0.38568049669265747, "beta_dpo/beta_margin_grad_std": 0.30072495341300964, "beta_dpo/beta_margin_mean": 3.0573172569274902, "beta_dpo/beta_margin_std": 8.1069917678833, "beta_dpo/beta_used": 0.12980836629867554, "beta_dpo/beta_used_raw": 0.06634081900119781, "beta_dpo/gap_mean": 28.19588851928711, "beta_dpo/gap_std": 40.07501220703125, "beta_dpo/loss_margin_mean": 24.444398880004883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5699168556311414, "grad_norm": 490.80743408203125, "learning_rate": 2.3414402008585886e-07, "logits/chosen": -2.9569051265716553, "logits/rejected": -2.962362289428711, "loss": 1.138, "step": 377 }, { "beta_dpo/beta": 0.2009427845478058, "beta_dpo/beta_margin_grad_mean": -0.39364734292030334, "beta_dpo/beta_margin_grad_std": 0.31605786085128784, "beta_dpo/beta_margin_mean": 5.381883144378662, "beta_dpo/beta_margin_std": 13.935928344726562, "beta_dpo/beta_used": 0.2009427845478058, "beta_dpo/beta_used_raw": 0.021990105509757996, "beta_dpo/gap_mean": 26.762958526611328, "beta_dpo/gap_std": 39.15788269042969, "beta_dpo/loss_margin_mean": 21.002058029174805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5714285714285714, "grad_norm": 779.2059326171875, "learning_rate": 2.3282469092517977e-07, "logits/chosen": -2.988287925720215, "logits/rejected": -2.997986316680908, "loss": 1.8012, "step": 378 }, { "beta_dpo/beta": 0.29767459630966187, "beta_dpo/beta_margin_grad_mean": -0.20546753704547882, "beta_dpo/beta_margin_grad_std": 0.3395988643169403, "beta_dpo/beta_margin_mean": 9.896245002746582, "beta_dpo/beta_margin_std": 15.230827331542969, "beta_dpo/beta_used": 0.29767459630966187, "beta_dpo/beta_used_raw": 0.29767459630966187, "beta_dpo/gap_mean": 27.635543823242188, "beta_dpo/gap_std": 39.90657043457031, "beta_dpo/loss_margin_mean": 32.11721420288086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5729402872260015, "grad_norm": 325.6419372558594, "learning_rate": 2.3150584219481643e-07, "logits/chosen": -3.069061040878296, "logits/rejected": -3.1075985431671143, "loss": 0.7379, "step": 379 }, { "beta_dpo/beta": 0.39250868558883667, "beta_dpo/beta_margin_grad_mean": -0.1666214019060135, "beta_dpo/beta_margin_grad_std": 0.3332377076148987, "beta_dpo/beta_margin_mean": 14.069883346557617, "beta_dpo/beta_margin_std": 16.81194305419922, "beta_dpo/beta_used": 0.39250868558883667, "beta_dpo/beta_used_raw": 0.39250868558883667, "beta_dpo/gap_mean": 28.805389404296875, "beta_dpo/gap_std": 40.448875427246094, "beta_dpo/loss_margin_mean": 35.44111251831055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5744520030234316, "grad_norm": 638.1084594726562, "learning_rate": 2.3018751078580283e-07, "logits/chosen": -2.9937350749969482, "logits/rejected": -2.993100881576538, "loss": 1.7685, "step": 380 }, { "beta_dpo/beta": 0.32273223996162415, "beta_dpo/beta_margin_grad_mean": -0.400060772895813, "beta_dpo/beta_margin_grad_std": 0.32593268156051636, "beta_dpo/beta_margin_mean": 6.122303009033203, "beta_dpo/beta_margin_std": 22.122623443603516, "beta_dpo/beta_used": 0.32273223996162415, "beta_dpo/beta_used_raw": 0.09878802299499512, "beta_dpo/gap_mean": 27.545970916748047, "beta_dpo/gap_std": 40.81670379638672, "beta_dpo/loss_margin_mean": 17.91020965576172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5759637188208617, "grad_norm": 668.0379028320312, "learning_rate": 2.288697335747027e-07, "logits/chosen": -2.9997167587280273, "logits/rejected": -2.9869627952575684, "loss": 1.4539, "step": 381 }, { "beta_dpo/beta": 0.3062552213668823, "beta_dpo/beta_margin_grad_mean": -0.2633870244026184, "beta_dpo/beta_margin_grad_std": 0.3874484598636627, "beta_dpo/beta_margin_mean": 9.107242584228516, "beta_dpo/beta_margin_std": 14.23564338684082, "beta_dpo/beta_used": 0.3062552213668823, "beta_dpo/beta_used_raw": 0.3062552213668823, "beta_dpo/gap_mean": 26.631755828857422, "beta_dpo/gap_std": 40.66786575317383, "beta_dpo/loss_margin_mean": 26.08523941040039, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5774754346182918, "grad_norm": 366.4364318847656, "learning_rate": 2.2755254742257706e-07, "logits/chosen": -3.0169169902801514, "logits/rejected": -3.03079891204834, "loss": 0.9439, "step": 382 }, { "beta_dpo/beta": 0.03148249536752701, "beta_dpo/beta_margin_grad_mean": -0.4024004638195038, "beta_dpo/beta_margin_grad_std": 0.2572653889656067, "beta_dpo/beta_margin_mean": 0.9517198204994202, "beta_dpo/beta_margin_std": 2.287767171859741, "beta_dpo/beta_used": 0.03148249536752701, "beta_dpo/beta_used_raw": -0.11997513473033905, "beta_dpo/gap_mean": 27.111793518066406, "beta_dpo/gap_std": 41.19513702392578, "beta_dpo/loss_margin_mean": 26.842315673828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5789871504157218, "grad_norm": 132.0910186767578, "learning_rate": 2.2623598917395436e-07, "logits/chosen": -2.999669313430786, "logits/rejected": -2.9849891662597656, "loss": 1.3339, "step": 383 }, { "beta_dpo/beta": 0.11498643457889557, "beta_dpo/beta_margin_grad_mean": -0.30243608355522156, "beta_dpo/beta_margin_grad_std": 0.2668159008026123, "beta_dpo/beta_margin_mean": 4.041726589202881, "beta_dpo/beta_margin_std": 6.5240478515625, "beta_dpo/beta_used": 0.11498643457889557, "beta_dpo/beta_used_raw": 0.11375071108341217, "beta_dpo/gap_mean": 27.615703582763672, "beta_dpo/gap_std": 41.406524658203125, "beta_dpo/loss_margin_mean": 32.565528869628906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5804988662131519, "grad_norm": 219.2327423095703, "learning_rate": 2.2492009565579875e-07, "logits/chosen": -2.9780848026275635, "logits/rejected": -2.9932589530944824, "loss": 1.013, "step": 384 }, { "beta_dpo/beta": 0.3769190013408661, "beta_dpo/beta_margin_grad_mean": -0.2724689245223999, "beta_dpo/beta_margin_grad_std": 0.2645536959171295, "beta_dpo/beta_margin_mean": 14.097354888916016, "beta_dpo/beta_margin_std": 20.308109283447266, "beta_dpo/beta_used": 0.3769190013408661, "beta_dpo/beta_used_raw": 0.36033499240875244, "beta_dpo/gap_mean": 29.11199378967285, "beta_dpo/gap_std": 39.8790283203125, "beta_dpo/loss_margin_mean": 34.771392822265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.582010582010582, "grad_norm": 285.1043701171875, "learning_rate": 2.2360490367648084e-07, "logits/chosen": -3.0191240310668945, "logits/rejected": -3.0336661338806152, "loss": 0.7273, "step": 385 }, { "beta_dpo/beta": 0.19765952229499817, "beta_dpo/beta_margin_grad_mean": -0.3405255973339081, "beta_dpo/beta_margin_grad_std": 0.30749204754829407, "beta_dpo/beta_margin_mean": 4.965733528137207, "beta_dpo/beta_margin_std": 11.302127838134766, "beta_dpo/beta_used": 0.19765952229499817, "beta_dpo/beta_used_raw": -0.18348905444145203, "beta_dpo/gap_mean": 28.022891998291016, "beta_dpo/gap_std": 40.65996170043945, "beta_dpo/loss_margin_mean": 21.723342895507812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5835222978080121, "grad_norm": 191.1682586669922, "learning_rate": 2.2229045002474724e-07, "logits/chosen": -3.016357183456421, "logits/rejected": -3.035351276397705, "loss": 1.1637, "step": 386 }, { "beta_dpo/beta": 0.3863077163696289, "beta_dpo/beta_margin_grad_mean": -0.17502714693546295, "beta_dpo/beta_margin_grad_std": 0.3491981327533722, "beta_dpo/beta_margin_mean": 14.741477966308594, "beta_dpo/beta_margin_std": 15.115537643432617, "beta_dpo/beta_used": 0.3863077163696289, "beta_dpo/beta_used_raw": 0.3863077163696289, "beta_dpo/gap_mean": 29.337215423583984, "beta_dpo/gap_std": 40.27283477783203, "beta_dpo/loss_margin_mean": 38.22383499145508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5850340136054422, "grad_norm": 652.61865234375, "learning_rate": 2.209767714686924e-07, "logits/chosen": -2.9998183250427246, "logits/rejected": -3.038905620574951, "loss": 1.2516, "step": 387 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4945504367351532, "beta_dpo/beta_margin_grad_std": 0.010549246333539486, "beta_dpo/beta_margin_mean": 0.021811524406075478, "beta_dpo/beta_margin_std": 0.04222576692700386, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2677161395549774, "beta_dpo/gap_mean": 28.763246536254883, "beta_dpo/gap_std": 40.887359619140625, "beta_dpo/loss_margin_mean": 21.811521530151367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5865457294028723, "grad_norm": 2.4893360137939453, "learning_rate": 2.1966390475472954e-07, "logits/chosen": -3.0375568866729736, "logits/rejected": -3.0330111980438232, "loss": 1.364, "step": 388 }, { "beta_dpo/beta": 0.2958033084869385, "beta_dpo/beta_margin_grad_mean": -0.21837277710437775, "beta_dpo/beta_margin_grad_std": 0.37667161226272583, "beta_dpo/beta_margin_mean": 10.13884449005127, "beta_dpo/beta_margin_std": 13.346165657043457, "beta_dpo/beta_used": 0.2958033084869385, "beta_dpo/beta_used_raw": 0.2958033084869385, "beta_dpo/gap_mean": 29.039878845214844, "beta_dpo/gap_std": 40.79185485839844, "beta_dpo/loss_margin_mean": 33.79916000366211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5880574452003023, "grad_norm": 706.0709838867188, "learning_rate": 2.1835188660656265e-07, "logits/chosen": -3.015286445617676, "logits/rejected": -3.031208038330078, "loss": 2.2031, "step": 389 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4932987093925476, "beta_dpo/beta_margin_grad_std": 0.009519056417047977, "beta_dpo/beta_margin_mean": 0.026817994192242622, "beta_dpo/beta_margin_std": 0.03809916600584984, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2738952040672302, "beta_dpo/gap_mean": 29.062936782836914, "beta_dpo/gap_std": 40.41197967529297, "beta_dpo/loss_margin_mean": 26.8179931640625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5895691609977324, "grad_norm": 2.219215154647827, "learning_rate": 2.170407537241599e-07, "logits/chosen": -2.981475353240967, "logits/rejected": -2.988084316253662, "loss": 1.3638, "step": 390 }, { "beta_dpo/beta": 0.625146746635437, "beta_dpo/beta_margin_grad_mean": -0.23137876391410828, "beta_dpo/beta_margin_grad_std": 0.37583622336387634, "beta_dpo/beta_margin_mean": 23.871423721313477, "beta_dpo/beta_margin_std": 28.901975631713867, "beta_dpo/beta_used": 0.625146746635437, "beta_dpo/beta_used_raw": 0.625146746635437, "beta_dpo/gap_mean": 30.184946060180664, "beta_dpo/gap_std": 40.649269104003906, "beta_dpo/loss_margin_mean": 37.41094970703125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5910808767951625, "grad_norm": 1463.567626953125, "learning_rate": 2.1573054278272636e-07, "logits/chosen": -3.0244569778442383, "logits/rejected": -3.035482883453369, "loss": 1.5648, "step": 391 }, { "beta_dpo/beta": 0.5046176314353943, "beta_dpo/beta_margin_grad_mean": -0.16831432282924652, "beta_dpo/beta_margin_grad_std": 0.3403349220752716, "beta_dpo/beta_margin_mean": 19.103736877441406, "beta_dpo/beta_margin_std": 22.98224639892578, "beta_dpo/beta_used": 0.5046176314353943, "beta_dpo/beta_used_raw": 0.5046176314353943, "beta_dpo/gap_mean": 30.835647583007812, "beta_dpo/gap_std": 41.523895263671875, "beta_dpo/loss_margin_mean": 36.13017654418945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5925925925925926, "grad_norm": 592.4343872070312, "learning_rate": 2.1442129043167873e-07, "logits/chosen": -3.0418100357055664, "logits/rejected": -3.0473031997680664, "loss": 1.8052, "step": 392 }, { "beta_dpo/beta": 0.2411310076713562, "beta_dpo/beta_margin_grad_mean": -0.3177638351917267, "beta_dpo/beta_margin_grad_std": 0.27696797251701355, "beta_dpo/beta_margin_mean": 10.78797721862793, "beta_dpo/beta_margin_std": 16.936288833618164, "beta_dpo/beta_used": 0.2411310076713562, "beta_dpo/beta_used_raw": 0.0982653796672821, "beta_dpo/gap_mean": 32.383819580078125, "beta_dpo/gap_std": 40.91410827636719, "beta_dpo/loss_margin_mean": 33.14302062988281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5941043083900227, "grad_norm": 452.4428405761719, "learning_rate": 2.131130332936195e-07, "logits/chosen": -3.0123586654663086, "logits/rejected": -3.02311110496521, "loss": 0.966, "step": 393 }, { "beta_dpo/beta": 0.22652791440486908, "beta_dpo/beta_margin_grad_mean": -0.33459609746932983, "beta_dpo/beta_margin_grad_std": 0.29433995485305786, "beta_dpo/beta_margin_mean": 8.418416023254395, "beta_dpo/beta_margin_std": 15.686582565307617, "beta_dpo/beta_used": 0.22652791440486908, "beta_dpo/beta_used_raw": -0.014699012041091919, "beta_dpo/gap_mean": 31.08023452758789, "beta_dpo/gap_std": 40.0783805847168, "beta_dpo/loss_margin_mean": 29.241104125976562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5956160241874527, "grad_norm": 492.2149658203125, "learning_rate": 2.1180580796331323e-07, "logits/chosen": -3.041472911834717, "logits/rejected": -3.0596466064453125, "loss": 1.4461, "step": 394 }, { "beta_dpo/beta": 0.14701415598392487, "beta_dpo/beta_margin_grad_mean": -0.32391709089279175, "beta_dpo/beta_margin_grad_std": 0.26708272099494934, "beta_dpo/beta_margin_mean": 5.0513153076171875, "beta_dpo/beta_margin_std": 9.305150032043457, "beta_dpo/beta_used": 0.14701415598392487, "beta_dpo/beta_used_raw": -0.07249976694583893, "beta_dpo/gap_mean": 30.491722106933594, "beta_dpo/gap_std": 39.65027618408203, "beta_dpo/loss_margin_mean": 27.321931838989258, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5971277399848829, "grad_norm": 392.08123779296875, "learning_rate": 2.104996510066625e-07, "logits/chosen": -2.976252555847168, "logits/rejected": -3.0006847381591797, "loss": 1.0728, "step": 395 }, { "beta_dpo/beta": 0.41436514258384705, "beta_dpo/beta_margin_grad_mean": -0.285800963640213, "beta_dpo/beta_margin_grad_std": 0.2666924297809601, "beta_dpo/beta_margin_mean": 18.4917049407959, "beta_dpo/beta_margin_std": 25.92902946472168, "beta_dpo/beta_used": 0.41436514258384705, "beta_dpo/beta_used_raw": 0.09369680285453796, "beta_dpo/gap_mean": 31.421274185180664, "beta_dpo/gap_std": 38.407318115234375, "beta_dpo/loss_margin_mean": 31.650283813476562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5986394557823129, "grad_norm": 255.34703063964844, "learning_rate": 2.0919459895968517e-07, "logits/chosen": -3.0301437377929688, "logits/rejected": -3.054694414138794, "loss": 0.8463, "step": 396 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49589958786964417, "beta_dpo/beta_margin_grad_std": 0.010871745645999908, "beta_dpo/beta_margin_mean": 0.016410168260335922, "beta_dpo/beta_margin_std": 0.04352058470249176, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2955280542373657, "beta_dpo/gap_mean": 28.55809211730957, "beta_dpo/gap_std": 38.24231719970703, "beta_dpo/loss_margin_mean": 16.410167694091797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.600151171579743, "grad_norm": 2.5978617668151855, "learning_rate": 2.078906883274924e-07, "logits/chosen": -3.0240797996520996, "logits/rejected": -3.0396976470947266, "loss": 1.3646, "step": 397 }, { "beta_dpo/beta": 0.13738001883029938, "beta_dpo/beta_margin_grad_mean": -0.23586338758468628, "beta_dpo/beta_margin_grad_std": 0.3398585915565491, "beta_dpo/beta_margin_mean": 4.376008033752441, "beta_dpo/beta_margin_std": 5.610034942626953, "beta_dpo/beta_used": 0.13738001883029938, "beta_dpo/beta_used_raw": 0.13738001883029938, "beta_dpo/gap_mean": 28.59261703491211, "beta_dpo/gap_std": 39.029197692871094, "beta_dpo/loss_margin_mean": 32.331600189208984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6016628873771731, "grad_norm": 248.6461639404297, "learning_rate": 2.065879555832674e-07, "logits/chosen": -3.0186073780059814, "logits/rejected": -3.0323636531829834, "loss": 0.874, "step": 398 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4914765954017639, "beta_dpo/beta_margin_grad_std": 0.010397281497716904, "beta_dpo/beta_margin_mean": 0.0341142974793911, "beta_dpo/beta_margin_std": 0.041627272963523865, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07908609509468079, "beta_dpo/gap_mean": 29.60391616821289, "beta_dpo/gap_std": 39.34052658081055, "beta_dpo/loss_margin_mean": 34.114295959472656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6031746031746031, "grad_norm": 2.631343364715576, "learning_rate": 2.052864371672457e-07, "logits/chosen": -3.0201542377471924, "logits/rejected": -3.059485673904419, "loss": 1.3601, "step": 399 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49394434690475464, "beta_dpo/beta_margin_grad_std": 0.008329670876264572, "beta_dpo/beta_margin_mean": 0.02423146180808544, "beta_dpo/beta_margin_std": 0.033334020525217056, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.24517488479614258, "beta_dpo/gap_mean": 29.253145217895508, "beta_dpo/gap_std": 38.769901275634766, "beta_dpo/loss_margin_mean": 24.231460571289062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6046863189720333, "grad_norm": 2.8842198848724365, "learning_rate": 2.0398616948569493e-07, "logits/chosen": -3.052142381668091, "logits/rejected": -3.0777342319488525, "loss": 1.3632, "step": 400 }, { "epoch": 0.6046863189720333, "eval_beta_dpo/beta": 0.029446229338645935, "eval_beta_dpo/beta_margin_grad_mean": -0.4573783874511719, "eval_beta_dpo/beta_margin_grad_std": 0.056579407304525375, "eval_beta_dpo/beta_margin_mean": 0.9100630879402161, "eval_beta_dpo/beta_margin_std": 1.195685625076294, "eval_beta_dpo/beta_used": 0.029446229338645935, "eval_beta_dpo/beta_used_raw": -0.3581295311450958, "eval_beta_dpo/gap_mean": 28.76643180847168, "eval_beta_dpo/gap_std": 38.37847137451172, "eval_beta_dpo/loss_margin_mean": 21.130935668945312, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -3.0744099617004395, "eval_logits/rejected": -3.0810747146606445, "eval_loss": 0.7274584174156189, "eval_runtime": 36.2627, "eval_samples_per_second": 63.509, "eval_steps_per_second": 1.986, "step": 400 }, { "beta_dpo/beta": 0.11721974611282349, "beta_dpo/beta_margin_grad_mean": -0.35206279158592224, "beta_dpo/beta_margin_grad_std": 0.28108495473861694, "beta_dpo/beta_margin_mean": 3.3964405059814453, "beta_dpo/beta_margin_std": 7.061282634735107, "beta_dpo/beta_used": 0.11721974611282349, "beta_dpo/beta_used_raw": 0.09774797409772873, "beta_dpo/gap_mean": 29.4639892578125, "beta_dpo/gap_std": 37.854820251464844, "beta_dpo/loss_margin_mean": 32.480064392089844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6061980347694633, "grad_norm": 187.5461883544922, "learning_rate": 2.0268718890989752e-07, "logits/chosen": -2.9957950115203857, "logits/rejected": -3.0160136222839355, "loss": 1.2168, "step": 401 }, { "beta_dpo/beta": 0.05155543237924576, "beta_dpo/beta_margin_grad_mean": -0.3367193043231964, "beta_dpo/beta_margin_grad_std": 0.2195734679698944, "beta_dpo/beta_margin_mean": 1.6674182415008545, "beta_dpo/beta_margin_std": 2.7201528549194336, "beta_dpo/beta_used": 0.05155543237924576, "beta_dpo/beta_used_raw": -0.07895001769065857, "beta_dpo/gap_mean": 29.160747528076172, "beta_dpo/gap_std": 37.533660888671875, "beta_dpo/loss_margin_mean": 25.307361602783203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6077097505668935, "grad_norm": 86.19723510742188, "learning_rate": 2.013895317751323e-07, "logits/chosen": -3.0285935401916504, "logits/rejected": -3.0081098079681396, "loss": 0.9101, "step": 402 }, { "beta_dpo/beta": 0.22628659009933472, "beta_dpo/beta_margin_grad_mean": -0.3428897559642792, "beta_dpo/beta_margin_grad_std": 0.28803175687789917, "beta_dpo/beta_margin_mean": 8.585755348205566, "beta_dpo/beta_margin_std": 15.24824047088623, "beta_dpo/beta_used": 0.22628659009933472, "beta_dpo/beta_used_raw": 0.15616539120674133, "beta_dpo/gap_mean": 29.993492126464844, "beta_dpo/gap_std": 38.41106414794922, "beta_dpo/loss_margin_mean": 38.23483657836914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6092214663643235, "grad_norm": 382.0389404296875, "learning_rate": 2.0009323437965898e-07, "logits/chosen": -2.9947726726531982, "logits/rejected": -3.021214485168457, "loss": 1.1869, "step": 403 }, { "beta_dpo/beta": 0.29033389687538147, "beta_dpo/beta_margin_grad_mean": -0.31872478127479553, "beta_dpo/beta_margin_grad_std": 0.27382490038871765, "beta_dpo/beta_margin_mean": 12.520002365112305, "beta_dpo/beta_margin_std": 21.002424240112305, "beta_dpo/beta_used": 0.29033389687538147, "beta_dpo/beta_used_raw": 0.09338931739330292, "beta_dpo/gap_mean": 31.210363388061523, "beta_dpo/gap_std": 38.91590118408203, "beta_dpo/loss_margin_mean": 32.66154479980469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6107331821617535, "grad_norm": 643.2323608398438, "learning_rate": 1.9879833298370237e-07, "logits/chosen": -3.054755210876465, "logits/rejected": -3.0784716606140137, "loss": 2.2771, "step": 404 }, { "beta_dpo/beta": 0.08867108821868896, "beta_dpo/beta_margin_grad_mean": -0.35953488945961, "beta_dpo/beta_margin_grad_std": 0.26592856645584106, "beta_dpo/beta_margin_mean": 3.2900259494781494, "beta_dpo/beta_margin_std": 5.851770401000977, "beta_dpo/beta_used": 0.08867108821868896, "beta_dpo/beta_used_raw": -0.21567538380622864, "beta_dpo/gap_mean": 30.805099487304688, "beta_dpo/gap_std": 38.458656311035156, "beta_dpo/loss_margin_mean": 28.532512664794922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6122448979591837, "grad_norm": 219.62863159179688, "learning_rate": 1.975048638084379e-07, "logits/chosen": -2.99334979057312, "logits/rejected": -3.013242721557617, "loss": 1.1381, "step": 405 }, { "beta_dpo/beta": 0.48128390312194824, "beta_dpo/beta_margin_grad_mean": -0.36179736256599426, "beta_dpo/beta_margin_grad_std": 0.3154648244380951, "beta_dpo/beta_margin_mean": 17.335620880126953, "beta_dpo/beta_margin_std": 34.6451416015625, "beta_dpo/beta_used": 0.48128390312194824, "beta_dpo/beta_used_raw": 0.39883124828338623, "beta_dpo/gap_mean": 30.695274353027344, "beta_dpo/gap_std": 38.449737548828125, "beta_dpo/loss_margin_mean": 32.28055953979492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6137566137566137, "grad_norm": 532.7105712890625, "learning_rate": 1.9621286303497914e-07, "logits/chosen": -3.000096082687378, "logits/rejected": -3.0485429763793945, "loss": 0.9884, "step": 406 }, { "beta_dpo/beta": 0.18012605607509613, "beta_dpo/beta_margin_grad_mean": -0.2155662477016449, "beta_dpo/beta_margin_grad_std": 0.3102184534072876, "beta_dpo/beta_margin_mean": 5.451420783996582, "beta_dpo/beta_margin_std": 7.179676055908203, "beta_dpo/beta_used": 0.18012605607509613, "beta_dpo/beta_used_raw": 0.18012605607509613, "beta_dpo/gap_mean": 30.456653594970703, "beta_dpo/gap_std": 37.66099166870117, "beta_dpo/loss_margin_mean": 29.90361213684082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6152683295540439, "grad_norm": 104.91793060302734, "learning_rate": 1.9492236680336483e-07, "logits/chosen": -3.095395088195801, "logits/rejected": -3.1127164363861084, "loss": 0.4795, "step": 407 }, { "beta_dpo/beta": 0.1678144782781601, "beta_dpo/beta_margin_grad_mean": -0.27898651361465454, "beta_dpo/beta_margin_grad_std": 0.2516646981239319, "beta_dpo/beta_margin_mean": 7.00124454498291, "beta_dpo/beta_margin_std": 10.054356575012207, "beta_dpo/beta_used": 0.1678144782781601, "beta_dpo/beta_used_raw": 0.11202029883861542, "beta_dpo/gap_mean": 30.880258560180664, "beta_dpo/gap_std": 36.1904182434082, "beta_dpo/loss_margin_mean": 35.354042053222656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6167800453514739, "grad_norm": 217.7000274658203, "learning_rate": 1.9363341121154895e-07, "logits/chosen": -3.02614688873291, "logits/rejected": -3.044498920440674, "loss": 0.9372, "step": 408 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4943253993988037, "beta_dpo/beta_margin_grad_std": 0.00914519652724266, "beta_dpo/beta_margin_mean": 0.022709691897034645, "beta_dpo/beta_margin_std": 0.03660096228122711, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5368869304656982, "beta_dpo/gap_mean": 30.207996368408203, "beta_dpo/gap_std": 36.25111389160156, "beta_dpo/loss_margin_mean": 22.709692001342773, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.618291761148904, "grad_norm": 2.2463226318359375, "learning_rate": 1.9234603231438994e-07, "logits/chosen": -3.043461799621582, "logits/rejected": -3.0443849563598633, "loss": 1.367, "step": 409 }, { "beta_dpo/beta": 0.08402707427740097, "beta_dpo/beta_margin_grad_mean": -0.28692978620529175, "beta_dpo/beta_margin_grad_std": 0.24823537468910217, "beta_dpo/beta_margin_mean": 3.4619128704071045, "beta_dpo/beta_margin_std": 5.12234354019165, "beta_dpo/beta_used": 0.08402707427740097, "beta_dpo/beta_used_raw": -0.09456826746463776, "beta_dpo/gap_mean": 30.54737091064453, "beta_dpo/gap_std": 35.517127990722656, "beta_dpo/loss_margin_mean": 32.21046829223633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6198034769463341, "grad_norm": 120.86051940917969, "learning_rate": 1.9106026612264315e-07, "logits/chosen": -3.0650954246520996, "logits/rejected": -3.070061206817627, "loss": 0.9332, "step": 410 }, { "beta_dpo/beta": 0.027193760499358177, "beta_dpo/beta_margin_grad_mean": -0.37177857756614685, "beta_dpo/beta_margin_grad_std": 0.2116282731294632, "beta_dpo/beta_margin_mean": 0.9016957879066467, "beta_dpo/beta_margin_std": 1.7975363731384277, "beta_dpo/beta_used": 0.027193760499358177, "beta_dpo/beta_used_raw": -0.04066654294729233, "beta_dpo/gap_mean": 30.096771240234375, "beta_dpo/gap_std": 36.01396179199219, "beta_dpo/loss_margin_mean": 28.125648498535156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6213151927437641, "grad_norm": 66.50321960449219, "learning_rate": 1.8977614860195296e-07, "logits/chosen": -3.014916181564331, "logits/rejected": -3.0385901927948, "loss": 0.999, "step": 411 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4926305115222931, "beta_dpo/beta_margin_grad_std": 0.0076973093673586845, "beta_dpo/beta_margin_mean": 0.029488172382116318, "beta_dpo/beta_margin_std": 0.030803833156824112, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2814818024635315, "beta_dpo/gap_mean": 29.791940689086914, "beta_dpo/gap_std": 35.42867660522461, "beta_dpo/loss_margin_mean": 29.488170623779297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6228269085411943, "grad_norm": 2.461094379425049, "learning_rate": 1.8849371567184662e-07, "logits/chosen": -3.009608745574951, "logits/rejected": -3.0188848972320557, "loss": 1.3632, "step": 412 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49362313747406006, "beta_dpo/beta_margin_grad_std": 0.010564255528151989, "beta_dpo/beta_margin_mean": 0.025521280243992805, "beta_dpo/beta_margin_std": 0.04228663817048073, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4660704731941223, "beta_dpo/gap_mean": 29.05756378173828, "beta_dpo/gap_std": 36.2657470703125, "beta_dpo/loss_margin_mean": 25.521278381347656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6243386243386243, "grad_norm": 2.5777804851531982, "learning_rate": 1.872130032047302e-07, "logits/chosen": -3.073596477508545, "logits/rejected": -3.076204776763916, "loss": 1.367, "step": 413 }, { "beta_dpo/beta": 0.0779917985200882, "beta_dpo/beta_margin_grad_mean": -0.3404553532600403, "beta_dpo/beta_margin_grad_std": 0.25039440393447876, "beta_dpo/beta_margin_mean": 2.3758316040039062, "beta_dpo/beta_margin_std": 4.421693325042725, "beta_dpo/beta_used": 0.0779917985200882, "beta_dpo/beta_used_raw": -0.03603484481573105, "beta_dpo/gap_mean": 29.065872192382812, "beta_dpo/gap_std": 36.048789978027344, "beta_dpo/loss_margin_mean": 30.15563201904297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6258503401360545, "grad_norm": 127.85913848876953, "learning_rate": 1.8593404702488436e-07, "logits/chosen": -3.034503221511841, "logits/rejected": -3.0492706298828125, "loss": 0.9021, "step": 414 }, { "beta_dpo/beta": 0.05307367071509361, "beta_dpo/beta_margin_grad_mean": -0.35378262400627136, "beta_dpo/beta_margin_grad_std": 0.23300494253635406, "beta_dpo/beta_margin_mean": 1.6274826526641846, "beta_dpo/beta_margin_std": 2.9165117740631104, "beta_dpo/beta_used": 0.05307367071509361, "beta_dpo/beta_used_raw": 0.05307367071509361, "beta_dpo/gap_mean": 29.417644500732422, "beta_dpo/gap_std": 35.23187255859375, "beta_dpo/loss_margin_mean": 31.10089111328125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6273620559334845, "grad_norm": 176.93101501464844, "learning_rate": 1.846568829074628e-07, "logits/chosen": -2.9848177433013916, "logits/rejected": -2.995086669921875, "loss": 1.0372, "step": 415 }, { "beta_dpo/beta": 0.3304360508918762, "beta_dpo/beta_margin_grad_mean": -0.31155529618263245, "beta_dpo/beta_margin_grad_std": 0.28502368927001953, "beta_dpo/beta_margin_mean": 11.89297103881836, "beta_dpo/beta_margin_std": 19.53858184814453, "beta_dpo/beta_used": 0.3304360508918762, "beta_dpo/beta_used_raw": 0.20077964663505554, "beta_dpo/gap_mean": 28.080665588378906, "beta_dpo/gap_std": 35.113502502441406, "beta_dpo/loss_margin_mean": 23.66207504272461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6288737717309146, "grad_norm": 763.5574340820312, "learning_rate": 1.8338154657749128e-07, "logits/chosen": -3.023198127746582, "logits/rejected": -3.030958652496338, "loss": 1.4262, "step": 416 }, { "beta_dpo/beta": 0.14367084205150604, "beta_dpo/beta_margin_grad_mean": -0.19789853692054749, "beta_dpo/beta_margin_grad_std": 0.30493274331092834, "beta_dpo/beta_margin_mean": 4.5488409996032715, "beta_dpo/beta_margin_std": 5.6324896812438965, "beta_dpo/beta_used": 0.14367084205150604, "beta_dpo/beta_used_raw": 0.14367084205150604, "beta_dpo/gap_mean": 28.43101692199707, "beta_dpo/gap_std": 35.292335510253906, "beta_dpo/loss_margin_mean": 30.2642879486084, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6303854875283447, "grad_norm": 211.81895446777344, "learning_rate": 1.8210807370886849e-07, "logits/chosen": -2.993131637573242, "logits/rejected": -3.003584861755371, "loss": 0.7942, "step": 417 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4951647222042084, "beta_dpo/beta_margin_grad_std": 0.00956287793815136, "beta_dpo/beta_margin_mean": 0.019352145493030548, "beta_dpo/beta_margin_std": 0.03827900066971779, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5479909181594849, "beta_dpo/gap_mean": 28.049537658691406, "beta_dpo/gap_std": 35.543701171875, "beta_dpo/loss_margin_mean": 19.352144241333008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6318972033257747, "grad_norm": 2.309333324432373, "learning_rate": 1.8083649992336825e-07, "logits/chosen": -3.0097949504852295, "logits/rejected": -3.013964891433716, "loss": 1.3693, "step": 418 }, { "beta_dpo/beta": 0.24485455453395844, "beta_dpo/beta_margin_grad_mean": -0.3431459069252014, "beta_dpo/beta_margin_grad_std": 0.3013096749782562, "beta_dpo/beta_margin_mean": 9.05129623413086, "beta_dpo/beta_margin_std": 15.675983428955078, "beta_dpo/beta_used": 0.24485455453395844, "beta_dpo/beta_used_raw": 0.19044339656829834, "beta_dpo/gap_mean": 28.351285934448242, "beta_dpo/gap_std": 36.18492126464844, "beta_dpo/loss_margin_mean": 36.2522087097168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6334089191232048, "grad_norm": 535.1361694335938, "learning_rate": 1.7956686078964255e-07, "logits/chosen": -3.008601188659668, "logits/rejected": -3.037018299102783, "loss": 1.331, "step": 419 }, { "beta_dpo/beta": 0.059331752359867096, "beta_dpo/beta_margin_grad_mean": -0.38873177766799927, "beta_dpo/beta_margin_grad_std": 0.2625668942928314, "beta_dpo/beta_margin_mean": 1.455504298210144, "beta_dpo/beta_margin_std": 3.291039228439331, "beta_dpo/beta_used": 0.059331752359867096, "beta_dpo/beta_used_raw": -0.013141274452209473, "beta_dpo/gap_mean": 27.697145462036133, "beta_dpo/gap_std": 36.58726119995117, "beta_dpo/loss_margin_mean": 20.262928009033203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6349206349206349, "grad_norm": 120.16482543945312, "learning_rate": 1.782991918222275e-07, "logits/chosen": -3.011108875274658, "logits/rejected": -3.016571044921875, "loss": 1.1169, "step": 420 }, { "beta_dpo/beta": 0.41717052459716797, "beta_dpo/beta_margin_grad_mean": -0.3158358931541443, "beta_dpo/beta_margin_grad_std": 0.292575865983963, "beta_dpo/beta_margin_mean": 13.852155685424805, "beta_dpo/beta_margin_std": 28.46140480041504, "beta_dpo/beta_used": 0.41717052459716797, "beta_dpo/beta_used_raw": 0.26456567645072937, "beta_dpo/gap_mean": 27.4444580078125, "beta_dpo/gap_std": 37.893394470214844, "beta_dpo/loss_margin_mean": 27.985321044921875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.636432350718065, "grad_norm": 733.0397338867188, "learning_rate": 1.7703352848054887e-07, "logits/chosen": -2.9851346015930176, "logits/rejected": -3.0097498893737793, "loss": 2.0376, "step": 421 }, { "beta_dpo/beta": 0.05253172665834427, "beta_dpo/beta_margin_grad_mean": -0.35536831617355347, "beta_dpo/beta_margin_grad_std": 0.2513173818588257, "beta_dpo/beta_margin_mean": 1.5365536212921143, "beta_dpo/beta_margin_std": 2.828378915786743, "beta_dpo/beta_used": 0.05253172665834427, "beta_dpo/beta_used_raw": -0.20523816347122192, "beta_dpo/gap_mean": 26.63359832763672, "beta_dpo/gap_std": 37.69291687011719, "beta_dpo/loss_margin_mean": 24.736282348632812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6379440665154951, "grad_norm": 144.82969665527344, "learning_rate": 1.7576990616793137e-07, "logits/chosen": -3.0424623489379883, "logits/rejected": -3.045719623565674, "loss": 1.0531, "step": 422 }, { "beta_dpo/beta": 0.31688183546066284, "beta_dpo/beta_margin_grad_mean": -0.19457308948040009, "beta_dpo/beta_margin_grad_std": 0.30100706219673157, "beta_dpo/beta_margin_mean": 11.033187866210938, "beta_dpo/beta_margin_std": 17.46510887145996, "beta_dpo/beta_used": 0.31688183546066284, "beta_dpo/beta_used_raw": 0.31688183546066284, "beta_dpo/gap_mean": 27.839336395263672, "beta_dpo/gap_std": 37.669700622558594, "beta_dpo/loss_margin_mean": 33.5172233581543, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6394557823129252, "grad_norm": 999.29296875, "learning_rate": 1.745083602306071e-07, "logits/chosen": -2.9774489402770996, "logits/rejected": -3.0142605304718018, "loss": 1.3781, "step": 423 }, { "beta_dpo/beta": 0.09759822487831116, "beta_dpo/beta_margin_grad_mean": -0.28802889585494995, "beta_dpo/beta_margin_grad_std": 0.22548428177833557, "beta_dpo/beta_margin_mean": 3.754995584487915, "beta_dpo/beta_margin_std": 5.6953959465026855, "beta_dpo/beta_used": 0.09759822487831116, "beta_dpo/beta_used_raw": 0.06761516630649567, "beta_dpo/gap_mean": 29.380714416503906, "beta_dpo/gap_std": 37.17816162109375, "beta_dpo/loss_margin_mean": 36.763824462890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6409674981103552, "grad_norm": 84.32976531982422, "learning_rate": 1.7324892595672804e-07, "logits/chosen": -3.0425024032592773, "logits/rejected": -3.0614097118377686, "loss": 0.8042, "step": 424 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4941061735153198, "beta_dpo/beta_margin_grad_std": 0.00797404907643795, "beta_dpo/beta_margin_mean": 0.02358274534344673, "beta_dpo/beta_margin_std": 0.03190897777676582, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13788114488124847, "beta_dpo/gap_mean": 28.752634048461914, "beta_dpo/gap_std": 36.62226867675781, "beta_dpo/loss_margin_mean": 23.582744598388672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6424792139077853, "grad_norm": 2.602902412414551, "learning_rate": 1.7199163857537824e-07, "logits/chosen": -2.9604344367980957, "logits/rejected": -2.958548069000244, "loss": 1.3619, "step": 425 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4955054521560669, "beta_dpo/beta_margin_grad_std": 0.009816068224608898, "beta_dpo/beta_margin_mean": 0.017988240346312523, "beta_dpo/beta_margin_std": 0.039290908724069595, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.49269723892211914, "beta_dpo/gap_mean": 27.00345230102539, "beta_dpo/gap_std": 36.83736038208008, "beta_dpo/loss_margin_mean": 17.98824119567871, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6439909297052154, "grad_norm": 2.686110734939575, "learning_rate": 1.7073653325558828e-07, "logits/chosen": -2.993440628051758, "logits/rejected": -2.9941649436950684, "loss": 1.3694, "step": 426 }, { "beta_dpo/beta": 0.21917250752449036, "beta_dpo/beta_margin_grad_mean": -0.30823761224746704, "beta_dpo/beta_margin_grad_std": 0.2705315351486206, "beta_dpo/beta_margin_mean": 7.877319812774658, "beta_dpo/beta_margin_std": 13.52176284790039, "beta_dpo/beta_used": 0.21917250752449036, "beta_dpo/beta_used_raw": 0.17343935370445251, "beta_dpo/gap_mean": 27.387710571289062, "beta_dpo/gap_std": 37.20250701904297, "beta_dpo/loss_margin_mean": 33.293609619140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6455026455026455, "grad_norm": 284.2796630859375, "learning_rate": 1.6948364510535218e-07, "logits/chosen": -3.02168345451355, "logits/rejected": -3.048999786376953, "loss": 0.8126, "step": 427 }, { "beta_dpo/beta": 0.3071382939815521, "beta_dpo/beta_margin_grad_mean": -0.351523220539093, "beta_dpo/beta_margin_grad_std": 0.3131656348705292, "beta_dpo/beta_margin_mean": 13.213186264038086, "beta_dpo/beta_margin_std": 23.171049118041992, "beta_dpo/beta_used": 0.3071382939815521, "beta_dpo/beta_used_raw": 0.019071310758590698, "beta_dpo/gap_mean": 27.952465057373047, "beta_dpo/gap_std": 37.053226470947266, "beta_dpo/loss_margin_mean": 32.0423583984375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6470143613000756, "grad_norm": 785.1513671875, "learning_rate": 1.6823300917064458e-07, "logits/chosen": -3.028686285018921, "logits/rejected": -3.0231781005859375, "loss": 2.2194, "step": 428 }, { "beta_dpo/beta": 0.40533530712127686, "beta_dpo/beta_margin_grad_mean": -0.23250898718833923, "beta_dpo/beta_margin_grad_std": 0.35834386944770813, "beta_dpo/beta_margin_mean": 12.332311630249023, "beta_dpo/beta_margin_std": 21.895021438598633, "beta_dpo/beta_used": 0.40533530712127686, "beta_dpo/beta_used_raw": 0.40533530712127686, "beta_dpo/gap_mean": 28.197933197021484, "beta_dpo/gap_std": 37.31829833984375, "beta_dpo/loss_margin_mean": 26.527587890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6485260770975056, "grad_norm": 386.2767639160156, "learning_rate": 1.669846604344412e-07, "logits/chosen": -2.974191904067993, "logits/rejected": -2.967235565185547, "loss": 0.6899, "step": 429 }, { "beta_dpo/beta": 0.24217864871025085, "beta_dpo/beta_margin_grad_mean": -0.19283412396907806, "beta_dpo/beta_margin_grad_std": 0.3163716197013855, "beta_dpo/beta_margin_mean": 8.279568672180176, "beta_dpo/beta_margin_std": 10.411409378051758, "beta_dpo/beta_used": 0.24217864871025085, "beta_dpo/beta_used_raw": 0.24217864871025085, "beta_dpo/gap_mean": 29.146041870117188, "beta_dpo/gap_std": 36.97269058227539, "beta_dpo/loss_margin_mean": 31.7739200592041, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6500377928949358, "grad_norm": 472.55072021484375, "learning_rate": 1.6573863381573954e-07, "logits/chosen": -2.986631393432617, "logits/rejected": -2.97794771194458, "loss": 0.663, "step": 430 }, { "beta_dpo/beta": 0.10047898441553116, "beta_dpo/beta_margin_grad_mean": -0.3267359733581543, "beta_dpo/beta_margin_grad_std": 0.24772731959819794, "beta_dpo/beta_margin_mean": 2.857672929763794, "beta_dpo/beta_margin_std": 4.949996471405029, "beta_dpo/beta_used": 0.10047898441553116, "beta_dpo/beta_used_raw": -0.2611073851585388, "beta_dpo/gap_mean": 28.47191619873047, "beta_dpo/gap_std": 36.198570251464844, "beta_dpo/loss_margin_mean": 26.27246856689453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6515495086923658, "grad_norm": 55.17536544799805, "learning_rate": 1.6449496416858282e-07, "logits/chosen": -3.016430377960205, "logits/rejected": -3.0456418991088867, "loss": 0.8078, "step": 431 }, { "beta_dpo/beta": 0.22804366052150726, "beta_dpo/beta_margin_grad_mean": -0.20772166550159454, "beta_dpo/beta_margin_grad_std": 0.3391772210597992, "beta_dpo/beta_margin_mean": 6.465082168579102, "beta_dpo/beta_margin_std": 8.044727325439453, "beta_dpo/beta_used": 0.22804366052150726, "beta_dpo/beta_used_raw": 0.22804366052150726, "beta_dpo/gap_mean": 28.469558715820312, "beta_dpo/gap_std": 35.54988098144531, "beta_dpo/loss_margin_mean": 27.847829818725586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6530612244897959, "grad_norm": 517.2797241210938, "learning_rate": 1.632536862810844e-07, "logits/chosen": -3.02810001373291, "logits/rejected": -3.046234130859375, "loss": 1.0916, "step": 432 }, { "beta_dpo/beta": 0.5039178729057312, "beta_dpo/beta_margin_grad_mean": -0.18501636385917664, "beta_dpo/beta_margin_grad_std": 0.3553808629512787, "beta_dpo/beta_margin_mean": 18.54363250732422, "beta_dpo/beta_margin_std": 20.644752502441406, "beta_dpo/beta_used": 0.5039178729057312, "beta_dpo/beta_used_raw": 0.5039178729057312, "beta_dpo/gap_mean": 29.425575256347656, "beta_dpo/gap_std": 36.11632537841797, "beta_dpo/loss_margin_mean": 36.797874450683594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.654572940287226, "grad_norm": 758.8828735351562, "learning_rate": 1.6201483487445515e-07, "logits/chosen": -2.982597589492798, "logits/rejected": -2.9782962799072266, "loss": 1.5323, "step": 433 }, { "beta_dpo/beta": 0.04688744619488716, "beta_dpo/beta_margin_grad_mean": -0.3494400978088379, "beta_dpo/beta_margin_grad_std": 0.2307935357093811, "beta_dpo/beta_margin_mean": 1.8151441812515259, "beta_dpo/beta_margin_std": 3.081648349761963, "beta_dpo/beta_used": 0.04688744619488716, "beta_dpo/beta_used_raw": -0.08956971764564514, "beta_dpo/gap_mean": 30.386577606201172, "beta_dpo/gap_std": 36.93446350097656, "beta_dpo/loss_margin_mean": 30.103981018066406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.656084656084656, "grad_norm": 147.99685668945312, "learning_rate": 1.6077844460203204e-07, "logits/chosen": -2.9807381629943848, "logits/rejected": -3.004141330718994, "loss": 1.0338, "step": 434 }, { "beta_dpo/beta": 0.1664637327194214, "beta_dpo/beta_margin_grad_mean": -0.21661928296089172, "beta_dpo/beta_margin_grad_std": 0.2999856173992157, "beta_dpo/beta_margin_mean": 5.165381908416748, "beta_dpo/beta_margin_std": 6.401003360748291, "beta_dpo/beta_used": 0.1664637327194214, "beta_dpo/beta_used_raw": 0.1664637327194214, "beta_dpo/gap_mean": 29.894763946533203, "beta_dpo/gap_std": 37.02755355834961, "beta_dpo/loss_margin_mean": 30.329971313476562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6575963718820862, "grad_norm": 153.4250030517578, "learning_rate": 1.5954455004830878e-07, "logits/chosen": -2.9743881225585938, "logits/rejected": -2.9850637912750244, "loss": 0.5309, "step": 435 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4939667284488678, "beta_dpo/beta_margin_grad_std": 0.009263965301215649, "beta_dpo/beta_margin_mean": 0.024143004789948463, "beta_dpo/beta_margin_std": 0.03707313910126686, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08352816104888916, "beta_dpo/gap_mean": 29.20709991455078, "beta_dpo/gap_std": 36.946022033691406, "beta_dpo/loss_margin_mean": 24.143003463745117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6591080876795162, "grad_norm": 2.9641666412353516, "learning_rate": 1.5831318572796847e-07, "logits/chosen": -2.9855284690856934, "logits/rejected": -2.9985339641571045, "loss": 1.3605, "step": 436 }, { "beta_dpo/beta": 0.12375855445861816, "beta_dpo/beta_margin_grad_mean": -0.31876465678215027, "beta_dpo/beta_margin_grad_std": 0.2591817378997803, "beta_dpo/beta_margin_mean": 4.931870937347412, "beta_dpo/beta_margin_std": 8.040031433105469, "beta_dpo/beta_used": 0.12375855445861816, "beta_dpo/beta_used_raw": -0.06239933520555496, "beta_dpo/gap_mean": 29.688655853271484, "beta_dpo/gap_std": 37.500450134277344, "beta_dpo/loss_margin_mean": 32.000179290771484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6606198034769464, "grad_norm": 202.52598571777344, "learning_rate": 1.5708438608491815e-07, "logits/chosen": -2.9799609184265137, "logits/rejected": -3.0229856967926025, "loss": 0.9282, "step": 437 }, { "beta_dpo/beta": 0.15766139328479767, "beta_dpo/beta_margin_grad_mean": -0.2849089503288269, "beta_dpo/beta_margin_grad_std": 0.24615149199962616, "beta_dpo/beta_margin_mean": 5.739384174346924, "beta_dpo/beta_margin_std": 8.78180980682373, "beta_dpo/beta_used": 0.15766139328479767, "beta_dpo/beta_used_raw": 0.023802101612091064, "beta_dpo/gap_mean": 28.854013442993164, "beta_dpo/gap_std": 37.47654724121094, "beta_dpo/loss_margin_mean": 28.225496292114258, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6621315192743764, "grad_norm": 140.12808227539062, "learning_rate": 1.558581854913253e-07, "logits/chosen": -2.959249496459961, "logits/rejected": -2.969465970993042, "loss": 0.7395, "step": 438 }, { "beta_dpo/beta": 0.2725057303905487, "beta_dpo/beta_margin_grad_mean": -0.22866128385066986, "beta_dpo/beta_margin_grad_std": 0.32659637928009033, "beta_dpo/beta_margin_mean": 9.943758010864258, "beta_dpo/beta_margin_std": 15.145366668701172, "beta_dpo/beta_used": 0.2725057303905487, "beta_dpo/beta_used_raw": 0.2725057303905487, "beta_dpo/gap_mean": 29.03016471862793, "beta_dpo/gap_std": 37.50492477416992, "beta_dpo/loss_margin_mean": 30.412309646606445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6636432350718064, "grad_norm": 166.89295959472656, "learning_rate": 1.5463461824665658e-07, "logits/chosen": -3.059014320373535, "logits/rejected": -3.0698959827423096, "loss": 0.5287, "step": 439 }, { "beta_dpo/beta": 0.15036743879318237, "beta_dpo/beta_margin_grad_mean": -0.3440133035182953, "beta_dpo/beta_margin_grad_std": 0.29287025332450867, "beta_dpo/beta_margin_mean": 4.517815589904785, "beta_dpo/beta_margin_std": 8.261082649230957, "beta_dpo/beta_used": 0.15036743879318237, "beta_dpo/beta_used_raw": 0.10082431882619858, "beta_dpo/gap_mean": 30.075698852539062, "beta_dpo/gap_std": 37.652427673339844, "beta_dpo/loss_margin_mean": 32.3798713684082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6651549508692366, "grad_norm": 454.4780578613281, "learning_rate": 1.534137185767178e-07, "logits/chosen": -2.961894989013672, "logits/rejected": -2.9933114051818848, "loss": 1.3947, "step": 440 }, { "beta_dpo/beta": 0.06749773770570755, "beta_dpo/beta_margin_grad_mean": -0.3118380308151245, "beta_dpo/beta_margin_grad_std": 0.22741641104221344, "beta_dpo/beta_margin_mean": 2.2036492824554443, "beta_dpo/beta_margin_std": 3.3733274936676025, "beta_dpo/beta_used": 0.06749773770570755, "beta_dpo/beta_used_raw": -0.059857144951820374, "beta_dpo/gap_mean": 30.235416412353516, "beta_dpo/gap_std": 36.371238708496094, "beta_dpo/loss_margin_mean": 31.540372848510742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6666666666666666, "grad_norm": 61.62382888793945, "learning_rate": 1.521955206326976e-07, "logits/chosen": -2.981652021408081, "logits/rejected": -3.013087749481201, "loss": 0.8221, "step": 441 }, { "beta_dpo/beta": 0.08830945193767548, "beta_dpo/beta_margin_grad_mean": -0.3359754979610443, "beta_dpo/beta_margin_grad_std": 0.26238736510276794, "beta_dpo/beta_margin_mean": 2.6826870441436768, "beta_dpo/beta_margin_std": 4.5776567459106445, "beta_dpo/beta_used": 0.08830945193767548, "beta_dpo/beta_used_raw": -0.2134900987148285, "beta_dpo/gap_mean": 29.962265014648438, "beta_dpo/gap_std": 35.663848876953125, "beta_dpo/loss_margin_mean": 28.262073516845703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6681783824640968, "grad_norm": 205.87567138671875, "learning_rate": 1.5098005849021078e-07, "logits/chosen": -3.003020763397217, "logits/rejected": -3.0152530670166016, "loss": 1.0074, "step": 442 }, { "beta_dpo/beta": 0.14007267355918884, "beta_dpo/beta_margin_grad_mean": -0.35146015882492065, "beta_dpo/beta_margin_grad_std": 0.2797658443450928, "beta_dpo/beta_margin_mean": 4.511338710784912, "beta_dpo/beta_margin_std": 8.455418586730957, "beta_dpo/beta_used": 0.14007267355918884, "beta_dpo/beta_used_raw": -0.07340739667415619, "beta_dpo/gap_mean": 29.874935150146484, "beta_dpo/gap_std": 35.72840118408203, "beta_dpo/loss_margin_mean": 29.933927536010742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6696900982615268, "grad_norm": 320.9236145019531, "learning_rate": 1.4976736614834662e-07, "logits/chosen": -2.984293222427368, "logits/rejected": -3.0145263671875, "loss": 1.2493, "step": 443 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4955978989601135, "beta_dpo/beta_margin_grad_std": 0.009687363170087337, "beta_dpo/beta_margin_mean": 0.017617596313357353, "beta_dpo/beta_margin_std": 0.03877225145697594, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3052191138267517, "beta_dpo/gap_mean": 28.419496536254883, "beta_dpo/gap_std": 36.03498840332031, "beta_dpo/loss_margin_mean": 17.617595672607422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.671201814058957, "grad_norm": 2.4595789909362793, "learning_rate": 1.4855747752871654e-07, "logits/chosen": -2.986158847808838, "logits/rejected": -3.02463960647583, "loss": 1.365, "step": 444 }, { "beta_dpo/beta": 0.22040501236915588, "beta_dpo/beta_margin_grad_mean": -0.23939883708953857, "beta_dpo/beta_margin_grad_std": 0.3706842064857483, "beta_dpo/beta_margin_mean": 6.504096031188965, "beta_dpo/beta_margin_std": 8.581731796264648, "beta_dpo/beta_used": 0.22040501236915588, "beta_dpo/beta_used_raw": 0.22040501236915588, "beta_dpo/gap_mean": 27.95121192932129, "beta_dpo/gap_std": 36.85621643066406, "beta_dpo/loss_margin_mean": 29.340797424316406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.672713529856387, "grad_norm": 393.7691955566406, "learning_rate": 1.473504264745062e-07, "logits/chosen": -2.961167335510254, "logits/rejected": -2.9654603004455566, "loss": 1.0141, "step": 445 }, { "beta_dpo/beta": 0.340822696685791, "beta_dpo/beta_margin_grad_mean": -0.2775871157646179, "beta_dpo/beta_margin_grad_std": 0.2622576355934143, "beta_dpo/beta_margin_mean": 16.909029006958008, "beta_dpo/beta_margin_std": 24.973739624023438, "beta_dpo/beta_used": 0.340822696685791, "beta_dpo/beta_used_raw": 0.294472873210907, "beta_dpo/gap_mean": 28.8350830078125, "beta_dpo/gap_std": 36.57999038696289, "beta_dpo/loss_margin_mean": 37.94091033935547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.674225245653817, "grad_norm": 358.96331787109375, "learning_rate": 1.461462467495284e-07, "logits/chosen": -2.9686427116394043, "logits/rejected": -2.9872775077819824, "loss": 1.6237, "step": 446 }, { "beta_dpo/beta": 0.44366323947906494, "beta_dpo/beta_margin_grad_mean": -0.1775507628917694, "beta_dpo/beta_margin_grad_std": 0.3296290636062622, "beta_dpo/beta_margin_mean": 16.851099014282227, "beta_dpo/beta_margin_std": 23.538476943969727, "beta_dpo/beta_used": 0.44366323947906494, "beta_dpo/beta_used_raw": 0.44366323947906494, "beta_dpo/gap_mean": 30.866836547851562, "beta_dpo/gap_std": 37.17848587036133, "beta_dpo/loss_margin_mean": 36.8920783996582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6757369614512472, "grad_norm": 504.9360656738281, "learning_rate": 1.4494497203727843e-07, "logits/chosen": -2.979090690612793, "logits/rejected": -3.029752492904663, "loss": 1.3514, "step": 447 }, { "beta_dpo/beta": 0.1897728443145752, "beta_dpo/beta_margin_grad_mean": -0.18827150762081146, "beta_dpo/beta_margin_grad_std": 0.3237147629261017, "beta_dpo/beta_margin_mean": 6.277503967285156, "beta_dpo/beta_margin_std": 6.867645263671875, "beta_dpo/beta_used": 0.1897728443145752, "beta_dpo/beta_used_raw": 0.1897728443145752, "beta_dpo/gap_mean": 31.5931396484375, "beta_dpo/gap_std": 37.23057556152344, "beta_dpo/loss_margin_mean": 33.25175094604492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6772486772486772, "grad_norm": 248.3333740234375, "learning_rate": 1.4374663593999256e-07, "logits/chosen": -2.9631669521331787, "logits/rejected": -2.976621150970459, "loss": 0.8557, "step": 448 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49524787068367004, "beta_dpo/beta_margin_grad_std": 0.0076606497168540955, "beta_dpo/beta_margin_mean": 0.019013898447155952, "beta_dpo/beta_margin_std": 0.03065245971083641, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4072267711162567, "beta_dpo/gap_mean": 29.96826171875, "beta_dpo/gap_std": 36.12980651855469, "beta_dpo/loss_margin_mean": 19.013896942138672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6787603930461074, "grad_norm": 2.6026723384857178, "learning_rate": 1.4255127197770707e-07, "logits/chosen": -3.0260300636291504, "logits/rejected": -3.0174269676208496, "loss": 1.365, "step": 449 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4947327673435211, "beta_dpo/beta_margin_grad_std": 0.008629199117422104, "beta_dpo/beta_margin_mean": 0.02107871323823929, "beta_dpo/beta_margin_std": 0.03453676775097847, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6087408065795898, "beta_dpo/gap_mean": 28.068256378173828, "beta_dpo/gap_std": 35.76659393310547, "beta_dpo/loss_margin_mean": 21.078712463378906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6802721088435374, "grad_norm": 2.508104085922241, "learning_rate": 1.4135891358732205e-07, "logits/chosen": -2.941275119781494, "logits/rejected": -2.997265577316284, "loss": 1.3703, "step": 450 }, { "beta_dpo/beta": 0.051626212894916534, "beta_dpo/beta_margin_grad_mean": -0.3609742522239685, "beta_dpo/beta_margin_grad_std": 0.22043851017951965, "beta_dpo/beta_margin_mean": 1.607721209526062, "beta_dpo/beta_margin_std": 3.0264225006103516, "beta_dpo/beta_used": 0.051626212894916534, "beta_dpo/beta_used_raw": -0.07908162474632263, "beta_dpo/gap_mean": 27.535343170166016, "beta_dpo/gap_std": 35.23435974121094, "beta_dpo/loss_margin_mean": 27.886857986450195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6817838246409675, "grad_norm": 67.10790252685547, "learning_rate": 1.4016959412166437e-07, "logits/chosen": -2.9985599517822266, "logits/rejected": -3.016568183898926, "loss": 0.983, "step": 451 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49271759390830994, "beta_dpo/beta_margin_grad_std": 0.009384777396917343, "beta_dpo/beta_margin_mean": 0.029145939275622368, "beta_dpo/beta_margin_std": 0.037574782967567444, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1551307588815689, "beta_dpo/gap_mean": 27.912967681884766, "beta_dpo/gap_std": 35.67900848388672, "beta_dpo/loss_margin_mean": 29.145936965942383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6832955404383976, "grad_norm": 2.733677387237549, "learning_rate": 1.3898334684855645e-07, "logits/chosen": -2.980307102203369, "logits/rejected": -3.0058016777038574, "loss": 1.363, "step": 452 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4922681748867035, "beta_dpo/beta_margin_grad_std": 0.009023171849548817, "beta_dpo/beta_margin_mean": 0.030942685902118683, "beta_dpo/beta_margin_std": 0.03612072020769119, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.34397417306900024, "beta_dpo/gap_mean": 28.23801040649414, "beta_dpo/gap_std": 35.68151092529297, "beta_dpo/loss_margin_mean": 30.942684173583984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6848072562358276, "grad_norm": 2.8947949409484863, "learning_rate": 1.3780020494988445e-07, "logits/chosen": -2.9505789279937744, "logits/rejected": -2.9637527465820312, "loss": 1.3658, "step": 453 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49272945523262024, "beta_dpo/beta_margin_grad_std": 0.00977272354066372, "beta_dpo/beta_margin_mean": 0.029096750542521477, "beta_dpo/beta_margin_std": 0.039117682725191116, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2079845815896988, "beta_dpo/gap_mean": 28.801530838012695, "beta_dpo/gap_std": 36.700557708740234, "beta_dpo/loss_margin_mean": 29.096750259399414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6863189720332578, "grad_norm": 2.9485905170440674, "learning_rate": 1.366202015206706e-07, "logits/chosen": -2.9418320655822754, "logits/rejected": -2.953274726867676, "loss": 1.363, "step": 454 }, { "beta_dpo/beta": 0.04326212778687477, "beta_dpo/beta_margin_grad_mean": -0.3606717586517334, "beta_dpo/beta_margin_grad_std": 0.23235774040222168, "beta_dpo/beta_margin_mean": 1.6770412921905518, "beta_dpo/beta_margin_std": 2.9725475311279297, "beta_dpo/beta_used": 0.04326212778687477, "beta_dpo/beta_used_raw": -0.25650086998939514, "beta_dpo/gap_mean": 29.796241760253906, "beta_dpo/gap_std": 37.28318405151367, "beta_dpo/loss_margin_mean": 35.360774993896484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6878306878306878, "grad_norm": 123.70121002197266, "learning_rate": 1.354433695681474e-07, "logits/chosen": -2.949462413787842, "logits/rejected": -2.9476213455200195, "loss": 1.1219, "step": 455 }, { "beta_dpo/beta": 0.05017132684588432, "beta_dpo/beta_margin_grad_mean": -0.40448522567749023, "beta_dpo/beta_margin_grad_std": 0.27879607677459717, "beta_dpo/beta_margin_mean": 1.2627125978469849, "beta_dpo/beta_margin_std": 3.3613815307617188, "beta_dpo/beta_used": 0.05017132684588432, "beta_dpo/beta_used_raw": 0.045974329113960266, "beta_dpo/gap_mean": 29.84475326538086, "beta_dpo/gap_std": 38.46610641479492, "beta_dpo/loss_margin_mean": 28.446945190429688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6893424036281179, "grad_norm": 131.1843719482422, "learning_rate": 1.3426974201083439e-07, "logits/chosen": -2.9341540336608887, "logits/rejected": -2.959643602371216, "loss": 1.1835, "step": 456 }, { "beta_dpo/beta": 0.2062607854604721, "beta_dpo/beta_margin_grad_mean": -0.32546162605285645, "beta_dpo/beta_margin_grad_std": 0.282577782869339, "beta_dpo/beta_margin_mean": 8.263957023620605, "beta_dpo/beta_margin_std": 12.933406829833984, "beta_dpo/beta_used": 0.2062607854604721, "beta_dpo/beta_used_raw": -0.04059891402721405, "beta_dpo/gap_mean": 30.066280364990234, "beta_dpo/gap_std": 38.42988586425781, "beta_dpo/loss_margin_mean": 29.572206497192383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.690854119425548, "grad_norm": 404.93133544921875, "learning_rate": 1.3309935167761717e-07, "logits/chosen": -2.8899693489074707, "logits/rejected": -2.927942991256714, "loss": 1.0866, "step": 457 }, { "beta_dpo/beta": 0.06866870075464249, "beta_dpo/beta_margin_grad_mean": -0.3728755712509155, "beta_dpo/beta_margin_grad_std": 0.2513536214828491, "beta_dpo/beta_margin_mean": 2.0318596363067627, "beta_dpo/beta_margin_std": 4.29193639755249, "beta_dpo/beta_used": 0.06866870075464249, "beta_dpo/beta_used_raw": -0.02202005684375763, "beta_dpo/gap_mean": 30.349742889404297, "beta_dpo/gap_std": 39.295570373535156, "beta_dpo/loss_margin_mean": 34.04402542114258, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6923658352229781, "grad_norm": 130.452880859375, "learning_rate": 1.3193223130682936e-07, "logits/chosen": -2.879459857940674, "logits/rejected": -2.9372262954711914, "loss": 0.9576, "step": 458 }, { "beta_dpo/beta": 0.14638648927211761, "beta_dpo/beta_margin_grad_mean": -0.28407731652259827, "beta_dpo/beta_margin_grad_std": 0.25495174527168274, "beta_dpo/beta_margin_mean": 7.257359504699707, "beta_dpo/beta_margin_std": 11.758667945861816, "beta_dpo/beta_used": 0.14638648927211761, "beta_dpo/beta_used_raw": 0.1360008269548416, "beta_dpo/gap_mean": 29.87301254272461, "beta_dpo/gap_std": 40.10071563720703, "beta_dpo/loss_margin_mean": 32.61771011352539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6938775510204082, "grad_norm": 211.43577575683594, "learning_rate": 1.3076841354533658e-07, "logits/chosen": -2.912708282470703, "logits/rejected": -2.9239847660064697, "loss": 0.9333, "step": 459 }, { "beta_dpo/beta": 0.14245007932186127, "beta_dpo/beta_margin_grad_mean": -0.38360172510147095, "beta_dpo/beta_margin_grad_std": 0.30201467871665955, "beta_dpo/beta_margin_mean": 4.727894306182861, "beta_dpo/beta_margin_std": 9.328141212463379, "beta_dpo/beta_used": 0.14245007932186127, "beta_dpo/beta_used_raw": 0.05454842001199722, "beta_dpo/gap_mean": 32.104637145996094, "beta_dpo/gap_std": 40.898643493652344, "beta_dpo/loss_margin_mean": 37.70212936401367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6953892668178382, "grad_norm": 312.2969055175781, "learning_rate": 1.2960793094762345e-07, "logits/chosen": -2.8840432167053223, "logits/rejected": -2.932039260864258, "loss": 1.1696, "step": 460 }, { "beta_dpo/beta": 0.1491033136844635, "beta_dpo/beta_margin_grad_mean": -0.3280187249183655, "beta_dpo/beta_margin_grad_std": 0.2687358558177948, "beta_dpo/beta_margin_mean": 6.421268463134766, "beta_dpo/beta_margin_std": 12.01246166229248, "beta_dpo/beta_used": 0.1491033136844635, "beta_dpo/beta_used_raw": -0.07928402721881866, "beta_dpo/gap_mean": 33.18560791015625, "beta_dpo/gap_std": 41.554012298583984, "beta_dpo/loss_margin_mean": 37.490901947021484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6969009826152683, "grad_norm": 274.51336669921875, "learning_rate": 1.2845081597488286e-07, "logits/chosen": -2.8532447814941406, "logits/rejected": -2.8888659477233887, "loss": 0.8844, "step": 461 }, { "beta_dpo/beta": 0.3675364553928375, "beta_dpo/beta_margin_grad_mean": -0.24762766063213348, "beta_dpo/beta_margin_grad_std": 0.2799862027168274, "beta_dpo/beta_margin_mean": 15.530708312988281, "beta_dpo/beta_margin_std": 25.592716217041016, "beta_dpo/beta_used": 0.3675364553928375, "beta_dpo/beta_used_raw": 0.3675364553928375, "beta_dpo/gap_mean": 34.30308532714844, "beta_dpo/gap_std": 41.602821350097656, "beta_dpo/loss_margin_mean": 40.83872985839844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6984126984126984, "grad_norm": 1062.2481689453125, "learning_rate": 1.27297100994108e-07, "logits/chosen": -2.857814311981201, "logits/rejected": -2.865307569503784, "loss": 1.7849, "step": 462 }, { "beta_dpo/beta": 0.012533308006823063, "beta_dpo/beta_margin_grad_mean": -0.40381601452827454, "beta_dpo/beta_margin_grad_std": 0.1651657372713089, "beta_dpo/beta_margin_mean": 0.5023635029792786, "beta_dpo/beta_margin_std": 0.8901291489601135, "beta_dpo/beta_used": 0.012533308006823063, "beta_dpo/beta_used_raw": -0.2637104094028473, "beta_dpo/gap_mean": 34.78700256347656, "beta_dpo/gap_std": 41.961753845214844, "beta_dpo/loss_margin_mean": 34.03054428100586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6999244142101285, "grad_norm": 31.238935470581055, "learning_rate": 1.2614681827718695e-07, "logits/chosen": -2.890806198120117, "logits/rejected": -2.878354549407959, "loss": 1.1205, "step": 463 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49027693271636963, "beta_dpo/beta_margin_grad_std": 0.012361356988549232, "beta_dpo/beta_margin_mean": 0.03892575949430466, "beta_dpo/beta_margin_std": 0.04949839040637016, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3940258026123047, "beta_dpo/gap_mean": 34.684288024902344, "beta_dpo/gap_std": 43.00639343261719, "beta_dpo/loss_margin_mean": 38.925758361816406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7014361300075586, "grad_norm": 4.210781574249268, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -2.8452868461608887, "logits/rejected": -2.8584518432617188, "loss": 1.3604, "step": 464 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49044856429100037, "beta_dpo/beta_margin_grad_std": 0.012582222931087017, "beta_dpo/beta_margin_mean": 0.03824080526828766, "beta_dpo/beta_margin_std": 0.05038909986615181, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4250331521034241, "beta_dpo/gap_mean": 35.82416915893555, "beta_dpo/gap_std": 44.42228698730469, "beta_dpo/loss_margin_mean": 38.24080276489258, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7029478458049887, "grad_norm": 3.7858383655548096, "learning_rate": 1.238566782415197e-07, "logits/chosen": -2.8738555908203125, "logits/rejected": -2.898587226867676, "loss": 1.3598, "step": 465 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4945682883262634, "beta_dpo/beta_margin_grad_std": 0.01177225448191166, "beta_dpo/beta_margin_mean": 0.021742146462202072, "beta_dpo/beta_margin_std": 0.04713207110762596, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6088250875473022, "beta_dpo/gap_mean": 34.10150909423828, "beta_dpo/gap_std": 45.46623229980469, "beta_dpo/loss_margin_mean": 21.742145538330078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7044595616024187, "grad_norm": 3.8012847900390625, "learning_rate": 1.2271688498291334e-07, "logits/chosen": -2.8463258743286133, "logits/rejected": -2.841275215148926, "loss": 1.3644, "step": 466 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48994848132133484, "beta_dpo/beta_margin_grad_std": 0.01253302488476038, "beta_dpo/beta_margin_mean": 0.04024511203169823, "beta_dpo/beta_margin_std": 0.050207603722810745, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18654143810272217, "beta_dpo/gap_mean": 34.15043640136719, "beta_dpo/gap_std": 45.88709259033203, "beta_dpo/loss_margin_mean": 40.24510955810547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7059712773998488, "grad_norm": 4.15172815322876, "learning_rate": 1.2158065210664848e-07, "logits/chosen": -2.7906570434570312, "logits/rejected": -2.8386688232421875, "loss": 1.3574, "step": 467 }, { "beta_dpo/beta": 0.4981394112110138, "beta_dpo/beta_margin_grad_mean": -0.18455180525779724, "beta_dpo/beta_margin_grad_std": 0.33006787300109863, "beta_dpo/beta_margin_mean": 25.867090225219727, "beta_dpo/beta_margin_std": 44.431175231933594, "beta_dpo/beta_used": 0.4981394112110138, "beta_dpo/beta_used_raw": 0.4981394112110138, "beta_dpo/gap_mean": 36.898414611816406, "beta_dpo/gap_std": 47.32759094238281, "beta_dpo/loss_margin_mean": 51.3314094543457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7074829931972789, "grad_norm": 1016.423583984375, "learning_rate": 1.204480113956011e-07, "logits/chosen": -2.839628219604492, "logits/rejected": -2.8357181549072266, "loss": 1.5654, "step": 468 }, { "beta_dpo/beta": 0.01080307736992836, "beta_dpo/beta_margin_grad_mean": -0.4090682566165924, "beta_dpo/beta_margin_grad_std": 0.14942912757396698, "beta_dpo/beta_margin_mean": 0.48849332332611084, "beta_dpo/beta_margin_std": 0.8895741105079651, "beta_dpo/beta_used": 0.01080307736992836, "beta_dpo/beta_used_raw": -0.01903732866048813, "beta_dpo/gap_mean": 37.885498046875, "beta_dpo/gap_std": 48.62867736816406, "beta_dpo/loss_margin_mean": 35.774200439453125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.708994708994709, "grad_norm": 43.972171783447266, "learning_rate": 1.1931899453216697e-07, "logits/chosen": -2.8145484924316406, "logits/rejected": -2.826870918273926, "loss": 1.1359, "step": 469 }, { "beta_dpo/beta": 0.005608946550637484, "beta_dpo/beta_margin_grad_mean": -0.44782423973083496, "beta_dpo/beta_margin_grad_std": 0.08384717255830765, "beta_dpo/beta_margin_mean": 0.22157049179077148, "beta_dpo/beta_margin_std": 0.3649922013282776, "beta_dpo/beta_used": 0.005608946550637484, "beta_dpo/beta_used_raw": -0.051736194640398026, "beta_dpo/gap_mean": 37.81916427612305, "beta_dpo/gap_std": 48.586944580078125, "beta_dpo/loss_margin_mean": 42.41155242919922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7105064247921391, "grad_norm": 26.199756622314453, "learning_rate": 1.1819363309737438e-07, "logits/chosen": -2.7599127292633057, "logits/rejected": -2.787717819213867, "loss": 1.2242, "step": 470 }, { "beta_dpo/beta": 0.5907325148582458, "beta_dpo/beta_margin_grad_mean": -0.11496514827013016, "beta_dpo/beta_margin_grad_std": 0.292705237865448, "beta_dpo/beta_margin_mean": 33.369873046875, "beta_dpo/beta_margin_std": 42.265533447265625, "beta_dpo/beta_used": 0.5907325148582458, "beta_dpo/beta_used_raw": 0.5907325148582458, "beta_dpo/gap_mean": 40.03142166137695, "beta_dpo/gap_std": 48.31740188598633, "beta_dpo/loss_margin_mean": 52.58283996582031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7120181405895691, "grad_norm": 1157.378662109375, "learning_rate": 1.1707195857000215e-07, "logits/chosen": -2.7885494232177734, "logits/rejected": -2.8036623001098633, "loss": 2.6333, "step": 471 }, { "beta_dpo/beta": 0.07265999913215637, "beta_dpo/beta_margin_grad_mean": -0.3512791395187378, "beta_dpo/beta_margin_grad_std": 0.2786218822002411, "beta_dpo/beta_margin_mean": 4.18007230758667, "beta_dpo/beta_margin_std": 7.867624759674072, "beta_dpo/beta_used": 0.07265999913215637, "beta_dpo/beta_used_raw": -0.3325417637825012, "beta_dpo/gap_mean": 41.11811065673828, "beta_dpo/gap_std": 50.19427490234375, "beta_dpo/loss_margin_mean": 45.41288757324219, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7135298563869993, "grad_norm": 421.1426086425781, "learning_rate": 1.1595400232569768e-07, "logits/chosen": -2.7796170711517334, "logits/rejected": -2.7934298515319824, "loss": 1.4017, "step": 472 }, { "beta_dpo/beta": 0.23743751645088196, "beta_dpo/beta_margin_grad_mean": -0.37403836846351624, "beta_dpo/beta_margin_grad_std": 0.30802056193351746, "beta_dpo/beta_margin_mean": 9.776598930358887, "beta_dpo/beta_margin_std": 21.84530258178711, "beta_dpo/beta_used": 0.23743751645088196, "beta_dpo/beta_used_raw": -0.2949034571647644, "beta_dpo/gap_mean": 41.25324249267578, "beta_dpo/gap_std": 52.74125671386719, "beta_dpo/loss_margin_mean": 37.91928482055664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7150415721844293, "grad_norm": 874.90771484375, "learning_rate": 1.1483979563610069e-07, "logits/chosen": -2.7615790367126465, "logits/rejected": -2.819824695587158, "loss": 2.1061, "step": 473 }, { "beta_dpo/beta": 0.3522808253765106, "beta_dpo/beta_margin_grad_mean": -0.40077441930770874, "beta_dpo/beta_margin_grad_std": 0.3069063127040863, "beta_dpo/beta_margin_mean": 15.927785873413086, "beta_dpo/beta_margin_std": 33.118064880371094, "beta_dpo/beta_used": 0.3522808253765106, "beta_dpo/beta_used_raw": 0.16157013177871704, "beta_dpo/gap_mean": 41.311546325683594, "beta_dpo/gap_std": 54.17529296875, "beta_dpo/loss_margin_mean": 40.076534271240234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7165532879818595, "grad_norm": 1089.6541748046875, "learning_rate": 1.1372936966796709e-07, "logits/chosen": -2.758329153060913, "logits/rejected": -2.801651954650879, "loss": 1.42, "step": 474 }, { "beta_dpo/beta": 0.6577058434486389, "beta_dpo/beta_margin_grad_mean": -0.14120624959468842, "beta_dpo/beta_margin_grad_std": 0.31994450092315674, "beta_dpo/beta_margin_mean": 38.25525665283203, "beta_dpo/beta_margin_std": 46.2388801574707, "beta_dpo/beta_used": 0.6577058434486389, "beta_dpo/beta_used_raw": 0.6577058434486389, "beta_dpo/gap_mean": 43.44903564453125, "beta_dpo/gap_std": 54.750579833984375, "beta_dpo/loss_margin_mean": 58.09202194213867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7180650037792895, "grad_norm": 1087.662353515625, "learning_rate": 1.126227554822985e-07, "logits/chosen": -2.7532217502593994, "logits/rejected": -2.7668323516845703, "loss": 0.8006, "step": 475 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4904637038707733, "beta_dpo/beta_margin_grad_std": 0.01358871627599001, "beta_dpo/beta_margin_mean": 0.0381828173995018, "beta_dpo/beta_margin_std": 0.0544172078371048, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6799896359443665, "beta_dpo/gap_mean": 43.584678649902344, "beta_dpo/gap_std": 55.18130111694336, "beta_dpo/loss_margin_mean": 38.18281555175781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7195767195767195, "grad_norm": 3.9220597743988037, "learning_rate": 1.1151998403347243e-07, "logits/chosen": -2.7706384658813477, "logits/rejected": -2.7767281532287598, "loss": 1.3565, "step": 476 }, { "beta_dpo/beta": 0.0515187531709671, "beta_dpo/beta_margin_grad_mean": -0.3912544250488281, "beta_dpo/beta_margin_grad_std": 0.2792060077190399, "beta_dpo/beta_margin_mean": 2.122750759124756, "beta_dpo/beta_margin_std": 5.050689220428467, "beta_dpo/beta_used": 0.0515187531709671, "beta_dpo/beta_used_raw": -0.3054247200489044, "beta_dpo/gap_mean": 42.680397033691406, "beta_dpo/gap_std": 56.65534210205078, "beta_dpo/loss_margin_mean": 40.76393508911133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7210884353741497, "grad_norm": 268.27996826171875, "learning_rate": 1.1042108616837692e-07, "logits/chosen": -2.7462360858917236, "logits/rejected": -2.781938076019287, "loss": 1.5907, "step": 477 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4899767339229584, "beta_dpo/beta_margin_grad_std": 0.014997678808867931, "beta_dpo/beta_margin_mean": 0.040140360593795776, "beta_dpo/beta_margin_std": 0.06006384268403053, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3576127588748932, "beta_dpo/gap_mean": 42.49079895019531, "beta_dpo/gap_std": 57.602779388427734, "beta_dpo/loss_margin_mean": 40.140357971191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7226001511715797, "grad_norm": 3.5894758701324463, "learning_rate": 1.0932609262554746e-07, "logits/chosen": -2.7310919761657715, "logits/rejected": -2.729006767272949, "loss": 1.3525, "step": 478 }, { "beta_dpo/beta": 0.02417217753827572, "beta_dpo/beta_margin_grad_mean": -0.3980015814304352, "beta_dpo/beta_margin_grad_std": 0.21414901316165924, "beta_dpo/beta_margin_mean": 0.8745595216751099, "beta_dpo/beta_margin_std": 1.965387225151062, "beta_dpo/beta_used": 0.02417217753827572, "beta_dpo/beta_used_raw": -0.27406013011932373, "beta_dpo/gap_mean": 40.88475036621094, "beta_dpo/gap_std": 57.084991455078125, "beta_dpo/loss_margin_mean": 32.70804977416992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7241118669690099, "grad_norm": 64.1123046875, "learning_rate": 1.0823503403430734e-07, "logits/chosen": -2.707150936126709, "logits/rejected": -2.7152490615844727, "loss": 1.0242, "step": 479 }, { "beta_dpo/beta": 0.973773181438446, "beta_dpo/beta_margin_grad_mean": -0.22256873548030853, "beta_dpo/beta_margin_grad_std": 0.4061585068702698, "beta_dpo/beta_margin_mean": 50.128910064697266, "beta_dpo/beta_margin_std": 66.56196594238281, "beta_dpo/beta_used": 0.973773181438446, "beta_dpo/beta_used_raw": 0.973773181438446, "beta_dpo/gap_mean": 40.585201263427734, "beta_dpo/gap_std": 57.819732666015625, "beta_dpo/loss_margin_mean": 46.63157272338867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7256235827664399, "grad_norm": 2886.19775390625, "learning_rate": 1.0714794091391072e-07, "logits/chosen": -2.7781190872192383, "logits/rejected": -2.77724027633667, "loss": 3.4544, "step": 480 }, { "beta_dpo/beta": 0.1825007051229477, "beta_dpo/beta_margin_grad_mean": -0.20635956525802612, "beta_dpo/beta_margin_grad_std": 0.33250176906585693, "beta_dpo/beta_margin_mean": 7.717591285705566, "beta_dpo/beta_margin_std": 9.530905723571777, "beta_dpo/beta_used": 0.1825007051229477, "beta_dpo/beta_used_raw": 0.1825007051229477, "beta_dpo/gap_mean": 42.10963439941406, "beta_dpo/gap_std": 57.092857360839844, "beta_dpo/loss_margin_mean": 43.11017990112305, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.72713529856387, "grad_norm": 440.2613220214844, "learning_rate": 1.0606484367268906e-07, "logits/chosen": -2.787419319152832, "logits/rejected": -2.780320644378662, "loss": 1.0162, "step": 481 }, { "beta_dpo/beta": 0.1774512678384781, "beta_dpo/beta_margin_grad_mean": -0.3469817638397217, "beta_dpo/beta_margin_grad_std": 0.3088356852531433, "beta_dpo/beta_margin_mean": 7.2963032722473145, "beta_dpo/beta_margin_std": 18.42062759399414, "beta_dpo/beta_used": 0.1774512678384781, "beta_dpo/beta_used_raw": -0.19875358045101166, "beta_dpo/gap_mean": 41.351280212402344, "beta_dpo/gap_std": 57.856937408447266, "beta_dpo/loss_margin_mean": 37.356292724609375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7286470143613001, "grad_norm": 362.2626037597656, "learning_rate": 1.0498577260720048e-07, "logits/chosen": -2.7591712474823, "logits/rejected": -2.8289999961853027, "loss": 1.0535, "step": 482 }, { "beta_dpo/beta": 0.15078911185264587, "beta_dpo/beta_margin_grad_mean": -0.19500455260276794, "beta_dpo/beta_margin_grad_std": 0.319975346326828, "beta_dpo/beta_margin_mean": 6.7245588302612305, "beta_dpo/beta_margin_std": 9.449666023254395, "beta_dpo/beta_used": 0.15078911185264587, "beta_dpo/beta_used_raw": 0.15078911185264587, "beta_dpo/gap_mean": 41.55865478515625, "beta_dpo/gap_std": 57.50669479370117, "beta_dpo/loss_margin_mean": 46.46189880371094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7301587301587301, "grad_norm": 398.8206481933594, "learning_rate": 1.0391075790138232e-07, "logits/chosen": -2.689525604248047, "logits/rejected": -2.752610206604004, "loss": 0.741, "step": 483 }, { "beta_dpo/beta": 0.09219953417778015, "beta_dpo/beta_margin_grad_mean": -0.3376636207103729, "beta_dpo/beta_margin_grad_std": 0.3007601499557495, "beta_dpo/beta_margin_mean": 3.813218593597412, "beta_dpo/beta_margin_std": 7.575406074523926, "beta_dpo/beta_used": 0.09219953417778015, "beta_dpo/beta_used_raw": -0.1167166456580162, "beta_dpo/gap_mean": 41.57493209838867, "beta_dpo/gap_std": 56.57539367675781, "beta_dpo/loss_margin_mean": 39.47372055053711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7316704459561603, "grad_norm": 492.06988525390625, "learning_rate": 1.0283982962570681e-07, "logits/chosen": -2.6704444885253906, "logits/rejected": -2.6830573081970215, "loss": 1.4066, "step": 484 }, { "beta_dpo/beta": 0.40363559126853943, "beta_dpo/beta_margin_grad_mean": -0.31707045435905457, "beta_dpo/beta_margin_grad_std": 0.29281070828437805, "beta_dpo/beta_margin_mean": 21.213218688964844, "beta_dpo/beta_margin_std": 39.091835021972656, "beta_dpo/beta_used": 0.40363559126853943, "beta_dpo/beta_used_raw": -0.09030476212501526, "beta_dpo/gap_mean": 41.633697509765625, "beta_dpo/gap_std": 56.02287292480469, "beta_dpo/loss_margin_mean": 37.29250717163086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7331821617535903, "grad_norm": 2081.590576171875, "learning_rate": 1.0177301773633992e-07, "logits/chosen": -2.696157932281494, "logits/rejected": -2.7043862342834473, "loss": 2.5686, "step": 485 }, { "beta_dpo/beta": 0.10442067682743073, "beta_dpo/beta_margin_grad_mean": -0.3749229609966278, "beta_dpo/beta_margin_grad_std": 0.3003118634223938, "beta_dpo/beta_margin_mean": 3.722522020339966, "beta_dpo/beta_margin_std": 8.481477737426758, "beta_dpo/beta_used": 0.10442067682743073, "beta_dpo/beta_used_raw": -0.01430542767047882, "beta_dpo/gap_mean": 40.47251892089844, "beta_dpo/gap_std": 55.193885803222656, "beta_dpo/loss_margin_mean": 41.57960891723633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7346938775510204, "grad_norm": 276.54681396484375, "learning_rate": 1.007103520743035e-07, "logits/chosen": -2.6733226776123047, "logits/rejected": -2.7371678352355957, "loss": 1.174, "step": 486 }, { "beta_dpo/beta": 0.07994943112134933, "beta_dpo/beta_margin_grad_mean": -0.3323688805103302, "beta_dpo/beta_margin_grad_std": 0.2599465847015381, "beta_dpo/beta_margin_mean": 2.785461902618408, "beta_dpo/beta_margin_std": 5.125765800476074, "beta_dpo/beta_used": 0.07994943112134933, "beta_dpo/beta_used_raw": -0.12181156873703003, "beta_dpo/gap_mean": 39.985862731933594, "beta_dpo/gap_std": 54.396183013916016, "beta_dpo/loss_margin_mean": 34.94841384887695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7362055933484505, "grad_norm": 163.62640380859375, "learning_rate": 9.965186236464046e-08, "logits/chosen": -2.6691818237304688, "logits/rejected": -2.7056899070739746, "loss": 0.8702, "step": 487 }, { "beta_dpo/beta": 0.5008640885353088, "beta_dpo/beta_margin_grad_mean": -0.3386906087398529, "beta_dpo/beta_margin_grad_std": 0.30464500188827515, "beta_dpo/beta_margin_mean": 28.58328628540039, "beta_dpo/beta_margin_std": 49.5888557434082, "beta_dpo/beta_used": 0.5008640885353088, "beta_dpo/beta_used_raw": 0.34469401836395264, "beta_dpo/gap_mean": 40.6063346862793, "beta_dpo/gap_std": 54.928916931152344, "beta_dpo/loss_margin_mean": 48.46849060058594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7377173091458806, "grad_norm": 1919.3291015625, "learning_rate": 9.859757821558337e-08, "logits/chosen": -2.689507484436035, "logits/rejected": -2.7045979499816895, "loss": 3.0325, "step": 488 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4926265478134155, "beta_dpo/beta_margin_grad_std": 0.012725806795060635, "beta_dpo/beta_margin_mean": 0.029519159346818924, "beta_dpo/beta_margin_std": 0.05095401778817177, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.43343132734298706, "beta_dpo/gap_mean": 39.814117431640625, "beta_dpo/gap_std": 54.60805130004883, "beta_dpo/loss_margin_mean": 29.51915740966797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7392290249433107, "grad_norm": 4.167266845703125, "learning_rate": 9.754752911772615e-08, "logits/chosen": -2.7549619674682617, "logits/rejected": -2.792065143585205, "loss": 1.3561, "step": 489 }, { "beta_dpo/beta": 0.32752177119255066, "beta_dpo/beta_margin_grad_mean": -0.39252033829689026, "beta_dpo/beta_margin_grad_std": 0.3261478543281555, "beta_dpo/beta_margin_mean": 12.92074203491211, "beta_dpo/beta_margin_std": 32.389190673828125, "beta_dpo/beta_used": 0.32752177119255066, "beta_dpo/beta_used_raw": 0.2527086138725281, "beta_dpo/gap_mean": 39.23323059082031, "beta_dpo/gap_std": 56.25166320800781, "beta_dpo/loss_margin_mean": 39.49768829345703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7407407407407407, "grad_norm": 1155.421630859375, "learning_rate": 9.650174444319956e-08, "logits/chosen": -2.711261749267578, "logits/rejected": -2.7159712314605713, "loss": 3.4279, "step": 490 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4900553822517395, "beta_dpo/beta_margin_grad_std": 0.013235099613666534, "beta_dpo/beta_margin_mean": 0.03981310874223709, "beta_dpo/beta_margin_std": 0.05299828574061394, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2943836748600006, "beta_dpo/gap_mean": 39.666542053222656, "beta_dpo/gap_std": 56.23136901855469, "beta_dpo/loss_margin_mean": 39.813106536865234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7422524565381708, "grad_norm": 3.6654624938964844, "learning_rate": 9.546025344484868e-08, "logits/chosen": -2.712951421737671, "logits/rejected": -2.7393627166748047, "loss": 1.3539, "step": 491 }, { "beta_dpo/beta": 0.5559797286987305, "beta_dpo/beta_margin_grad_mean": -0.3582126796245575, "beta_dpo/beta_margin_grad_std": 0.31471025943756104, "beta_dpo/beta_margin_mean": 29.00932502746582, "beta_dpo/beta_margin_std": 51.71617889404297, "beta_dpo/beta_used": 0.5559797286987305, "beta_dpo/beta_used_raw": 0.42610257863998413, "beta_dpo/gap_mean": 38.09209060668945, "beta_dpo/gap_std": 57.30845260620117, "beta_dpo/loss_margin_mean": 36.10185623168945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7437641723356009, "grad_norm": 1250.3416748046875, "learning_rate": 9.442308525541589e-08, "logits/chosen": -2.703014850616455, "logits/rejected": -2.743101119995117, "loss": 2.3617, "step": 492 }, { "beta_dpo/beta": 0.15209481120109558, "beta_dpo/beta_margin_grad_mean": -0.31275543570518494, "beta_dpo/beta_margin_grad_std": 0.2850077450275421, "beta_dpo/beta_margin_mean": 7.864630699157715, "beta_dpo/beta_margin_std": 15.609386444091797, "beta_dpo/beta_used": 0.15209481120109558, "beta_dpo/beta_used_raw": 0.09873668849468231, "beta_dpo/gap_mean": 39.294410705566406, "beta_dpo/gap_std": 56.776641845703125, "beta_dpo/loss_margin_mean": 44.313941955566406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.745275888133031, "grad_norm": 693.7211303710938, "learning_rate": 9.339026888672468e-08, "logits/chosen": -2.722355842590332, "logits/rejected": -2.7542717456817627, "loss": 2.4299, "step": 493 }, { "beta_dpo/beta": 0.07930776476860046, "beta_dpo/beta_margin_grad_mean": -0.35822421312332153, "beta_dpo/beta_margin_grad_std": 0.29255738854408264, "beta_dpo/beta_margin_mean": 3.3953824043273926, "beta_dpo/beta_margin_std": 7.435774803161621, "beta_dpo/beta_used": 0.07930776476860046, "beta_dpo/beta_used_raw": -0.20857512950897217, "beta_dpo/gap_mean": 39.77642822265625, "beta_dpo/gap_std": 57.30767822265625, "beta_dpo/loss_margin_mean": 36.8887939453125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7467876039304611, "grad_norm": 316.9527587890625, "learning_rate": 9.236183322886945e-08, "logits/chosen": -2.739696502685547, "logits/rejected": -2.7579593658447266, "loss": 1.1247, "step": 494 }, { "beta_dpo/beta": 0.33298665285110474, "beta_dpo/beta_margin_grad_mean": -0.32901203632354736, "beta_dpo/beta_margin_grad_std": 0.2863346338272095, "beta_dpo/beta_margin_mean": 15.57753849029541, "beta_dpo/beta_margin_std": 29.28587532043457, "beta_dpo/beta_used": 0.33298665285110474, "beta_dpo/beta_used_raw": -0.00022649765014648438, "beta_dpo/gap_mean": 38.87417221069336, "beta_dpo/gap_std": 55.89385986328125, "beta_dpo/loss_margin_mean": 38.79734420776367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7482993197278912, "grad_norm": 713.429443359375, "learning_rate": 9.133780704940594e-08, "logits/chosen": -2.6686229705810547, "logits/rejected": -2.7093167304992676, "loss": 0.9881, "step": 495 }, { "beta_dpo/beta": 0.02213066816329956, "beta_dpo/beta_margin_grad_mean": -0.4068155586719513, "beta_dpo/beta_margin_grad_std": 0.2209998369216919, "beta_dpo/beta_margin_mean": 0.7566680312156677, "beta_dpo/beta_margin_std": 1.8222705125808716, "beta_dpo/beta_used": 0.02213066816329956, "beta_dpo/beta_used_raw": -0.06461194157600403, "beta_dpo/gap_mean": 38.767662048339844, "beta_dpo/gap_std": 56.514461517333984, "beta_dpo/loss_margin_mean": 35.313419342041016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7498110355253212, "grad_norm": 87.11608123779297, "learning_rate": 9.031821899254797e-08, "logits/chosen": -2.733874559402466, "logits/rejected": -2.7735462188720703, "loss": 1.1315, "step": 496 }, { "beta_dpo/beta": 0.12018337845802307, "beta_dpo/beta_margin_grad_mean": -0.15946322679519653, "beta_dpo/beta_margin_grad_std": 0.2790553867816925, "beta_dpo/beta_margin_mean": 7.110559463500977, "beta_dpo/beta_margin_std": 6.706122875213623, "beta_dpo/beta_used": 0.12018337845802307, "beta_dpo/beta_used_raw": 0.12018337845802307, "beta_dpo/gap_mean": 41.55035400390625, "beta_dpo/gap_std": 56.61823272705078, "beta_dpo/loss_margin_mean": 59.153717041015625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7513227513227513, "grad_norm": 290.6864013671875, "learning_rate": 8.930309757836516e-08, "logits/chosen": -2.71683931350708, "logits/rejected": -2.7388899326324463, "loss": 0.8565, "step": 497 }, { "beta_dpo/beta": 0.2165343165397644, "beta_dpo/beta_margin_grad_mean": -0.3753109276294708, "beta_dpo/beta_margin_grad_std": 0.30145102739334106, "beta_dpo/beta_margin_mean": 10.487258911132812, "beta_dpo/beta_margin_std": 21.614362716674805, "beta_dpo/beta_used": 0.2165343165397644, "beta_dpo/beta_used_raw": 0.09735321253538132, "beta_dpo/gap_mean": 42.46245193481445, "beta_dpo/gap_std": 57.141075134277344, "beta_dpo/loss_margin_mean": 44.00096893310547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7528344671201814, "grad_norm": 961.116455078125, "learning_rate": 8.829247120198563e-08, "logits/chosen": -2.7068562507629395, "logits/rejected": -2.713627815246582, "loss": 2.3702, "step": 498 }, { "beta_dpo/beta": 0.19605065882205963, "beta_dpo/beta_margin_grad_mean": -0.39544767141342163, "beta_dpo/beta_margin_grad_std": 0.3248600363731384, "beta_dpo/beta_margin_mean": 9.355982780456543, "beta_dpo/beta_margin_std": 21.50864028930664, "beta_dpo/beta_used": 0.19605065882205963, "beta_dpo/beta_used_raw": 0.004853472113609314, "beta_dpo/gap_mean": 43.18915557861328, "beta_dpo/gap_std": 58.87891387939453, "beta_dpo/loss_margin_mean": 44.659873962402344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7543461829176115, "grad_norm": 743.588623046875, "learning_rate": 8.728636813280163e-08, "logits/chosen": -2.692368507385254, "logits/rejected": -2.726922035217285, "loss": 3.2996, "step": 499 }, { "beta_dpo/beta": 0.1744510680437088, "beta_dpo/beta_margin_grad_mean": -0.30420351028442383, "beta_dpo/beta_margin_grad_std": 0.2772481441497803, "beta_dpo/beta_margin_mean": 8.870034217834473, "beta_dpo/beta_margin_std": 15.547243118286133, "beta_dpo/beta_used": 0.1744510680437088, "beta_dpo/beta_used_raw": -0.004995211958885193, "beta_dpo/gap_mean": 43.78472900390625, "beta_dpo/gap_std": 59.16692352294922, "beta_dpo/loss_margin_mean": 48.520931243896484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7558578987150416, "grad_norm": 359.8203125, "learning_rate": 8.628481651367875e-08, "logits/chosen": -2.729337692260742, "logits/rejected": -2.7200241088867188, "loss": 1.2415, "step": 500 }, { "epoch": 0.7558578987150416, "eval_beta_dpo/beta": 0.06070829555392265, "eval_beta_dpo/beta_margin_grad_mean": -0.43526893854141235, "eval_beta_dpo/beta_margin_grad_std": 0.08183299005031586, "eval_beta_dpo/beta_margin_mean": 2.933407783508301, "eval_beta_dpo/beta_margin_std": 3.5973763465881348, "eval_beta_dpo/beta_used": 0.06070829555392265, "eval_beta_dpo/beta_used_raw": -0.4269829988479614, "eval_beta_dpo/gap_mean": 44.15704345703125, "eval_beta_dpo/gap_std": 58.8353271484375, "eval_beta_dpo/loss_margin_mean": 35.37399673461914, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -2.762294054031372, "eval_logits/rejected": -2.7785770893096924, "eval_loss": 0.8583182096481323, "eval_runtime": 36.37, "eval_samples_per_second": 63.321, "eval_steps_per_second": 1.98, "step": 500 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49090656638145447, "beta_dpo/beta_margin_grad_std": 0.011450248770415783, "beta_dpo/beta_margin_mean": 0.036405812948942184, "beta_dpo/beta_margin_std": 0.04586649313569069, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5008817911148071, "beta_dpo/gap_mean": 42.739036560058594, "beta_dpo/gap_std": 56.7427978515625, "beta_dpo/loss_margin_mean": 36.40580749511719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7573696145124716, "grad_norm": 3.569932460784912, "learning_rate": 8.528784436016878e-08, "logits/chosen": -2.7037782669067383, "logits/rejected": -2.695845603942871, "loss": 1.3542, "step": 501 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49141430854797363, "beta_dpo/beta_margin_grad_std": 0.011822287924587727, "beta_dpo/beta_margin_mean": 0.034368664026260376, "beta_dpo/beta_margin_std": 0.04734347015619278, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.616624116897583, "beta_dpo/gap_mean": 41.67316436767578, "beta_dpo/gap_std": 55.443443298339844, "beta_dpo/loss_margin_mean": 34.36865997314453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7588813303099018, "grad_norm": 4.4559831619262695, "learning_rate": 8.4295479559726e-08, "logits/chosen": -2.7137112617492676, "logits/rejected": -2.738381862640381, "loss": 1.3572, "step": 502 }, { "beta_dpo/beta": 0.3707210421562195, "beta_dpo/beta_margin_grad_mean": -0.3491879105567932, "beta_dpo/beta_margin_grad_std": 0.3027651011943817, "beta_dpo/beta_margin_mean": 15.77136516571045, "beta_dpo/beta_margin_std": 40.02242660522461, "beta_dpo/beta_used": 0.3707210421562195, "beta_dpo/beta_used_raw": 0.2508103847503662, "beta_dpo/gap_mean": 40.90766143798828, "beta_dpo/gap_std": 55.69822311401367, "beta_dpo/loss_margin_mean": 38.29641342163086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7603930461073318, "grad_norm": 1319.376220703125, "learning_rate": 8.330774987092712e-08, "logits/chosen": -2.710824966430664, "logits/rejected": -2.6981759071350098, "loss": 2.776, "step": 503 }, { "beta_dpo/beta": 0.28216904401779175, "beta_dpo/beta_margin_grad_mean": -0.3438568115234375, "beta_dpo/beta_margin_grad_std": 0.30565929412841797, "beta_dpo/beta_margin_mean": 15.786298751831055, "beta_dpo/beta_margin_std": 28.352420806884766, "beta_dpo/beta_used": 0.28216904401779175, "beta_dpo/beta_used_raw": -0.08858853578567505, "beta_dpo/gap_mean": 40.74700164794922, "beta_dpo/gap_std": 55.91231155395508, "beta_dpo/loss_margin_mean": 45.25965881347656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7619047619047619, "grad_norm": 789.7346801757812, "learning_rate": 8.232468292269479e-08, "logits/chosen": -2.6995792388916016, "logits/rejected": -2.6963398456573486, "loss": 2.6669, "step": 504 }, { "beta_dpo/beta": 0.22853413224220276, "beta_dpo/beta_margin_grad_mean": -0.3785220682621002, "beta_dpo/beta_margin_grad_std": 0.31999051570892334, "beta_dpo/beta_margin_mean": 10.497669219970703, "beta_dpo/beta_margin_std": 23.13285255432129, "beta_dpo/beta_used": 0.22853413224220276, "beta_dpo/beta_used_raw": -0.23188593983650208, "beta_dpo/gap_mean": 40.928749084472656, "beta_dpo/gap_std": 57.20260238647461, "beta_dpo/loss_margin_mean": 33.34831619262695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.763416477702192, "grad_norm": 902.9845581054688, "learning_rate": 8.134630621352483e-08, "logits/chosen": -2.7248096466064453, "logits/rejected": -2.7551441192626953, "loss": 2.0222, "step": 505 }, { "beta_dpo/beta": 0.24293901026248932, "beta_dpo/beta_margin_grad_mean": -0.28122228384017944, "beta_dpo/beta_margin_grad_std": 0.3899361491203308, "beta_dpo/beta_margin_mean": 8.826823234558105, "beta_dpo/beta_margin_std": 19.38666343688965, "beta_dpo/beta_used": 0.24293901026248932, "beta_dpo/beta_used_raw": 0.24293901026248932, "beta_dpo/gap_mean": 39.355960845947266, "beta_dpo/gap_std": 58.168418884277344, "beta_dpo/loss_margin_mean": 36.38151931762695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.764928193499622, "grad_norm": 1123.488525390625, "learning_rate": 8.037264711071698e-08, "logits/chosen": -2.7153756618499756, "logits/rejected": -2.7147321701049805, "loss": 3.9304, "step": 506 }, { "beta_dpo/beta": 0.26356324553489685, "beta_dpo/beta_margin_grad_mean": -0.3092266321182251, "beta_dpo/beta_margin_grad_std": 0.29034730792045593, "beta_dpo/beta_margin_mean": 12.295702934265137, "beta_dpo/beta_margin_std": 26.49407958984375, "beta_dpo/beta_used": 0.26356324553489685, "beta_dpo/beta_used_raw": 0.16478615999221802, "beta_dpo/gap_mean": 39.833335876464844, "beta_dpo/gap_std": 59.442283630371094, "beta_dpo/loss_margin_mean": 44.284969329833984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7664399092970522, "grad_norm": 531.67529296875, "learning_rate": 7.940373284960933e-08, "logits/chosen": -2.705458641052246, "logits/rejected": -2.7373507022857666, "loss": 1.0875, "step": 507 }, { "beta_dpo/beta": 0.5739637017250061, "beta_dpo/beta_margin_grad_mean": -0.21459561586380005, "beta_dpo/beta_margin_grad_std": 0.3837727904319763, "beta_dpo/beta_margin_mean": 24.81824493408203, "beta_dpo/beta_margin_std": 49.48651885986328, "beta_dpo/beta_used": 0.5739637017250061, "beta_dpo/beta_used_raw": 0.5739637017250061, "beta_dpo/gap_mean": 40.61821746826172, "beta_dpo/gap_std": 59.360069274902344, "beta_dpo/loss_margin_mean": 43.16511535644531, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7679516250944822, "grad_norm": 2711.739501953125, "learning_rate": 7.843959053281663e-08, "logits/chosen": -2.720541000366211, "logits/rejected": -2.79325532913208, "loss": 5.5206, "step": 508 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49008607864379883, "beta_dpo/beta_margin_grad_std": 0.012960444204509258, "beta_dpo/beta_margin_mean": 0.03969065845012665, "beta_dpo/beta_margin_std": 0.0518970787525177, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.379862904548645, "beta_dpo/gap_mean": 40.70125198364258, "beta_dpo/gap_std": 58.98310089111328, "beta_dpo/loss_margin_mean": 39.69065856933594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7694633408919124, "grad_norm": 4.5289483070373535, "learning_rate": 7.748024712947204e-08, "logits/chosen": -2.7247955799102783, "logits/rejected": -2.742762804031372, "loss": 1.3543, "step": 509 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48754072189331055, "beta_dpo/beta_margin_grad_std": 0.015329192392528057, "beta_dpo/beta_margin_mean": 0.04989998787641525, "beta_dpo/beta_margin_std": 0.061411548405885696, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.46870729327201843, "beta_dpo/gap_mean": 42.109127044677734, "beta_dpo/gap_std": 59.28163146972656, "beta_dpo/loss_margin_mean": 49.899986267089844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7709750566893424, "grad_norm": 4.867334365844727, "learning_rate": 7.652572947447272e-08, "logits/chosen": -2.674614429473877, "logits/rejected": -2.7252471446990967, "loss": 1.3545, "step": 510 }, { "beta_dpo/beta": 0.20752891898155212, "beta_dpo/beta_margin_grad_mean": -0.20747746527194977, "beta_dpo/beta_margin_grad_std": 0.35087651014328003, "beta_dpo/beta_margin_mean": 10.76735782623291, "beta_dpo/beta_margin_std": 14.685410499572754, "beta_dpo/beta_used": 0.20752891898155212, "beta_dpo/beta_used_raw": 0.20752891898155212, "beta_dpo/gap_mean": 43.691673278808594, "beta_dpo/gap_std": 60.531578063964844, "beta_dpo/loss_margin_mean": 54.25608444213867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7724867724867724, "grad_norm": 521.0980224609375, "learning_rate": 7.557606426772961e-08, "logits/chosen": -2.6949658393859863, "logits/rejected": -2.7207558155059814, "loss": 2.0411, "step": 511 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48948413133621216, "beta_dpo/beta_margin_grad_std": 0.014411956071853638, "beta_dpo/beta_margin_mean": 0.042109742760658264, "beta_dpo/beta_margin_std": 0.057718053460121155, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5245345830917358, "beta_dpo/gap_mean": 44.23836135864258, "beta_dpo/gap_std": 60.7104606628418, "beta_dpo/loss_margin_mean": 42.1097412109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7739984882842026, "grad_norm": 4.707949161529541, "learning_rate": 7.463127807341966e-08, "logits/chosen": -2.730802059173584, "logits/rejected": -2.729092597961426, "loss": 1.3534, "step": 512 }, { "beta_dpo/beta": 0.18585793673992157, "beta_dpo/beta_margin_grad_mean": -0.23562981188297272, "beta_dpo/beta_margin_grad_std": 0.29042237997055054, "beta_dpo/beta_margin_mean": 9.61783504486084, "beta_dpo/beta_margin_std": 16.55315589904785, "beta_dpo/beta_used": 0.18585793673992157, "beta_dpo/beta_used_raw": 0.18585793673992157, "beta_dpo/gap_mean": 44.56898498535156, "beta_dpo/gap_std": 60.356040954589844, "beta_dpo/loss_margin_mean": 44.24302291870117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7755102040816326, "grad_norm": 579.6964721679688, "learning_rate": 7.369139731924401e-08, "logits/chosen": -2.6522293090820312, "logits/rejected": -2.6841511726379395, "loss": 0.7497, "step": 513 }, { "beta_dpo/beta": 0.6994319558143616, "beta_dpo/beta_margin_grad_mean": -0.15793399512767792, "beta_dpo/beta_margin_grad_std": 0.34477755427360535, "beta_dpo/beta_margin_mean": 38.30527114868164, "beta_dpo/beta_margin_std": 44.85448455810547, "beta_dpo/beta_used": 0.6994319558143616, "beta_dpo/beta_used_raw": 0.6994319558143616, "beta_dpo/gap_mean": 45.162689208984375, "beta_dpo/gap_std": 59.23829650878906, "beta_dpo/loss_margin_mean": 53.46204376220703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7770219198790628, "grad_norm": 1937.4132080078125, "learning_rate": 7.275644829568747e-08, "logits/chosen": -2.7020692825317383, "logits/rejected": -2.7054526805877686, "loss": 1.9092, "step": 514 }, { "beta_dpo/beta": 0.009622273966670036, "beta_dpo/beta_margin_grad_mean": -0.4048810601234436, "beta_dpo/beta_margin_grad_std": 0.1544850915670395, "beta_dpo/beta_margin_mean": 0.4853326976299286, "beta_dpo/beta_margin_std": 0.8304917812347412, "beta_dpo/beta_used": 0.009622273966670036, "beta_dpo/beta_used_raw": -0.013449749909341335, "beta_dpo/gap_mean": 45.89698028564453, "beta_dpo/gap_std": 59.16020965576172, "beta_dpo/loss_margin_mean": 47.00020980834961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7785336356764928, "grad_norm": 38.7457160949707, "learning_rate": 7.182645715528435e-08, "logits/chosen": -2.658329963684082, "logits/rejected": -2.6947593688964844, "loss": 1.1149, "step": 515 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4907180666923523, "beta_dpo/beta_margin_grad_std": 0.014946096576750278, "beta_dpo/beta_margin_mean": 0.03717074170708656, "beta_dpo/beta_margin_std": 0.05986913666129112, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3845486044883728, "beta_dpo/gap_mean": 44.72285842895508, "beta_dpo/gap_std": 59.19635772705078, "beta_dpo/loss_margin_mean": 37.170738220214844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.780045351473923, "grad_norm": 3.5393691062927246, "learning_rate": 7.090144991188568e-08, "logits/chosen": -2.6673450469970703, "logits/rejected": -2.685701847076416, "loss": 1.3505, "step": 516 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4910920560359955, "beta_dpo/beta_margin_grad_std": 0.014533808454871178, "beta_dpo/beta_margin_mean": 0.035668447613716125, "beta_dpo/beta_margin_std": 0.058203116059303284, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4118385910987854, "beta_dpo/gap_mean": 43.163978576660156, "beta_dpo/gap_std": 59.21691131591797, "beta_dpo/loss_margin_mean": 35.6684455871582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.781557067271353, "grad_norm": 4.225611209869385, "learning_rate": 6.998145243993284e-08, "logits/chosen": -2.6880226135253906, "logits/rejected": -2.6727945804595947, "loss": 1.3527, "step": 517 }, { "beta_dpo/beta": 0.18050672113895416, "beta_dpo/beta_margin_grad_mean": -0.37036359310150146, "beta_dpo/beta_margin_grad_std": 0.30285516381263733, "beta_dpo/beta_margin_mean": 9.467116355895996, "beta_dpo/beta_margin_std": 17.349821090698242, "beta_dpo/beta_used": 0.18050672113895416, "beta_dpo/beta_used_raw": -0.2277805060148239, "beta_dpo/gap_mean": 42.35028076171875, "beta_dpo/gap_std": 58.71092224121094, "beta_dpo/loss_margin_mean": 42.672080993652344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.783068783068783, "grad_norm": 1116.5013427734375, "learning_rate": 6.906649047373245e-08, "logits/chosen": -2.704665422439575, "logits/rejected": -2.7393040657043457, "loss": 1.9926, "step": 518 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49066850543022156, "beta_dpo/beta_margin_grad_std": 0.016804302111268044, "beta_dpo/beta_margin_mean": 0.0373702310025692, "beta_dpo/beta_margin_std": 0.06730356067419052, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3206685781478882, "beta_dpo/gap_mean": 41.75722885131836, "beta_dpo/gap_std": 60.208457946777344, "beta_dpo/loss_margin_mean": 37.3702278137207, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7845804988662132, "grad_norm": 4.537266731262207, "learning_rate": 6.815658960673781e-08, "logits/chosen": -2.676978588104248, "logits/rejected": -2.6939690113067627, "loss": 1.3527, "step": 519 }, { "beta_dpo/beta": 0.20510099828243256, "beta_dpo/beta_margin_grad_mean": -0.3535204231739044, "beta_dpo/beta_margin_grad_std": 0.3087931275367737, "beta_dpo/beta_margin_mean": 8.220118522644043, "beta_dpo/beta_margin_std": 18.738656997680664, "beta_dpo/beta_used": 0.20510099828243256, "beta_dpo/beta_used_raw": 0.11168855428695679, "beta_dpo/gap_mean": 41.049041748046875, "beta_dpo/gap_std": 59.8758430480957, "beta_dpo/loss_margin_mean": 34.11420822143555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7860922146636432, "grad_norm": 505.3785400390625, "learning_rate": 6.725177529083209e-08, "logits/chosen": -2.6468472480773926, "logits/rejected": -2.6863021850585938, "loss": 1.5582, "step": 520 }, { "beta_dpo/beta": 0.3797077238559723, "beta_dpo/beta_margin_grad_mean": -0.3206307291984558, "beta_dpo/beta_margin_grad_std": 0.2996864318847656, "beta_dpo/beta_margin_mean": 23.054044723510742, "beta_dpo/beta_margin_std": 38.42852020263672, "beta_dpo/beta_used": 0.3797077238559723, "beta_dpo/beta_used_raw": 0.33471980690956116, "beta_dpo/gap_mean": 40.93505859375, "beta_dpo/gap_std": 59.90309143066406, "beta_dpo/loss_margin_mean": 48.176727294921875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7876039304610734, "grad_norm": 1331.1934814453125, "learning_rate": 6.63520728356167e-08, "logits/chosen": -2.725958824157715, "logits/rejected": -2.7716081142425537, "loss": 4.2967, "step": 521 }, { "beta_dpo/beta": 0.3732588291168213, "beta_dpo/beta_margin_grad_mean": -0.20111659169197083, "beta_dpo/beta_margin_grad_std": 0.35144945979118347, "beta_dpo/beta_margin_mean": 17.039627075195312, "beta_dpo/beta_margin_std": 22.248046875, "beta_dpo/beta_used": 0.3732588291168213, "beta_dpo/beta_used_raw": 0.3732588291168213, "beta_dpo/gap_mean": 42.58722686767578, "beta_dpo/gap_std": 59.817386627197266, "beta_dpo/loss_margin_mean": 45.58156204223633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7891156462585034, "grad_norm": 870.8029174804688, "learning_rate": 6.545750740770336e-08, "logits/chosen": -2.685265302658081, "logits/rejected": -2.6789164543151855, "loss": 2.3226, "step": 522 }, { "beta_dpo/beta": 0.4820902943611145, "beta_dpo/beta_margin_grad_mean": -0.22910968959331512, "beta_dpo/beta_margin_grad_std": 0.4086511433124542, "beta_dpo/beta_margin_mean": 21.961559295654297, "beta_dpo/beta_margin_std": 31.645612716674805, "beta_dpo/beta_used": 0.4820902943611145, "beta_dpo/beta_used_raw": 0.4820902943611145, "beta_dpo/gap_mean": 43.25143051147461, "beta_dpo/gap_std": 60.66827392578125, "beta_dpo/loss_margin_mean": 44.9202995300293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7906273620559335, "grad_norm": 1632.2755126953125, "learning_rate": 6.456810403001012e-08, "logits/chosen": -2.7161006927490234, "logits/rejected": -2.7626240253448486, "loss": 4.9571, "step": 523 }, { "beta_dpo/beta": 0.10415734350681305, "beta_dpo/beta_margin_grad_mean": -0.36675214767456055, "beta_dpo/beta_margin_grad_std": 0.2674957513809204, "beta_dpo/beta_margin_mean": 4.917224884033203, "beta_dpo/beta_margin_std": 9.322827339172363, "beta_dpo/beta_used": 0.10415734350681305, "beta_dpo/beta_used_raw": -0.0208590030670166, "beta_dpo/gap_mean": 42.01835632324219, "beta_dpo/gap_std": 60.799781799316406, "beta_dpo/loss_margin_mean": 38.786258697509766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7921390778533636, "grad_norm": 278.8160095214844, "learning_rate": 6.368388758106134e-08, "logits/chosen": -2.7579870223999023, "logits/rejected": -2.7684619426727295, "loss": 1.4521, "step": 524 }, { "beta_dpo/beta": 0.1328941434621811, "beta_dpo/beta_margin_grad_mean": -0.3332993686199188, "beta_dpo/beta_margin_grad_std": 0.26771458983421326, "beta_dpo/beta_margin_mean": 5.369054794311523, "beta_dpo/beta_margin_std": 11.418255805969238, "beta_dpo/beta_used": 0.1328941434621811, "beta_dpo/beta_used_raw": -0.1355496495962143, "beta_dpo/gap_mean": 40.92710876464844, "beta_dpo/gap_std": 61.039215087890625, "beta_dpo/loss_margin_mean": 34.65338134765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7936507936507936, "grad_norm": 420.6562805175781, "learning_rate": 6.280488279429185e-08, "logits/chosen": -2.7576797008514404, "logits/rejected": -2.757020950317383, "loss": 1.1612, "step": 525 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4936671257019043, "beta_dpo/beta_margin_grad_std": 0.013691714033484459, "beta_dpo/beta_margin_mean": 0.02535291761159897, "beta_dpo/beta_margin_std": 0.054825231432914734, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6394113898277283, "beta_dpo/gap_mean": 39.1685676574707, "beta_dpo/gap_std": 59.67204284667969, "beta_dpo/loss_margin_mean": 25.352916717529297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7951625094482238, "grad_norm": 4.625575542449951, "learning_rate": 6.193111425735515e-08, "logits/chosen": -2.695737600326538, "logits/rejected": -2.7326161861419678, "loss": 1.3601, "step": 526 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49305132031440735, "beta_dpo/beta_margin_grad_std": 0.014519465155899525, "beta_dpo/beta_margin_mean": 0.02782551757991314, "beta_dpo/beta_margin_std": 0.058140479028224945, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.17464077472686768, "beta_dpo/gap_mean": 36.03892517089844, "beta_dpo/gap_std": 59.28942108154297, "beta_dpo/loss_margin_mean": 27.825515747070312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7966742252456538, "grad_norm": 4.227888584136963, "learning_rate": 6.106260641143546e-08, "logits/chosen": -2.652071237564087, "logits/rejected": -2.6945571899414062, "loss": 1.3557, "step": 527 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4925900101661682, "beta_dpo/beta_margin_grad_std": 0.014309762977063656, "beta_dpo/beta_margin_mean": 0.02966652624309063, "beta_dpo/beta_margin_std": 0.05729580298066139, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5310070514678955, "beta_dpo/gap_mean": 35.63081359863281, "beta_dpo/gap_std": 59.07182312011719, "beta_dpo/loss_margin_mean": 29.66652488708496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7981859410430839, "grad_norm": 3.7201552391052246, "learning_rate": 6.019938355056422e-08, "logits/chosen": -2.6412606239318848, "logits/rejected": -2.6760683059692383, "loss": 1.3618, "step": 528 }, { "beta_dpo/beta": 1.4114124774932861, "beta_dpo/beta_margin_grad_mean": -0.17690998315811157, "beta_dpo/beta_margin_grad_std": 0.3721368908882141, "beta_dpo/beta_margin_mean": 94.305419921875, "beta_dpo/beta_margin_std": 109.623291015625, "beta_dpo/beta_used": 1.4114124774932861, "beta_dpo/beta_used_raw": 1.4114124774932861, "beta_dpo/gap_mean": 39.14246368408203, "beta_dpo/gap_std": 59.363304138183594, "beta_dpo/loss_margin_mean": 62.783653259277344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.799697656840514, "grad_norm": 3297.61962890625, "learning_rate": 5.934146982094049e-08, "logits/chosen": -2.6939330101013184, "logits/rejected": -2.718538284301758, "loss": 5.9051, "step": 529 }, { "beta_dpo/beta": 0.17238160967826843, "beta_dpo/beta_margin_grad_mean": -0.22959469258785248, "beta_dpo/beta_margin_grad_std": 0.3280448913574219, "beta_dpo/beta_margin_mean": 8.542181968688965, "beta_dpo/beta_margin_std": 10.939250946044922, "beta_dpo/beta_used": 0.17238160967826843, "beta_dpo/beta_used_raw": 0.17238160967826843, "beta_dpo/gap_mean": 41.31953430175781, "beta_dpo/gap_std": 59.120887756347656, "beta_dpo/loss_margin_mean": 49.01192092895508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8012093726379441, "grad_norm": 402.5897521972656, "learning_rate": 5.848888922025552e-08, "logits/chosen": -2.6831681728363037, "logits/rejected": -2.6938679218292236, "loss": 1.0653, "step": 530 }, { "beta_dpo/beta": 0.20381604135036469, "beta_dpo/beta_margin_grad_mean": -0.35280048847198486, "beta_dpo/beta_margin_grad_std": 0.27263426780700684, "beta_dpo/beta_margin_mean": 9.475677490234375, "beta_dpo/beta_margin_std": 18.85011100769043, "beta_dpo/beta_used": 0.20381604135036469, "beta_dpo/beta_used_raw": 0.05179119110107422, "beta_dpo/gap_mean": 40.65738296508789, "beta_dpo/gap_std": 60.33680725097656, "beta_dpo/loss_margin_mean": 37.609867095947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8027210884353742, "grad_norm": 721.6512451171875, "learning_rate": 5.7641665597021435e-08, "logits/chosen": -2.6561365127563477, "logits/rejected": -2.7007155418395996, "loss": 1.0704, "step": 531 }, { "beta_dpo/beta": 0.5565502643585205, "beta_dpo/beta_margin_grad_mean": -0.17941914498806, "beta_dpo/beta_margin_grad_std": 0.3398556709289551, "beta_dpo/beta_margin_mean": 33.13414764404297, "beta_dpo/beta_margin_std": 49.94964599609375, "beta_dpo/beta_used": 0.5565502643585205, "beta_dpo/beta_used_raw": 0.5565502643585205, "beta_dpo/gap_mean": 42.603416442871094, "beta_dpo/gap_std": 60.625701904296875, "beta_dpo/loss_margin_mean": 54.4295539855957, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8042328042328042, "grad_norm": 2332.498046875, "learning_rate": 5.679982264990424e-08, "logits/chosen": -2.671722888946533, "logits/rejected": -2.690765380859375, "loss": 4.9389, "step": 532 }, { "beta_dpo/beta": 0.08064591884613037, "beta_dpo/beta_margin_grad_mean": -0.3793724775314331, "beta_dpo/beta_margin_grad_std": 0.30262112617492676, "beta_dpo/beta_margin_mean": 2.689401865005493, "beta_dpo/beta_margin_std": 7.671005725860596, "beta_dpo/beta_used": 0.08064591884613037, "beta_dpo/beta_used_raw": -0.5107542276382446, "beta_dpo/gap_mean": 41.580352783203125, "beta_dpo/gap_std": 59.28820037841797, "beta_dpo/loss_margin_mean": 30.574068069458008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8057445200302343, "grad_norm": 319.44219970703125, "learning_rate": 5.596338392706076e-08, "logits/chosen": -2.6407032012939453, "logits/rejected": -2.677487373352051, "loss": 1.1372, "step": 533 }, { "beta_dpo/beta": 0.28966397047042847, "beta_dpo/beta_margin_grad_mean": -0.26224929094314575, "beta_dpo/beta_margin_grad_std": 0.3490891456604004, "beta_dpo/beta_margin_mean": 15.298602104187012, "beta_dpo/beta_margin_std": 28.032827377319336, "beta_dpo/beta_used": 0.28966397047042847, "beta_dpo/beta_used_raw": 0.28966397047042847, "beta_dpo/gap_mean": 42.06398010253906, "beta_dpo/gap_std": 60.664085388183594, "beta_dpo/loss_margin_mean": 44.23617172241211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8072562358276644, "grad_norm": 1128.4276123046875, "learning_rate": 5.513237282548033e-08, "logits/chosen": -2.7209525108337402, "logits/rejected": -2.7407164573669434, "loss": 2.1069, "step": 534 }, { "beta_dpo/beta": 0.18617966771125793, "beta_dpo/beta_margin_grad_mean": -0.3503064513206482, "beta_dpo/beta_margin_grad_std": 0.2936100959777832, "beta_dpo/beta_margin_mean": 10.054865837097168, "beta_dpo/beta_margin_std": 21.22418785095215, "beta_dpo/beta_used": 0.18617966771125793, "beta_dpo/beta_used_raw": -0.012800291180610657, "beta_dpo/gap_mean": 42.36915588378906, "beta_dpo/gap_std": 62.56495666503906, "beta_dpo/loss_margin_mean": 48.45128631591797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8087679516250945, "grad_norm": 501.5768127441406, "learning_rate": 5.430681259032957e-08, "logits/chosen": -2.7039687633514404, "logits/rejected": -2.7297236919403076, "loss": 1.6171, "step": 535 }, { "beta_dpo/beta": 0.16233938932418823, "beta_dpo/beta_margin_grad_mean": -0.2145552784204483, "beta_dpo/beta_margin_grad_std": 0.2856879234313965, "beta_dpo/beta_margin_mean": 8.161446571350098, "beta_dpo/beta_margin_std": 12.291303634643555, "beta_dpo/beta_used": 0.16233938932418823, "beta_dpo/beta_used_raw": 0.16233938932418823, "beta_dpo/gap_mean": 44.00806427001953, "beta_dpo/gap_std": 62.28424072265625, "beta_dpo/loss_margin_mean": 49.89377212524414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8102796674225246, "grad_norm": 491.6341857910156, "learning_rate": 5.3486726314303175e-08, "logits/chosen": -2.6543660163879395, "logits/rejected": -2.6955974102020264, "loss": 1.154, "step": 536 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4869405925273895, "beta_dpo/beta_margin_grad_std": 0.015572650358080864, "beta_dpo/beta_margin_mean": 0.05230266600847244, "beta_dpo/beta_margin_std": 0.06237604841589928, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19751229882240295, "beta_dpo/gap_mean": 45.57786560058594, "beta_dpo/gap_std": 62.4625244140625, "beta_dpo/loss_margin_mean": 52.30266189575195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8117913832199547, "grad_norm": 3.94612455368042, "learning_rate": 5.267213693697695e-08, "logits/chosen": -2.6674160957336426, "logits/rejected": -2.7232227325439453, "loss": 1.3469, "step": 537 }, { "beta_dpo/beta": 0.281332790851593, "beta_dpo/beta_margin_grad_mean": -0.1457832306623459, "beta_dpo/beta_margin_grad_std": 0.3034435212612152, "beta_dpo/beta_margin_mean": 15.984519958496094, "beta_dpo/beta_margin_std": 18.725618362426758, "beta_dpo/beta_used": 0.281332790851593, "beta_dpo/beta_used_raw": 0.281332790851593, "beta_dpo/gap_mean": 46.69129180908203, "beta_dpo/gap_std": 62.50321960449219, "beta_dpo/loss_margin_mean": 54.41521453857422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8133030990173847, "grad_norm": 404.3365783691406, "learning_rate": 5.1863067244167144e-08, "logits/chosen": -2.685150623321533, "logits/rejected": -2.69342041015625, "loss": 1.124, "step": 538 }, { "beta_dpo/beta": 0.003658185014501214, "beta_dpo/beta_margin_grad_mean": -0.4645317792892456, "beta_dpo/beta_margin_grad_std": 0.0686301663517952, "beta_dpo/beta_margin_mean": 0.14727774262428284, "beta_dpo/beta_margin_std": 0.28779175877571106, "beta_dpo/beta_used": 0.003658185014501214, "beta_dpo/beta_used_raw": -0.4632405936717987, "beta_dpo/gap_mean": 46.15460205078125, "beta_dpo/gap_std": 61.67210388183594, "beta_dpo/loss_margin_mean": 36.68147659301758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8148148148148148, "grad_norm": 14.169596672058105, "learning_rate": 5.105953986729195e-08, "logits/chosen": -2.695448398590088, "logits/rejected": -2.7702274322509766, "loss": 1.2546, "step": 539 }, { "beta_dpo/beta": 0.3899621069431305, "beta_dpo/beta_margin_grad_mean": -0.3473030626773834, "beta_dpo/beta_margin_grad_std": 0.3095802962779999, "beta_dpo/beta_margin_mean": 23.450132369995117, "beta_dpo/beta_margin_std": 43.04521560668945, "beta_dpo/beta_used": 0.3899621069431305, "beta_dpo/beta_used_raw": 0.24834245443344116, "beta_dpo/gap_mean": 45.328651428222656, "beta_dpo/gap_std": 61.643280029296875, "beta_dpo/loss_margin_mean": 48.5355224609375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8163265306122449, "grad_norm": 1687.021240234375, "learning_rate": 5.026157728273966e-08, "logits/chosen": -2.661731243133545, "logits/rejected": -2.721250534057617, "loss": 5.0481, "step": 540 }, { "beta_dpo/beta": 0.4383842349052429, "beta_dpo/beta_margin_grad_mean": -0.3423454463481903, "beta_dpo/beta_margin_grad_std": 0.3038384020328522, "beta_dpo/beta_margin_mean": 20.71125602722168, "beta_dpo/beta_margin_std": 44.29029083251953, "beta_dpo/beta_used": 0.4383842349052429, "beta_dpo/beta_used_raw": 0.11720219254493713, "beta_dpo/gap_mean": 45.792572021484375, "beta_dpo/gap_std": 62.074562072753906, "beta_dpo/loss_margin_mean": 42.424739837646484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.817838246409675, "grad_norm": 1500.9615478515625, "learning_rate": 4.9469201811239035e-08, "logits/chosen": -2.6188478469848633, "logits/rejected": -2.602254867553711, "loss": 1.5266, "step": 541 }, { "beta_dpo/beta": 0.6556390523910522, "beta_dpo/beta_margin_grad_mean": -0.20644766092300415, "beta_dpo/beta_margin_grad_std": 0.38132748007774353, "beta_dpo/beta_margin_mean": 31.148107528686523, "beta_dpo/beta_margin_std": 52.01056671142578, "beta_dpo/beta_used": 0.6556390523910522, "beta_dpo/beta_used_raw": 0.6556390523910522, "beta_dpo/gap_mean": 46.190338134765625, "beta_dpo/gap_std": 62.900184631347656, "beta_dpo/loss_margin_mean": 49.45319747924805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8193499622071051, "grad_norm": 1859.2935791015625, "learning_rate": 4.868243561723534e-08, "logits/chosen": -2.661344528198242, "logits/rejected": -2.68660569190979, "loss": 2.7462, "step": 542 }, { "beta_dpo/beta": 0.4987204074859619, "beta_dpo/beta_margin_grad_mean": -0.14901922643184662, "beta_dpo/beta_margin_grad_std": 0.2977834939956665, "beta_dpo/beta_margin_mean": 30.138824462890625, "beta_dpo/beta_margin_std": 42.48214340209961, "beta_dpo/beta_used": 0.4987204074859619, "beta_dpo/beta_used_raw": 0.4987204074859619, "beta_dpo/gap_mean": 46.941497802734375, "beta_dpo/gap_std": 62.01499938964844, "beta_dpo/loss_margin_mean": 54.9544677734375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8208616780045351, "grad_norm": 863.5515747070312, "learning_rate": 4.790130070827028e-08, "logits/chosen": -2.646941661834717, "logits/rejected": -2.7055516242980957, "loss": 1.5678, "step": 543 }, { "beta_dpo/beta": 0.1872519850730896, "beta_dpo/beta_margin_grad_mean": -0.31010645627975464, "beta_dpo/beta_margin_grad_std": 0.27586114406585693, "beta_dpo/beta_margin_mean": 10.743023872375488, "beta_dpo/beta_margin_std": 18.55499839782715, "beta_dpo/beta_used": 0.1872519850730896, "beta_dpo/beta_used_raw": 0.021043449640274048, "beta_dpo/gap_mean": 48.91101837158203, "beta_dpo/gap_std": 61.6202392578125, "beta_dpo/loss_margin_mean": 55.6162109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8223733938019653, "grad_norm": 474.7829895019531, "learning_rate": 4.7125818934366454e-08, "logits/chosen": -2.670353889465332, "logits/rejected": -2.7138702869415283, "loss": 0.8989, "step": 544 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48910388350486755, "beta_dpo/beta_margin_grad_std": 0.013874729163944721, "beta_dpo/beta_margin_mean": 0.043630167841911316, "beta_dpo/beta_margin_std": 0.05556848645210266, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1730966717004776, "beta_dpo/gap_mean": 48.88160705566406, "beta_dpo/gap_std": 60.836570739746094, "beta_dpo/loss_margin_mean": 43.630165100097656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8238851095993953, "grad_norm": 3.9678640365600586, "learning_rate": 4.635601198741607e-08, "logits/chosen": -2.6898350715637207, "logits/rejected": -2.7201786041259766, "loss": 1.3431, "step": 545 }, { "beta_dpo/beta": 0.3082360625267029, "beta_dpo/beta_margin_grad_mean": -0.3328179121017456, "beta_dpo/beta_margin_grad_std": 0.2885495126247406, "beta_dpo/beta_margin_mean": 15.173134803771973, "beta_dpo/beta_margin_std": 26.40501594543457, "beta_dpo/beta_used": 0.3082360625267029, "beta_dpo/beta_used_raw": 0.026352345943450928, "beta_dpo/gap_mean": 47.82192611694336, "beta_dpo/gap_std": 59.33293914794922, "beta_dpo/loss_margin_mean": 44.03695297241211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8253968253968254, "grad_norm": 988.4298706054688, "learning_rate": 4.559190140057428e-08, "logits/chosen": -2.604769706726074, "logits/rejected": -2.61057710647583, "loss": 1.3104, "step": 546 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48737552762031555, "beta_dpo/beta_margin_grad_std": 0.015205537900328636, "beta_dpo/beta_margin_mean": 0.05055927485227585, "beta_dpo/beta_margin_std": 0.060925960540771484, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.32710736989974976, "beta_dpo/gap_mean": 48.19280242919922, "beta_dpo/gap_std": 59.7764892578125, "beta_dpo/loss_margin_mean": 50.55927276611328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8269085411942555, "grad_norm": 3.9216151237487793, "learning_rate": 4.483350854765672e-08, "logits/chosen": -2.702695608139038, "logits/rejected": -2.7354962825775146, "loss": 1.3463, "step": 547 }, { "beta_dpo/beta": 0.08089688420295715, "beta_dpo/beta_margin_grad_mean": -0.34109532833099365, "beta_dpo/beta_margin_grad_std": 0.26831504702568054, "beta_dpo/beta_margin_mean": 3.4130594730377197, "beta_dpo/beta_margin_std": 7.480922222137451, "beta_dpo/beta_used": 0.08089688420295715, "beta_dpo/beta_used_raw": -0.3346138894557953, "beta_dpo/gap_mean": 45.551795959472656, "beta_dpo/gap_std": 58.918609619140625, "beta_dpo/loss_margin_mean": 34.3262825012207, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8284202569916855, "grad_norm": 117.26342010498047, "learning_rate": 4.4080854642541826e-08, "logits/chosen": -2.7186174392700195, "logits/rejected": -2.7511534690856934, "loss": 0.9831, "step": 548 }, { "beta_dpo/beta": 0.16079770028591156, "beta_dpo/beta_margin_grad_mean": -0.3676168918609619, "beta_dpo/beta_margin_grad_std": 0.2950216233730316, "beta_dpo/beta_margin_mean": 6.911593914031982, "beta_dpo/beta_margin_std": 16.279003143310547, "beta_dpo/beta_used": 0.16079770028591156, "beta_dpo/beta_used_raw": -0.14775623381137848, "beta_dpo/gap_mean": 43.952392578125, "beta_dpo/gap_std": 58.409446716308594, "beta_dpo/loss_margin_mean": 37.30677795410156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8299319727891157, "grad_norm": 667.4703369140625, "learning_rate": 4.333396073857723e-08, "logits/chosen": -2.665083169937134, "logits/rejected": -2.7149558067321777, "loss": 1.735, "step": 549 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4928406774997711, "beta_dpo/beta_margin_grad_std": 0.01840727962553501, "beta_dpo/beta_margin_mean": 0.028669806197285652, "beta_dpo/beta_margin_std": 0.0737244263291359, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4056813418865204, "beta_dpo/gap_mean": 42.487064361572266, "beta_dpo/gap_std": 60.82746887207031, "beta_dpo/loss_margin_mean": 28.6698055267334, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8314436885865457, "grad_norm": 4.606810092926025, "learning_rate": 4.259284772799099e-08, "logits/chosen": -2.658696413040161, "logits/rejected": -2.670809507369995, "loss": 1.3534, "step": 550 }, { "beta_dpo/beta": 0.22617414593696594, "beta_dpo/beta_margin_grad_mean": -0.3524710536003113, "beta_dpo/beta_margin_grad_std": 0.30751875042915344, "beta_dpo/beta_margin_mean": 8.762946128845215, "beta_dpo/beta_margin_std": 21.531352996826172, "beta_dpo/beta_used": 0.22617414593696594, "beta_dpo/beta_used_raw": 0.04135200381278992, "beta_dpo/gap_mean": 40.189510345458984, "beta_dpo/gap_std": 60.546417236328125, "beta_dpo/loss_margin_mean": 36.95454025268555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8329554043839759, "grad_norm": 493.1956787109375, "learning_rate": 4.1857536341307176e-08, "logits/chosen": -2.6752843856811523, "logits/rejected": -2.7005228996276855, "loss": 1.6817, "step": 551 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49255049228668213, "beta_dpo/beta_margin_grad_std": 0.014759018085896969, "beta_dpo/beta_margin_mean": 0.029821420088410378, "beta_dpo/beta_margin_std": 0.05910492688417435, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3311285376548767, "beta_dpo/gap_mean": 38.61370849609375, "beta_dpo/gap_std": 60.83926773071289, "beta_dpo/loss_margin_mean": 29.82141876220703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8344671201814059, "grad_norm": 3.8606088161468506, "learning_rate": 4.112804714676593e-08, "logits/chosen": -2.6869289875030518, "logits/rejected": -2.716911792755127, "loss": 1.3556, "step": 552 }, { "beta_dpo/beta": 0.3258926272392273, "beta_dpo/beta_margin_grad_mean": -0.23501437902450562, "beta_dpo/beta_margin_grad_std": 0.3916976749897003, "beta_dpo/beta_margin_mean": 16.936145782470703, "beta_dpo/beta_margin_std": 22.796802520751953, "beta_dpo/beta_used": 0.3258926272392273, "beta_dpo/beta_used_raw": 0.3258926272392273, "beta_dpo/gap_mean": 40.338661193847656, "beta_dpo/gap_std": 60.826988220214844, "beta_dpo/loss_margin_mean": 52.69847106933594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8359788359788359, "grad_norm": 1291.6273193359375, "learning_rate": 4.0404400549748144e-08, "logits/chosen": -2.6371967792510986, "logits/rejected": -2.6834166049957275, "loss": 2.2743, "step": 553 }, { "beta_dpo/beta": 0.15449394285678864, "beta_dpo/beta_margin_grad_mean": -0.3572808802127838, "beta_dpo/beta_margin_grad_std": 0.28501877188682556, "beta_dpo/beta_margin_mean": 6.621963977813721, "beta_dpo/beta_margin_std": 14.800515174865723, "beta_dpo/beta_used": 0.15449394285678864, "beta_dpo/beta_used_raw": -0.3769558072090149, "beta_dpo/gap_mean": 41.05055236816406, "beta_dpo/gap_std": 60.93577575683594, "beta_dpo/loss_margin_mean": 41.461669921875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8374905517762661, "grad_norm": 770.6651611328125, "learning_rate": 3.968661679220467e-08, "logits/chosen": -2.7024216651916504, "logits/rejected": -2.714076042175293, "loss": 1.4889, "step": 554 }, { "beta_dpo/beta": 0.7318522334098816, "beta_dpo/beta_margin_grad_mean": -0.34280702471733093, "beta_dpo/beta_margin_grad_std": 0.30473846197128296, "beta_dpo/beta_margin_mean": 42.75510025024414, "beta_dpo/beta_margin_std": 77.02257537841797, "beta_dpo/beta_used": 0.7318522334098816, "beta_dpo/beta_used_raw": 0.5461255311965942, "beta_dpo/gap_mean": 40.325279235839844, "beta_dpo/gap_std": 61.363433837890625, "beta_dpo/loss_margin_mean": 41.5892333984375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8390022675736961, "grad_norm": 1357.3326416015625, "learning_rate": 3.89747159520904e-08, "logits/chosen": -2.640911102294922, "logits/rejected": -2.6422338485717773, "loss": 2.0458, "step": 555 }, { "beta_dpo/beta": 0.12523804605007172, "beta_dpo/beta_margin_grad_mean": -0.34922054409980774, "beta_dpo/beta_margin_grad_std": 0.2761671245098114, "beta_dpo/beta_margin_mean": 5.768284797668457, "beta_dpo/beta_margin_std": 11.772993087768555, "beta_dpo/beta_used": 0.12523804605007172, "beta_dpo/beta_used_raw": -0.3097414970397949, "beta_dpo/gap_mean": 41.716514587402344, "beta_dpo/gap_std": 61.88613510131836, "beta_dpo/loss_margin_mean": 43.49522018432617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8405139833711263, "grad_norm": 824.6611328125, "learning_rate": 3.826871794280192e-08, "logits/chosen": -2.606398820877075, "logits/rejected": -2.626181125640869, "loss": 2.4349, "step": 556 }, { "beta_dpo/beta": 0.475053071975708, "beta_dpo/beta_margin_grad_mean": -0.338980108499527, "beta_dpo/beta_margin_grad_std": 0.3049916923046112, "beta_dpo/beta_margin_mean": 31.90798568725586, "beta_dpo/beta_margin_std": 49.599910736083984, "beta_dpo/beta_used": 0.475053071975708, "beta_dpo/beta_used_raw": 0.0741761326789856, "beta_dpo/gap_mean": 43.92063903808594, "beta_dpo/gap_std": 61.88815689086914, "beta_dpo/loss_margin_mean": 52.65357971191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8420256991685563, "grad_norm": 2362.4228515625, "learning_rate": 3.756864251262143e-08, "logits/chosen": -2.6281399726867676, "logits/rejected": -2.681450366973877, "loss": 1.4266, "step": 557 }, { "beta_dpo/beta": 0.06146103888750076, "beta_dpo/beta_margin_grad_mean": -0.3510986864566803, "beta_dpo/beta_margin_grad_std": 0.2637402415275574, "beta_dpo/beta_margin_mean": 3.4399425983428955, "beta_dpo/beta_margin_std": 6.441949844360352, "beta_dpo/beta_used": 0.06146103888750076, "beta_dpo/beta_used_raw": -0.19650453329086304, "beta_dpo/gap_mean": 44.777374267578125, "beta_dpo/gap_std": 61.062469482421875, "beta_dpo/loss_margin_mean": 52.67274856567383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8435374149659864, "grad_norm": 230.94940185546875, "learning_rate": 3.687450924416341e-08, "logits/chosen": -2.6728081703186035, "logits/rejected": -2.6919565200805664, "loss": 1.0793, "step": 558 }, { "beta_dpo/beta": 0.38971662521362305, "beta_dpo/beta_margin_grad_mean": -0.3183648884296417, "beta_dpo/beta_margin_grad_std": 0.29581478238105774, "beta_dpo/beta_margin_mean": 23.02225685119629, "beta_dpo/beta_margin_std": 37.03925323486328, "beta_dpo/beta_used": 0.38971662521362305, "beta_dpo/beta_used_raw": 0.26015961170196533, "beta_dpo/gap_mean": 46.305084228515625, "beta_dpo/gap_std": 61.043853759765625, "beta_dpo/loss_margin_mean": 48.54761505126953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8450491307634165, "grad_norm": 868.8684692382812, "learning_rate": 3.6186337553827743e-08, "logits/chosen": -2.674811363220215, "logits/rejected": -2.719515323638916, "loss": 2.4704, "step": 559 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4892803728580475, "beta_dpo/beta_margin_grad_std": 0.015276779420673847, "beta_dpo/beta_margin_mean": 0.042931199073791504, "beta_dpo/beta_margin_std": 0.06118900701403618, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5526399612426758, "beta_dpo/gap_mean": 45.3603401184082, "beta_dpo/gap_std": 61.738525390625, "beta_dpo/loss_margin_mean": 42.93119812011719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8465608465608465, "grad_norm": 3.9332680702209473, "learning_rate": 3.550414669125573e-08, "logits/chosen": -2.687546730041504, "logits/rejected": -2.7129406929016113, "loss": 1.3528, "step": 560 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4891974925994873, "beta_dpo/beta_margin_grad_std": 0.015051293186843395, "beta_dpo/beta_margin_mean": 0.043262675404548645, "beta_dpo/beta_margin_std": 0.06029163673520088, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.47285085916519165, "beta_dpo/gap_mean": 45.07012939453125, "beta_dpo/gap_std": 61.351409912109375, "beta_dpo/loss_margin_mean": 43.26267623901367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8480725623582767, "grad_norm": 5.872406005859375, "learning_rate": 3.482795573879241e-08, "logits/chosen": -2.710063934326172, "logits/rejected": -2.717824935913086, "loss": 1.3517, "step": 561 }, { "beta_dpo/beta": 0.09827530384063721, "beta_dpo/beta_margin_grad_mean": -0.3188078701496124, "beta_dpo/beta_margin_grad_std": 0.24310383200645447, "beta_dpo/beta_margin_mean": 5.576371192932129, "beta_dpo/beta_margin_std": 9.145209312438965, "beta_dpo/beta_used": 0.09827530384063721, "beta_dpo/beta_used_raw": -0.6985861659049988, "beta_dpo/gap_mean": 45.44583511352539, "beta_dpo/gap_std": 60.678016662597656, "beta_dpo/loss_margin_mean": 44.74995040893555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8495842781557067, "grad_norm": 384.5655517578125, "learning_rate": 3.415778361095226e-08, "logits/chosen": -2.6769933700561523, "logits/rejected": -2.707970380783081, "loss": 1.037, "step": 562 }, { "beta_dpo/beta": 0.5065726637840271, "beta_dpo/beta_margin_grad_mean": -0.19759216904640198, "beta_dpo/beta_margin_grad_std": 0.3802008330821991, "beta_dpo/beta_margin_mean": 26.610563278198242, "beta_dpo/beta_margin_std": 34.39125442504883, "beta_dpo/beta_used": 0.5065726637840271, "beta_dpo/beta_used_raw": 0.5065726637840271, "beta_dpo/gap_mean": 45.60718536376953, "beta_dpo/gap_std": 61.100364685058594, "beta_dpo/loss_margin_mean": 51.522247314453125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8510959939531368, "grad_norm": 1682.5528564453125, "learning_rate": 3.349364905389032e-08, "logits/chosen": -2.6109561920166016, "logits/rejected": -2.63857364654541, "loss": 2.5953, "step": 563 }, { "beta_dpo/beta": 0.33434048295021057, "beta_dpo/beta_margin_grad_mean": -0.3251274526119232, "beta_dpo/beta_margin_grad_std": 0.29672136902809143, "beta_dpo/beta_margin_mean": 17.791189193725586, "beta_dpo/beta_margin_std": 37.987796783447266, "beta_dpo/beta_used": 0.33434048295021057, "beta_dpo/beta_used_raw": 0.3103909194469452, "beta_dpo/gap_mean": 46.764015197753906, "beta_dpo/gap_std": 61.399383544921875, "beta_dpo/loss_margin_mean": 51.34377670288086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8526077097505669, "grad_norm": 323.5574951171875, "learning_rate": 3.283557064487785e-08, "logits/chosen": -2.7021117210388184, "logits/rejected": -2.728177547454834, "loss": 0.9074, "step": 564 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4917549788951874, "beta_dpo/beta_margin_grad_std": 0.014812292531132698, "beta_dpo/beta_margin_mean": 0.03301194682717323, "beta_dpo/beta_margin_std": 0.05931118503212929, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7162899971008301, "beta_dpo/gap_mean": 44.52302932739258, "beta_dpo/gap_std": 61.823211669921875, "beta_dpo/loss_margin_mean": 33.01194381713867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.854119425547997, "grad_norm": 3.8082151412963867, "learning_rate": 3.218356679178252e-08, "logits/chosen": -2.66007137298584, "logits/rejected": -2.680148124694824, "loss": 1.3563, "step": 565 }, { "beta_dpo/beta": 0.6846121549606323, "beta_dpo/beta_margin_grad_mean": -0.2204248458147049, "beta_dpo/beta_margin_grad_std": 0.3947845995426178, "beta_dpo/beta_margin_mean": 41.83022689819336, "beta_dpo/beta_margin_std": 55.214500427246094, "beta_dpo/beta_used": 0.6846121549606323, "beta_dpo/beta_used_raw": 0.6846121549606323, "beta_dpo/gap_mean": 45.49637985229492, "beta_dpo/gap_std": 61.635169982910156, "beta_dpo/loss_margin_mean": 55.270782470703125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8556311413454271, "grad_norm": 3381.755126953125, "learning_rate": 3.1537655732553764e-08, "logits/chosen": -2.678783655166626, "logits/rejected": -2.68731951713562, "loss": 3.5014, "step": 566 }, { "beta_dpo/beta": 0.11898145079612732, "beta_dpo/beta_margin_grad_mean": -0.2830747365951538, "beta_dpo/beta_margin_grad_std": 0.2540491223335266, "beta_dpo/beta_margin_mean": 7.840543270111084, "beta_dpo/beta_margin_std": 11.777994155883789, "beta_dpo/beta_used": 0.11898145079612732, "beta_dpo/beta_used_raw": -0.3021107614040375, "beta_dpo/gap_mean": 47.88221740722656, "beta_dpo/gap_std": 60.788387298583984, "beta_dpo/loss_margin_mean": 49.669471740722656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8571428571428571, "grad_norm": 287.8054504394531, "learning_rate": 3.089785553471233e-08, "logits/chosen": -2.6855921745300293, "logits/rejected": -2.72359037399292, "loss": 0.9714, "step": 567 }, { "beta_dpo/beta": 0.1126384437084198, "beta_dpo/beta_margin_grad_mean": -0.33147335052490234, "beta_dpo/beta_margin_grad_std": 0.2764538824558258, "beta_dpo/beta_margin_mean": 6.823496341705322, "beta_dpo/beta_margin_std": 11.87881088256836, "beta_dpo/beta_used": 0.1126384437084198, "beta_dpo/beta_used_raw": -0.024167485535144806, "beta_dpo/gap_mean": 46.8142204284668, "beta_dpo/gap_std": 59.064796447753906, "beta_dpo/loss_margin_mean": 48.74604415893555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8586545729402872, "grad_norm": 370.638427734375, "learning_rate": 3.026418409484513e-08, "logits/chosen": -2.6945395469665527, "logits/rejected": -2.7361483573913574, "loss": 1.1515, "step": 568 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49009791016578674, "beta_dpo/beta_margin_grad_std": 0.014846453443169594, "beta_dpo/beta_margin_mean": 0.03965507820248604, "beta_dpo/beta_margin_std": 0.05946631357073784, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4391339421272278, "beta_dpo/gap_mean": 46.97361755371094, "beta_dpo/gap_std": 59.10280227661133, "beta_dpo/loss_margin_mean": 39.65507507324219, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8601662887377173, "grad_norm": 4.464386940002441, "learning_rate": 2.963665913810451e-08, "logits/chosen": -2.668788433074951, "logits/rejected": -2.679075241088867, "loss": 1.3493, "step": 569 }, { "beta_dpo/beta": 0.9029349684715271, "beta_dpo/beta_margin_grad_mean": -0.3065827488899231, "beta_dpo/beta_margin_grad_std": 0.29563117027282715, "beta_dpo/beta_margin_mean": 70.92729187011719, "beta_dpo/beta_margin_std": 114.28070068359375, "beta_dpo/beta_used": 0.9029349684715271, "beta_dpo/beta_used_raw": 0.6830695271492004, "beta_dpo/gap_mean": 47.96229934692383, "beta_dpo/gap_std": 59.62773895263672, "beta_dpo/loss_margin_mean": 64.41072845458984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8616780045351474, "grad_norm": 781.4393920898438, "learning_rate": 2.9015298217712453e-08, "logits/chosen": -2.6695735454559326, "logits/rejected": -2.6966354846954346, "loss": 1.4145, "step": 570 }, { "beta_dpo/beta": 0.14786839485168457, "beta_dpo/beta_margin_grad_mean": -0.35911333560943604, "beta_dpo/beta_margin_grad_std": 0.2933298647403717, "beta_dpo/beta_margin_mean": 6.836848258972168, "beta_dpo/beta_margin_std": 13.719381332397461, "beta_dpo/beta_used": 0.14786839485168457, "beta_dpo/beta_used_raw": -0.18180185556411743, "beta_dpo/gap_mean": 47.217105865478516, "beta_dpo/gap_std": 60.4058723449707, "beta_dpo/loss_margin_mean": 36.39863967895508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8631897203325775, "grad_norm": 588.5823974609375, "learning_rate": 2.840011871446962e-08, "logits/chosen": -2.660149097442627, "logits/rejected": -2.665757417678833, "loss": 1.6662, "step": 571 }, { "beta_dpo/beta": 0.3506244122982025, "beta_dpo/beta_margin_grad_mean": -0.34980422258377075, "beta_dpo/beta_margin_grad_std": 0.30930382013320923, "beta_dpo/beta_margin_mean": 16.844135284423828, "beta_dpo/beta_margin_std": 33.19049835205078, "beta_dpo/beta_used": 0.3506244122982025, "beta_dpo/beta_used_raw": 0.3069079518318176, "beta_dpo/gap_mean": 46.96336364746094, "beta_dpo/gap_std": 59.50727844238281, "beta_dpo/loss_margin_mean": 44.716285705566406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8647014361300076, "grad_norm": 698.576171875, "learning_rate": 2.7791137836269158e-08, "logits/chosen": -2.72560977935791, "logits/rejected": -2.6912665367126465, "loss": 1.6549, "step": 572 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48835763335227966, "beta_dpo/beta_margin_grad_std": 0.017284568399190903, "beta_dpo/beta_margin_mean": 0.046639442443847656, "beta_dpo/beta_margin_std": 0.0692763701081276, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.31975650787353516, "beta_dpo/gap_mean": 46.835479736328125, "beta_dpo/gap_std": 60.77733612060547, "beta_dpo/loss_margin_mean": 46.63943862915039, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8662131519274376, "grad_norm": 4.514516830444336, "learning_rate": 2.718837261761528e-08, "logits/chosen": -2.689098834991455, "logits/rejected": -2.7141900062561035, "loss": 1.3473, "step": 573 }, { "beta_dpo/beta": 0.5320430994033813, "beta_dpo/beta_margin_grad_mean": -0.2053694725036621, "beta_dpo/beta_margin_grad_std": 0.3366325795650482, "beta_dpo/beta_margin_mean": 33.57503890991211, "beta_dpo/beta_margin_std": 48.08414840698242, "beta_dpo/beta_used": 0.5320430994033813, "beta_dpo/beta_used_raw": 0.5320430994033813, "beta_dpo/gap_mean": 46.90580749511719, "beta_dpo/gap_std": 60.166412353515625, "beta_dpo/loss_margin_mean": 53.00767517089844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8677248677248677, "grad_norm": 1298.696044921875, "learning_rate": 2.659183991914696e-08, "logits/chosen": -2.641983985900879, "logits/rejected": -2.6648993492126465, "loss": 1.0964, "step": 574 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4908713698387146, "beta_dpo/beta_margin_grad_std": 0.01577117294073105, "beta_dpo/beta_margin_mean": 0.03656105324625969, "beta_dpo/beta_margin_std": 0.06318365782499313, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8852093815803528, "beta_dpo/gap_mean": 46.991451263427734, "beta_dpo/gap_std": 60.7982292175293, "beta_dpo/loss_margin_mean": 36.56105041503906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8692365835222978, "grad_norm": 4.405168056488037, "learning_rate": 2.600155642716606e-08, "logits/chosen": -2.6793715953826904, "logits/rejected": -2.735421895980835, "loss": 1.3564, "step": 575 }, { "beta_dpo/beta": 0.49797600507736206, "beta_dpo/beta_margin_grad_mean": -0.30947670340538025, "beta_dpo/beta_margin_grad_std": 0.2893446385860443, "beta_dpo/beta_margin_mean": 30.291072845458984, "beta_dpo/beta_margin_std": 55.82719421386719, "beta_dpo/beta_used": 0.49797600507736206, "beta_dpo/beta_used_raw": 0.11030000448226929, "beta_dpo/gap_mean": 47.00640869140625, "beta_dpo/gap_std": 61.392356872558594, "beta_dpo/loss_margin_mean": 52.25484085083008, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8707482993197279, "grad_norm": 710.93017578125, "learning_rate": 2.5417538653170754e-08, "logits/chosen": -2.652390480041504, "logits/rejected": -2.703554153442383, "loss": 0.9258, "step": 576 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49140772223472595, "beta_dpo/beta_margin_grad_std": 0.015221024863421917, "beta_dpo/beta_margin_mean": 0.03440757095813751, "beta_dpo/beta_margin_std": 0.060963064432144165, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5801578164100647, "beta_dpo/gap_mean": 44.7833137512207, "beta_dpo/gap_std": 61.34014892578125, "beta_dpo/loss_margin_mean": 34.407569885253906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.872260015117158, "grad_norm": 3.743579149246216, "learning_rate": 2.4839802933393607e-08, "logits/chosen": -2.6809592247009277, "logits/rejected": -2.6941497325897217, "loss": 1.3538, "step": 577 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.490521103143692, "beta_dpo/beta_margin_grad_std": 0.013443054631352425, "beta_dpo/beta_margin_mean": 0.03795962780714035, "beta_dpo/beta_margin_std": 0.05385306850075722, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.27034834027290344, "beta_dpo/gap_mean": 43.665016174316406, "beta_dpo/gap_std": 60.30692672729492, "beta_dpo/loss_margin_mean": 37.959625244140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.873771730914588, "grad_norm": 3.6446995735168457, "learning_rate": 2.4268365428344733e-08, "logits/chosen": -2.629558801651001, "logits/rejected": -2.6443064212799072, "loss": 1.3497, "step": 578 }, { "beta_dpo/beta": 0.4658081829547882, "beta_dpo/beta_margin_grad_mean": -0.3299787640571594, "beta_dpo/beta_margin_grad_std": 0.29749563336372375, "beta_dpo/beta_margin_mean": 27.888103485107422, "beta_dpo/beta_margin_std": 50.06583023071289, "beta_dpo/beta_used": 0.4658081829547882, "beta_dpo/beta_used_raw": 0.3958699703216553, "beta_dpo/gap_mean": 45.86872100830078, "beta_dpo/gap_std": 60.440391540527344, "beta_dpo/loss_margin_mean": 60.93756866455078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8752834467120182, "grad_norm": 744.2311401367188, "learning_rate": 2.3703242122359357e-08, "logits/chosen": -2.675605297088623, "logits/rejected": -2.6713008880615234, "loss": 1.8259, "step": 579 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49142566323280334, "beta_dpo/beta_margin_grad_std": 0.015153970569372177, "beta_dpo/beta_margin_mean": 0.034334223717451096, "beta_dpo/beta_margin_std": 0.06069787219166756, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7968156337738037, "beta_dpo/gap_mean": 44.41089630126953, "beta_dpo/gap_std": 60.85872268676758, "beta_dpo/loss_margin_mean": 34.33422088623047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8767951625094482, "grad_norm": 3.7005584239959717, "learning_rate": 2.3144448823151392e-08, "logits/chosen": -2.689199447631836, "logits/rejected": -2.719898223876953, "loss": 1.3575, "step": 580 }, { "beta_dpo/beta": 0.7862246036529541, "beta_dpo/beta_margin_grad_mean": -0.16341674327850342, "beta_dpo/beta_margin_grad_std": 0.3616049289703369, "beta_dpo/beta_margin_mean": 39.6046028137207, "beta_dpo/beta_margin_std": 50.12466049194336, "beta_dpo/beta_used": 0.7862246036529541, "beta_dpo/beta_used_raw": 0.7862246036529541, "beta_dpo/gap_mean": 45.356910705566406, "beta_dpo/gap_std": 60.91173553466797, "beta_dpo/loss_margin_mean": 49.52141571044922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8783068783068783, "grad_norm": 2152.57861328125, "learning_rate": 2.259200116137039e-08, "logits/chosen": -2.6786694526672363, "logits/rejected": -2.711623430252075, "loss": 4.1808, "step": 581 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48708194494247437, "beta_dpo/beta_margin_grad_std": 0.016725635156035423, "beta_dpo/beta_margin_mean": 0.05174446851015091, "beta_dpo/beta_margin_std": 0.0670098289847374, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.32603561878204346, "beta_dpo/gap_mean": 46.38182067871094, "beta_dpo/gap_std": 61.84091567993164, "beta_dpo/loss_margin_mean": 51.74446487426758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8798185941043084, "grad_norm": 4.562225341796875, "learning_rate": 2.204591459016525e-08, "logits/chosen": -2.6497602462768555, "logits/rejected": -2.6249165534973145, "loss": 1.3481, "step": 582 }, { "beta_dpo/beta": 0.026739204302430153, "beta_dpo/beta_margin_grad_mean": -0.38849934935569763, "beta_dpo/beta_margin_grad_std": 0.24935057759284973, "beta_dpo/beta_margin_mean": 1.3460389375686646, "beta_dpo/beta_margin_std": 2.851071357727051, "beta_dpo/beta_used": 0.026739204302430153, "beta_dpo/beta_used_raw": -0.28397732973098755, "beta_dpo/gap_mean": 46.57801055908203, "beta_dpo/gap_std": 63.39801025390625, "beta_dpo/loss_margin_mean": 45.611568450927734, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8813303099017384, "grad_norm": 87.90982055664062, "learning_rate": 2.1506204384751064e-08, "logits/chosen": -2.648636817932129, "logits/rejected": -2.7286105155944824, "loss": 1.0609, "step": 583 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4901319742202759, "beta_dpo/beta_margin_grad_std": 0.014690570533275604, "beta_dpo/beta_margin_mean": 0.03951896354556084, "beta_dpo/beta_margin_std": 0.05884556472301483, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20548459887504578, "beta_dpo/gap_mean": 45.45520782470703, "beta_dpo/gap_std": 63.303741455078125, "beta_dpo/loss_margin_mean": 39.518959045410156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8828420256991686, "grad_norm": 4.558004379272461, "learning_rate": 2.09728856419826e-08, "logits/chosen": -2.587111711502075, "logits/rejected": -2.6333353519439697, "loss": 1.3471, "step": 584 }, { "beta_dpo/beta": 0.43229246139526367, "beta_dpo/beta_margin_grad_mean": -0.34557801485061646, "beta_dpo/beta_margin_grad_std": 0.30445998907089233, "beta_dpo/beta_margin_mean": 21.776687622070312, "beta_dpo/beta_margin_std": 50.94138717651367, "beta_dpo/beta_used": 0.43229246139526367, "beta_dpo/beta_used_raw": 0.049025118350982666, "beta_dpo/gap_mean": 44.17786407470703, "beta_dpo/gap_std": 62.77634048461914, "beta_dpo/loss_margin_mean": 42.17787170410156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8843537414965986, "grad_norm": 347.8465881347656, "learning_rate": 2.044597327993153e-08, "logits/chosen": -2.7210912704467773, "logits/rejected": -2.743596076965332, "loss": 0.9183, "step": 585 }, { "beta_dpo/beta": 0.5478598475456238, "beta_dpo/beta_margin_grad_mean": -0.13486941158771515, "beta_dpo/beta_margin_grad_std": 0.30780330300331116, "beta_dpo/beta_margin_mean": 28.558656692504883, "beta_dpo/beta_margin_std": 34.09098815917969, "beta_dpo/beta_used": 0.5478598475456238, "beta_dpo/beta_used_raw": 0.5478598475456238, "beta_dpo/gap_mean": 45.71546936035156, "beta_dpo/gap_std": 62.273223876953125, "beta_dpo/loss_margin_mean": 52.50735855102539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8858654572940288, "grad_norm": 594.8992309570312, "learning_rate": 1.9925482037469187e-08, "logits/chosen": -2.641079902648926, "logits/rejected": -2.650176525115967, "loss": 0.6024, "step": 586 }, { "beta_dpo/beta": 0.6760488748550415, "beta_dpo/beta_margin_grad_mean": -0.24379543960094452, "beta_dpo/beta_margin_grad_std": 0.40585577487945557, "beta_dpo/beta_margin_mean": 36.56929397583008, "beta_dpo/beta_margin_std": 50.73309326171875, "beta_dpo/beta_used": 0.6760488748550415, "beta_dpo/beta_used_raw": 0.6760488748550415, "beta_dpo/gap_mean": 47.27666091918945, "beta_dpo/gap_std": 63.39990997314453, "beta_dpo/loss_margin_mean": 51.81309127807617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8873771730914588, "grad_norm": 2910.705810546875, "learning_rate": 1.9411426473854687e-08, "logits/chosen": -2.6720352172851562, "logits/rejected": -2.664752244949341, "loss": 3.7419, "step": 587 }, { "beta_dpo/beta": 0.5917388200759888, "beta_dpo/beta_margin_grad_mean": -0.16118545830249786, "beta_dpo/beta_margin_grad_std": 0.3344132602214813, "beta_dpo/beta_margin_mean": 30.505475997924805, "beta_dpo/beta_margin_std": 43.3480110168457, "beta_dpo/beta_used": 0.5917388200759888, "beta_dpo/beta_used_raw": 0.5917388200759888, "beta_dpo/gap_mean": 47.58997344970703, "beta_dpo/gap_std": 63.348236083984375, "beta_dpo/loss_margin_mean": 49.69615173339844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8888888888888888, "grad_norm": 1808.064453125, "learning_rate": 1.890382096832699e-08, "logits/chosen": -2.671614170074463, "logits/rejected": -2.7003884315490723, "loss": 4.4247, "step": 588 }, { "beta_dpo/beta": 0.421763151884079, "beta_dpo/beta_margin_grad_mean": -0.17055396735668182, "beta_dpo/beta_margin_grad_std": 0.3382036089897156, "beta_dpo/beta_margin_mean": 24.8367919921875, "beta_dpo/beta_margin_std": 33.23596954345703, "beta_dpo/beta_used": 0.421763151884079, "beta_dpo/beta_used_raw": 0.421763151884079, "beta_dpo/gap_mean": 47.91116714477539, "beta_dpo/gap_std": 62.123374938964844, "beta_dpo/loss_margin_mean": 53.15658950805664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.890400604686319, "grad_norm": 956.9037475585938, "learning_rate": 1.840267971970344e-08, "logits/chosen": -2.6805222034454346, "logits/rejected": -2.696932315826416, "loss": 1.6051, "step": 589 }, { "beta_dpo/beta": 0.31593888998031616, "beta_dpo/beta_margin_grad_mean": -0.23733378946781158, "beta_dpo/beta_margin_grad_std": 0.34482425451278687, "beta_dpo/beta_margin_mean": 16.363954544067383, "beta_dpo/beta_margin_std": 33.442989349365234, "beta_dpo/beta_used": 0.31593888998031616, "beta_dpo/beta_used_raw": 0.31593888998031616, "beta_dpo/gap_mean": 49.48387908935547, "beta_dpo/gap_std": 62.66719055175781, "beta_dpo/loss_margin_mean": 53.68710708618164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.891912320483749, "grad_norm": 1125.697998046875, "learning_rate": 1.7908016745981856e-08, "logits/chosen": -2.671731472015381, "logits/rejected": -2.695211887359619, "loss": 1.4225, "step": 590 }, { "beta_dpo/beta": 0.3611750602722168, "beta_dpo/beta_margin_grad_mean": -0.36769920587539673, "beta_dpo/beta_margin_grad_std": 0.31189650297164917, "beta_dpo/beta_margin_mean": 23.30474281311035, "beta_dpo/beta_margin_std": 38.24365234375, "beta_dpo/beta_used": 0.3611750602722168, "beta_dpo/beta_used_raw": -0.048559755086898804, "beta_dpo/gap_mean": 50.10858917236328, "beta_dpo/gap_std": 63.43110275268555, "beta_dpo/loss_margin_mean": 48.033172607421875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8934240362811792, "grad_norm": 1010.1416625976562, "learning_rate": 1.7419845883949098e-08, "logits/chosen": -2.662808895111084, "logits/rejected": -2.7068371772766113, "loss": 2.6003, "step": 591 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4882203936576843, "beta_dpo/beta_margin_grad_std": 0.014940670691430569, "beta_dpo/beta_margin_mean": 0.0471792109310627, "beta_dpo/beta_margin_std": 0.05986550450325012, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9182393550872803, "beta_dpo/gap_mean": 49.40886688232422, "beta_dpo/gap_std": 63.50605010986328, "beta_dpo/loss_margin_mean": 47.17920684814453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8949357520786092, "grad_norm": 4.937822341918945, "learning_rate": 1.6938180788793556e-08, "logits/chosen": -2.6216418743133545, "logits/rejected": -2.681025981903076, "loss": 1.3546, "step": 592 }, { "beta_dpo/beta": 0.10008588433265686, "beta_dpo/beta_margin_grad_mean": -0.34281057119369507, "beta_dpo/beta_margin_grad_std": 0.2690125107765198, "beta_dpo/beta_margin_mean": 4.608383655548096, "beta_dpo/beta_margin_std": 9.81347942352295, "beta_dpo/beta_used": 0.10008588433265686, "beta_dpo/beta_used_raw": -0.28076955676078796, "beta_dpo/gap_mean": 48.20735549926758, "beta_dpo/gap_std": 62.75986099243164, "beta_dpo/loss_margin_mean": 45.53506088256836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8964474678760394, "grad_norm": 453.401611328125, "learning_rate": 1.6463034933723336e-08, "logits/chosen": -2.620387077331543, "logits/rejected": -2.6645336151123047, "loss": 1.3823, "step": 593 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4902368187904358, "beta_dpo/beta_margin_grad_std": 0.01430630125105381, "beta_dpo/beta_margin_mean": 0.03909473866224289, "beta_dpo/beta_margin_std": 0.05729776993393898, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.563217282295227, "beta_dpo/gap_mean": 46.71723937988281, "beta_dpo/gap_std": 61.984397888183594, "beta_dpo/loss_margin_mean": 39.0947380065918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8979591836734694, "grad_norm": 4.723624229431152, "learning_rate": 1.5994421609589385e-08, "logits/chosen": -2.684835910797119, "logits/rejected": -2.688138484954834, "loss": 1.3514, "step": 594 }, { "beta_dpo/beta": 0.7683069705963135, "beta_dpo/beta_margin_grad_mean": -0.16124965250492096, "beta_dpo/beta_margin_grad_std": 0.3314729332923889, "beta_dpo/beta_margin_mean": 46.35795593261719, "beta_dpo/beta_margin_std": 64.7205810546875, "beta_dpo/beta_used": 0.7683069705963135, "beta_dpo/beta_used_raw": 0.7683069705963135, "beta_dpo/gap_mean": 47.41358184814453, "beta_dpo/gap_std": 62.83539962768555, "beta_dpo/loss_margin_mean": 56.00600814819336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8994708994708994, "grad_norm": 5064.8017578125, "learning_rate": 1.553235392451377e-08, "logits/chosen": -2.6298999786376953, "logits/rejected": -2.6741812229156494, "loss": 2.9971, "step": 595 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49240824580192566, "beta_dpo/beta_margin_grad_std": 0.016294801607728004, "beta_dpo/beta_margin_mean": 0.03041478991508484, "beta_dpo/beta_margin_std": 0.06527598202228546, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.981214702129364, "beta_dpo/gap_mean": 45.94303512573242, "beta_dpo/gap_std": 63.294654846191406, "beta_dpo/loss_margin_mean": 30.4147891998291, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9009826152683296, "grad_norm": 3.551957368850708, "learning_rate": 1.507684480352292e-08, "logits/chosen": -2.719653606414795, "logits/rejected": -2.707707405090332, "loss": 1.3593, "step": 596 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48790696263313293, "beta_dpo/beta_margin_grad_std": 0.015724794939160347, "beta_dpo/beta_margin_mean": 0.04844098910689354, "beta_dpo/beta_margin_std": 0.06300117075443268, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3929826617240906, "beta_dpo/gap_mean": 45.600364685058594, "beta_dpo/gap_std": 63.82142639160156, "beta_dpo/loss_margin_mean": 48.44098663330078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9024943310657596, "grad_norm": 3.981309413909912, "learning_rate": 1.4627906988186111e-08, "logits/chosen": -2.629239320755005, "logits/rejected": -2.6321868896484375, "loss": 1.35, "step": 597 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4906459450721741, "beta_dpo/beta_margin_grad_std": 0.016122223809361458, "beta_dpo/beta_margin_mean": 0.03746494650840759, "beta_dpo/beta_margin_std": 0.06457507610321045, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.32325291633605957, "beta_dpo/gap_mean": 44.20635223388672, "beta_dpo/gap_std": 63.54792022705078, "beta_dpo/loss_margin_mean": 37.464942932128906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9040060468631897, "grad_norm": 3.6207661628723145, "learning_rate": 1.4185553036259095e-08, "logits/chosen": -2.6546905040740967, "logits/rejected": -2.6918768882751465, "loss": 1.3501, "step": 598 }, { "beta_dpo/beta": 0.11703047156333923, "beta_dpo/beta_margin_grad_mean": -0.3772951662540436, "beta_dpo/beta_margin_grad_std": 0.2943384647369385, "beta_dpo/beta_margin_mean": 4.737819671630859, "beta_dpo/beta_margin_std": 12.399438858032227, "beta_dpo/beta_used": 0.11703047156333923, "beta_dpo/beta_used_raw": -0.01774664968252182, "beta_dpo/gap_mean": 42.406776428222656, "beta_dpo/gap_std": 65.09292602539062, "beta_dpo/loss_margin_mean": 35.07368087768555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9055177626606198, "grad_norm": 222.8874969482422, "learning_rate": 1.3749795321332885e-08, "logits/chosen": -2.6827566623687744, "logits/rejected": -2.701910972595215, "loss": 1.0998, "step": 599 }, { "beta_dpo/beta": 0.31094202399253845, "beta_dpo/beta_margin_grad_mean": -0.33834904432296753, "beta_dpo/beta_margin_grad_std": 0.28769829869270325, "beta_dpo/beta_margin_mean": 19.670934677124023, "beta_dpo/beta_margin_std": 35.01140594482422, "beta_dpo/beta_used": 0.31094202399253845, "beta_dpo/beta_used_raw": 0.25774458050727844, "beta_dpo/gap_mean": 42.59223175048828, "beta_dpo/gap_std": 65.00656127929688, "beta_dpo/loss_margin_mean": 48.77783966064453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9070294784580499, "grad_norm": 1108.4788818359375, "learning_rate": 1.3320646032487393e-08, "logits/chosen": -2.6578688621520996, "logits/rejected": -2.695185422897339, "loss": 2.3016, "step": 600 }, { "epoch": 0.9070294784580499, "eval_beta_dpo/beta": 0.16006897389888763, "eval_beta_dpo/beta_margin_grad_mean": -0.382272869348526, "eval_beta_dpo/beta_margin_grad_std": 0.16653159260749817, "eval_beta_dpo/beta_margin_mean": 8.13154411315918, "eval_beta_dpo/beta_margin_std": 10.172322273254395, "eval_beta_dpo/beta_used": 0.16006897389888763, "eval_beta_dpo/beta_used_raw": -0.1685781031847, "eval_beta_dpo/gap_mean": 43.68259048461914, "eval_beta_dpo/gap_std": 65.11566162109375, "eval_beta_dpo/loss_margin_mean": 39.20629119873047, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -2.739290475845337, "eval_logits/rejected": -2.7541418075561523, "eval_loss": 1.292609453201294, "eval_runtime": 36.3199, "eval_samples_per_second": 63.409, "eval_steps_per_second": 1.982, "step": 600 }, { "beta_dpo/beta": 0.16864097118377686, "beta_dpo/beta_margin_grad_mean": -0.257927268743515, "beta_dpo/beta_margin_grad_std": 0.3531711995601654, "beta_dpo/beta_margin_mean": 8.603113174438477, "beta_dpo/beta_margin_std": 12.788561820983887, "beta_dpo/beta_used": 0.16864097118377686, "beta_dpo/beta_used_raw": 0.16864097118377686, "beta_dpo/gap_mean": 44.62376403808594, "beta_dpo/gap_std": 65.33360290527344, "beta_dpo/loss_margin_mean": 47.54133224487305, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.90854119425548, "grad_norm": 535.887451171875, "learning_rate": 1.2898117173950868e-08, "logits/chosen": -2.6898365020751953, "logits/rejected": -2.736027240753174, "loss": 1.576, "step": 601 }, { "beta_dpo/beta": 0.07933580130338669, "beta_dpo/beta_margin_grad_mean": -0.32244589924812317, "beta_dpo/beta_margin_grad_std": 0.2566680610179901, "beta_dpo/beta_margin_mean": 4.081573486328125, "beta_dpo/beta_margin_std": 7.243903636932373, "beta_dpo/beta_used": 0.07933580130338669, "beta_dpo/beta_used_raw": 0.05116073787212372, "beta_dpo/gap_mean": 44.761383056640625, "beta_dpo/gap_std": 64.28461456298828, "beta_dpo/loss_margin_mean": 48.29131317138672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.91005291005291, "grad_norm": 439.05218505859375, "learning_rate": 1.2482220564763667e-08, "logits/chosen": -2.6923227310180664, "logits/rejected": -2.697751045227051, "loss": 1.1586, "step": 602 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4896172881126404, "beta_dpo/beta_margin_grad_std": 0.016847671940922737, "beta_dpo/beta_margin_mean": 0.04159487411379814, "beta_dpo/beta_margin_std": 0.06750661134719849, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.42653343081474304, "beta_dpo/gap_mean": 44.85320281982422, "beta_dpo/gap_std": 64.16732788085938, "beta_dpo/loss_margin_mean": 41.594871520996094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9115646258503401, "grad_norm": 4.901760578155518, "learning_rate": 1.2072967838448051e-08, "logits/chosen": -2.668689250946045, "logits/rejected": -2.7003610134124756, "loss": 1.3513, "step": 603 }, { "beta_dpo/beta": 0.3185281753540039, "beta_dpo/beta_margin_grad_mean": -0.34246817231178284, "beta_dpo/beta_margin_grad_std": 0.29718223214149475, "beta_dpo/beta_margin_mean": 14.821910858154297, "beta_dpo/beta_margin_std": 35.994895935058594, "beta_dpo/beta_used": 0.3185281753540039, "beta_dpo/beta_used_raw": -0.35138705372810364, "beta_dpo/gap_mean": 43.115814208984375, "beta_dpo/gap_std": 65.3883056640625, "beta_dpo/loss_margin_mean": 38.19163131713867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9130763416477702, "grad_norm": 1009.60888671875, "learning_rate": 1.1670370442682459e-08, "logits/chosen": -2.695526123046875, "logits/rejected": -2.6826400756835938, "loss": 2.4601, "step": 604 }, { "beta_dpo/beta": 0.19613006711006165, "beta_dpo/beta_margin_grad_mean": -0.33283427357673645, "beta_dpo/beta_margin_grad_std": 0.2794617712497711, "beta_dpo/beta_margin_mean": 8.730825424194336, "beta_dpo/beta_margin_std": 19.20995330810547, "beta_dpo/beta_used": 0.19613006711006165, "beta_dpo/beta_used_raw": -0.11835774779319763, "beta_dpo/gap_mean": 43.39642333984375, "beta_dpo/gap_std": 65.54288482666016, "beta_dpo/loss_margin_mean": 44.109275817871094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9145880574452003, "grad_norm": 454.4317932128906, "learning_rate": 1.1274439638981532e-08, "logits/chosen": -2.6736767292022705, "logits/rejected": -2.698230028152466, "loss": 1.1924, "step": 605 }, { "beta_dpo/beta": 0.15788358449935913, "beta_dpo/beta_margin_grad_mean": -0.33934134244918823, "beta_dpo/beta_margin_grad_std": 0.29730185866355896, "beta_dpo/beta_margin_mean": 7.944419860839844, "beta_dpo/beta_margin_std": 16.363065719604492, "beta_dpo/beta_used": 0.15788358449935913, "beta_dpo/beta_used_raw": -0.1490476429462433, "beta_dpo/gap_mean": 43.95667266845703, "beta_dpo/gap_std": 65.00943756103516, "beta_dpo/loss_margin_mean": 47.72287368774414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9160997732426304, "grad_norm": 456.1502380371094, "learning_rate": 1.0885186502381016e-08, "logits/chosen": -2.68801212310791, "logits/rejected": -2.725857734680176, "loss": 1.1833, "step": 606 }, { "beta_dpo/beta": 0.058823633939027786, "beta_dpo/beta_margin_grad_mean": -0.34928181767463684, "beta_dpo/beta_margin_grad_std": 0.2856190800666809, "beta_dpo/beta_margin_mean": 3.743802547454834, "beta_dpo/beta_margin_std": 6.441350936889648, "beta_dpo/beta_used": 0.058823633939027786, "beta_dpo/beta_used_raw": -0.2072100192308426, "beta_dpo/gap_mean": 46.15118408203125, "beta_dpo/gap_std": 64.77732849121094, "beta_dpo/loss_margin_mean": 54.09783935546875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9176114890400605, "grad_norm": 391.70599365234375, "learning_rate": 1.0502621921127774e-08, "logits/chosen": -2.6742727756500244, "logits/rejected": -2.6755614280700684, "loss": 1.544, "step": 607 }, { "beta_dpo/beta": 0.09076043963432312, "beta_dpo/beta_margin_grad_mean": -0.37318694591522217, "beta_dpo/beta_margin_grad_std": 0.29319775104522705, "beta_dpo/beta_margin_mean": 3.9786016941070557, "beta_dpo/beta_margin_std": 9.074618339538574, "beta_dpo/beta_used": 0.09076043963432312, "beta_dpo/beta_used_raw": -0.24247410893440247, "beta_dpo/gap_mean": 43.72791290283203, "beta_dpo/gap_std": 63.787208557128906, "beta_dpo/loss_margin_mean": 33.294742584228516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9191232048374905, "grad_norm": 533.9596557617188, "learning_rate": 1.0126756596375685e-08, "logits/chosen": -2.6911814212799072, "logits/rejected": -2.73805570602417, "loss": 2.0222, "step": 608 }, { "beta_dpo/beta": 0.04691994562745094, "beta_dpo/beta_margin_grad_mean": -0.3009467124938965, "beta_dpo/beta_margin_grad_std": 0.21611282229423523, "beta_dpo/beta_margin_mean": 2.724210739135742, "beta_dpo/beta_margin_std": 4.51793909072876, "beta_dpo/beta_used": 0.04691994562745094, "beta_dpo/beta_used_raw": 0.025028718635439873, "beta_dpo/gap_mean": 45.130619049072266, "beta_dpo/gap_std": 62.89480209350586, "beta_dpo/loss_margin_mean": 51.13569641113281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9206349206349206, "grad_norm": 53.00703811645508, "learning_rate": 9.757601041885694e-09, "logits/chosen": -2.6229584217071533, "logits/rejected": -2.637420177459717, "loss": 0.8375, "step": 609 }, { "beta_dpo/beta": 0.09450700134038925, "beta_dpo/beta_margin_grad_mean": -0.3247065842151642, "beta_dpo/beta_margin_grad_std": 0.2635004222393036, "beta_dpo/beta_margin_mean": 5.4679741859436035, "beta_dpo/beta_margin_std": 10.656018257141113, "beta_dpo/beta_used": 0.09450700134038925, "beta_dpo/beta_used_raw": -0.16219983994960785, "beta_dpo/gap_mean": 45.62958526611328, "beta_dpo/gap_std": 63.55073547363281, "beta_dpo/loss_margin_mean": 51.04203414916992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9221466364323507, "grad_norm": 231.1525421142578, "learning_rate": 9.395165583732379e-09, "logits/chosen": -2.659266710281372, "logits/rejected": -2.67087459564209, "loss": 1.1302, "step": 610 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4915010929107666, "beta_dpo/beta_margin_grad_std": 0.012929055839776993, "beta_dpo/beta_margin_mean": 0.03402576968073845, "beta_dpo/beta_margin_std": 0.0517716147005558, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.49436575174331665, "beta_dpo/gap_mean": 44.93272399902344, "beta_dpo/gap_std": 62.146820068359375, "beta_dpo/loss_margin_mean": 34.0257682800293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9236583522297808, "grad_norm": 3.9169530868530273, "learning_rate": 9.03946036001449e-09, "logits/chosen": -2.686603307723999, "logits/rejected": -2.7164177894592285, "loss": 1.3521, "step": 611 }, { "beta_dpo/beta": 0.5432992577552795, "beta_dpo/beta_margin_grad_mean": -0.15547636151313782, "beta_dpo/beta_margin_grad_std": 0.30775806307792664, "beta_dpo/beta_margin_mean": 31.741825103759766, "beta_dpo/beta_margin_std": 52.140472412109375, "beta_dpo/beta_used": 0.5432992577552795, "beta_dpo/beta_used_raw": 0.5432992577552795, "beta_dpo/gap_mean": 44.699771881103516, "beta_dpo/gap_std": 63.031394958496094, "beta_dpo/loss_margin_mean": 52.14598846435547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9251700680272109, "grad_norm": 903.3458251953125, "learning_rate": 8.690495320571839e-09, "logits/chosen": -2.7321419715881348, "logits/rejected": -2.7761948108673096, "loss": 1.0432, "step": 612 }, { "beta_dpo/beta": 0.6847690939903259, "beta_dpo/beta_margin_grad_mean": -0.15985107421875, "beta_dpo/beta_margin_grad_std": 0.342172235250473, "beta_dpo/beta_margin_mean": 40.27997970581055, "beta_dpo/beta_margin_std": 41.0804557800293, "beta_dpo/beta_used": 0.6847690939903259, "beta_dpo/beta_used_raw": 0.6847690939903259, "beta_dpo/gap_mean": 47.31453323364258, "beta_dpo/gap_std": 62.68354797363281, "beta_dpo/loss_margin_mean": 58.733970642089844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.926681783824641, "grad_norm": 962.0908203125, "learning_rate": 8.348280226706722e-09, "logits/chosen": -2.6662611961364746, "logits/rejected": -2.666443347930908, "loss": 1.5508, "step": 613 }, { "beta_dpo/beta": 0.45406830310821533, "beta_dpo/beta_margin_grad_mean": -0.14712685346603394, "beta_dpo/beta_margin_grad_std": 0.3178809881210327, "beta_dpo/beta_margin_mean": 26.492536544799805, "beta_dpo/beta_margin_std": 27.315082550048828, "beta_dpo/beta_used": 0.45406830310821533, "beta_dpo/beta_used_raw": 0.45406830310821533, "beta_dpo/gap_mean": 49.33307647705078, "beta_dpo/gap_std": 62.564170837402344, "beta_dpo/loss_margin_mean": 58.573570251464844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9281934996220711, "grad_norm": 1251.2257080078125, "learning_rate": 8.012824650910937e-09, "logits/chosen": -2.6480016708374023, "logits/rejected": -2.650477409362793, "loss": 3.7381, "step": 614 }, { "beta_dpo/beta": 0.2266254723072052, "beta_dpo/beta_margin_grad_mean": -0.33106034994125366, "beta_dpo/beta_margin_grad_std": 0.29621782898902893, "beta_dpo/beta_margin_mean": 16.26293182373047, "beta_dpo/beta_margin_std": 29.231475830078125, "beta_dpo/beta_used": 0.2266254723072052, "beta_dpo/beta_used_raw": -0.08037641644477844, "beta_dpo/gap_mean": 51.66609191894531, "beta_dpo/gap_std": 63.533966064453125, "beta_dpo/loss_margin_mean": 55.990055084228516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9297052154195011, "grad_norm": 1692.2303466796875, "learning_rate": 7.684137976598088e-09, "logits/chosen": -2.7013931274414062, "logits/rejected": -2.7283122539520264, "loss": 4.8218, "step": 615 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48794546723365784, "beta_dpo/beta_margin_grad_std": 0.01570320688188076, "beta_dpo/beta_margin_mean": 0.04827674850821495, "beta_dpo/beta_margin_std": 0.06289937347173691, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3228529393672943, "beta_dpo/gap_mean": 50.67670440673828, "beta_dpo/gap_std": 63.63441848754883, "beta_dpo/loss_margin_mean": 48.2767448425293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9312169312169312, "grad_norm": 4.385542869567871, "learning_rate": 7.36222939784098e-09, "logits/chosen": -2.641757011413574, "logits/rejected": -2.667628288269043, "loss": 1.3437, "step": 616 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48837581276893616, "beta_dpo/beta_margin_grad_std": 0.01476855855435133, "beta_dpo/beta_margin_mean": 0.04654671624302864, "beta_dpo/beta_margin_std": 0.05915853753685951, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8279430866241455, "beta_dpo/gap_mean": 50.42936706542969, "beta_dpo/gap_std": 62.97998046875, "beta_dpo/loss_margin_mean": 46.546714782714844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9327286470143613, "grad_norm": 4.693226337432861, "learning_rate": 7.047107919114586e-09, "logits/chosen": -2.6448731422424316, "logits/rejected": -2.678791046142578, "loss": 1.3522, "step": 617 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48894065618515015, "beta_dpo/beta_margin_grad_std": 0.014615356922149658, "beta_dpo/beta_margin_mean": 0.044289905577898026, "beta_dpo/beta_margin_std": 0.0585482232272625, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20385049283504486, "beta_dpo/gap_mean": 49.48298645019531, "beta_dpo/gap_std": 61.756935119628906, "beta_dpo/loss_margin_mean": 44.2899055480957, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9342403628117913, "grad_norm": 4.098353862762451, "learning_rate": 6.738782355044048e-09, "logits/chosen": -2.6989409923553467, "logits/rejected": -2.742736339569092, "loss": 1.343, "step": 618 }, { "beta_dpo/beta": 0.05981595069169998, "beta_dpo/beta_margin_grad_mean": -0.3577544689178467, "beta_dpo/beta_margin_grad_std": 0.2613312900066376, "beta_dpo/beta_margin_mean": 3.053403854370117, "beta_dpo/beta_margin_std": 5.86320161819458, "beta_dpo/beta_used": 0.05981595069169998, "beta_dpo/beta_used_raw": -0.0736929327249527, "beta_dpo/gap_mean": 48.345306396484375, "beta_dpo/gap_std": 61.484928131103516, "beta_dpo/loss_margin_mean": 45.6826286315918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9357520786092215, "grad_norm": 231.64077758789062, "learning_rate": 6.437261330158206e-09, "logits/chosen": -2.621891498565674, "logits/rejected": -2.662619113922119, "loss": 1.0265, "step": 619 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4897708594799042, "beta_dpo/beta_margin_grad_std": 0.016919763758778572, "beta_dpo/beta_margin_mean": 0.04097270593047142, "beta_dpo/beta_margin_std": 0.06777238100767136, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.27026185393333435, "beta_dpo/gap_mean": 46.54864501953125, "beta_dpo/gap_std": 62.34131622314453, "beta_dpo/loss_margin_mean": 40.97270584106445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9372637944066515, "grad_norm": 4.460054397583008, "learning_rate": 6.142553278648238e-09, "logits/chosen": -2.6672420501708984, "logits/rejected": -2.6600871086120605, "loss": 1.3471, "step": 620 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49017781019210815, "beta_dpo/beta_margin_grad_std": 0.016016369685530663, "beta_dpo/beta_margin_mean": 0.03934101015329361, "beta_dpo/beta_margin_std": 0.06416355073451996, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2695315480232239, "beta_dpo/gap_mean": 44.97822570800781, "beta_dpo/gap_std": 62.6904296875, "beta_dpo/loss_margin_mean": 39.341007232666016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9387755102040817, "grad_norm": 5.045653343200684, "learning_rate": 5.854666444131934e-09, "logits/chosen": -2.657578468322754, "logits/rejected": -2.72951078414917, "loss": 1.3485, "step": 621 }, { "beta_dpo/beta": 0.04698815196752548, "beta_dpo/beta_margin_grad_mean": -0.36041608452796936, "beta_dpo/beta_margin_grad_std": 0.2401597797870636, "beta_dpo/beta_margin_mean": 2.0823357105255127, "beta_dpo/beta_margin_std": 4.529184341430664, "beta_dpo/beta_used": 0.04698815196752548, "beta_dpo/beta_used_raw": 0.003569558262825012, "beta_dpo/gap_mean": 44.71732711791992, "beta_dpo/gap_std": 62.19575881958008, "beta_dpo/loss_margin_mean": 41.447021484375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9402872260015117, "grad_norm": 154.41818237304688, "learning_rate": 5.573608879422875e-09, "logits/chosen": -2.7066431045532227, "logits/rejected": -2.7310023307800293, "loss": 1.0009, "step": 622 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4911818206310272, "beta_dpo/beta_margin_grad_std": 0.014753853902220726, "beta_dpo/beta_margin_mean": 0.0353122353553772, "beta_dpo/beta_margin_std": 0.059086430817842484, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.30502232909202576, "beta_dpo/gap_mean": 43.35747528076172, "beta_dpo/gap_std": 61.82522964477539, "beta_dpo/loss_margin_mean": 35.312232971191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9417989417989417, "grad_norm": 4.888934135437012, "learning_rate": 5.299388446305342e-09, "logits/chosen": -2.701735496520996, "logits/rejected": -2.7184062004089355, "loss": 1.3507, "step": 623 }, { "beta_dpo/beta": 0.24348057806491852, "beta_dpo/beta_margin_grad_mean": -0.3350781798362732, "beta_dpo/beta_margin_grad_std": 0.29120299220085144, "beta_dpo/beta_margin_mean": 14.595932960510254, "beta_dpo/beta_margin_std": 29.82645034790039, "beta_dpo/beta_used": 0.24348057806491852, "beta_dpo/beta_used_raw": 0.03254944086074829, "beta_dpo/gap_mean": 45.11775207519531, "beta_dpo/gap_std": 63.17668914794922, "beta_dpo/loss_margin_mean": 57.301719665527344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9433106575963719, "grad_norm": 992.426025390625, "learning_rate": 5.03201281531429e-09, "logits/chosen": -2.650886297225952, "logits/rejected": -2.7026054859161377, "loss": 3.3259, "step": 624 }, { "beta_dpo/beta": 0.17052163183689117, "beta_dpo/beta_margin_grad_mean": -0.3552238941192627, "beta_dpo/beta_margin_grad_std": 0.3150428533554077, "beta_dpo/beta_margin_mean": 8.42929458618164, "beta_dpo/beta_margin_std": 17.509658813476562, "beta_dpo/beta_used": 0.17052163183689117, "beta_dpo/beta_used_raw": 0.011962205171585083, "beta_dpo/gap_mean": 44.10435485839844, "beta_dpo/gap_std": 63.476158142089844, "beta_dpo/loss_margin_mean": 38.564598083496094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9448223733938019, "grad_norm": 605.406982421875, "learning_rate": 4.7714894655209174e-09, "logits/chosen": -2.6164121627807617, "logits/rejected": -2.6647305488586426, "loss": 2.2167, "step": 625 }, { "beta_dpo/beta": 0.5995941162109375, "beta_dpo/beta_margin_grad_mean": -0.37211719155311584, "beta_dpo/beta_margin_grad_std": 0.32004252076148987, "beta_dpo/beta_margin_mean": 34.19970703125, "beta_dpo/beta_margin_std": 66.08871459960938, "beta_dpo/beta_used": 0.5995941162109375, "beta_dpo/beta_used_raw": 0.41719281673431396, "beta_dpo/gap_mean": 45.84862518310547, "beta_dpo/gap_std": 64.30546569824219, "beta_dpo/loss_margin_mean": 53.29762649536133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9463340891912321, "grad_norm": 1210.412109375, "learning_rate": 4.517825684323323e-09, "logits/chosen": -2.609795093536377, "logits/rejected": -2.6795785427093506, "loss": 1.7134, "step": 626 }, { "beta_dpo/beta": 0.5332940816879272, "beta_dpo/beta_margin_grad_mean": -0.17192615568637848, "beta_dpo/beta_margin_grad_std": 0.35103702545166016, "beta_dpo/beta_margin_mean": 29.522315979003906, "beta_dpo/beta_margin_std": 42.06474304199219, "beta_dpo/beta_used": 0.5332940816879272, "beta_dpo/beta_used_raw": 0.5332940816879272, "beta_dpo/gap_mean": 47.599143981933594, "beta_dpo/gap_std": 65.24098205566406, "beta_dpo/loss_margin_mean": 59.54181671142578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9478458049886621, "grad_norm": 1807.4373779296875, "learning_rate": 4.271028567242818e-09, "logits/chosen": -2.6838269233703613, "logits/rejected": -2.7577645778656006, "loss": 2.0301, "step": 627 }, { "beta_dpo/beta": 0.9057918190956116, "beta_dpo/beta_margin_grad_mean": -0.1649731993675232, "beta_dpo/beta_margin_grad_std": 0.3386947214603424, "beta_dpo/beta_margin_mean": 59.12750244140625, "beta_dpo/beta_margin_std": 72.5323486328125, "beta_dpo/beta_used": 0.9057918190956116, "beta_dpo/beta_used_raw": 0.9057918190956116, "beta_dpo/gap_mean": 48.67937088012695, "beta_dpo/gap_std": 66.37429809570312, "beta_dpo/loss_margin_mean": 54.8211669921875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9493575207860923, "grad_norm": 1693.4759521484375, "learning_rate": 4.0311050177251895e-09, "logits/chosen": -2.6874046325683594, "logits/rejected": -2.7123756408691406, "loss": 3.691, "step": 628 }, { "beta_dpo/beta": 0.23058763146400452, "beta_dpo/beta_margin_grad_mean": -0.22378967702388763, "beta_dpo/beta_margin_grad_std": 0.3259342908859253, "beta_dpo/beta_margin_mean": 10.928329467773438, "beta_dpo/beta_margin_std": 18.11894989013672, "beta_dpo/beta_used": 0.23058763146400452, "beta_dpo/beta_used_raw": 0.23058763146400452, "beta_dpo/gap_mean": 48.796653747558594, "beta_dpo/gap_std": 63.12718200683594, "beta_dpo/loss_margin_mean": 43.14870834350586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9508692365835223, "grad_norm": 795.4987182617188, "learning_rate": 3.798061746947995e-09, "logits/chosen": -2.6901187896728516, "logits/rejected": -2.703768014907837, "loss": 1.3562, "step": 629 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4876892566680908, "beta_dpo/beta_margin_grad_std": 0.015770576894283295, "beta_dpo/beta_margin_mean": 0.04931268468499184, "beta_dpo/beta_margin_std": 0.06319800019264221, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.21424424648284912, "beta_dpo/gap_mean": 48.620269775390625, "beta_dpo/gap_std": 62.675270080566406, "beta_dpo/loss_margin_mean": 49.31268310546875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9523809523809523, "grad_norm": 3.732545852661133, "learning_rate": 3.5719052736323806e-09, "logits/chosen": -2.7009708881378174, "logits/rejected": -2.7310919761657715, "loss": 1.3441, "step": 630 }, { "beta_dpo/beta": 0.13656026124954224, "beta_dpo/beta_margin_grad_mean": -0.3304164409637451, "beta_dpo/beta_margin_grad_std": 0.28156745433807373, "beta_dpo/beta_margin_mean": 9.255078315734863, "beta_dpo/beta_margin_std": 15.206001281738281, "beta_dpo/beta_used": 0.13656026124954224, "beta_dpo/beta_used_raw": -0.24415385723114014, "beta_dpo/gap_mean": 50.703880310058594, "beta_dpo/gap_std": 63.21595764160156, "beta_dpo/loss_margin_mean": 57.89152908325195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9538926681783825, "grad_norm": 520.2734985351562, "learning_rate": 3.352641923861144e-09, "logits/chosen": -2.6408612728118896, "logits/rejected": -2.7099227905273438, "loss": 1.6559, "step": 631 }, { "beta_dpo/beta": 0.35730719566345215, "beta_dpo/beta_margin_grad_mean": -0.33872702717781067, "beta_dpo/beta_margin_grad_std": 0.3022365868091583, "beta_dpo/beta_margin_mean": 20.990575790405273, "beta_dpo/beta_margin_std": 34.35315704345703, "beta_dpo/beta_used": 0.35730719566345215, "beta_dpo/beta_used_raw": 0.27769792079925537, "beta_dpo/gap_mean": 51.40171813964844, "beta_dpo/gap_std": 62.56011962890625, "beta_dpo/loss_margin_mean": 55.30367660522461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9554043839758125, "grad_norm": 1589.6190185546875, "learning_rate": 3.140277830901428e-09, "logits/chosen": -2.6545939445495605, "logits/rejected": -2.657032012939453, "loss": 2.6254, "step": 632 }, { "beta_dpo/beta": 0.004144558683037758, "beta_dpo/beta_margin_grad_mean": -0.45725518465042114, "beta_dpo/beta_margin_grad_std": 0.08634334057569504, "beta_dpo/beta_margin_mean": 0.1822691112756729, "beta_dpo/beta_margin_std": 0.3737095594406128, "beta_dpo/beta_used": 0.004144558683037758, "beta_dpo/beta_used_raw": -0.6120012402534485, "beta_dpo/gap_mean": 48.40888214111328, "beta_dpo/gap_std": 62.7435302734375, "beta_dpo/loss_margin_mean": 34.49918746948242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9569160997732427, "grad_norm": 14.633933067321777, "learning_rate": 2.9348189350335007e-09, "logits/chosen": -2.6546261310577393, "logits/rejected": -2.6814990043640137, "loss": 1.2401, "step": 633 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4926002025604248, "beta_dpo/beta_margin_grad_std": 0.014817976392805576, "beta_dpo/beta_margin_mean": 0.029641717672348022, "beta_dpo/beta_margin_std": 0.059361252933740616, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0031733512878418, "beta_dpo/gap_mean": 45.527591705322266, "beta_dpo/gap_std": 62.52671432495117, "beta_dpo/loss_margin_mean": 29.64171600341797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9584278155706727, "grad_norm": 3.942791700363159, "learning_rate": 2.736270983384276e-09, "logits/chosen": -2.637674331665039, "logits/rejected": -2.6252646446228027, "loss": 1.3601, "step": 634 }, { "beta_dpo/beta": 0.02143486775457859, "beta_dpo/beta_margin_grad_mean": -0.4092518091201782, "beta_dpo/beta_margin_grad_std": 0.24451853334903717, "beta_dpo/beta_margin_mean": 0.7495732307434082, "beta_dpo/beta_margin_std": 2.171191692352295, "beta_dpo/beta_used": 0.02143486775457859, "beta_dpo/beta_used_raw": -0.07430359721183777, "beta_dpo/gap_mean": 43.78990173339844, "beta_dpo/gap_std": 63.080482482910156, "beta_dpo/loss_margin_mean": 39.18009567260742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9599395313681028, "grad_norm": 128.81625366210938, "learning_rate": 2.5446395297668287e-09, "logits/chosen": -2.723635673522949, "logits/rejected": -2.7572028636932373, "loss": 1.1247, "step": 635 }, { "beta_dpo/beta": 0.10870007425546646, "beta_dpo/beta_margin_grad_mean": -0.3280467092990875, "beta_dpo/beta_margin_grad_std": 0.26943886280059814, "beta_dpo/beta_margin_mean": 5.701779842376709, "beta_dpo/beta_margin_std": 10.197867393493652, "beta_dpo/beta_used": 0.10870007425546646, "beta_dpo/beta_used_raw": 0.04050559550523758, "beta_dpo/gap_mean": 44.87833786010742, "beta_dpo/gap_std": 62.010650634765625, "beta_dpo/loss_margin_mean": 50.85565948486328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9614512471655329, "grad_norm": 454.9655456542969, "learning_rate": 2.359929934524829e-09, "logits/chosen": -2.6658878326416016, "logits/rejected": -2.726693630218506, "loss": 1.3966, "step": 636 }, { "beta_dpo/beta": 0.02533043548464775, "beta_dpo/beta_margin_grad_mean": -0.3730124533176422, "beta_dpo/beta_margin_grad_std": 0.24714794754981995, "beta_dpo/beta_margin_mean": 1.1759974956512451, "beta_dpo/beta_margin_std": 2.531526565551758, "beta_dpo/beta_used": 0.02533043548464775, "beta_dpo/beta_used_raw": -0.0743693932890892, "beta_dpo/gap_mean": 45.429656982421875, "beta_dpo/gap_std": 61.94932556152344, "beta_dpo/loss_margin_mean": 47.75582504272461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9629629629629629, "grad_norm": 112.1050033569336, "learning_rate": 2.1821473643827137e-09, "logits/chosen": -2.692873001098633, "logits/rejected": -2.7226829528808594, "loss": 1.0988, "step": 637 }, { "beta_dpo/beta": 0.392764687538147, "beta_dpo/beta_margin_grad_mean": -0.19437937438488007, "beta_dpo/beta_margin_grad_std": 0.3297037184238434, "beta_dpo/beta_margin_mean": 20.477251052856445, "beta_dpo/beta_margin_std": 27.82265281677246, "beta_dpo/beta_used": 0.392764687538147, "beta_dpo/beta_used_raw": 0.392764687538147, "beta_dpo/gap_mean": 46.582801818847656, "beta_dpo/gap_std": 62.45842361450195, "beta_dpo/loss_margin_mean": 51.22517013549805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9644746787603931, "grad_norm": 1120.524658203125, "learning_rate": 2.0112967923011646e-09, "logits/chosen": -2.6447203159332275, "logits/rejected": -2.671504020690918, "loss": 1.4728, "step": 638 }, { "beta_dpo/beta": 0.34388256072998047, "beta_dpo/beta_margin_grad_mean": -0.3614674210548401, "beta_dpo/beta_margin_grad_std": 0.3070048689842224, "beta_dpo/beta_margin_mean": 17.764925003051758, "beta_dpo/beta_margin_std": 32.02020263671875, "beta_dpo/beta_used": 0.34388256072998047, "beta_dpo/beta_used_raw": -0.06688737869262695, "beta_dpo/gap_mean": 46.94060516357422, "beta_dpo/gap_std": 61.81489562988281, "beta_dpo/loss_margin_mean": 47.23959732055664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9659863945578231, "grad_norm": 1660.4727783203125, "learning_rate": 1.847382997337943e-09, "logits/chosen": -2.6560726165771484, "logits/rejected": -2.696206569671631, "loss": 1.579, "step": 639 }, { "beta_dpo/beta": 0.7060970067977905, "beta_dpo/beta_margin_grad_mean": -0.1816413551568985, "beta_dpo/beta_margin_grad_std": 0.37319278717041016, "beta_dpo/beta_margin_mean": 36.43185043334961, "beta_dpo/beta_margin_std": 47.868900299072266, "beta_dpo/beta_used": 0.7060970067977905, "beta_dpo/beta_used_raw": 0.7060970067977905, "beta_dpo/gap_mean": 47.429561614990234, "beta_dpo/gap_std": 62.916141510009766, "beta_dpo/loss_margin_mean": 51.59613037109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9674981103552532, "grad_norm": 2258.9169921875, "learning_rate": 1.690410564514244e-09, "logits/chosen": -2.6873271465301514, "logits/rejected": -2.727292537689209, "loss": 5.4355, "step": 640 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49105432629585266, "beta_dpo/beta_margin_grad_std": 0.0135034816339612, "beta_dpo/beta_margin_mean": 0.03582516312599182, "beta_dpo/beta_margin_std": 0.05409466102719307, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8339321613311768, "beta_dpo/gap_mean": 45.91423416137695, "beta_dpo/gap_std": 62.17848205566406, "beta_dpo/loss_margin_mean": 35.82516098022461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9690098261526833, "grad_norm": 4.1548380851745605, "learning_rate": 1.5403838846864692e-09, "logits/chosen": -2.6482882499694824, "logits/rejected": -2.662318468093872, "loss": 1.3567, "step": 641 }, { "beta_dpo/beta": 0.23349756002426147, "beta_dpo/beta_margin_grad_mean": -0.3614916503429413, "beta_dpo/beta_margin_grad_std": 0.29659217596054077, "beta_dpo/beta_margin_mean": 11.944047927856445, "beta_dpo/beta_margin_std": 25.50196075439453, "beta_dpo/beta_used": 0.23349756002426147, "beta_dpo/beta_used_raw": -0.3991941511631012, "beta_dpo/gap_mean": 45.326568603515625, "beta_dpo/gap_std": 62.26630401611328, "beta_dpo/loss_margin_mean": 42.00018310546875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9705215419501134, "grad_norm": 867.032958984375, "learning_rate": 1.3973071544233218e-09, "logits/chosen": -2.685100793838501, "logits/rejected": -2.67539381980896, "loss": 2.268, "step": 642 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49193012714385986, "beta_dpo/beta_margin_grad_std": 0.014358686283230782, "beta_dpo/beta_margin_mean": 0.03231479600071907, "beta_dpo/beta_margin_std": 0.05750858411192894, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6026580333709717, "beta_dpo/gap_mean": 43.12543869018555, "beta_dpo/gap_std": 61.69993591308594, "beta_dpo/loss_margin_mean": 32.31479263305664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9720332577475435, "grad_norm": 4.907909870147705, "learning_rate": 1.261184375888541e-09, "logits/chosen": -2.6672868728637695, "logits/rejected": -2.7138211727142334, "loss": 1.3558, "step": 643 }, { "beta_dpo/beta": 0.6081154942512512, "beta_dpo/beta_margin_grad_mean": -0.34746262431144714, "beta_dpo/beta_margin_grad_std": 0.31283101439476013, "beta_dpo/beta_margin_mean": 30.244873046875, "beta_dpo/beta_margin_std": 66.24939727783203, "beta_dpo/beta_used": 0.6081154942512512, "beta_dpo/beta_used_raw": 0.6020084023475647, "beta_dpo/gap_mean": 42.613712310791016, "beta_dpo/gap_std": 62.06809616088867, "beta_dpo/loss_margin_mean": 45.869632720947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9735449735449735, "grad_norm": 1124.218017578125, "learning_rate": 1.1320193567288527e-09, "logits/chosen": -2.6450648307800293, "logits/rejected": -2.667994976043701, "loss": 2.2666, "step": 644 }, { "beta_dpo/beta": 0.4541359841823578, "beta_dpo/beta_margin_grad_mean": -0.27940618991851807, "beta_dpo/beta_margin_grad_std": 0.2722318768501282, "beta_dpo/beta_margin_mean": 26.9298152923584, "beta_dpo/beta_margin_std": 44.21894836425781, "beta_dpo/beta_used": 0.4541359841823578, "beta_dpo/beta_used_raw": 0.40730053186416626, "beta_dpo/gap_mean": 44.1930046081543, "beta_dpo/gap_std": 61.355472564697266, "beta_dpo/loss_margin_mean": 47.67912673950195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9750566893424036, "grad_norm": 470.2606201171875, "learning_rate": 1.0098157099674987e-09, "logits/chosen": -2.657165050506592, "logits/rejected": -2.6617069244384766, "loss": 1.38, "step": 645 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4881736934185028, "beta_dpo/beta_margin_grad_std": 0.01566481776535511, "beta_dpo/beta_margin_mean": 0.04736267775297165, "beta_dpo/beta_margin_std": 0.06274493038654327, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4280562400817871, "beta_dpo/gap_mean": 44.21896743774414, "beta_dpo/gap_std": 61.35779571533203, "beta_dpo/loss_margin_mean": 47.362674713134766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9765684051398337, "grad_norm": 4.742650032043457, "learning_rate": 8.945768539031783e-10, "logits/chosen": -2.672578811645508, "logits/rejected": -2.6946420669555664, "loss": 1.3519, "step": 646 }, { "beta_dpo/beta": 0.47655943036079407, "beta_dpo/beta_margin_grad_mean": -0.32146018743515015, "beta_dpo/beta_margin_grad_std": 0.2852969765663147, "beta_dpo/beta_margin_mean": 30.615407943725586, "beta_dpo/beta_margin_std": 52.7960319519043, "beta_dpo/beta_used": 0.47655943036079407, "beta_dpo/beta_used_raw": 0.3814311623573303, "beta_dpo/gap_mean": 46.42333984375, "beta_dpo/gap_std": 61.456687927246094, "beta_dpo/loss_margin_mean": 59.471378326416016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9780801209372638, "grad_norm": 1220.833984375, "learning_rate": 7.863060120144316e-10, "logits/chosen": -2.688619613647461, "logits/rejected": -2.7265939712524414, "loss": 1.394, "step": 647 }, { "beta_dpo/beta": 0.5712614059448242, "beta_dpo/beta_margin_grad_mean": -0.35060861706733704, "beta_dpo/beta_margin_grad_std": 0.3148672878742218, "beta_dpo/beta_margin_mean": 32.07784652709961, "beta_dpo/beta_margin_std": 68.7822036743164, "beta_dpo/beta_used": 0.5712614059448242, "beta_dpo/beta_used_raw": 0.37917831540107727, "beta_dpo/gap_mean": 47.899803161621094, "beta_dpo/gap_std": 62.45370864868164, "beta_dpo/loss_margin_mean": 48.78077697753906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9795918367346939, "grad_norm": 1129.4110107421875, "learning_rate": 6.850062128694045e-10, "logits/chosen": -2.653449535369873, "logits/rejected": -2.67905330657959, "loss": 1.9834, "step": 648 }, { "beta_dpo/beta": 0.4049026668071747, "beta_dpo/beta_margin_grad_mean": -0.20922957360744476, "beta_dpo/beta_margin_grad_std": 0.3691186010837555, "beta_dpo/beta_margin_mean": 20.532917022705078, "beta_dpo/beta_margin_std": 29.904996871948242, "beta_dpo/beta_used": 0.4049026668071747, "beta_dpo/beta_used_raw": 0.4049026668071747, "beta_dpo/gap_mean": 47.93426513671875, "beta_dpo/gap_std": 62.62583923339844, "beta_dpo/loss_margin_mean": 47.987239837646484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.981103552532124, "grad_norm": 1016.6148681640625, "learning_rate": 5.906802900412788e-10, "logits/chosen": -2.611091136932373, "logits/rejected": -2.6330745220184326, "loss": 2.0432, "step": 649 }, { "beta_dpo/beta": 0.1802101582288742, "beta_dpo/beta_margin_grad_mean": -0.21415099501609802, "beta_dpo/beta_margin_grad_std": 0.3358995318412781, "beta_dpo/beta_margin_mean": 9.91841983795166, "beta_dpo/beta_margin_std": 12.939188957214355, "beta_dpo/beta_used": 0.1802101582288742, "beta_dpo/beta_used_raw": 0.1802101582288742, "beta_dpo/gap_mean": 48.672733306884766, "beta_dpo/gap_std": 63.85776138305664, "beta_dpo/loss_margin_mean": 55.11127853393555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.982615268329554, "grad_norm": 547.158935546875, "learning_rate": 5.033308820289184e-10, "logits/chosen": -2.678342819213867, "logits/rejected": -2.7141497135162354, "loss": 1.4694, "step": 650 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.490220844745636, "beta_dpo/beta_margin_grad_std": 0.014912915416061878, "beta_dpo/beta_margin_mean": 0.039162568747997284, "beta_dpo/beta_margin_std": 0.05972345918416977, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6605195999145508, "beta_dpo/gap_mean": 47.09050750732422, "beta_dpo/gap_std": 63.09800720214844, "beta_dpo/loss_margin_mean": 39.162567138671875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9841269841269841, "grad_norm": 3.8208343982696533, "learning_rate": 4.2296043218295606e-10, "logits/chosen": -2.668421506881714, "logits/rejected": -2.7090401649475098, "loss": 1.3531, "step": 651 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4899626076221466, "beta_dpo/beta_margin_grad_std": 0.015524974092841148, "beta_dpo/beta_margin_mean": 0.040194738656282425, "beta_dpo/beta_margin_std": 0.062190212309360504, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5771675109863281, "beta_dpo/gap_mean": 46.139862060546875, "beta_dpo/gap_std": 62.530296325683594, "beta_dpo/loss_margin_mean": 40.19473648071289, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9856386999244142, "grad_norm": 4.492720603942871, "learning_rate": 3.4957118863768176e-10, "logits/chosen": -2.669163227081299, "logits/rejected": -2.6763458251953125, "loss": 1.3523, "step": 652 }, { "beta_dpo/beta": 0.08541844040155411, "beta_dpo/beta_margin_grad_mean": -0.36232131719589233, "beta_dpo/beta_margin_grad_std": 0.269138902425766, "beta_dpo/beta_margin_mean": 4.381135940551758, "beta_dpo/beta_margin_std": 8.300312995910645, "beta_dpo/beta_used": 0.08541844040155411, "beta_dpo/beta_used_raw": -0.15151172876358032, "beta_dpo/gap_mean": 46.68292999267578, "beta_dpo/gap_std": 63.072364807128906, "beta_dpo/loss_margin_mean": 51.156280517578125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9871504157218443, "grad_norm": 235.69155883789062, "learning_rate": 2.831652042480093e-10, "logits/chosen": -2.698141098022461, "logits/rejected": -2.714226722717285, "loss": 1.165, "step": 653 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4916025996208191, "beta_dpo/beta_margin_grad_std": 0.014544263482093811, "beta_dpo/beta_margin_mean": 0.033634938299655914, "beta_dpo/beta_margin_std": 0.05826953798532486, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8502531051635742, "beta_dpo/gap_mean": 45.716835021972656, "beta_dpo/gap_std": 62.543975830078125, "beta_dpo/loss_margin_mean": 33.63493728637695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9886621315192744, "grad_norm": 4.136869430541992, "learning_rate": 2.2374433653205016e-10, "logits/chosen": -2.6694984436035156, "logits/rejected": -2.724447727203369, "loss": 1.3572, "step": 654 }, { "beta_dpo/beta": 0.005714002996683121, "beta_dpo/beta_margin_grad_mean": -0.42852216958999634, "beta_dpo/beta_margin_grad_std": 0.10619087517261505, "beta_dpo/beta_margin_mean": 0.31733012199401855, "beta_dpo/beta_margin_std": 0.49290141463279724, "beta_dpo/beta_used": 0.005714002996683121, "beta_dpo/beta_used_raw": 0.0007717101834714413, "beta_dpo/gap_mean": 44.994659423828125, "beta_dpo/gap_std": 61.005882263183594, "beta_dpo/loss_margin_mean": 45.900482177734375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9901738473167044, "grad_norm": 22.22044563293457, "learning_rate": 1.7131024761923852e-10, "logits/chosen": -2.679999589920044, "logits/rejected": -2.732828140258789, "loss": 1.1879, "step": 655 }, { "beta_dpo/beta": 0.6009721159934998, "beta_dpo/beta_margin_grad_mean": -0.3375764787197113, "beta_dpo/beta_margin_grad_std": 0.31433162093162537, "beta_dpo/beta_margin_mean": 33.633056640625, "beta_dpo/beta_margin_std": 62.72954177856445, "beta_dpo/beta_used": 0.6009721159934998, "beta_dpo/beta_used_raw": 0.2453356385231018, "beta_dpo/gap_mean": 45.758460998535156, "beta_dpo/gap_std": 61.461029052734375, "beta_dpo/loss_margin_mean": 52.54164123535156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9916855631141346, "grad_norm": 1435.6373291015625, "learning_rate": 1.2586440420372934e-10, "logits/chosen": -2.749436378479004, "logits/rejected": -2.761258125305176, "loss": 2.4934, "step": 656 }, { "beta_dpo/beta": 0.5924053192138672, "beta_dpo/beta_margin_grad_mean": -0.10536504536867142, "beta_dpo/beta_margin_grad_std": 0.27900680899620056, "beta_dpo/beta_margin_mean": 37.769229888916016, "beta_dpo/beta_margin_std": 38.588802337646484, "beta_dpo/beta_used": 0.5924053192138672, "beta_dpo/beta_used_raw": 0.5924053192138672, "beta_dpo/gap_mean": 48.455291748046875, "beta_dpo/gap_std": 62.5181884765625, "beta_dpo/loss_margin_mean": 64.0851058959961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9931972789115646, "grad_norm": 901.1080932617188, "learning_rate": 8.740807750345913e-11, "logits/chosen": -2.6236462593078613, "logits/rejected": -2.6605048179626465, "loss": 0.882, "step": 657 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4894210994243622, "beta_dpo/beta_margin_grad_std": 0.01632015034556389, "beta_dpo/beta_margin_mean": 0.04237865284085274, "beta_dpo/beta_margin_std": 0.06539247184991837, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7204711437225342, "beta_dpo/gap_mean": 48.004180908203125, "beta_dpo/gap_std": 62.23754119873047, "beta_dpo/loss_margin_mean": 42.3786506652832, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9947089947089947, "grad_norm": 3.7113711833953857, "learning_rate": 5.594234322453539e-11, "logits/chosen": -2.6681385040283203, "logits/rejected": -2.682823419570923, "loss": 1.353, "step": 658 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49211886525154114, "beta_dpo/beta_margin_grad_std": 0.015781141817569733, "beta_dpo/beta_margin_mean": 0.03156294301152229, "beta_dpo/beta_margin_std": 0.06321074068546295, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6728357076644897, "beta_dpo/gap_mean": 46.467201232910156, "beta_dpo/gap_std": 62.880088806152344, "beta_dpo/loss_margin_mean": 31.562942504882812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9962207105064248, "grad_norm": 3.7238643169403076, "learning_rate": 3.146808153123293e-11, "logits/chosen": -2.6363942623138428, "logits/rejected": -2.659372568130493, "loss": 1.3537, "step": 659 }, { "beta_dpo/beta": 0.9708003997802734, "beta_dpo/beta_margin_grad_mean": -0.17687278985977173, "beta_dpo/beta_margin_grad_std": 0.3737434148788452, "beta_dpo/beta_margin_mean": 49.68276596069336, "beta_dpo/beta_margin_std": 61.30149459838867, "beta_dpo/beta_used": 0.9708003997802734, "beta_dpo/beta_used_raw": 0.9708003997802734, "beta_dpo/gap_mean": 45.81683349609375, "beta_dpo/gap_std": 62.97257995605469, "beta_dpo/loss_margin_mean": 51.25537872314453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9977324263038548, "grad_norm": 2283.9326171875, "learning_rate": 1.3985977021235829e-11, "logits/chosen": -2.642078399658203, "logits/rejected": -2.6817831993103027, "loss": 3.5476, "step": 660 }, { "beta_dpo/beta": 0.07907932996749878, "beta_dpo/beta_margin_grad_mean": -0.3486056625843048, "beta_dpo/beta_margin_grad_std": 0.2862682342529297, "beta_dpo/beta_margin_mean": 3.647887945175171, "beta_dpo/beta_margin_std": 8.705880165100098, "beta_dpo/beta_used": 0.07907932996749878, "beta_dpo/beta_used_raw": -0.3208945393562317, "beta_dpo/gap_mean": 43.73834991455078, "beta_dpo/gap_std": 63.08509063720703, "beta_dpo/loss_margin_mean": 33.91815185546875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.999244142101285, "grad_norm": 502.38873291015625, "learning_rate": 3.4965187065971735e-12, "logits/chosen": -2.689065933227539, "logits/rejected": -2.731311559677124, "loss": 1.2566, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.3336656033181207, "train_runtime": 3770.6222, "train_samples_per_second": 11.228, "train_steps_per_second": 0.175 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }