{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 681, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta_used": 0.09912768006324768, "beta_dpo/beta_used_raw": 0.09912768006324768, "beta_dpo/gap_mean": 0.00946818944066763, "beta_dpo/gap_std": 0.06761293858289719, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.0014684287812041115, "grad_norm": 36.03247833251953, "learning_rate": 0.0, "logits/chosen": -0.1428939700126648, "logits/rejected": 0.2641817033290863, "loss": 1.3877, "step": 1 }, { "beta_dpo/beta_used": 0.0943765640258789, "beta_dpo/beta_used_raw": 0.0943765640258789, "beta_dpo/gap_mean": -0.008595498278737068, "beta_dpo/gap_std": 0.1328437626361847, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.002936857562408223, "grad_norm": 28.348440170288086, "learning_rate": 7.246376811594203e-09, "logits/chosen": 0.512961745262146, "logits/rejected": 0.6081655621528625, "loss": 1.3958, "step": 2 }, { "beta_dpo/beta_used": 0.09973011910915375, "beta_dpo/beta_used_raw": 0.09973011910915375, "beta_dpo/gap_mean": -0.020052069798111916, "beta_dpo/gap_std": 0.20296388864517212, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.004405286343612335, "grad_norm": 25.432849884033203, "learning_rate": 1.4492753623188406e-08, "logits/chosen": 0.19606800377368927, "logits/rejected": 0.3750133812427521, "loss": 1.3898, "step": 3 }, { "beta_dpo/beta_used": 0.10318515449762344, "beta_dpo/beta_used_raw": 0.10318515449762344, "beta_dpo/gap_mean": -0.015820063650608063, "beta_dpo/gap_std": 0.24965426325798035, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.005873715124816446, "grad_norm": 29.762346267700195, "learning_rate": 2.1739130434782606e-08, "logits/chosen": 0.8724163770675659, "logits/rejected": 0.9871234893798828, "loss": 1.3829, "step": 4 }, { "beta_dpo/beta_used": 0.10503459721803665, "beta_dpo/beta_used_raw": 0.10503459721803665, "beta_dpo/gap_mean": -0.011851204559206963, "beta_dpo/gap_std": 0.29855671525001526, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.007342143906020558, "grad_norm": 37.113651275634766, "learning_rate": 2.898550724637681e-08, "logits/chosen": 0.7447645664215088, "logits/rejected": 1.042862892150879, "loss": 1.3789, "step": 5 }, { "beta_dpo/beta_used": 0.10265050828456879, "beta_dpo/beta_used_raw": 0.10265050828456879, "beta_dpo/gap_mean": 0.0011657942086458206, "beta_dpo/gap_std": 0.3148193359375, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.00881057268722467, "grad_norm": 39.14086151123047, "learning_rate": 3.6231884057971014e-08, "logits/chosen": 0.802085816860199, "logits/rejected": 1.1166476011276245, "loss": 1.3817, "step": 6 }, { "beta_dpo/beta_used": 0.10336506366729736, "beta_dpo/beta_used_raw": 0.10336506366729736, "beta_dpo/gap_mean": 0.012128479778766632, "beta_dpo/gap_std": 0.33621037006378174, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.010279001468428781, "grad_norm": 38.720542907714844, "learning_rate": 4.347826086956521e-08, "logits/chosen": 0.27170026302337646, "logits/rejected": 0.16746661067008972, "loss": 1.3791, "step": 7 }, { "beta_dpo/beta_used": 0.09534113109111786, "beta_dpo/beta_used_raw": 0.09534113109111786, "beta_dpo/gap_mean": 0.004293666686862707, "beta_dpo/gap_std": 0.348634272813797, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.011747430249632892, "grad_norm": 34.6677360534668, "learning_rate": 5.0724637681159424e-08, "logits/chosen": 0.33636578917503357, "logits/rejected": 0.6127815842628479, "loss": 1.3939, "step": 8 }, { "beta_dpo/beta_used": 0.10228224098682404, "beta_dpo/beta_used_raw": 0.10228224098682404, "beta_dpo/gap_mean": 0.006271847989410162, "beta_dpo/gap_std": 0.365943044424057, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.013215859030837005, "grad_norm": 39.09025573730469, "learning_rate": 5.797101449275362e-08, "logits/chosen": 0.983077883720398, "logits/rejected": 1.1696516275405884, "loss": 1.3814, "step": 9 }, { "beta_dpo/beta_used": 0.09608335793018341, "beta_dpo/beta_used_raw": 0.09608335793018341, "beta_dpo/gap_mean": -0.009593424387276173, "beta_dpo/gap_std": 0.37053757905960083, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.014684287812041116, "grad_norm": 27.551111221313477, "learning_rate": 6.521739130434782e-08, "logits/chosen": 0.4329628348350525, "logits/rejected": 0.5324227809906006, "loss": 1.3938, "step": 10 }, { "beta_dpo/beta_used": 0.10036857426166534, "beta_dpo/beta_used_raw": 0.10036857426166534, "beta_dpo/gap_mean": -0.008158953860402107, "beta_dpo/gap_std": 0.38943251967430115, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.016152716593245228, "grad_norm": 36.65106201171875, "learning_rate": 7.246376811594203e-08, "logits/chosen": 0.7284325361251831, "logits/rejected": 0.9985450506210327, "loss": 1.3881, "step": 11 }, { "beta_dpo/beta_used": 0.10221745073795319, "beta_dpo/beta_used_raw": 0.10221745073795319, "beta_dpo/gap_mean": 0.002900504507124424, "beta_dpo/gap_std": 0.4011257290840149, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.01762114537444934, "grad_norm": 41.69594955444336, "learning_rate": 7.971014492753623e-08, "logits/chosen": -0.2577040195465088, "logits/rejected": -0.08773193508386612, "loss": 1.3808, "step": 12 }, { "beta_dpo/beta_used": 0.09779460728168488, "beta_dpo/beta_used_raw": 0.09779460728168488, "beta_dpo/gap_mean": 0.010516807436943054, "beta_dpo/gap_std": 0.4214455485343933, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.01908957415565345, "grad_norm": 37.253273010253906, "learning_rate": 8.695652173913042e-08, "logits/chosen": 0.40396368503570557, "logits/rejected": 0.7425417900085449, "loss": 1.3896, "step": 13 }, { "beta_dpo/beta_used": 0.09969929605722427, "beta_dpo/beta_used_raw": 0.09969929605722427, "beta_dpo/gap_mean": 0.0034803529269993305, "beta_dpo/gap_std": 0.42071378231048584, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.020558002936857563, "grad_norm": 35.80532455444336, "learning_rate": 9.420289855072464e-08, "logits/chosen": 0.5135482549667358, "logits/rejected": 0.63726806640625, "loss": 1.3853, "step": 14 }, { "beta_dpo/beta_used": 0.09926551580429077, "beta_dpo/beta_used_raw": 0.09926551580429077, "beta_dpo/gap_mean": 0.00040535128209739923, "beta_dpo/gap_std": 0.437721848487854, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.022026431718061675, "grad_norm": 33.884681701660156, "learning_rate": 1.0144927536231885e-07, "logits/chosen": 0.34413981437683105, "logits/rejected": 0.5353966951370239, "loss": 1.3869, "step": 15 }, { "beta_dpo/beta_used": 0.10161672532558441, "beta_dpo/beta_used_raw": 0.10161672532558441, "beta_dpo/gap_mean": 0.005474040750414133, "beta_dpo/gap_std": 0.44184213876724243, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.023494860499265784, "grad_norm": 34.41832733154297, "learning_rate": 1.0869565217391303e-07, "logits/chosen": 0.02781546115875244, "logits/rejected": 0.3703967332839966, "loss": 1.3835, "step": 16 }, { "beta_dpo/beta_used": 0.10158823430538177, "beta_dpo/beta_used_raw": 0.10158823430538177, "beta_dpo/gap_mean": 0.020230602473020554, "beta_dpo/gap_std": 0.4481740891933441, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.024963289280469897, "grad_norm": 38.5697021484375, "learning_rate": 1.1594202898550725e-07, "logits/chosen": 0.17520441114902496, "logits/rejected": 0.4109325110912323, "loss": 1.381, "step": 17 }, { "beta_dpo/beta_used": 0.09817768633365631, "beta_dpo/beta_used_raw": 0.09817768633365631, "beta_dpo/gap_mean": 0.022835951298475266, "beta_dpo/gap_std": 0.45730656385421753, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.02643171806167401, "grad_norm": 31.3992862701416, "learning_rate": 1.2318840579710146e-07, "logits/chosen": -0.11811737716197968, "logits/rejected": 0.37715792655944824, "loss": 1.3868, "step": 18 }, { "beta_dpo/beta_used": 0.10017681121826172, "beta_dpo/beta_used_raw": 0.10017681121826172, "beta_dpo/gap_mean": 0.021170007064938545, "beta_dpo/gap_std": 0.45264753699302673, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.027900146842878122, "grad_norm": 32.43757247924805, "learning_rate": 1.3043478260869563e-07, "logits/chosen": 0.14852826297283173, "logits/rejected": 0.4215266704559326, "loss": 1.3843, "step": 19 }, { "beta_dpo/beta_used": 0.10059511661529541, "beta_dpo/beta_used_raw": 0.10059511661529541, "beta_dpo/gap_mean": 0.024478904902935028, "beta_dpo/gap_std": 0.4402172565460205, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.02936857562408223, "grad_norm": 32.24197006225586, "learning_rate": 1.3768115942028986e-07, "logits/chosen": 0.02626686543226242, "logits/rejected": 0.23224107921123505, "loss": 1.3813, "step": 20 }, { "beta_dpo/beta_used": 0.10132542252540588, "beta_dpo/beta_used_raw": 0.10132542252540588, "beta_dpo/gap_mean": 0.03433241322636604, "beta_dpo/gap_std": 0.4314417243003845, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.030837004405286344, "grad_norm": 32.54502487182617, "learning_rate": 1.4492753623188405e-07, "logits/chosen": 0.8274613618850708, "logits/rejected": 1.209240436553955, "loss": 1.3807, "step": 21 }, { "beta_dpo/beta_used": 0.09770512580871582, "beta_dpo/beta_used_raw": 0.09770512580871582, "beta_dpo/gap_mean": 0.047481901943683624, "beta_dpo/gap_std": 0.4284280240535736, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.032305433186490456, "grad_norm": 44.07844161987305, "learning_rate": 1.5217391304347825e-07, "logits/chosen": 0.7793359160423279, "logits/rejected": 0.6676016449928284, "loss": 1.3895, "step": 22 }, { "beta_dpo/beta_used": 0.09836747497320175, "beta_dpo/beta_used_raw": 0.09836747497320175, "beta_dpo/gap_mean": 0.04389035701751709, "beta_dpo/gap_std": 0.4313211441040039, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.033773861967694566, "grad_norm": 34.02887725830078, "learning_rate": 1.5942028985507245e-07, "logits/chosen": 0.6295610070228577, "logits/rejected": 0.6386342644691467, "loss": 1.3847, "step": 23 }, { "beta_dpo/beta_used": 0.10079745948314667, "beta_dpo/beta_used_raw": 0.10079745948314667, "beta_dpo/gap_mean": 0.03594818338751793, "beta_dpo/gap_std": 0.4390791654586792, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.03524229074889868, "grad_norm": 45.06439971923828, "learning_rate": 1.6666666666666665e-07, "logits/chosen": 0.6740798354148865, "logits/rejected": 0.7973790168762207, "loss": 1.3825, "step": 24 }, { "beta_dpo/beta_used": 0.10290396213531494, "beta_dpo/beta_used_raw": 0.10290396213531494, "beta_dpo/gap_mean": 0.033481329679489136, "beta_dpo/gap_std": 0.444084495306015, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.03671071953010279, "grad_norm": 39.44239807128906, "learning_rate": 1.7391304347826085e-07, "logits/chosen": 0.8647956848144531, "logits/rejected": 0.9377778768539429, "loss": 1.3783, "step": 25 }, { "beta_dpo/beta_used": 0.096158966422081, "beta_dpo/beta_used_raw": 0.096158966422081, "beta_dpo/gap_mean": 0.03490423411130905, "beta_dpo/gap_std": 0.44243302941322327, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.0381791483113069, "grad_norm": 42.32288360595703, "learning_rate": 1.8115942028985507e-07, "logits/chosen": 0.4755552411079407, "logits/rejected": 0.6985275745391846, "loss": 1.3868, "step": 26 }, { "beta_dpo/beta_used": 0.10245074331760406, "beta_dpo/beta_used_raw": 0.10245074331760406, "beta_dpo/gap_mean": 0.04747733473777771, "beta_dpo/gap_std": 0.444235622882843, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.039647577092511016, "grad_norm": 41.91151809692383, "learning_rate": 1.8840579710144927e-07, "logits/chosen": -0.21072596311569214, "logits/rejected": 0.08524161577224731, "loss": 1.378, "step": 27 }, { "beta_dpo/beta_used": 0.10306209325790405, "beta_dpo/beta_used_raw": 0.10306209325790405, "beta_dpo/gap_mean": 0.058281153440475464, "beta_dpo/gap_std": 0.44807279109954834, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.041116005873715125, "grad_norm": 37.31840896606445, "learning_rate": 1.9565217391304347e-07, "logits/chosen": 0.6112695932388306, "logits/rejected": 0.714950680732727, "loss": 1.3738, "step": 28 }, { "beta_dpo/beta_used": 0.09879401326179504, "beta_dpo/beta_used_raw": 0.09879401326179504, "beta_dpo/gap_mean": 0.07269902527332306, "beta_dpo/gap_std": 0.44637590646743774, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.042584434654919234, "grad_norm": 47.05006790161133, "learning_rate": 2.028985507246377e-07, "logits/chosen": 0.7314038276672363, "logits/rejected": 0.8412085771560669, "loss": 1.381, "step": 29 }, { "beta_dpo/beta_used": 0.1020648330450058, "beta_dpo/beta_used_raw": 0.1020648330450058, "beta_dpo/gap_mean": 0.07155513763427734, "beta_dpo/gap_std": 0.44477319717407227, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.04405286343612335, "grad_norm": 44.45933151245117, "learning_rate": 2.1014492753623187e-07, "logits/chosen": 1.1050382852554321, "logits/rejected": 1.16668701171875, "loss": 1.3764, "step": 30 }, { "beta_dpo/beta_used": 0.0970248281955719, "beta_dpo/beta_used_raw": 0.0970248281955719, "beta_dpo/gap_mean": 0.06911972165107727, "beta_dpo/gap_std": 0.4422132968902588, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.04552129221732746, "grad_norm": 33.403358459472656, "learning_rate": 2.1739130434782607e-07, "logits/chosen": 0.8101310133934021, "logits/rejected": 1.0011759996414185, "loss": 1.3836, "step": 31 }, { "beta_dpo/beta_used": 0.09790567308664322, "beta_dpo/beta_used_raw": 0.09790567308664322, "beta_dpo/gap_mean": 0.07079443335533142, "beta_dpo/gap_std": 0.4461364150047302, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.04698972099853157, "grad_norm": 33.10820007324219, "learning_rate": 2.2463768115942027e-07, "logits/chosen": 0.8439233303070068, "logits/rejected": 0.950434684753418, "loss": 1.384, "step": 32 }, { "beta_dpo/beta_used": 0.10266172885894775, "beta_dpo/beta_used_raw": 0.10266172885894775, "beta_dpo/gap_mean": 0.06464250385761261, "beta_dpo/gap_std": 0.44462600350379944, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.048458149779735685, "grad_norm": 34.716880798339844, "learning_rate": 2.318840579710145e-07, "logits/chosen": 0.6662931442260742, "logits/rejected": 0.8440811634063721, "loss": 1.3753, "step": 33 }, { "beta_dpo/beta_used": 0.10402781516313553, "beta_dpo/beta_used_raw": 0.10402781516313553, "beta_dpo/gap_mean": 0.09583105146884918, "beta_dpo/gap_std": 0.46839314699172974, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.049926578560939794, "grad_norm": 36.31764602661133, "learning_rate": 2.391304347826087e-07, "logits/chosen": 0.3044871687889099, "logits/rejected": 0.7843359112739563, "loss": 1.37, "step": 34 }, { "beta_dpo/beta_used": 0.10199436545372009, "beta_dpo/beta_used_raw": 0.10199436545372009, "beta_dpo/gap_mean": 0.11586057394742966, "beta_dpo/gap_std": 0.4675544500350952, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.0513950073421439, "grad_norm": 39.71388626098633, "learning_rate": 2.463768115942029e-07, "logits/chosen": 0.5839816331863403, "logits/rejected": 0.8258933424949646, "loss": 1.371, "step": 35 }, { "beta_dpo/beta_used": 0.09709247946739197, "beta_dpo/beta_used_raw": 0.09709247946739197, "beta_dpo/gap_mean": 0.12648111581802368, "beta_dpo/gap_std": 0.4649723768234253, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.05286343612334802, "grad_norm": 29.94068145751953, "learning_rate": 2.536231884057971e-07, "logits/chosen": 0.40314602851867676, "logits/rejected": 0.5354802012443542, "loss": 1.378, "step": 36 }, { "beta_dpo/beta_used": 0.10307516157627106, "beta_dpo/beta_used_raw": 0.10307516157627106, "beta_dpo/gap_mean": 0.1381409764289856, "beta_dpo/gap_std": 0.4927595257759094, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.05433186490455213, "grad_norm": 31.40448570251465, "learning_rate": 2.6086956521739126e-07, "logits/chosen": 0.35831230878829956, "logits/rejected": 0.6434404850006104, "loss": 1.3676, "step": 37 }, { "beta_dpo/beta_used": 0.09714777767658234, "beta_dpo/beta_used_raw": 0.09714777767658234, "beta_dpo/gap_mean": 0.156931072473526, "beta_dpo/gap_std": 0.5091784000396729, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.055800293685756244, "grad_norm": 27.932357788085938, "learning_rate": 2.681159420289855e-07, "logits/chosen": 0.563934326171875, "logits/rejected": 0.6664830446243286, "loss": 1.3781, "step": 38 }, { "beta_dpo/beta_used": 0.10663290321826935, "beta_dpo/beta_used_raw": 0.10663290321826935, "beta_dpo/gap_mean": 0.1700209379196167, "beta_dpo/gap_std": 0.5124276876449585, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.05726872246696035, "grad_norm": 39.61822509765625, "learning_rate": 2.753623188405797e-07, "logits/chosen": 0.48427289724349976, "logits/rejected": 0.8242242336273193, "loss": 1.3578, "step": 39 }, { "beta_dpo/beta_used": 0.10216629505157471, "beta_dpo/beta_used_raw": 0.10216629505157471, "beta_dpo/gap_mean": 0.17161959409713745, "beta_dpo/gap_std": 0.5042372941970825, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.05873715124816446, "grad_norm": 32.475460052490234, "learning_rate": 2.8260869565217386e-07, "logits/chosen": 0.920052170753479, "logits/rejected": 0.9896578788757324, "loss": 1.3658, "step": 40 }, { "beta_dpo/beta_used": 0.09696967899799347, "beta_dpo/beta_used_raw": 0.09696967899799347, "beta_dpo/gap_mean": 0.18380120396614075, "beta_dpo/gap_std": 0.5123995542526245, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.06020558002936858, "grad_norm": 30.475223541259766, "learning_rate": 2.898550724637681e-07, "logits/chosen": 1.0419857501983643, "logits/rejected": 1.2172199487686157, "loss": 1.3738, "step": 41 }, { "beta_dpo/beta_used": 0.1069054901599884, "beta_dpo/beta_used_raw": 0.1069054901599884, "beta_dpo/gap_mean": 0.20398783683776855, "beta_dpo/gap_std": 0.5257683992385864, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.06167400881057269, "grad_norm": 43.8969841003418, "learning_rate": 2.971014492753623e-07, "logits/chosen": 0.42500317096710205, "logits/rejected": 0.5260858535766602, "loss": 1.3524, "step": 42 }, { "beta_dpo/beta_used": 0.10209308564662933, "beta_dpo/beta_used_raw": 0.10209308564662933, "beta_dpo/gap_mean": 0.24691221117973328, "beta_dpo/gap_std": 0.5559054017066956, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.0631424375917768, "grad_norm": 32.977210998535156, "learning_rate": 3.043478260869565e-07, "logits/chosen": 0.5705319046974182, "logits/rejected": 0.6937817931175232, "loss": 1.3579, "step": 43 }, { "beta_dpo/beta_used": 0.10147799551486969, "beta_dpo/beta_used_raw": 0.10147799551486969, "beta_dpo/gap_mean": 0.2685306966304779, "beta_dpo/gap_std": 0.5704429149627686, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.06461086637298091, "grad_norm": 34.141815185546875, "learning_rate": 3.115942028985507e-07, "logits/chosen": 0.5468135476112366, "logits/rejected": 0.7769373059272766, "loss": 1.3587, "step": 44 }, { "beta_dpo/beta_used": 0.0953749343752861, "beta_dpo/beta_used_raw": 0.0953749343752861, "beta_dpo/gap_mean": 0.2547299265861511, "beta_dpo/gap_std": 0.5621392726898193, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.06607929515418502, "grad_norm": 26.028532028198242, "learning_rate": 3.188405797101449e-07, "logits/chosen": 0.6253921985626221, "logits/rejected": 0.7837856411933899, "loss": 1.3699, "step": 45 }, { "beta_dpo/beta_used": 0.10346020013093948, "beta_dpo/beta_used_raw": 0.10346020013093948, "beta_dpo/gap_mean": 0.27350008487701416, "beta_dpo/gap_std": 0.5653533339500427, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.06754772393538913, "grad_norm": 33.697540283203125, "learning_rate": 3.260869565217391e-07, "logits/chosen": 0.9563829898834229, "logits/rejected": 1.276297926902771, "loss": 1.3537, "step": 46 }, { "beta_dpo/beta_used": 0.09652406722307205, "beta_dpo/beta_used_raw": 0.09652406722307205, "beta_dpo/gap_mean": 0.28601908683776855, "beta_dpo/gap_std": 0.5872968435287476, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.06901615271659324, "grad_norm": 33.535667419433594, "learning_rate": 3.333333333333333e-07, "logits/chosen": 0.003040153533220291, "logits/rejected": 0.20534364879131317, "loss": 1.3657, "step": 47 }, { "beta_dpo/beta_used": 0.103759765625, "beta_dpo/beta_used_raw": 0.103759765625, "beta_dpo/gap_mean": 0.3253113031387329, "beta_dpo/gap_std": 0.6272794604301453, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.07048458149779736, "grad_norm": 36.597084045410156, "learning_rate": 3.4057971014492755e-07, "logits/chosen": 0.16515415906906128, "logits/rejected": 0.25872254371643066, "loss": 1.3473, "step": 48 }, { "beta_dpo/beta_used": 0.10399220883846283, "beta_dpo/beta_used_raw": 0.10399220883846283, "beta_dpo/gap_mean": 0.37170833349227905, "beta_dpo/gap_std": 0.6765430569648743, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.07195301027900147, "grad_norm": 35.3515625, "learning_rate": 3.478260869565217e-07, "logits/chosen": -0.1696886569261551, "logits/rejected": 0.33062222599983215, "loss": 1.3397, "step": 49 }, { "beta_dpo/beta_used": 0.09910602867603302, "beta_dpo/beta_used_raw": 0.09910602867603302, "beta_dpo/gap_mean": 0.4253373444080353, "beta_dpo/gap_std": 0.7102055549621582, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.07342143906020558, "grad_norm": 31.9883975982666, "learning_rate": 3.5507246376811595e-07, "logits/chosen": 0.8714113235473633, "logits/rejected": 1.1603641510009766, "loss": 1.3456, "step": 50 }, { "beta_dpo/beta_used": 0.10119134932756424, "beta_dpo/beta_used_raw": 0.10119134932756424, "beta_dpo/gap_mean": 0.4460796117782593, "beta_dpo/gap_std": 0.7467154264450073, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.07488986784140969, "grad_norm": 29.27351951599121, "learning_rate": 3.6231884057971015e-07, "logits/chosen": 0.7564854621887207, "logits/rejected": 1.1092755794525146, "loss": 1.3414, "step": 51 }, { "beta_dpo/beta_used": 0.10360611975193024, "beta_dpo/beta_used_raw": 0.10360611975193024, "beta_dpo/gap_mean": 0.5001685619354248, "beta_dpo/gap_std": 0.7689269185066223, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.0763582966226138, "grad_norm": 31.185443878173828, "learning_rate": 3.695652173913043e-07, "logits/chosen": 0.1388968527317047, "logits/rejected": 0.3441314697265625, "loss": 1.3309, "step": 52 }, { "beta_dpo/beta_used": 0.10029203444719315, "beta_dpo/beta_used_raw": 0.10029203444719315, "beta_dpo/gap_mean": 0.5589120388031006, "beta_dpo/gap_std": 0.8447322845458984, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.07782672540381791, "grad_norm": 31.37520980834961, "learning_rate": 3.7681159420289855e-07, "logits/chosen": 0.6295239329338074, "logits/rejected": 0.7122503519058228, "loss": 1.3325, "step": 53 }, { "beta_dpo/beta_used": 0.10459037125110626, "beta_dpo/beta_used_raw": 0.10459037125110626, "beta_dpo/gap_mean": 0.6512259840965271, "beta_dpo/gap_std": 0.8958290815353394, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.07929515418502203, "grad_norm": 35.64163589477539, "learning_rate": 3.8405797101449274e-07, "logits/chosen": 0.9176524877548218, "logits/rejected": 1.011580228805542, "loss": 1.3147, "step": 54 }, { "beta_dpo/beta_used": 0.1026381105184555, "beta_dpo/beta_used_raw": 0.1026381105184555, "beta_dpo/gap_mean": 0.7022398710250854, "beta_dpo/gap_std": 0.9507501125335693, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.08076358296622614, "grad_norm": 35.24003601074219, "learning_rate": 3.9130434782608694e-07, "logits/chosen": -0.29505473375320435, "logits/rejected": -0.09624499082565308, "loss": 1.3113, "step": 55 }, { "beta_dpo/beta_used": 0.1019178181886673, "beta_dpo/beta_used_raw": 0.1019178181886673, "beta_dpo/gap_mean": 0.772992730140686, "beta_dpo/gap_std": 1.0654406547546387, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.08223201174743025, "grad_norm": 32.721805572509766, "learning_rate": 3.9855072463768114e-07, "logits/chosen": -0.0909002423286438, "logits/rejected": 0.28411364555358887, "loss": 1.3044, "step": 56 }, { "beta_dpo/beta_used": 0.09452690184116364, "beta_dpo/beta_used_raw": 0.09452690184116364, "beta_dpo/gap_mean": 0.7895393371582031, "beta_dpo/gap_std": 1.1319793462753296, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.08370044052863436, "grad_norm": 31.025205612182617, "learning_rate": 4.057971014492754e-07, "logits/chosen": 0.24088265001773834, "logits/rejected": 0.3191375136375427, "loss": 1.3245, "step": 57 }, { "beta_dpo/beta_used": 0.10477735102176666, "beta_dpo/beta_used_raw": 0.10477735102176666, "beta_dpo/gap_mean": 0.8548910021781921, "beta_dpo/gap_std": 1.1864020824432373, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.08516886930983847, "grad_norm": 35.668453216552734, "learning_rate": 4.1304347826086954e-07, "logits/chosen": -0.08373896777629852, "logits/rejected": 0.29527297616004944, "loss": 1.2914, "step": 58 }, { "beta_dpo/beta_used": 0.10405892133712769, "beta_dpo/beta_used_raw": 0.10405892133712769, "beta_dpo/gap_mean": 0.922788143157959, "beta_dpo/gap_std": 1.215921401977539, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.08663729809104258, "grad_norm": 33.82415008544922, "learning_rate": 4.2028985507246374e-07, "logits/chosen": 0.3770354986190796, "logits/rejected": 0.5427916646003723, "loss": 1.2938, "step": 59 }, { "beta_dpo/beta_used": 0.08976569026708603, "beta_dpo/beta_used_raw": 0.08976569026708603, "beta_dpo/gap_mean": 0.9623857736587524, "beta_dpo/gap_std": 1.2300928831100464, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.0881057268722467, "grad_norm": 25.01860809326172, "learning_rate": 4.2753623188405794e-07, "logits/chosen": 0.3236808180809021, "logits/rejected": 0.7505677938461304, "loss": 1.3151, "step": 60 }, { "beta_dpo/beta_used": 0.07914106547832489, "beta_dpo/beta_used_raw": 0.07914106547832489, "beta_dpo/gap_mean": 0.9610786437988281, "beta_dpo/gap_std": 1.3071849346160889, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.08957415565345081, "grad_norm": 22.11328887939453, "learning_rate": 4.3478260869565214e-07, "logits/chosen": -0.2516426146030426, "logits/rejected": -0.02317236363887787, "loss": 1.3396, "step": 61 }, { "beta_dpo/beta_used": 0.09530578553676605, "beta_dpo/beta_used_raw": 0.09530578553676605, "beta_dpo/gap_mean": 0.9768849611282349, "beta_dpo/gap_std": 1.3115894794464111, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.09104258443465492, "grad_norm": 26.365983963012695, "learning_rate": 4.420289855072464e-07, "logits/chosen": 0.27271533012390137, "logits/rejected": 0.268252432346344, "loss": 1.3037, "step": 62 }, { "beta_dpo/beta_used": 0.10470438003540039, "beta_dpo/beta_used_raw": 0.10470438003540039, "beta_dpo/gap_mean": 1.0516126155853271, "beta_dpo/gap_std": 1.3722937107086182, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.09251101321585903, "grad_norm": 35.525062561035156, "learning_rate": 4.4927536231884053e-07, "logits/chosen": 0.27358633279800415, "logits/rejected": 0.5209922790527344, "loss": 1.2743, "step": 63 }, { "beta_dpo/beta_used": 0.09943975508213043, "beta_dpo/beta_used_raw": 0.09943975508213043, "beta_dpo/gap_mean": 1.142209529876709, "beta_dpo/gap_std": 1.4562242031097412, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.09397944199706314, "grad_norm": 31.30100440979004, "learning_rate": 4.5652173913043473e-07, "logits/chosen": 0.8951408863067627, "logits/rejected": 1.089853048324585, "loss": 1.2742, "step": 64 }, { "beta_dpo/beta_used": 0.09851166605949402, "beta_dpo/beta_used_raw": 0.09851166605949402, "beta_dpo/gap_mean": 1.2123044729232788, "beta_dpo/gap_std": 1.492063045501709, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.09544787077826726, "grad_norm": 34.030433654785156, "learning_rate": 4.63768115942029e-07, "logits/chosen": 0.39446431398391724, "logits/rejected": 0.29857978224754333, "loss": 1.2767, "step": 65 }, { "beta_dpo/beta_used": 0.08195464313030243, "beta_dpo/beta_used_raw": 0.08195464313030243, "beta_dpo/gap_mean": 1.2385344505310059, "beta_dpo/gap_std": 1.5351271629333496, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.09691629955947137, "grad_norm": 21.924297332763672, "learning_rate": 4.7101449275362313e-07, "logits/chosen": 0.015095788985490799, "logits/rejected": 0.257466584444046, "loss": 1.3196, "step": 66 }, { "beta_dpo/beta_used": 0.07061232626438141, "beta_dpo/beta_used_raw": 0.07061232626438141, "beta_dpo/gap_mean": 1.1743882894515991, "beta_dpo/gap_std": 1.622124195098877, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.09838472834067548, "grad_norm": 17.05328941345215, "learning_rate": 4.782608695652174e-07, "logits/chosen": 0.6905490159988403, "logits/rejected": 0.8179426193237305, "loss": 1.3397, "step": 67 }, { "beta_dpo/beta_used": 0.09138956665992737, "beta_dpo/beta_used_raw": 0.09138956665992737, "beta_dpo/gap_mean": 1.2332521677017212, "beta_dpo/gap_std": 1.7263941764831543, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.09985315712187959, "grad_norm": 24.639341354370117, "learning_rate": 4.855072463768116e-07, "logits/chosen": -0.910900354385376, "logits/rejected": -0.7187647223472595, "loss": 1.2867, "step": 68 }, { "beta_dpo/beta_used": 0.09712530672550201, "beta_dpo/beta_used_raw": 0.09712530672550201, "beta_dpo/gap_mean": 1.3337668180465698, "beta_dpo/gap_std": 1.8452472686767578, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1013215859030837, "grad_norm": 30.226844787597656, "learning_rate": 4.927536231884058e-07, "logits/chosen": 0.2648603320121765, "logits/rejected": 0.3073993921279907, "loss": 1.268, "step": 69 }, { "beta_dpo/beta_used": 0.08421066403388977, "beta_dpo/beta_used_raw": 0.08421066403388977, "beta_dpo/gap_mean": 1.459987998008728, "beta_dpo/gap_std": 1.982649564743042, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1027900146842878, "grad_norm": 22.558631896972656, "learning_rate": 5e-07, "logits/chosen": 0.3340024948120117, "logits/rejected": 0.5476035475730896, "loss": 1.286, "step": 70 }, { "beta_dpo/beta_used": 0.10257969796657562, "beta_dpo/beta_used_raw": 0.10257969796657562, "beta_dpo/gap_mean": 1.7209677696228027, "beta_dpo/gap_std": 2.2336602210998535, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.10425844346549193, "grad_norm": 32.90518569946289, "learning_rate": 4.999967061337492e-07, "logits/chosen": 0.2438286542892456, "logits/rejected": 0.43359801173210144, "loss": 1.2169, "step": 71 }, { "beta_dpo/beta_used": 0.08484944701194763, "beta_dpo/beta_used_raw": 0.08484944701194763, "beta_dpo/gap_mean": 1.8638193607330322, "beta_dpo/gap_std": 2.3361806869506836, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.10572687224669604, "grad_norm": 24.293167114257812, "learning_rate": 4.999868246217933e-07, "logits/chosen": 0.16392625868320465, "logits/rejected": 0.36707645654678345, "loss": 1.2618, "step": 72 }, { "beta_dpo/beta_used": 0.06621776521205902, "beta_dpo/beta_used_raw": 0.06621776521205902, "beta_dpo/gap_mean": 1.9632892608642578, "beta_dpo/gap_std": 2.580928087234497, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.10719530102790015, "grad_norm": 20.27967643737793, "learning_rate": 4.999703557245192e-07, "logits/chosen": -0.8935944437980652, "logits/rejected": -0.34290611743927, "loss": 1.3011, "step": 73 }, { "beta_dpo/beta_used": 0.09418769180774689, "beta_dpo/beta_used_raw": 0.09418769180774689, "beta_dpo/gap_mean": 2.124460220336914, "beta_dpo/gap_std": 2.7994680404663086, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.10866372980910426, "grad_norm": 30.242183685302734, "learning_rate": 4.999472998758977e-07, "logits/chosen": -0.3257332146167755, "logits/rejected": -0.3129286468029022, "loss": 1.2112, "step": 74 }, { "beta_dpo/beta_used": 0.11877734959125519, "beta_dpo/beta_used_raw": 0.11877734959125519, "beta_dpo/gap_mean": 2.335268974304199, "beta_dpo/gap_std": 3.031240940093994, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.11013215859030837, "grad_norm": 39.98731231689453, "learning_rate": 4.999176576834721e-07, "logits/chosen": -0.41363754868507385, "logits/rejected": 0.28612393140792847, "loss": 1.109, "step": 75 }, { "beta_dpo/beta_used": 0.08970046043395996, "beta_dpo/beta_used_raw": 0.08970046043395996, "beta_dpo/gap_mean": 2.4628543853759766, "beta_dpo/gap_std": 3.2532970905303955, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.11160058737151249, "grad_norm": 25.231731414794922, "learning_rate": 4.998814299283415e-07, "logits/chosen": -1.2684142589569092, "logits/rejected": -0.9033623933792114, "loss": 1.1912, "step": 76 }, { "beta_dpo/beta_used": 0.09582079946994781, "beta_dpo/beta_used_raw": 0.09582079946994781, "beta_dpo/gap_mean": 2.5265262126922607, "beta_dpo/gap_std": 3.3844351768493652, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.1130690161527166, "grad_norm": 29.32447624206543, "learning_rate": 4.998386175651409e-07, "logits/chosen": -0.20256809890270233, "logits/rejected": -0.08170560747385025, "loss": 1.1905, "step": 77 }, { "beta_dpo/beta_used": 0.07637983560562134, "beta_dpo/beta_used_raw": 0.07637983560562134, "beta_dpo/gap_mean": 2.600193977355957, "beta_dpo/gap_std": 3.4132637977600098, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1145374449339207, "grad_norm": 20.592315673828125, "learning_rate": 4.997892217220159e-07, "logits/chosen": 0.5006381273269653, "logits/rejected": 0.6229469180107117, "loss": 1.2294, "step": 78 }, { "beta_dpo/beta_used": 0.12261506170034409, "beta_dpo/beta_used_raw": 0.12261506170034409, "beta_dpo/gap_mean": 2.8109922409057617, "beta_dpo/gap_std": 3.607632637023926, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.11600587371512482, "grad_norm": 36.98799514770508, "learning_rate": 4.997332437005931e-07, "logits/chosen": -0.14069265127182007, "logits/rejected": 0.381592333316803, "loss": 1.0555, "step": 79 }, { "beta_dpo/beta_used": 0.05763671547174454, "beta_dpo/beta_used_raw": 0.05763671547174454, "beta_dpo/gap_mean": 2.924854278564453, "beta_dpo/gap_std": 3.7584455013275146, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.11747430249632893, "grad_norm": 18.50529670715332, "learning_rate": 4.996706849759452e-07, "logits/chosen": -0.5511586666107178, "logits/rejected": 0.2598066031932831, "loss": 1.263, "step": 80 }, { "beta_dpo/beta_used": 0.0756058320403099, "beta_dpo/beta_used_raw": 0.0756058320403099, "beta_dpo/gap_mean": 3.1903529167175293, "beta_dpo/gap_std": 4.072197914123535, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.11894273127753303, "grad_norm": 22.593578338623047, "learning_rate": 4.996015471965529e-07, "logits/chosen": -0.4839993119239807, "logits/rejected": 0.11238844692707062, "loss": 1.2039, "step": 81 }, { "beta_dpo/beta_used": 0.06392714381217957, "beta_dpo/beta_used_raw": 0.06392714381217957, "beta_dpo/gap_mean": 3.1580824851989746, "beta_dpo/gap_std": 4.085663795471191, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.12041116005873716, "grad_norm": 21.1937313079834, "learning_rate": 4.995258321842611e-07, "logits/chosen": -0.18834875524044037, "logits/rejected": 0.010252445936203003, "loss": 1.2371, "step": 82 }, { "beta_dpo/beta_used": 0.051595039665699005, "beta_dpo/beta_used_raw": 0.051595039665699005, "beta_dpo/gap_mean": 3.2494096755981445, "beta_dpo/gap_std": 4.285775661468506, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.12187958883994127, "grad_norm": 16.829927444458008, "learning_rate": 4.994435419342304e-07, "logits/chosen": -0.4440305233001709, "logits/rejected": -0.3782370090484619, "loss": 1.279, "step": 83 }, { "beta_dpo/beta_used": 0.035387977957725525, "beta_dpo/beta_used_raw": 0.026219218969345093, "beta_dpo/gap_mean": 3.168778896331787, "beta_dpo/gap_std": 4.352312088012695, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.12334801762114538, "grad_norm": 14.960714340209961, "learning_rate": 4.993546786148857e-07, "logits/chosen": -0.734527587890625, "logits/rejected": -0.41321492195129395, "loss": 1.3021, "step": 84 }, { "beta_dpo/beta_used": 0.09027501940727234, "beta_dpo/beta_used_raw": 0.09027501940727234, "beta_dpo/gap_mean": 3.0220541954040527, "beta_dpo/gap_std": 4.437371253967285, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.12481644640234948, "grad_norm": 26.920080184936523, "learning_rate": 4.992592445678582e-07, "logits/chosen": 0.3732537031173706, "logits/rejected": 0.6761988997459412, "loss": 1.1866, "step": 85 }, { "beta_dpo/beta_used": 0.04152470454573631, "beta_dpo/beta_used_raw": 0.04152470454573631, "beta_dpo/gap_mean": 3.152754783630371, "beta_dpo/gap_std": 4.687079429626465, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1262848751835536, "grad_norm": 12.763246536254883, "learning_rate": 4.991572423079235e-07, "logits/chosen": -0.7253550887107849, "logits/rejected": -0.5855756998062134, "loss": 1.2988, "step": 86 }, { "beta_dpo/beta_used": 0.06223129481077194, "beta_dpo/beta_used_raw": 0.06223129481077194, "beta_dpo/gap_mean": 3.322826862335205, "beta_dpo/gap_std": 4.999612808227539, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1277533039647577, "grad_norm": 19.533201217651367, "learning_rate": 4.990486745229364e-07, "logits/chosen": -0.8896793723106384, "logits/rejected": -0.609255313873291, "loss": 1.2483, "step": 87 }, { "beta_dpo/beta_used": 0.10454927384853363, "beta_dpo/beta_used_raw": 0.10454927384853363, "beta_dpo/gap_mean": 3.3977251052856445, "beta_dpo/gap_std": 5.225910186767578, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.12922173274596183, "grad_norm": 28.452533721923828, "learning_rate": 4.989335440737586e-07, "logits/chosen": -0.383707731962204, "logits/rejected": -0.38696473836898804, "loss": 1.1191, "step": 88 }, { "beta_dpo/beta_used": 0.03293079510331154, "beta_dpo/beta_used_raw": 0.03293079510331154, "beta_dpo/gap_mean": 3.3997325897216797, "beta_dpo/gap_std": 5.348217010498047, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.13069016152716592, "grad_norm": 11.382292747497559, "learning_rate": 4.988118539941847e-07, "logits/chosen": -0.12446750700473785, "logits/rejected": 0.03424917906522751, "loss": 1.3119, "step": 89 }, { "beta_dpo/beta_used": 0.11191559582948685, "beta_dpo/beta_used_raw": 0.11191559582948685, "beta_dpo/gap_mean": 3.538516044616699, "beta_dpo/gap_std": 5.529797554016113, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.13215859030837004, "grad_norm": 32.30577850341797, "learning_rate": 4.986836074908615e-07, "logits/chosen": -0.3827494978904724, "logits/rejected": -0.2031489610671997, "loss": 1.063, "step": 90 }, { "beta_dpo/beta_used": 0.0587504506111145, "beta_dpo/beta_used_raw": 0.0587504506111145, "beta_dpo/gap_mean": 3.693659782409668, "beta_dpo/gap_std": 5.714102745056152, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.13362701908957417, "grad_norm": 19.282800674438477, "learning_rate": 4.985488079432037e-07, "logits/chosen": -0.5610724687576294, "logits/rejected": -0.07184645533561707, "loss": 1.2361, "step": 91 }, { "beta_dpo/beta_used": 0.021044503897428513, "beta_dpo/beta_used_raw": 0.021044503897428513, "beta_dpo/gap_mean": 3.6730222702026367, "beta_dpo/gap_std": 5.738746166229248, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.13509544787077826, "grad_norm": 9.117363929748535, "learning_rate": 4.984074589033043e-07, "logits/chosen": -1.3292481899261475, "logits/rejected": -1.0745368003845215, "loss": 1.3362, "step": 92 }, { "beta_dpo/beta_used": 0.06564544886350632, "beta_dpo/beta_used_raw": 0.06564544886350632, "beta_dpo/gap_mean": 3.622422933578491, "beta_dpo/gap_std": 5.81143856048584, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.13656387665198239, "grad_norm": 18.065797805786133, "learning_rate": 4.982595640958425e-07, "logits/chosen": -1.129691481590271, "logits/rejected": -0.4519658386707306, "loss": 1.2159, "step": 93 }, { "beta_dpo/beta_used": 0.07418715953826904, "beta_dpo/beta_used_raw": 0.07418715953826904, "beta_dpo/gap_mean": 3.922881603240967, "beta_dpo/gap_std": 5.974148273468018, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.13803230543318648, "grad_norm": 30.44569206237793, "learning_rate": 4.98105127417984e-07, "logits/chosen": -0.7543771266937256, "logits/rejected": -0.3432539701461792, "loss": 1.1855, "step": 94 }, { "beta_dpo/beta_used": 0.016762804239988327, "beta_dpo/beta_used_raw": 0.016762804239988327, "beta_dpo/gap_mean": 3.914681911468506, "beta_dpo/gap_std": 6.0327043533325195, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1395007342143906, "grad_norm": 4.844404220581055, "learning_rate": 4.979441529392784e-07, "logits/chosen": -1.3417716026306152, "logits/rejected": -0.5555290579795837, "loss": 1.3438, "step": 95 }, { "beta_dpo/beta_used": 0.07483043521642685, "beta_dpo/beta_used_raw": 0.07483043521642685, "beta_dpo/gap_mean": 4.16600227355957, "beta_dpo/gap_std": 6.302978515625, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.14096916299559473, "grad_norm": 20.949954986572266, "learning_rate": 4.977766449015534e-07, "logits/chosen": -0.09304308891296387, "logits/rejected": 0.17778439819812775, "loss": 1.1501, "step": 96 }, { "beta_dpo/beta_used": 0.08849300444126129, "beta_dpo/beta_used_raw": 0.08849300444126129, "beta_dpo/gap_mean": 4.1845598220825195, "beta_dpo/gap_std": 6.200246810913086, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.14243759177679882, "grad_norm": 24.92792320251465, "learning_rate": 4.976026077188012e-07, "logits/chosen": -0.6613257527351379, "logits/rejected": -0.10425081849098206, "loss": 1.0978, "step": 97 }, { "beta_dpo/beta_used": 0.11061571538448334, "beta_dpo/beta_used_raw": 0.11061571538448334, "beta_dpo/gap_mean": 4.468649387359619, "beta_dpo/gap_std": 6.226131439208984, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.14390602055800295, "grad_norm": 31.26581573486328, "learning_rate": 4.974220459770639e-07, "logits/chosen": -0.2658594846725464, "logits/rejected": -0.22771313786506653, "loss": 1.0047, "step": 98 }, { "beta_dpo/beta_used": 0.06567872315645218, "beta_dpo/beta_used_raw": 0.06567872315645218, "beta_dpo/gap_mean": 4.65333366394043, "beta_dpo/gap_std": 6.376982688903809, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.14537444933920704, "grad_norm": 21.47028160095215, "learning_rate": 4.972349644343108e-07, "logits/chosen": -0.632530152797699, "logits/rejected": -0.23820585012435913, "loss": 1.16, "step": 99 }, { "beta_dpo/beta_used": 0.07161588966846466, "beta_dpo/beta_used_raw": 0.07161588966846466, "beta_dpo/gap_mean": 4.682595252990723, "beta_dpo/gap_std": 6.322968482971191, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.14684287812041116, "grad_norm": 20.208816528320312, "learning_rate": 4.970413680203148e-07, "logits/chosen": -0.9340729117393494, "logits/rejected": -0.8069697618484497, "loss": 1.1246, "step": 100 }, { "epoch": 0.14684287812041116, "eval_beta_dpo/beta_used": 0.029263369739055634, "eval_beta_dpo/beta_used_raw": -4.359763352113077e-06, "eval_beta_dpo/gap_mean": 4.617349624633789, "eval_beta_dpo/gap_std": 6.235292911529541, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.1879054754972458, "eval_logits/rejected": 0.048689987510442734, "eval_loss": 0.6499497890472412, "eval_runtime": 44.1735, "eval_samples_per_second": 52.95, "eval_steps_per_second": 1.675, "step": 100 }, { "beta_dpo/beta_used": 0.05792199447751045, "beta_dpo/beta_used_raw": 0.011043012142181396, "beta_dpo/gap_mean": 4.556900501251221, "beta_dpo/gap_std": 6.3267927169799805, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.14831130690161526, "grad_norm": 22.336591720581055, "learning_rate": 4.968412618365215e-07, "logits/chosen": -0.3307875096797943, "logits/rejected": -0.38723018765449524, "loss": 1.1745, "step": 101 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0021508424542844296, "beta_dpo/gap_mean": 4.187932014465332, "beta_dpo/gap_std": 6.494045257568359, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.14977973568281938, "grad_norm": 0.3191300332546234, "learning_rate": 4.966346511559149e-07, "logits/chosen": -0.49200695753097534, "logits/rejected": 0.015693657100200653, "loss": 1.3837, "step": 102 }, { "beta_dpo/beta_used": 0.12325669825077057, "beta_dpo/beta_used_raw": 0.12325669825077057, "beta_dpo/gap_mean": 4.446552276611328, "beta_dpo/gap_std": 6.728653907775879, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.1512481644640235, "grad_norm": 30.04520034790039, "learning_rate": 4.964215414228785e-07, "logits/chosen": -0.1893901824951172, "logits/rejected": 0.20488853752613068, "loss": 0.9733, "step": 103 }, { "beta_dpo/beta_used": 0.10145142674446106, "beta_dpo/beta_used_raw": 0.10145142674446106, "beta_dpo/gap_mean": 4.730528831481934, "beta_dpo/gap_std": 7.018924713134766, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.1527165932452276, "grad_norm": 25.41728973388672, "learning_rate": 4.96201938253052e-07, "logits/chosen": -1.5134764909744263, "logits/rejected": -0.8099765181541443, "loss": 1.0458, "step": 104 }, { "beta_dpo/beta_used": 0.07333097606897354, "beta_dpo/beta_used_raw": 0.07333097606897354, "beta_dpo/gap_mean": 4.928286552429199, "beta_dpo/gap_std": 7.368670463562012, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.15418502202643172, "grad_norm": 21.229785919189453, "learning_rate": 4.959758474331832e-07, "logits/chosen": -1.5502166748046875, "logits/rejected": -0.854112982749939, "loss": 1.1229, "step": 105 }, { "beta_dpo/beta_used": 0.04561164975166321, "beta_dpo/beta_used_raw": 0.04561164975166321, "beta_dpo/gap_mean": 5.1268205642700195, "beta_dpo/gap_std": 7.609879970550537, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.15565345080763582, "grad_norm": 13.381973266601562, "learning_rate": 4.957432749209755e-07, "logits/chosen": -0.42794278264045715, "logits/rejected": 0.10008341073989868, "loss": 1.2137, "step": 106 }, { "beta_dpo/beta_used": 0.1029866486787796, "beta_dpo/beta_used_raw": 0.1029866486787796, "beta_dpo/gap_mean": 5.44158935546875, "beta_dpo/gap_std": 8.028169631958008, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.15712187958883994, "grad_norm": 28.917911529541016, "learning_rate": 4.955042268449307e-07, "logits/chosen": -0.9266539812088013, "logits/rejected": -0.49631673097610474, "loss": 0.9888, "step": 107 }, { "beta_dpo/beta_used": 0.06229037046432495, "beta_dpo/beta_used_raw": 0.06229037046432495, "beta_dpo/gap_mean": 5.726339340209961, "beta_dpo/gap_std": 8.711938858032227, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.15859030837004406, "grad_norm": 19.129323959350586, "learning_rate": 4.952587095041881e-07, "logits/chosen": -0.9584572911262512, "logits/rejected": -0.5016314387321472, "loss": 1.1273, "step": 108 }, { "beta_dpo/beta_used": 0.06188575178384781, "beta_dpo/beta_used_raw": 0.06188575178384781, "beta_dpo/gap_mean": 6.139488697052002, "beta_dpo/gap_std": 9.01210880279541, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.16005873715124816, "grad_norm": 18.17817497253418, "learning_rate": 4.95006729368358e-07, "logits/chosen": -1.1628775596618652, "logits/rejected": -1.2133209705352783, "loss": 1.1233, "step": 109 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.051077503710985184, "beta_dpo/gap_mean": 6.065304756164551, "beta_dpo/gap_std": 9.025213241577148, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.16152716593245228, "grad_norm": 0.3541998863220215, "learning_rate": 4.947482930773511e-07, "logits/chosen": -0.3318154215812683, "logits/rejected": 0.049862414598464966, "loss": 1.3828, "step": 110 }, { "beta_dpo/beta_used": 0.21433338522911072, "beta_dpo/beta_used_raw": 0.21433338522911072, "beta_dpo/gap_mean": 6.521720886230469, "beta_dpo/gap_std": 9.535682678222656, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.16299559471365638, "grad_norm": 52.75183868408203, "learning_rate": 4.944834074412042e-07, "logits/chosen": -1.2158098220825195, "logits/rejected": -1.3288953304290771, "loss": 0.6861, "step": 111 }, { "beta_dpo/beta_used": 0.0375223234295845, "beta_dpo/beta_used_raw": 0.0375223234295845, "beta_dpo/gap_mean": 6.324298858642578, "beta_dpo/gap_std": 9.393738746643066, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1644640234948605, "grad_norm": 13.388900756835938, "learning_rate": 4.942120794399002e-07, "logits/chosen": -0.3608340322971344, "logits/rejected": -0.2805202603340149, "loss": 1.2109, "step": 112 }, { "beta_dpo/beta_used": 0.004732728470116854, "beta_dpo/beta_used_raw": -0.012436026707291603, "beta_dpo/gap_mean": 6.02249002456665, "beta_dpo/gap_std": 9.307682037353516, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.16593245227606462, "grad_norm": 2.6726489067077637, "learning_rate": 4.939343162231841e-07, "logits/chosen": -0.6618878841400146, "logits/rejected": -0.6378419995307922, "loss": 1.3654, "step": 113 }, { "beta_dpo/beta_used": 0.10487943142652512, "beta_dpo/beta_used_raw": 0.10487943142652512, "beta_dpo/gap_mean": 6.201924800872803, "beta_dpo/gap_std": 9.722326278686523, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.16740088105726872, "grad_norm": 37.85430145263672, "learning_rate": 4.936501251103751e-07, "logits/chosen": -2.106161117553711, "logits/rejected": -1.1193244457244873, "loss": 1.0121, "step": 114 }, { "beta_dpo/beta_used": 0.07024595141410828, "beta_dpo/beta_used_raw": 0.07024595141410828, "beta_dpo/gap_mean": 6.369258880615234, "beta_dpo/gap_std": 9.897127151489258, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.16886930983847284, "grad_norm": 22.83355712890625, "learning_rate": 4.933595135901732e-07, "logits/chosen": -0.8814918994903564, "logits/rejected": -0.22126063704490662, "loss": 1.0974, "step": 115 }, { "beta_dpo/beta_used": 0.03589708358049393, "beta_dpo/beta_used_raw": 0.016239957883954048, "beta_dpo/gap_mean": 6.360749244689941, "beta_dpo/gap_std": 9.90849781036377, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.17033773861967694, "grad_norm": 15.455899238586426, "learning_rate": 4.930624893204624e-07, "logits/chosen": -0.8801502585411072, "logits/rejected": -0.5614099502563477, "loss": 1.2206, "step": 116 }, { "beta_dpo/beta_used": 0.06246402487158775, "beta_dpo/beta_used_raw": 0.06246402487158775, "beta_dpo/gap_mean": 6.227132797241211, "beta_dpo/gap_std": 10.034835815429688, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.17180616740088106, "grad_norm": 17.241731643676758, "learning_rate": 4.927590601281083e-07, "logits/chosen": -1.1716945171356201, "logits/rejected": -0.6936039328575134, "loss": 1.1196, "step": 117 }, { "beta_dpo/beta_used": 0.11437574028968811, "beta_dpo/beta_used_raw": 0.11437574028968811, "beta_dpo/gap_mean": 6.186464786529541, "beta_dpo/gap_std": 9.922000885009766, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.17327459618208516, "grad_norm": 26.410181045532227, "learning_rate": 4.924492340087524e-07, "logits/chosen": -0.24005727469921112, "logits/rejected": 0.17718710005283356, "loss": 0.9042, "step": 118 }, { "beta_dpo/beta_used": 0.15982091426849365, "beta_dpo/beta_used_raw": 0.15982091426849365, "beta_dpo/gap_mean": 6.43756103515625, "beta_dpo/gap_std": 10.469891548156738, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.17474302496328928, "grad_norm": 35.94211959838867, "learning_rate": 4.92133019126601e-07, "logits/chosen": -1.271965742111206, "logits/rejected": -0.8017282485961914, "loss": 0.782, "step": 119 }, { "beta_dpo/beta_used": 0.18478921055793762, "beta_dpo/beta_used_raw": 0.18478921055793762, "beta_dpo/gap_mean": 6.951420783996582, "beta_dpo/gap_std": 11.378963470458984, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1762114537444934, "grad_norm": 71.97966766357422, "learning_rate": 4.918104238142103e-07, "logits/chosen": -1.8507401943206787, "logits/rejected": -0.5078636407852173, "loss": 0.8735, "step": 120 }, { "beta_dpo/beta_used": 0.08799508213996887, "beta_dpo/beta_used_raw": 0.08799508213996887, "beta_dpo/gap_mean": 7.4571428298950195, "beta_dpo/gap_std": 11.627532958984375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1776798825256975, "grad_norm": 25.373376846313477, "learning_rate": 4.91481456572267e-07, "logits/chosen": -0.416792631149292, "logits/rejected": -0.1818535476922989, "loss": 0.936, "step": 121 }, { "beta_dpo/beta_used": 0.17342130839824677, "beta_dpo/beta_used_raw": 0.17342130839824677, "beta_dpo/gap_mean": 7.865861892700195, "beta_dpo/gap_std": 11.58388900756836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.17914831130690162, "grad_norm": 48.199588775634766, "learning_rate": 4.911461260693638e-07, "logits/chosen": -0.6681898832321167, "logits/rejected": -0.8930027484893799, "loss": 0.7657, "step": 122 }, { "beta_dpo/beta_used": 0.025086402893066406, "beta_dpo/beta_used_raw": 0.008279215544462204, "beta_dpo/gap_mean": 8.13044548034668, "beta_dpo/gap_std": 11.621488571166992, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.18061674008810572, "grad_norm": 11.817289352416992, "learning_rate": 4.908044411417711e-07, "logits/chosen": -0.7230386734008789, "logits/rejected": -0.658951997756958, "loss": 1.2325, "step": 123 }, { "beta_dpo/beta_used": 0.03426877409219742, "beta_dpo/beta_used_raw": 0.0250830240547657, "beta_dpo/gap_mean": 8.08623218536377, "beta_dpo/gap_std": 12.203933715820312, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.18208516886930984, "grad_norm": 15.88585376739502, "learning_rate": 4.904564107932048e-07, "logits/chosen": -1.3795387744903564, "logits/rejected": -1.25779390335083, "loss": 1.2045, "step": 124 }, { "beta_dpo/beta_used": 0.02784820832312107, "beta_dpo/beta_used_raw": -0.018910221755504608, "beta_dpo/gap_mean": 7.851171493530273, "beta_dpo/gap_std": 12.244317054748535, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.18355359765051396, "grad_norm": 11.108583450317383, "learning_rate": 4.90102044194588e-07, "logits/chosen": -1.035954236984253, "logits/rejected": -0.5925918221473694, "loss": 1.2259, "step": 125 }, { "beta_dpo/beta_used": 0.13587123155593872, "beta_dpo/beta_used_raw": 0.13587123155593872, "beta_dpo/gap_mean": 7.984950542449951, "beta_dpo/gap_std": 12.292469024658203, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.18502202643171806, "grad_norm": 18.8892822265625, "learning_rate": 4.897413506838102e-07, "logits/chosen": -1.1455111503601074, "logits/rejected": -0.735314130783081, "loss": 0.8214, "step": 126 }, { "beta_dpo/beta_used": 0.015097999945282936, "beta_dpo/beta_used_raw": -0.08129874616861343, "beta_dpo/gap_mean": 7.8060221672058105, "beta_dpo/gap_std": 12.259511947631836, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.18649045521292218, "grad_norm": 7.639008045196533, "learning_rate": 4.89374339765481e-07, "logits/chosen": -1.6038010120391846, "logits/rejected": -1.1748343706130981, "loss": 1.3005, "step": 127 }, { "beta_dpo/beta_used": 0.02616513893008232, "beta_dpo/beta_used_raw": -0.017380883917212486, "beta_dpo/gap_mean": 7.4818620681762695, "beta_dpo/gap_std": 12.500299453735352, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.18795888399412627, "grad_norm": 9.376649856567383, "learning_rate": 4.890010211106795e-07, "logits/chosen": -1.3974875211715698, "logits/rejected": -1.2336883544921875, "loss": 1.241, "step": 128 }, { "beta_dpo/beta_used": 0.02920507825911045, "beta_dpo/beta_used_raw": 0.016296565532684326, "beta_dpo/gap_mean": 7.44722843170166, "beta_dpo/gap_std": 12.719097137451172, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1894273127753304, "grad_norm": 11.695999145507812, "learning_rate": 4.88621404556699e-07, "logits/chosen": -0.9010772705078125, "logits/rejected": -0.5811547040939331, "loss": 1.255, "step": 129 }, { "beta_dpo/beta_used": 0.10024239122867584, "beta_dpo/beta_used_raw": 0.058206088840961456, "beta_dpo/gap_mean": 7.697457313537598, "beta_dpo/gap_std": 12.858654975891113, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.19089574155653452, "grad_norm": 35.886478424072266, "learning_rate": 4.882355001067891e-07, "logits/chosen": -1.766028642654419, "logits/rejected": -1.4478832483291626, "loss": 1.0962, "step": 130 }, { "beta_dpo/beta_used": 0.051814038306474686, "beta_dpo/beta_used_raw": -0.005639418959617615, "beta_dpo/gap_mean": 8.031866073608398, "beta_dpo/gap_std": 13.036602020263672, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.19236417033773862, "grad_norm": 16.280431747436523, "learning_rate": 4.878433179298909e-07, "logits/chosen": -0.9288041591644287, "logits/rejected": -0.586998462677002, "loss": 1.1125, "step": 131 }, { "beta_dpo/beta_used": 0.12162086367607117, "beta_dpo/beta_used_raw": 0.0700206607580185, "beta_dpo/gap_mean": 8.335851669311523, "beta_dpo/gap_std": 13.116096496582031, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.19383259911894274, "grad_norm": 74.64128875732422, "learning_rate": 4.874448683603694e-07, "logits/chosen": -1.410784363746643, "logits/rejected": -0.9624686241149902, "loss": 1.11, "step": 132 }, { "beta_dpo/beta_used": 0.02543148770928383, "beta_dpo/beta_used_raw": -0.024122050032019615, "beta_dpo/gap_mean": 8.31583309173584, "beta_dpo/gap_std": 13.013181686401367, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.19530102790014683, "grad_norm": 13.60718822479248, "learning_rate": 4.870401618977415e-07, "logits/chosen": -1.5543633699417114, "logits/rejected": -1.2189191579818726, "loss": 1.2251, "step": 133 }, { "beta_dpo/beta_used": 0.14605940878391266, "beta_dpo/beta_used_raw": 0.11427221447229385, "beta_dpo/gap_mean": 8.396821975708008, "beta_dpo/gap_std": 12.921862602233887, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.19676945668135096, "grad_norm": 37.15864944458008, "learning_rate": 4.866292092063986e-07, "logits/chosen": -1.2449548244476318, "logits/rejected": -1.1385276317596436, "loss": 0.9317, "step": 134 }, { "beta_dpo/beta_used": 0.045718513429164886, "beta_dpo/beta_used_raw": 0.045718513429164886, "beta_dpo/gap_mean": 8.817639350891113, "beta_dpo/gap_std": 12.690017700195312, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.19823788546255505, "grad_norm": 17.508895874023438, "learning_rate": 4.862120211153265e-07, "logits/chosen": 0.02831496298313141, "logits/rejected": -0.29007458686828613, "loss": 1.0905, "step": 135 }, { "beta_dpo/beta_used": 0.02267739549279213, "beta_dpo/beta_used_raw": -0.015174014493823051, "beta_dpo/gap_mean": 8.903334617614746, "beta_dpo/gap_std": 12.98226547241211, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.19970631424375918, "grad_norm": 11.94852352142334, "learning_rate": 4.857886086178193e-07, "logits/chosen": -0.7013956308364868, "logits/rejected": -0.5501904487609863, "loss": 1.2525, "step": 136 }, { "beta_dpo/beta_used": 0.1529083102941513, "beta_dpo/beta_used_raw": 0.10866523534059525, "beta_dpo/gap_mean": 9.902145385742188, "beta_dpo/gap_std": 13.492377281188965, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2011747430249633, "grad_norm": 13.686877250671387, "learning_rate": 4.853589828711902e-07, "logits/chosen": -1.8978835344314575, "logits/rejected": -1.6756528615951538, "loss": 0.786, "step": 137 }, { "beta_dpo/beta_used": 0.05722189322113991, "beta_dpo/beta_used_raw": 0.026043172925710678, "beta_dpo/gap_mean": 9.929509162902832, "beta_dpo/gap_std": 13.629077911376953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2026431718061674, "grad_norm": 19.724102020263672, "learning_rate": 4.849231551964771e-07, "logits/chosen": -1.0326536893844604, "logits/rejected": -0.7594943046569824, "loss": 1.0938, "step": 138 }, { "beta_dpo/beta_used": 0.12640082836151123, "beta_dpo/beta_used_raw": 0.12640082836151123, "beta_dpo/gap_mean": 9.884401321411133, "beta_dpo/gap_std": 13.6463623046875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.20411160058737152, "grad_norm": 26.593109130859375, "learning_rate": 4.844811370781446e-07, "logits/chosen": -1.64597749710083, "logits/rejected": -0.9838266372680664, "loss": 0.6854, "step": 139 }, { "beta_dpo/beta_used": 0.056642085313797, "beta_dpo/beta_used_raw": 0.056642085313797, "beta_dpo/gap_mean": 9.712495803833008, "beta_dpo/gap_std": 13.483675003051758, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2055800293685756, "grad_norm": 18.55207061767578, "learning_rate": 4.840329401637809e-07, "logits/chosen": -1.6746652126312256, "logits/rejected": -1.4344180822372437, "loss": 1.0512, "step": 140 }, { "beta_dpo/beta_used": 0.0755089595913887, "beta_dpo/beta_used_raw": 0.0755089595913887, "beta_dpo/gap_mean": 9.770435333251953, "beta_dpo/gap_std": 13.59737491607666, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.20704845814977973, "grad_norm": 21.781705856323242, "learning_rate": 4.83578576263792e-07, "logits/chosen": -1.4660749435424805, "logits/rejected": -0.8644838333129883, "loss": 0.9227, "step": 141 }, { "beta_dpo/beta_used": 0.01150353904813528, "beta_dpo/beta_used_raw": -0.020570263266563416, "beta_dpo/gap_mean": 9.913363456726074, "beta_dpo/gap_std": 13.940789222717285, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.20851688693098386, "grad_norm": 6.346410751342773, "learning_rate": 4.83118057351089e-07, "logits/chosen": -1.5786668062210083, "logits/rejected": -1.476420283317566, "loss": 1.2976, "step": 142 }, { "beta_dpo/beta_used": 0.027784820646047592, "beta_dpo/beta_used_raw": -0.006260888651013374, "beta_dpo/gap_mean": 9.732027053833008, "beta_dpo/gap_std": 13.709982872009277, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.20998531571218795, "grad_norm": 11.604023933410645, "learning_rate": 4.826513955607734e-07, "logits/chosen": -1.2161595821380615, "logits/rejected": -0.7919315099716187, "loss": 1.2025, "step": 143 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.05543350800871849, "beta_dpo/gap_mean": 9.147777557373047, "beta_dpo/gap_std": 13.664955139160156, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.21145374449339208, "grad_norm": 0.4413388669490814, "learning_rate": 4.821786031898176e-07, "logits/chosen": -1.390363335609436, "logits/rejected": -0.8810880184173584, "loss": 1.3798, "step": 144 }, { "beta_dpo/beta_used": 0.06164686381816864, "beta_dpo/beta_used_raw": 0.05399645119905472, "beta_dpo/gap_mean": 9.196908950805664, "beta_dpo/gap_std": 13.344491004943848, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.21292217327459617, "grad_norm": 26.998960494995117, "learning_rate": 4.816996926967401e-07, "logits/chosen": -0.8819482922554016, "logits/rejected": -0.13721227645874023, "loss": 1.0788, "step": 145 }, { "beta_dpo/beta_used": 0.07673842459917068, "beta_dpo/beta_used_raw": 0.07673842459917068, "beta_dpo/gap_mean": 9.43843936920166, "beta_dpo/gap_std": 13.427677154541016, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2143906020558003, "grad_norm": 26.07987403869629, "learning_rate": 4.812146767012779e-07, "logits/chosen": -0.7555097341537476, "logits/rejected": -0.3993244469165802, "loss": 0.9748, "step": 146 }, { "beta_dpo/beta_used": 0.0317380353808403, "beta_dpo/beta_used_raw": 0.0317380353808403, "beta_dpo/gap_mean": 9.393177032470703, "beta_dpo/gap_std": 13.246931076049805, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.21585903083700442, "grad_norm": 13.280474662780762, "learning_rate": 4.807235679840536e-07, "logits/chosen": -0.7165126800537109, "logits/rejected": -0.09713305532932281, "loss": 1.1652, "step": 147 }, { "beta_dpo/beta_used": 0.08999797701835632, "beta_dpo/beta_used_raw": 0.08999797701835632, "beta_dpo/gap_mean": 9.365718841552734, "beta_dpo/gap_std": 12.914957046508789, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.2173274596182085, "grad_norm": 43.10494613647461, "learning_rate": 4.802263794862384e-07, "logits/chosen": -1.666337251663208, "logits/rejected": -0.916774332523346, "loss": 1.0254, "step": 148 }, { "beta_dpo/beta_used": 0.07181931287050247, "beta_dpo/beta_used_raw": 0.05286760255694389, "beta_dpo/gap_mean": 9.781652450561523, "beta_dpo/gap_std": 13.695876121520996, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.21879588839941264, "grad_norm": 26.33489990234375, "learning_rate": 4.797231243092118e-07, "logits/chosen": -1.176733374595642, "logits/rejected": -1.0434083938598633, "loss": 1.0403, "step": 149 }, { "beta_dpo/beta_used": 0.06182215362787247, "beta_dpo/beta_used_raw": 0.04589571803808212, "beta_dpo/gap_mean": 9.728391647338867, "beta_dpo/gap_std": 13.834672927856445, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.22026431718061673, "grad_norm": 17.09866714477539, "learning_rate": 4.792138157142157e-07, "logits/chosen": -1.348958134651184, "logits/rejected": -0.8628441095352173, "loss": 0.9952, "step": 150 }, { "beta_dpo/beta_used": 0.03461529687047005, "beta_dpo/beta_used_raw": 0.03461529687047005, "beta_dpo/gap_mean": 9.682779312133789, "beta_dpo/gap_std": 14.172306060791016, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.22173274596182085, "grad_norm": 13.76240348815918, "learning_rate": 4.786984671220053e-07, "logits/chosen": -2.906869888305664, "logits/rejected": -2.313046932220459, "loss": 1.1467, "step": 151 }, { "beta_dpo/beta_used": 0.05210987105965614, "beta_dpo/beta_used_raw": 0.04498470202088356, "beta_dpo/gap_mean": 10.006989479064941, "beta_dpo/gap_std": 14.11435317993164, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.22320117474302498, "grad_norm": 17.71702766418457, "learning_rate": 4.78177092112495e-07, "logits/chosen": -1.2832828760147095, "logits/rejected": -0.9308356642723083, "loss": 1.0583, "step": 152 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09433462470769882, "beta_dpo/gap_mean": 9.721057891845703, "beta_dpo/gap_std": 14.40190315246582, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.22466960352422907, "grad_norm": 0.4966456890106201, "learning_rate": 4.776497044244016e-07, "logits/chosen": -2.0196692943573, "logits/rejected": -1.5572824478149414, "loss": 1.38, "step": 153 }, { "beta_dpo/beta_used": 0.09308835864067078, "beta_dpo/beta_used_raw": 0.09089169651269913, "beta_dpo/gap_mean": 10.133020401000977, "beta_dpo/gap_std": 14.461688995361328, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2261380323054332, "grad_norm": 49.517601013183594, "learning_rate": 4.771163179548808e-07, "logits/chosen": -1.7237651348114014, "logits/rejected": -1.6149145364761353, "loss": 0.9692, "step": 154 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04272199422121048, "beta_dpo/gap_mean": 10.489069938659668, "beta_dpo/gap_std": 14.97435188293457, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.2276064610866373, "grad_norm": 0.4489584267139435, "learning_rate": 4.7657694675916247e-07, "logits/chosen": -1.3847413063049316, "logits/rejected": -1.009522557258606, "loss": 1.3783, "step": 155 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18804168701171875, "beta_dpo/gap_mean": 10.080753326416016, "beta_dpo/gap_std": 15.23552417755127, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2290748898678414, "grad_norm": 0.4157707393169403, "learning_rate": 4.7603160505017893e-07, "logits/chosen": -1.9495174884796143, "logits/rejected": -1.3704191446304321, "loss": 1.3807, "step": 156 }, { "beta_dpo/beta_used": 0.23349730670452118, "beta_dpo/beta_used_raw": 0.23349730670452118, "beta_dpo/gap_mean": 10.294825553894043, "beta_dpo/gap_std": 14.933460235595703, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2305433186490455, "grad_norm": 45.34365463256836, "learning_rate": 4.7548030719819154e-07, "logits/chosen": -2.053683042526245, "logits/rejected": -1.90477454662323, "loss": 0.4983, "step": 157 }, { "beta_dpo/beta_used": 0.19772231578826904, "beta_dpo/beta_used_raw": 0.19772231578826904, "beta_dpo/gap_mean": 10.810285568237305, "beta_dpo/gap_std": 15.137372970581055, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.23201174743024963, "grad_norm": 62.011653900146484, "learning_rate": 4.7492306773041136e-07, "logits/chosen": -1.6941977739334106, "logits/rejected": -1.2474297285079956, "loss": 0.5737, "step": 158 }, { "beta_dpo/beta_used": 0.04134593904018402, "beta_dpo/beta_used_raw": 0.04134593904018402, "beta_dpo/gap_mean": 11.02712631225586, "beta_dpo/gap_std": 15.286413192749023, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.23348017621145375, "grad_norm": 14.630475044250488, "learning_rate": 4.743599013306165e-07, "logits/chosen": -0.7188162803649902, "logits/rejected": -0.42425087094306946, "loss": 1.0856, "step": 159 }, { "beta_dpo/beta_used": 0.14309489727020264, "beta_dpo/beta_used_raw": 0.14309489727020264, "beta_dpo/gap_mean": 11.070915222167969, "beta_dpo/gap_std": 14.914669036865234, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.23494860499265785, "grad_norm": 41.04716491699219, "learning_rate": 4.737908228387656e-07, "logits/chosen": -1.9082109928131104, "logits/rejected": -1.252223253250122, "loss": 0.8484, "step": 160 }, { "beta_dpo/beta_used": 0.01924612559378147, "beta_dpo/beta_used_raw": 0.01886550523340702, "beta_dpo/gap_mean": 10.881231307983398, "beta_dpo/gap_std": 14.884664535522461, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.23641703377386197, "grad_norm": 9.691744804382324, "learning_rate": 4.7321584725060594e-07, "logits/chosen": -1.6370534896850586, "logits/rejected": -0.8996493816375732, "loss": 1.2307, "step": 161 }, { "beta_dpo/beta_used": 0.014870250597596169, "beta_dpo/beta_used_raw": -0.013937926851212978, "beta_dpo/gap_mean": 10.78388786315918, "beta_dpo/gap_std": 14.657320022583008, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.23788546255506607, "grad_norm": 8.438042640686035, "learning_rate": 4.7263498971727905e-07, "logits/chosen": -0.6985443830490112, "logits/rejected": -0.7340394854545593, "loss": 1.2619, "step": 162 }, { "beta_dpo/beta_used": 0.09499046206474304, "beta_dpo/beta_used_raw": 0.09499046206474304, "beta_dpo/gap_mean": 11.073570251464844, "beta_dpo/gap_std": 14.936013221740723, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.2393538913362702, "grad_norm": 23.195219039916992, "learning_rate": 4.720482655449212e-07, "logits/chosen": -1.755723476409912, "logits/rejected": -1.3146591186523438, "loss": 0.966, "step": 163 }, { "beta_dpo/beta_used": 0.048484496772289276, "beta_dpo/beta_used_raw": -0.054905518889427185, "beta_dpo/gap_mean": 10.88538932800293, "beta_dpo/gap_std": 14.663843154907227, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.24082232011747431, "grad_norm": 25.961963653564453, "learning_rate": 4.714556901942599e-07, "logits/chosen": -1.142005443572998, "logits/rejected": -0.8273177146911621, "loss": 1.1191, "step": 164 }, { "beta_dpo/beta_used": 0.061039723455905914, "beta_dpo/beta_used_raw": 0.05095431208610535, "beta_dpo/gap_mean": 10.835360527038574, "beta_dpo/gap_std": 14.838174819946289, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.2422907488986784, "grad_norm": 21.01889419555664, "learning_rate": 4.708572792802069e-07, "logits/chosen": -1.440589427947998, "logits/rejected": -0.6986174583435059, "loss": 1.0417, "step": 165 }, { "beta_dpo/beta_used": 0.04411781206727028, "beta_dpo/beta_used_raw": 0.04411781206727028, "beta_dpo/gap_mean": 11.252758979797363, "beta_dpo/gap_std": 15.27828311920166, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.24375917767988253, "grad_norm": 21.198007583618164, "learning_rate": 4.702530485714461e-07, "logits/chosen": -2.4728612899780273, "logits/rejected": -2.3846404552459717, "loss": 1.0621, "step": 166 }, { "beta_dpo/beta_used": 0.06072790175676346, "beta_dpo/beta_used_raw": 0.04923243448138237, "beta_dpo/gap_mean": 11.877561569213867, "beta_dpo/gap_std": 15.251140594482422, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.24522760646108663, "grad_norm": 15.156601905822754, "learning_rate": 4.6964301399001877e-07, "logits/chosen": -0.06632952392101288, "logits/rejected": 0.2907092869281769, "loss": 0.959, "step": 167 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.01268717646598816, "beta_dpo/gap_mean": 11.734265327453613, "beta_dpo/gap_std": 14.756448745727539, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.24669603524229075, "grad_norm": 0.5311835408210754, "learning_rate": 4.690271916109034e-07, "logits/chosen": -2.7944483757019043, "logits/rejected": -2.1463072299957275, "loss": 1.3762, "step": 168 }, { "beta_dpo/beta_used": 0.027791917324066162, "beta_dpo/beta_used_raw": -0.016583973541855812, "beta_dpo/gap_mean": 11.941884994506836, "beta_dpo/gap_std": 14.766561508178711, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.24816446402349487, "grad_norm": 10.78883171081543, "learning_rate": 4.6840559766159235e-07, "logits/chosen": -2.4098339080810547, "logits/rejected": -2.322842597961426, "loss": 1.1529, "step": 169 }, { "beta_dpo/beta_used": 0.12186012417078018, "beta_dpo/beta_used_raw": 0.09154824912548065, "beta_dpo/gap_mean": 11.933835983276367, "beta_dpo/gap_std": 14.100811958312988, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.24963289280469897, "grad_norm": 32.531742095947266, "learning_rate": 4.6777824852166437e-07, "logits/chosen": -1.3292378187179565, "logits/rejected": -1.3104033470153809, "loss": 0.8473, "step": 170 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.05536588281393051, "beta_dpo/gap_mean": 11.822772979736328, "beta_dpo/gap_std": 14.968297958374023, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2511013215859031, "grad_norm": 0.4719236493110657, "learning_rate": 4.6714516072235273e-07, "logits/chosen": -2.689007520675659, "logits/rejected": -2.0470364093780518, "loss": 1.3774, "step": 171 }, { "beta_dpo/beta_used": 0.0349096953868866, "beta_dpo/beta_used_raw": 0.0349096953868866, "beta_dpo/gap_mean": 11.722180366516113, "beta_dpo/gap_std": 14.616718292236328, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2525697503671072, "grad_norm": 17.803831100463867, "learning_rate": 4.6650635094610966e-07, "logits/chosen": -1.0337367057800293, "logits/rejected": -0.6409567594528198, "loss": 1.0913, "step": 172 }, { "beta_dpo/beta_used": 0.025228869169950485, "beta_dpo/beta_used_raw": -0.06849551200866699, "beta_dpo/gap_mean": 11.226985931396484, "beta_dpo/gap_std": 14.002543449401855, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2540381791483113, "grad_norm": 10.361151695251465, "learning_rate": 4.6586183602616687e-07, "logits/chosen": -1.6432037353515625, "logits/rejected": -1.187882661819458, "loss": 1.1894, "step": 173 }, { "beta_dpo/beta_used": 0.12070997804403305, "beta_dpo/beta_used_raw": 0.12070997804403305, "beta_dpo/gap_mean": 11.388176918029785, "beta_dpo/gap_std": 14.080391883850098, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2555066079295154, "grad_norm": 42.93434524536133, "learning_rate": 4.652116329460919e-07, "logits/chosen": -1.564131498336792, "logits/rejected": -1.1296707391738892, "loss": 0.7887, "step": 174 }, { "beta_dpo/beta_used": 0.15403205156326294, "beta_dpo/beta_used_raw": 0.15403205156326294, "beta_dpo/gap_mean": 11.710367202758789, "beta_dpo/gap_std": 13.957894325256348, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.25697503671071953, "grad_norm": 50.55311584472656, "learning_rate": 4.645557588393406e-07, "logits/chosen": -0.962306022644043, "logits/rejected": -0.5951389074325562, "loss": 0.5825, "step": 175 }, { "beta_dpo/beta_used": 0.15750160813331604, "beta_dpo/beta_used_raw": 0.15750160813331604, "beta_dpo/gap_mean": 12.05875015258789, "beta_dpo/gap_std": 14.242860794067383, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.25844346549192365, "grad_norm": 31.742752075195312, "learning_rate": 4.638942309888058e-07, "logits/chosen": -0.5246734619140625, "logits/rejected": -0.361318975687027, "loss": 0.6294, "step": 176 }, { "beta_dpo/beta_used": 0.03919539228081703, "beta_dpo/beta_used_raw": 0.03919539228081703, "beta_dpo/gap_mean": 12.447943687438965, "beta_dpo/gap_std": 14.704492568969727, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2599118942731278, "grad_norm": 13.71200180053711, "learning_rate": 4.6322706682636137e-07, "logits/chosen": -1.8452959060668945, "logits/rejected": -1.9624505043029785, "loss": 1.0297, "step": 177 }, { "beta_dpo/beta_used": 0.14656506478786469, "beta_dpo/beta_used_raw": 0.14656506478786469, "beta_dpo/gap_mean": 13.48470687866211, "beta_dpo/gap_std": 15.340696334838867, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.26138032305433184, "grad_norm": 19.883230209350586, "learning_rate": 4.6255428393240354e-07, "logits/chosen": -1.0550949573516846, "logits/rejected": -1.1299716234207153, "loss": 0.6043, "step": 178 }, { "beta_dpo/beta_used": 0.06533389538526535, "beta_dpo/beta_used_raw": 0.004587773233652115, "beta_dpo/gap_mean": 13.46491813659668, "beta_dpo/gap_std": 15.624149322509766, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.26284875183553597, "grad_norm": 28.019865036010742, "learning_rate": 4.6187590003538724e-07, "logits/chosen": -1.2954455614089966, "logits/rejected": -1.0482146739959717, "loss": 1.0268, "step": 179 }, { "beta_dpo/beta_used": 0.09482339769601822, "beta_dpo/beta_used_raw": 0.06445710361003876, "beta_dpo/gap_mean": 13.542264938354492, "beta_dpo/gap_std": 15.617108345031738, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2643171806167401, "grad_norm": 25.957172393798828, "learning_rate": 4.611919330113591e-07, "logits/chosen": -1.9469971656799316, "logits/rejected": -1.342593789100647, "loss": 0.9187, "step": 180 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.16363635659217834, "beta_dpo/gap_mean": 13.057550430297852, "beta_dpo/gap_std": 15.115577697753906, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.2657856093979442, "grad_norm": 0.4522143304347992, "learning_rate": 4.605024008834863e-07, "logits/chosen": -1.1184544563293457, "logits/rejected": -0.9509117603302002, "loss": 1.3772, "step": 181 }, { "beta_dpo/beta_used": 0.06607332825660706, "beta_dpo/beta_used_raw": -0.025189578533172607, "beta_dpo/gap_mean": 13.242197036743164, "beta_dpo/gap_std": 15.207517623901367, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.26725403817914833, "grad_norm": 18.858734130859375, "learning_rate": 4.598073218215817e-07, "logits/chosen": -0.5637632608413696, "logits/rejected": -0.6395530104637146, "loss": 0.9268, "step": 182 }, { "beta_dpo/beta_used": 0.04271284118294716, "beta_dpo/beta_used_raw": 0.001886218786239624, "beta_dpo/gap_mean": 13.31583309173584, "beta_dpo/gap_std": 15.089471817016602, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2687224669603524, "grad_norm": 15.126312255859375, "learning_rate": 4.5910671414162484e-07, "logits/chosen": -1.3318713903427124, "logits/rejected": -0.7215397357940674, "loss": 1.0227, "step": 183 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10297068953514099, "beta_dpo/gap_mean": 12.548648834228516, "beta_dpo/gap_std": 15.1453218460083, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2701908957415565, "grad_norm": 0.4771396219730377, "learning_rate": 4.5840059630527985e-07, "logits/chosen": -1.202876329421997, "logits/rejected": -1.0334120988845825, "loss": 1.3769, "step": 184 }, { "beta_dpo/beta_used": 0.02503103017807007, "beta_dpo/beta_used_raw": -0.03311272710561752, "beta_dpo/gap_mean": 11.986303329467773, "beta_dpo/gap_std": 15.116844177246094, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.27165932452276065, "grad_norm": 12.845839500427246, "learning_rate": 4.5768898691940836e-07, "logits/chosen": -1.604547142982483, "logits/rejected": -1.0696234703063965, "loss": 1.172, "step": 185 }, { "beta_dpo/beta_used": 0.07039283961057663, "beta_dpo/beta_used_raw": 0.037874944508075714, "beta_dpo/gap_mean": 12.084492683410645, "beta_dpo/gap_std": 15.036298751831055, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.27312775330396477, "grad_norm": 37.95589828491211, "learning_rate": 4.5697190473557947e-07, "logits/chosen": -1.762836217880249, "logits/rejected": -0.9561566710472107, "loss": 1.1006, "step": 186 }, { "beta_dpo/beta_used": 0.050373200327157974, "beta_dpo/beta_used_raw": 0.01781865581870079, "beta_dpo/gap_mean": 11.821205139160156, "beta_dpo/gap_std": 14.921106338500977, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2745961820851689, "grad_norm": 17.699548721313477, "learning_rate": 4.5624936864957555e-07, "logits/chosen": -1.5083730220794678, "logits/rejected": -1.079012155532837, "loss": 1.0635, "step": 187 }, { "beta_dpo/beta_used": 0.22188173234462738, "beta_dpo/beta_used_raw": 0.22188173234462738, "beta_dpo/gap_mean": 12.162216186523438, "beta_dpo/gap_std": 14.689672470092773, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.27606461086637296, "grad_norm": 35.80200958251953, "learning_rate": 4.5552139770089454e-07, "logits/chosen": -1.5861679315567017, "logits/rejected": -1.3824684619903564, "loss": 0.3456, "step": 188 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07328170537948608, "beta_dpo/gap_mean": 12.102740287780762, "beta_dpo/gap_std": 15.12980842590332, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2775330396475771, "grad_norm": 0.4252919554710388, "learning_rate": 4.5478801107224794e-07, "logits/chosen": -1.6726609468460083, "logits/rejected": -0.8588078618049622, "loss": 1.3771, "step": 189 }, { "beta_dpo/beta_used": 0.12008103728294373, "beta_dpo/beta_used_raw": 0.12008103728294373, "beta_dpo/gap_mean": 12.775008201599121, "beta_dpo/gap_std": 15.181267738342285, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2790014684287812, "grad_norm": 32.254600524902344, "learning_rate": 4.5404922808905543e-07, "logits/chosen": -1.151025652885437, "logits/rejected": -0.609367847442627, "loss": 0.6217, "step": 190 }, { "beta_dpo/beta_used": 0.18861740827560425, "beta_dpo/beta_used_raw": 0.18861740827560425, "beta_dpo/gap_mean": 13.542198181152344, "beta_dpo/gap_std": 15.320116996765137, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.28046989720998533, "grad_norm": 33.18883514404297, "learning_rate": 4.5330506821893565e-07, "logits/chosen": -1.625694751739502, "logits/rejected": -1.439784049987793, "loss": 0.5277, "step": 191 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.043134208768606186, "beta_dpo/gap_mean": 13.56833267211914, "beta_dpo/gap_std": 15.42152214050293, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.28193832599118945, "grad_norm": 0.5424801111221313, "learning_rate": 4.5255555107119336e-07, "logits/chosen": -0.8365378379821777, "logits/rejected": -0.426064670085907, "loss": 1.3751, "step": 192 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07547399401664734, "beta_dpo/gap_mean": 12.877976417541504, "beta_dpo/gap_std": 15.427734375, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.2834067547723935, "grad_norm": 0.45949697494506836, "learning_rate": 4.5180069639630236e-07, "logits/chosen": -1.4489470720291138, "logits/rejected": -0.35072940587997437, "loss": 1.3764, "step": 193 }, { "beta_dpo/beta_used": 0.04862586036324501, "beta_dpo/beta_used_raw": 0.022517003118991852, "beta_dpo/gap_mean": 12.606361389160156, "beta_dpo/gap_std": 15.135665893554688, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.28487518355359764, "grad_norm": 14.401615142822266, "learning_rate": 4.510405240853854e-07, "logits/chosen": -2.855362892150879, "logits/rejected": -2.187931776046753, "loss": 1.0808, "step": 194 }, { "beta_dpo/beta_used": 0.15549784898757935, "beta_dpo/beta_used_raw": 0.15549784898757935, "beta_dpo/gap_mean": 12.886514663696289, "beta_dpo/gap_std": 15.310539245605469, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.28634361233480177, "grad_norm": 33.854034423828125, "learning_rate": 4.5027505416968985e-07, "logits/chosen": -1.4690104722976685, "logits/rejected": -1.1913936138153076, "loss": 0.6216, "step": 195 }, { "beta_dpo/beta_used": 0.09694637358188629, "beta_dpo/beta_used_raw": 0.09694637358188629, "beta_dpo/gap_mean": 13.215679168701172, "beta_dpo/gap_std": 15.15478515625, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2878120411160059, "grad_norm": 21.24295997619629, "learning_rate": 4.495043068200599e-07, "logits/chosen": -0.30434098839759827, "logits/rejected": -0.16999830305576324, "loss": 0.7612, "step": 196 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.15686266124248505, "beta_dpo/gap_mean": 12.623498916625977, "beta_dpo/gap_std": 14.628103256225586, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.28928046989721, "grad_norm": 0.4905308485031128, "learning_rate": 4.4872830234640493e-07, "logits/chosen": -0.6161783933639526, "logits/rejected": -0.6048535704612732, "loss": 1.3783, "step": 197 }, { "beta_dpo/beta_used": 0.15224260091781616, "beta_dpo/beta_used_raw": 0.15224260091781616, "beta_dpo/gap_mean": 12.599102973937988, "beta_dpo/gap_std": 14.41894817352295, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2907488986784141, "grad_norm": 34.72080993652344, "learning_rate": 4.479470611971645e-07, "logits/chosen": -1.3998963832855225, "logits/rejected": -1.5008985996246338, "loss": 0.5797, "step": 198 }, { "beta_dpo/beta_used": 0.034465912729501724, "beta_dpo/beta_used_raw": -0.010611668229103088, "beta_dpo/gap_mean": 12.675536155700684, "beta_dpo/gap_std": 14.37156867980957, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2922173274596182, "grad_norm": 13.862502098083496, "learning_rate": 4.471606039587695e-07, "logits/chosen": -1.6405764818191528, "logits/rejected": -0.7938838005065918, "loss": 1.11, "step": 199 }, { "beta_dpo/beta_used": 0.3353341221809387, "beta_dpo/beta_used_raw": 0.3353341221809387, "beta_dpo/gap_mean": 13.030755043029785, "beta_dpo/gap_std": 14.559476852416992, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2936857562408223, "grad_norm": 83.0425796508789, "learning_rate": 4.4636895135509966e-07, "logits/chosen": -0.21876315772533417, "logits/rejected": -0.3272482454776764, "loss": 0.313, "step": 200 }, { "epoch": 0.2936857562408223, "eval_beta_dpo/beta_used": 0.022861473262310028, "eval_beta_dpo/beta_used_raw": -0.09986051917076111, "eval_beta_dpo/gap_mean": 12.980899810791016, "eval_beta_dpo/gap_std": 14.598077774047852, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -1.1707913875579834, "eval_logits/rejected": -0.8922103643417358, "eval_loss": 0.6345767974853516, "eval_runtime": 44.0978, "eval_samples_per_second": 53.041, "eval_steps_per_second": 1.678, "step": 200 }, { "beta_dpo/beta_used": 0.04337719827890396, "beta_dpo/beta_used_raw": 0.03660256415605545, "beta_dpo/gap_mean": 12.999302864074707, "beta_dpo/gap_std": 14.945844650268555, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.29515418502202645, "grad_norm": 16.888778686523438, "learning_rate": 4.455721242469372e-07, "logits/chosen": -1.4362549781799316, "logits/rejected": -0.8436669707298279, "loss": 1.0486, "step": 201 }, { "beta_dpo/beta_used": 0.005111072212457657, "beta_dpo/beta_used_raw": -0.022527314722537994, "beta_dpo/gap_mean": 12.948795318603516, "beta_dpo/gap_std": 15.3850679397583, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2966226138032305, "grad_norm": 2.7122955322265625, "learning_rate": 4.4477014363141755e-07, "logits/chosen": -1.2722792625427246, "logits/rejected": -1.0189919471740723, "loss": 1.3289, "step": 202 }, { "beta_dpo/beta_used": 0.12862522900104523, "beta_dpo/beta_used_raw": 0.12862522900104523, "beta_dpo/gap_mean": 13.006298065185547, "beta_dpo/gap_std": 15.015997886657715, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.29809104258443464, "grad_norm": 56.114437103271484, "learning_rate": 4.439630306414758e-07, "logits/chosen": -1.3585411310195923, "logits/rejected": -1.1620537042617798, "loss": 0.8579, "step": 203 }, { "beta_dpo/beta_used": 0.0850096344947815, "beta_dpo/beta_used_raw": 0.054740045219659805, "beta_dpo/gap_mean": 12.737507820129395, "beta_dpo/gap_std": 15.283018112182617, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.29955947136563876, "grad_norm": 21.43082618713379, "learning_rate": 4.431508065452897e-07, "logits/chosen": -1.5987844467163086, "logits/rejected": -1.1546014547348022, "loss": 0.8967, "step": 204 }, { "beta_dpo/beta_used": 0.0995965451002121, "beta_dpo/beta_used_raw": 0.0995965451002121, "beta_dpo/gap_mean": 13.173287391662598, "beta_dpo/gap_std": 14.941397666931152, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3010279001468429, "grad_norm": 23.453872680664062, "learning_rate": 4.4233349274571974e-07, "logits/chosen": -1.287585973739624, "logits/rejected": -0.8196157217025757, "loss": 0.6706, "step": 205 }, { "beta_dpo/beta_used": 0.13683286309242249, "beta_dpo/beta_used_raw": 0.13683286309242249, "beta_dpo/gap_mean": 13.55710220336914, "beta_dpo/gap_std": 14.449283599853516, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.302496328928047, "grad_norm": 31.56971549987793, "learning_rate": 4.415111107797445e-07, "logits/chosen": -0.7237926125526428, "logits/rejected": -0.35238227248191833, "loss": 0.4603, "step": 206 }, { "beta_dpo/beta_used": 0.030014729127287865, "beta_dpo/beta_used_raw": -0.027833428233861923, "beta_dpo/gap_mean": 13.621185302734375, "beta_dpo/gap_std": 15.09548568725586, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3039647577092511, "grad_norm": 15.121429443359375, "learning_rate": 4.4068368231789365e-07, "logits/chosen": -2.298497438430786, "logits/rejected": -1.8928518295288086, "loss": 1.1526, "step": 207 }, { "beta_dpo/beta_used": 0.15946441888809204, "beta_dpo/beta_used_raw": 0.15946441888809204, "beta_dpo/gap_mean": 13.768835067749023, "beta_dpo/gap_std": 15.279106140136719, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3054331864904552, "grad_norm": 34.89619445800781, "learning_rate": 4.398512291636768e-07, "logits/chosen": -1.5797189474105835, "logits/rejected": -1.104495644569397, "loss": 0.4405, "step": 208 }, { "beta_dpo/beta_used": 0.02266796864569187, "beta_dpo/beta_used_raw": -0.026145994663238525, "beta_dpo/gap_mean": 13.984394073486328, "beta_dpo/gap_std": 15.439398765563965, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3069016152716593, "grad_norm": 10.798962593078613, "learning_rate": 4.3901377325300857e-07, "logits/chosen": -1.2947354316711426, "logits/rejected": -1.006791353225708, "loss": 1.1714, "step": 209 }, { "beta_dpo/beta_used": 0.04331028088927269, "beta_dpo/beta_used_raw": 0.04278234392404556, "beta_dpo/gap_mean": 13.823338508605957, "beta_dpo/gap_std": 15.4095458984375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.30837004405286345, "grad_norm": 16.52035140991211, "learning_rate": 4.381713366536311e-07, "logits/chosen": -1.9326417446136475, "logits/rejected": -1.2985832691192627, "loss": 1.0328, "step": 210 }, { "beta_dpo/beta_used": 0.04574752599000931, "beta_dpo/beta_used_raw": -0.038681887090206146, "beta_dpo/gap_mean": 13.721076965332031, "beta_dpo/gap_std": 15.460807800292969, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.30983847283406757, "grad_norm": 24.701913833618164, "learning_rate": 4.373239415645323e-07, "logits/chosen": -1.6576852798461914, "logits/rejected": -1.029742956161499, "loss": 1.0769, "step": 211 }, { "beta_dpo/beta_used": 0.2509711980819702, "beta_dpo/beta_used_raw": 0.2509711980819702, "beta_dpo/gap_mean": 14.656517028808594, "beta_dpo/gap_std": 15.86941909790039, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.31130690161527164, "grad_norm": 51.808109283447266, "learning_rate": 4.3647161031536086e-07, "logits/chosen": -1.8365099430084229, "logits/rejected": -1.1870900392532349, "loss": 0.3817, "step": 212 }, { "beta_dpo/beta_used": 0.04146008566021919, "beta_dpo/beta_used_raw": 0.007331036031246185, "beta_dpo/gap_mean": 14.903135299682617, "beta_dpo/gap_std": 15.852666854858398, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.31277533039647576, "grad_norm": 28.643192291259766, "learning_rate": 4.3561436536583774e-07, "logits/chosen": -1.5916590690612793, "logits/rejected": -1.4091522693634033, "loss": 1.0754, "step": 213 }, { "beta_dpo/beta_used": 0.015493694692850113, "beta_dpo/beta_used_raw": -0.1139422208070755, "beta_dpo/gap_mean": 14.58911418914795, "beta_dpo/gap_std": 16.172378540039062, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3142437591776799, "grad_norm": 9.1444673538208, "learning_rate": 4.3475222930516473e-07, "logits/chosen": -1.299335241317749, "logits/rejected": -0.966693639755249, "loss": 1.2192, "step": 214 }, { "beta_dpo/beta_used": 0.07507734000682831, "beta_dpo/beta_used_raw": 0.07507734000682831, "beta_dpo/gap_mean": 14.976900100708008, "beta_dpo/gap_std": 16.408676147460938, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.315712187958884, "grad_norm": 21.278892517089844, "learning_rate": 4.3388522485142885e-07, "logits/chosen": -1.205472707748413, "logits/rejected": -0.8757593035697937, "loss": 0.7435, "step": 215 }, { "beta_dpo/beta_used": 0.03626835718750954, "beta_dpo/beta_used_raw": -0.06867353618144989, "beta_dpo/gap_mean": 14.79420280456543, "beta_dpo/gap_std": 16.09663200378418, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.31718061674008813, "grad_norm": 13.734975814819336, "learning_rate": 4.330133748510036e-07, "logits/chosen": -1.366228699684143, "logits/rejected": -1.189439058303833, "loss": 1.0693, "step": 216 }, { "beta_dpo/beta_used": 0.04537259042263031, "beta_dpo/beta_used_raw": -0.07117318361997604, "beta_dpo/gap_mean": 14.434456825256348, "beta_dpo/gap_std": 15.98811149597168, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3186490455212922, "grad_norm": 25.16986083984375, "learning_rate": 4.3213670227794757e-07, "logits/chosen": -1.8399202823638916, "logits/rejected": -1.6952290534973145, "loss": 1.0775, "step": 217 }, { "beta_dpo/beta_used": 0.08779692649841309, "beta_dpo/beta_used_raw": 0.053243957459926605, "beta_dpo/gap_mean": 14.49212646484375, "beta_dpo/gap_std": 16.23577117919922, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3201174743024963, "grad_norm": 19.83509063720703, "learning_rate": 4.3125523023339815e-07, "logits/chosen": -1.025547981262207, "logits/rejected": -0.8068137764930725, "loss": 0.843, "step": 218 }, { "beta_dpo/beta_used": 0.038829490542411804, "beta_dpo/beta_used_raw": 0.03714650496840477, "beta_dpo/gap_mean": 14.216641426086426, "beta_dpo/gap_std": 16.70601463317871, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.32158590308370044, "grad_norm": 15.10183334350586, "learning_rate": 4.303689819449636e-07, "logits/chosen": -1.3892719745635986, "logits/rejected": -1.0883077383041382, "loss": 1.0343, "step": 219 }, { "beta_dpo/beta_used": 0.022933853790163994, "beta_dpo/beta_used_raw": 0.022933853790163994, "beta_dpo/gap_mean": 14.02775764465332, "beta_dpo/gap_std": 16.341564178466797, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.32305433186490456, "grad_norm": 10.92718505859375, "learning_rate": 4.2947798076611047e-07, "logits/chosen": -1.247340202331543, "logits/rejected": -1.210787296295166, "loss": 1.1295, "step": 220 }, { "beta_dpo/beta_used": 0.253589391708374, "beta_dpo/beta_used_raw": 0.253589391708374, "beta_dpo/gap_mean": 14.438077926635742, "beta_dpo/gap_std": 15.995098114013672, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3245227606461087, "grad_norm": 35.193336486816406, "learning_rate": 4.285822501755485e-07, "logits/chosen": -0.5756672024726868, "logits/rejected": -0.727837324142456, "loss": 0.3889, "step": 221 }, { "beta_dpo/beta_used": 0.1512635350227356, "beta_dpo/beta_used_raw": 0.1512635350227356, "beta_dpo/gap_mean": 14.80112361907959, "beta_dpo/gap_std": 16.69683837890625, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.32599118942731276, "grad_norm": 48.97563552856445, "learning_rate": 4.276818137766118e-07, "logits/chosen": -1.5134377479553223, "logits/rejected": -1.0653544664382935, "loss": 0.5451, "step": 222 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.025954080745577812, "beta_dpo/gap_mean": 14.758966445922852, "beta_dpo/gap_std": 17.08310317993164, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3274596182085169, "grad_norm": 0.5194804072380066, "learning_rate": 4.2677669529663686e-07, "logits/chosen": -1.5161755084991455, "logits/rejected": -1.2484257221221924, "loss": 1.3738, "step": 223 }, { "beta_dpo/beta_used": 0.09249898791313171, "beta_dpo/beta_used_raw": -0.0033410415053367615, "beta_dpo/gap_mean": 14.644186019897461, "beta_dpo/gap_std": 17.055156707763672, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.328928046989721, "grad_norm": 45.95803451538086, "learning_rate": 4.2586691858633747e-07, "logits/chosen": -1.6961524486541748, "logits/rejected": -1.0969496965408325, "loss": 0.9883, "step": 224 }, { "beta_dpo/beta_used": 0.21849367022514343, "beta_dpo/beta_used_raw": 0.21849367022514343, "beta_dpo/gap_mean": 15.002262115478516, "beta_dpo/gap_std": 17.485044479370117, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3303964757709251, "grad_norm": 43.80759048461914, "learning_rate": 4.249525076191759e-07, "logits/chosen": -2.022803783416748, "logits/rejected": -1.1923706531524658, "loss": 0.4851, "step": 225 }, { "beta_dpo/beta_used": 0.07541501522064209, "beta_dpo/beta_used_raw": 0.06650637090206146, "beta_dpo/gap_mean": 15.346711158752441, "beta_dpo/gap_std": 17.219505310058594, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.33186490455212925, "grad_norm": 30.993896484375, "learning_rate": 4.2403348649073167e-07, "logits/chosen": -1.4473025798797607, "logits/rejected": -0.7017968893051147, "loss": 0.9455, "step": 226 }, { "beta_dpo/beta_used": 0.14083002507686615, "beta_dpo/beta_used_raw": 0.09833408892154694, "beta_dpo/gap_mean": 15.228629112243652, "beta_dpo/gap_std": 17.45088005065918, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3333333333333333, "grad_norm": 55.5523796081543, "learning_rate": 4.2310987941806615e-07, "logits/chosen": -0.8681109547615051, "logits/rejected": -0.7855240106582642, "loss": 1.1573, "step": 227 }, { "beta_dpo/beta_used": 0.046257004141807556, "beta_dpo/beta_used_raw": 0.046257004141807556, "beta_dpo/gap_mean": 14.790523529052734, "beta_dpo/gap_std": 16.98739242553711, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.33480176211453744, "grad_norm": 19.148212432861328, "learning_rate": 4.2218171073908463e-07, "logits/chosen": -1.7377731800079346, "logits/rejected": -1.401872158050537, "loss": 0.972, "step": 228 }, { "beta_dpo/beta_used": 0.10670603811740875, "beta_dpo/beta_used_raw": 0.10478197783231735, "beta_dpo/gap_mean": 15.433286666870117, "beta_dpo/gap_std": 17.089149475097656, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.33627019089574156, "grad_norm": 41.02910232543945, "learning_rate": 4.212490049118951e-07, "logits/chosen": -1.8957240581512451, "logits/rejected": -0.95273357629776, "loss": 0.8929, "step": 229 }, { "beta_dpo/beta_used": 0.23959508538246155, "beta_dpo/beta_used_raw": 0.23959508538246155, "beta_dpo/gap_mean": 16.245468139648438, "beta_dpo/gap_std": 17.074111938476562, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3377386196769457, "grad_norm": 47.157413482666016, "learning_rate": 4.203117865141635e-07, "logits/chosen": -2.0739693641662598, "logits/rejected": -1.6560773849487305, "loss": 0.3956, "step": 230 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10665209591388702, "beta_dpo/gap_mean": 16.15087127685547, "beta_dpo/gap_std": 16.743568420410156, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.3392070484581498, "grad_norm": 0.5642727613449097, "learning_rate": 4.1937008024246625e-07, "logits/chosen": -2.107431411743164, "logits/rejected": -1.350379467010498, "loss": 1.3735, "step": 231 }, { "beta_dpo/beta_used": 0.022975584492087364, "beta_dpo/beta_used_raw": -0.013602446764707565, "beta_dpo/gap_mean": 15.72067928314209, "beta_dpo/gap_std": 16.699817657470703, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.3406754772393539, "grad_norm": 10.909036636352539, "learning_rate": 4.1842391091163933e-07, "logits/chosen": -0.8957525491714478, "logits/rejected": -0.3968271017074585, "loss": 1.1455, "step": 232 }, { "beta_dpo/beta_used": 0.19611503183841705, "beta_dpo/beta_used_raw": 0.19611503183841705, "beta_dpo/gap_mean": 16.100704193115234, "beta_dpo/gap_std": 16.955623626708984, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.342143906020558, "grad_norm": 47.163082122802734, "learning_rate": 4.174733034541245e-07, "logits/chosen": -0.7504737973213196, "logits/rejected": -0.7375265955924988, "loss": 0.5632, "step": 233 }, { "beta_dpo/beta_used": 0.11440300941467285, "beta_dpo/beta_used_raw": -0.004454091191291809, "beta_dpo/gap_mean": 17.05381965637207, "beta_dpo/gap_std": 17.666168212890625, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3436123348017621, "grad_norm": 43.946189880371094, "learning_rate": 4.165182829193126e-07, "logits/chosen": -1.0023349523544312, "logits/rejected": -0.7694397568702698, "loss": 0.9703, "step": 234 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08463907241821289, "beta_dpo/gap_mean": 16.602458953857422, "beta_dpo/gap_std": 17.500028610229492, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.34508076358296624, "grad_norm": 0.5770571827888489, "learning_rate": 4.1555887447288255e-07, "logits/chosen": -0.5492238402366638, "logits/rejected": -0.09408207982778549, "loss": 1.3728, "step": 235 }, { "beta_dpo/beta_used": 0.02071463130414486, "beta_dpo/beta_used_raw": -0.0335993617773056, "beta_dpo/gap_mean": 16.79233741760254, "beta_dpo/gap_std": 18.010557174682617, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3465491923641703, "grad_norm": 10.533134460449219, "learning_rate": 4.1459510339613946e-07, "logits/chosen": -1.1379200220108032, "logits/rejected": -1.1965206861495972, "loss": 1.1522, "step": 236 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1416703164577484, "beta_dpo/gap_mean": 16.931236267089844, "beta_dpo/gap_std": 19.234132766723633, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.34801762114537443, "grad_norm": 0.5797684788703918, "learning_rate": 4.136269950853473e-07, "logits/chosen": -1.082244873046875, "logits/rejected": -0.766608715057373, "loss": 1.3739, "step": 237 }, { "beta_dpo/beta_used": 0.043054625391960144, "beta_dpo/beta_used_raw": 0.043054625391960144, "beta_dpo/gap_mean": 16.68514060974121, "beta_dpo/gap_std": 19.14327049255371, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.34948604992657856, "grad_norm": 15.43837833404541, "learning_rate": 4.126545750510605e-07, "logits/chosen": -1.263676404953003, "logits/rejected": -1.2675501108169556, "loss": 0.9083, "step": 238 }, { "beta_dpo/beta_used": 0.10590211302042007, "beta_dpo/beta_used_raw": -0.05012969672679901, "beta_dpo/gap_mean": 16.43265151977539, "beta_dpo/gap_std": 18.66457748413086, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3509544787077827, "grad_norm": 35.777400970458984, "learning_rate": 4.116778689174514e-07, "logits/chosen": -1.411913275718689, "logits/rejected": -0.9554502964019775, "loss": 0.8536, "step": 239 }, { "beta_dpo/beta_used": 0.0013544057728722692, "beta_dpo/beta_used_raw": -0.018202736973762512, "beta_dpo/gap_mean": 15.894089698791504, "beta_dpo/gap_std": 18.250343322753906, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.3524229074889868, "grad_norm": 0.7293118238449097, "learning_rate": 4.106969024216348e-07, "logits/chosen": -1.32597017288208, "logits/rejected": -1.1729243993759155, "loss": 1.3676, "step": 240 }, { "beta_dpo/beta_used": 0.260785311460495, "beta_dpo/beta_used_raw": 0.260785311460495, "beta_dpo/gap_mean": 16.478500366210938, "beta_dpo/gap_std": 18.057748794555664, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.35389133627019087, "grad_norm": 67.17617797851562, "learning_rate": 4.097117014129903e-07, "logits/chosen": -1.8056962490081787, "logits/rejected": -0.977647066116333, "loss": 0.4013, "step": 241 }, { "beta_dpo/beta_used": 0.14323639869689941, "beta_dpo/beta_used_raw": 0.13850098848342896, "beta_dpo/gap_mean": 16.862356185913086, "beta_dpo/gap_std": 18.165666580200195, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.355359765051395, "grad_norm": 49.057254791259766, "learning_rate": 4.087222918524807e-07, "logits/chosen": -1.4334087371826172, "logits/rejected": -1.16968834400177, "loss": 0.8685, "step": 242 }, { "beta_dpo/beta_used": 0.21399369835853577, "beta_dpo/beta_used_raw": 0.21399369835853577, "beta_dpo/gap_mean": 16.813247680664062, "beta_dpo/gap_std": 18.005495071411133, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3568281938325991, "grad_norm": 26.01089859008789, "learning_rate": 4.07728699811968e-07, "logits/chosen": -1.286488652229309, "logits/rejected": -0.5671318173408508, "loss": 0.4084, "step": 243 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07187733054161072, "beta_dpo/gap_mean": 17.121925354003906, "beta_dpo/gap_std": 18.0570068359375, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.35829662261380324, "grad_norm": 0.628965437412262, "learning_rate": 4.067309514735267e-07, "logits/chosen": -2.109622001647949, "logits/rejected": -1.9282748699188232, "loss": 1.3722, "step": 244 }, { "beta_dpo/beta_used": 0.06885449588298798, "beta_dpo/beta_used_raw": 0.008601933717727661, "beta_dpo/gap_mean": 17.39687728881836, "beta_dpo/gap_std": 18.113006591796875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.35976505139500736, "grad_norm": 23.871013641357422, "learning_rate": 4.057290731287531e-07, "logits/chosen": -0.9358984231948853, "logits/rejected": -0.6926010847091675, "loss": 0.8853, "step": 245 }, { "beta_dpo/beta_used": 0.02485613524913788, "beta_dpo/beta_used_raw": -0.1078411415219307, "beta_dpo/gap_mean": 17.159767150878906, "beta_dpo/gap_std": 17.97473907470703, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.36123348017621143, "grad_norm": 11.873096466064453, "learning_rate": 4.047230911780736e-07, "logits/chosen": -1.774381399154663, "logits/rejected": -1.4926104545593262, "loss": 1.0859, "step": 246 }, { "beta_dpo/beta_used": 0.09282705932855606, "beta_dpo/beta_used_raw": 0.05106119439005852, "beta_dpo/gap_mean": 17.497295379638672, "beta_dpo/gap_std": 18.711734771728516, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.36270190895741555, "grad_norm": 16.94463348388672, "learning_rate": 4.0371303213004814e-07, "logits/chosen": -2.484321117401123, "logits/rejected": -1.9094198942184448, "loss": 0.8472, "step": 247 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.16252590715885162, "beta_dpo/gap_mean": 17.24135971069336, "beta_dpo/gap_std": 18.60338020324707, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3641703377386197, "grad_norm": 0.5134167075157166, "learning_rate": 4.0269892260067197e-07, "logits/chosen": -1.9478964805603027, "logits/rejected": -1.7901415824890137, "loss": 1.3736, "step": 248 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08990784734487534, "beta_dpo/gap_mean": 16.688066482543945, "beta_dpo/gap_std": 18.753692626953125, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3656387665198238, "grad_norm": 0.5049749612808228, "learning_rate": 4.0168078931267426e-07, "logits/chosen": -1.6443979740142822, "logits/rejected": -1.204066514968872, "loss": 1.3735, "step": 249 }, { "beta_dpo/beta_used": 0.12853886187076569, "beta_dpo/beta_used_raw": 0.11648933589458466, "beta_dpo/gap_mean": 16.183063507080078, "beta_dpo/gap_std": 18.758853912353516, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.3671071953010279, "grad_norm": 69.31681823730469, "learning_rate": 4.006586590948141e-07, "logits/chosen": -1.9143394231796265, "logits/rejected": -1.0378131866455078, "loss": 1.1315, "step": 250 }, { "beta_dpo/beta_used": 0.12471996992826462, "beta_dpo/beta_used_raw": -0.040085241198539734, "beta_dpo/gap_mean": 15.786233901977539, "beta_dpo/gap_std": 18.349742889404297, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.368575624082232, "grad_norm": 38.703819274902344, "learning_rate": 3.9963255888117325e-07, "logits/chosen": -1.8796600103378296, "logits/rejected": -0.8574713468551636, "loss": 0.9538, "step": 251 }, { "beta_dpo/beta_used": 0.03168496862053871, "beta_dpo/beta_used_raw": 0.030149439349770546, "beta_dpo/gap_mean": 15.278026580810547, "beta_dpo/gap_std": 17.97142219543457, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3700440528634361, "grad_norm": 16.276369094848633, "learning_rate": 3.9860251571044666e-07, "logits/chosen": -1.1928433179855347, "logits/rejected": -0.7398958802223206, "loss": 1.1108, "step": 252 }, { "beta_dpo/beta_used": 0.16947996616363525, "beta_dpo/beta_used_raw": 0.09505901485681534, "beta_dpo/gap_mean": 14.929329872131348, "beta_dpo/gap_std": 17.659282684326172, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.37151248164464024, "grad_norm": 44.42042541503906, "learning_rate": 3.9756855672522986e-07, "logits/chosen": -1.2876951694488525, "logits/rejected": -0.822074830532074, "loss": 0.8353, "step": 253 }, { "beta_dpo/beta_used": 0.07628422975540161, "beta_dpo/beta_used_raw": 0.0370587520301342, "beta_dpo/gap_mean": 15.008431434631348, "beta_dpo/gap_std": 17.31850814819336, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.37298091042584436, "grad_norm": 29.85235595703125, "learning_rate": 3.965307091713037e-07, "logits/chosen": -0.848065972328186, "logits/rejected": -0.8653386831283569, "loss": 0.9635, "step": 254 }, { "beta_dpo/beta_used": 0.2053917944431305, "beta_dpo/beta_used_raw": 0.2053917944431305, "beta_dpo/gap_mean": 14.993793487548828, "beta_dpo/gap_std": 17.258270263671875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3744493392070485, "grad_norm": 71.72965240478516, "learning_rate": 3.954890003969163e-07, "logits/chosen": -1.6960498094558716, "logits/rejected": -1.232972264289856, "loss": 0.6151, "step": 255 }, { "beta_dpo/beta_used": 0.24883843958377838, "beta_dpo/beta_used_raw": 0.24883843958377838, "beta_dpo/gap_mean": 15.53835391998291, "beta_dpo/gap_std": 17.70315170288086, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.37591776798825255, "grad_norm": 71.81951904296875, "learning_rate": 3.944434578520628e-07, "logits/chosen": -1.5790761709213257, "logits/rejected": -1.3537611961364746, "loss": 0.4976, "step": 256 }, { "beta_dpo/beta_used": 0.06511445343494415, "beta_dpo/beta_used_raw": 0.02169763669371605, "beta_dpo/gap_mean": 16.239253997802734, "beta_dpo/gap_std": 18.342966079711914, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.37738619676945667, "grad_norm": 42.911102294921875, "learning_rate": 3.933941090877615e-07, "logits/chosen": -1.5112509727478027, "logits/rejected": -0.863937497138977, "loss": 1.0018, "step": 257 }, { "beta_dpo/beta_used": 0.2228226512670517, "beta_dpo/beta_used_raw": 0.2228226512670517, "beta_dpo/gap_mean": 16.093191146850586, "beta_dpo/gap_std": 17.990446090698242, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3788546255506608, "grad_norm": 47.87943649291992, "learning_rate": 3.923409817553284e-07, "logits/chosen": -2.145803451538086, "logits/rejected": -1.6822357177734375, "loss": 0.4068, "step": 258 }, { "beta_dpo/beta_used": 0.016522977501153946, "beta_dpo/beta_used_raw": -0.009284183382987976, "beta_dpo/gap_mean": 16.06846046447754, "beta_dpo/gap_std": 18.156803131103516, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3803230543318649, "grad_norm": 9.490212440490723, "learning_rate": 3.9128410360564793e-07, "logits/chosen": -0.6225954294204712, "logits/rejected": 0.12847939133644104, "loss": 1.1857, "step": 259 }, { "beta_dpo/beta_used": 0.05389215424656868, "beta_dpo/beta_used_raw": 0.05389215424656868, "beta_dpo/gap_mean": 15.638516426086426, "beta_dpo/gap_std": 18.49860382080078, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.38179148311306904, "grad_norm": 24.094758987426758, "learning_rate": 3.9022350248844246e-07, "logits/chosen": -0.7264130711555481, "logits/rejected": -0.6564480066299438, "loss": 0.9837, "step": 260 }, { "beta_dpo/beta_used": 0.07325537502765656, "beta_dpo/beta_used_raw": 0.06343812495470047, "beta_dpo/gap_mean": 15.851795196533203, "beta_dpo/gap_std": 18.720157623291016, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3832599118942731, "grad_norm": 28.612140655517578, "learning_rate": 3.891592063515376e-07, "logits/chosen": -2.4056403636932373, "logits/rejected": -2.0737316608428955, "loss": 0.958, "step": 261 }, { "beta_dpo/beta_used": 0.1220453754067421, "beta_dpo/beta_used_raw": 0.07019668817520142, "beta_dpo/gap_mean": 15.816585540771484, "beta_dpo/gap_std": 18.276844024658203, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.38472834067547723, "grad_norm": 30.337125778198242, "learning_rate": 3.880912432401264e-07, "logits/chosen": -1.315596580505371, "logits/rejected": -0.7939830422401428, "loss": 0.8197, "step": 262 }, { "beta_dpo/beta_used": 0.2112702876329422, "beta_dpo/beta_used_raw": 0.2112702876329422, "beta_dpo/gap_mean": 16.783252716064453, "beta_dpo/gap_std": 18.60260772705078, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.38619676945668135, "grad_norm": 81.57725524902344, "learning_rate": 3.870196412960302e-07, "logits/chosen": -2.479870080947876, "logits/rejected": -1.969857931137085, "loss": 0.6229, "step": 263 }, { "beta_dpo/beta_used": 0.14798879623413086, "beta_dpo/beta_used_raw": 0.14798879623413086, "beta_dpo/gap_mean": 17.149694442749023, "beta_dpo/gap_std": 18.860740661621094, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3876651982378855, "grad_norm": 41.35186767578125, "learning_rate": 3.8594442875695665e-07, "logits/chosen": -2.416067600250244, "logits/rejected": -1.8507721424102783, "loss": 0.5789, "step": 264 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.15261715650558472, "beta_dpo/gap_mean": 17.3541259765625, "beta_dpo/gap_std": 19.03475570678711, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3891336270190896, "grad_norm": 0.48281610012054443, "learning_rate": 3.848656339557562e-07, "logits/chosen": -1.9840517044067383, "logits/rejected": -1.0997028350830078, "loss": 1.3732, "step": 265 }, { "beta_dpo/beta_used": 0.028951261192560196, "beta_dpo/beta_used_raw": 0.009278932586312294, "beta_dpo/gap_mean": 16.781330108642578, "beta_dpo/gap_std": 18.97824478149414, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.39060205580029367, "grad_norm": 14.177918434143066, "learning_rate": 3.8378328531967507e-07, "logits/chosen": -1.7325778007507324, "logits/rejected": -0.572989821434021, "loss": 1.1, "step": 266 }, { "beta_dpo/beta_used": 0.08866294473409653, "beta_dpo/beta_used_raw": 0.08866294473409653, "beta_dpo/gap_mean": 17.111412048339844, "beta_dpo/gap_std": 18.73262596130371, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.3920704845814978, "grad_norm": 28.271018981933594, "learning_rate": 3.8269741136960646e-07, "logits/chosen": -1.7354084253311157, "logits/rejected": -1.1105022430419922, "loss": 0.7697, "step": 267 }, { "beta_dpo/beta_used": 0.05076988786458969, "beta_dpo/beta_used_raw": 0.05076988786458969, "beta_dpo/gap_mean": 17.12268829345703, "beta_dpo/gap_std": 18.194721221923828, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3935389133627019, "grad_norm": 16.701509475708008, "learning_rate": 3.8160804071933894e-07, "logits/chosen": -0.7248194217681885, "logits/rejected": -0.3424651622772217, "loss": 0.8855, "step": 268 }, { "beta_dpo/beta_used": 0.10103872418403625, "beta_dpo/beta_used_raw": 0.10103872418403625, "beta_dpo/gap_mean": 17.397462844848633, "beta_dpo/gap_std": 18.34782600402832, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.39500734214390604, "grad_norm": 29.299842834472656, "learning_rate": 3.8051520207480204e-07, "logits/chosen": -2.2202186584472656, "logits/rejected": -1.583660364151001, "loss": 0.8166, "step": 269 }, { "beta_dpo/beta_used": 0.11783033609390259, "beta_dpo/beta_used_raw": 0.01920953392982483, "beta_dpo/gap_mean": 17.6357421875, "beta_dpo/gap_std": 18.888328552246094, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3964757709251101, "grad_norm": 49.17156982421875, "learning_rate": 3.794189242333106e-07, "logits/chosen": -2.5414323806762695, "logits/rejected": -1.5535459518432617, "loss": 0.9573, "step": 270 }, { "beta_dpo/beta_used": 0.2534657418727875, "beta_dpo/beta_used_raw": 0.2534657418727875, "beta_dpo/gap_mean": 17.942276000976562, "beta_dpo/gap_std": 19.22007179260254, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.39794419970631423, "grad_norm": 39.75117492675781, "learning_rate": 3.7831923608280514e-07, "logits/chosen": -1.6566705703735352, "logits/rejected": -0.9555039405822754, "loss": 0.2862, "step": 271 }, { "beta_dpo/beta_used": 0.05589652433991432, "beta_dpo/beta_used_raw": 0.03210698813199997, "beta_dpo/gap_mean": 18.281635284423828, "beta_dpo/gap_std": 19.472030639648438, "beta_dpo/mask_keep_frac": 0.5, "epoch": 0.39941262848751835, "grad_norm": 23.56406021118164, "learning_rate": 3.772161666010912e-07, "logits/chosen": -2.1835896968841553, "logits/rejected": -2.0108699798583984, "loss": 0.9745, "step": 272 }, { "beta_dpo/beta_used": 0.1337103396654129, "beta_dpo/beta_used_raw": 0.1337103396654129, "beta_dpo/gap_mean": 18.84831428527832, "beta_dpo/gap_std": 19.925540924072266, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4008810572687225, "grad_norm": 59.734561920166016, "learning_rate": 3.761097448550755e-07, "logits/chosen": -1.7125663757324219, "logits/rejected": -1.1651277542114258, "loss": 0.7957, "step": 273 }, { "beta_dpo/beta_used": 0.024842973798513412, "beta_dpo/beta_used_raw": -0.06161361187696457, "beta_dpo/gap_mean": 18.44357681274414, "beta_dpo/gap_std": 19.366474151611328, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4023494860499266, "grad_norm": 11.591614723205566, "learning_rate": 3.75e-07, "logits/chosen": -1.2841222286224365, "logits/rejected": -1.130070447921753, "loss": 1.0872, "step": 274 }, { "beta_dpo/beta_used": 0.18502850830554962, "beta_dpo/beta_used_raw": 0.13920262455940247, "beta_dpo/gap_mean": 17.91891860961914, "beta_dpo/gap_std": 18.584510803222656, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.40381791483113066, "grad_norm": 72.93920135498047, "learning_rate": 3.738869612786737e-07, "logits/chosen": -2.4037299156188965, "logits/rejected": -2.062166690826416, "loss": 0.832, "step": 275 }, { "beta_dpo/beta_used": 0.0947640910744667, "beta_dpo/beta_used_raw": -0.02612786740064621, "beta_dpo/gap_mean": 17.996925354003906, "beta_dpo/gap_std": 18.459285736083984, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4052863436123348, "grad_norm": 47.03031921386719, "learning_rate": 3.7277065802070204e-07, "logits/chosen": -1.2630318403244019, "logits/rejected": -1.0531865358352661, "loss": 0.9701, "step": 276 }, { "beta_dpo/beta_used": 0.12786605954170227, "beta_dpo/beta_used_raw": 0.12454156577587128, "beta_dpo/gap_mean": 17.348060607910156, "beta_dpo/gap_std": 17.54006576538086, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4067547723935389, "grad_norm": 19.697120666503906, "learning_rate": 3.71651119641714e-07, "logits/chosen": -1.6277648210525513, "logits/rejected": -1.226701021194458, "loss": 0.7939, "step": 277 }, { "beta_dpo/beta_used": 0.19851908087730408, "beta_dpo/beta_used_raw": 0.12097430229187012, "beta_dpo/gap_mean": 17.38713836669922, "beta_dpo/gap_std": 17.585594177246094, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.40822320117474303, "grad_norm": 76.3206787109375, "learning_rate": 3.705283756425872e-07, "logits/chosen": -2.005500316619873, "logits/rejected": -1.8199653625488281, "loss": 0.9127, "step": 278 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11197362095117569, "beta_dpo/gap_mean": 17.153640747070312, "beta_dpo/gap_std": 17.83950424194336, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.40969162995594716, "grad_norm": 0.5505157113075256, "learning_rate": 3.6940245560867e-07, "logits/chosen": -1.1522029638290405, "logits/rejected": -0.8978596329689026, "loss": 1.3723, "step": 279 }, { "beta_dpo/beta_used": 0.22522485256195068, "beta_dpo/beta_used_raw": 0.22522485256195068, "beta_dpo/gap_mean": 17.42764663696289, "beta_dpo/gap_std": 17.832962036132812, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.4111600587371512, "grad_norm": 60.16712188720703, "learning_rate": 3.6827338920900253e-07, "logits/chosen": -2.581055164337158, "logits/rejected": -2.898463487625122, "loss": 0.4076, "step": 280 }, { "beta_dpo/beta_used": 0.17530831694602966, "beta_dpo/beta_used_raw": 0.17530831694602966, "beta_dpo/gap_mean": 18.07257843017578, "beta_dpo/gap_std": 18.202064514160156, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.41262848751835535, "grad_norm": 35.21115493774414, "learning_rate": 3.6714120619553435e-07, "logits/chosen": -1.638143539428711, "logits/rejected": -1.3213109970092773, "loss": 0.4081, "step": 281 }, { "beta_dpo/beta_used": 0.07618734985589981, "beta_dpo/beta_used_raw": 0.003689289093017578, "beta_dpo/gap_mean": 18.907608032226562, "beta_dpo/gap_std": 18.572853088378906, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.41409691629955947, "grad_norm": 33.21187973022461, "learning_rate": 3.660059364023408e-07, "logits/chosen": -1.9029979705810547, "logits/rejected": -1.3287708759307861, "loss": 0.8627, "step": 282 }, { "beta_dpo/beta_used": 0.030037278309464455, "beta_dpo/beta_used_raw": -0.004605751484632492, "beta_dpo/gap_mean": 18.53631591796875, "beta_dpo/gap_std": 18.78057861328125, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4155653450807636, "grad_norm": 13.139922142028809, "learning_rate": 3.6486760974483685e-07, "logits/chosen": -2.0984127521514893, "logits/rejected": -1.6443045139312744, "loss": 1.0425, "step": 283 }, { "beta_dpo/beta_used": 0.06725043058395386, "beta_dpo/beta_used_raw": 0.03131512925028801, "beta_dpo/gap_mean": 19.09128189086914, "beta_dpo/gap_std": 19.074609756469727, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4170337738619677, "grad_norm": 14.876636505126953, "learning_rate": 3.6372625621898863e-07, "logits/chosen": -1.9047149419784546, "logits/rejected": -1.4838480949401855, "loss": 0.8712, "step": 284 }, { "beta_dpo/beta_used": 0.10819166898727417, "beta_dpo/beta_used_raw": 0.10372734069824219, "beta_dpo/gap_mean": 18.893922805786133, "beta_dpo/gap_std": 18.592544555664062, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.4185022026431718, "grad_norm": 15.111198425292969, "learning_rate": 3.625819059005228e-07, "logits/chosen": -1.871731162071228, "logits/rejected": -2.0776164531707764, "loss": 0.741, "step": 285 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.033716779202222824, "beta_dpo/gap_mean": 18.982357025146484, "beta_dpo/gap_std": 18.396522521972656, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.4199706314243759, "grad_norm": 0.6566715836524963, "learning_rate": 3.614345889441346e-07, "logits/chosen": -2.1930174827575684, "logits/rejected": -1.613207459449768, "loss": 1.37, "step": 286 }, { "beta_dpo/beta_used": 0.09730731695890427, "beta_dpo/beta_used_raw": -0.03323051333427429, "beta_dpo/gap_mean": 18.03030014038086, "beta_dpo/gap_std": 18.275131225585938, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.42143906020558003, "grad_norm": 25.1693172454834, "learning_rate": 3.6028433558269275e-07, "logits/chosen": -1.2699886560440063, "logits/rejected": -0.5031905770301819, "loss": 0.859, "step": 287 }, { "beta_dpo/beta_used": 0.12815497815608978, "beta_dpo/beta_used_raw": 0.0743340253829956, "beta_dpo/gap_mean": 18.27791976928711, "beta_dpo/gap_std": 18.494709014892578, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.42290748898678415, "grad_norm": 64.22969818115234, "learning_rate": 3.5913117612644327e-07, "logits/chosen": -1.7721920013427734, "logits/rejected": -1.6445941925048828, "loss": 0.9294, "step": 288 }, { "beta_dpo/beta_used": 0.041309650987386703, "beta_dpo/beta_used_raw": -0.15388831496238708, "beta_dpo/gap_mean": 18.72998046875, "beta_dpo/gap_std": 19.468055725097656, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4243759177679883, "grad_norm": 14.944448471069336, "learning_rate": 3.5797514096221024e-07, "logits/chosen": -1.7737936973571777, "logits/rejected": -1.3258979320526123, "loss": 1.0116, "step": 289 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19734390079975128, "beta_dpo/gap_mean": 19.377426147460938, "beta_dpo/gap_std": 20.422447204589844, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.42584434654919234, "grad_norm": 0.564625084400177, "learning_rate": 3.568162605525952e-07, "logits/chosen": -1.156833529472351, "logits/rejected": -1.211308479309082, "loss": 1.3719, "step": 290 }, { "beta_dpo/beta_used": 0.06650421768426895, "beta_dpo/beta_used_raw": 0.031241487711668015, "beta_dpo/gap_mean": 19.186107635498047, "beta_dpo/gap_std": 20.719282150268555, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.42731277533039647, "grad_norm": 20.972444534301758, "learning_rate": 3.5565456543517485e-07, "logits/chosen": -1.7543714046478271, "logits/rejected": -1.8473269939422607, "loss": 0.8817, "step": 291 }, { "beta_dpo/beta_used": 0.1455276906490326, "beta_dpo/beta_used_raw": 0.1455276906490326, "beta_dpo/gap_mean": 19.42520523071289, "beta_dpo/gap_std": 19.865537643432617, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4287812041116006, "grad_norm": 48.049530029296875, "learning_rate": 3.5449008622169583e-07, "logits/chosen": -1.8504797220230103, "logits/rejected": -1.318284511566162, "loss": 0.4908, "step": 292 }, { "beta_dpo/beta_used": 0.04769207909703255, "beta_dpo/beta_used_raw": -0.17275524139404297, "beta_dpo/gap_mean": 18.744335174560547, "beta_dpo/gap_std": 19.63604736328125, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4302496328928047, "grad_norm": 17.39205551147461, "learning_rate": 3.5332285359726846e-07, "logits/chosen": -1.9810543060302734, "logits/rejected": -2.0150904655456543, "loss": 1.0056, "step": 293 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19172877073287964, "beta_dpo/gap_mean": 18.067184448242188, "beta_dpo/gap_std": 19.677724838256836, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.43171806167400884, "grad_norm": 0.4995046854019165, "learning_rate": 3.5215289831955786e-07, "logits/chosen": -1.750572681427002, "logits/rejected": -1.3933688402175903, "loss": 1.3732, "step": 294 }, { "beta_dpo/beta_used": 0.07876399159431458, "beta_dpo/beta_used_raw": 0.015555135905742645, "beta_dpo/gap_mean": 17.91991424560547, "beta_dpo/gap_std": 19.704513549804688, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.4331864904552129, "grad_norm": 19.872262954711914, "learning_rate": 3.509802512179737e-07, "logits/chosen": -1.6054997444152832, "logits/rejected": -1.2405972480773926, "loss": 0.8559, "step": 295 }, { "beta_dpo/beta_used": 0.032191064208745956, "beta_dpo/beta_used_raw": 0.032191064208745956, "beta_dpo/gap_mean": 18.38714599609375, "beta_dpo/gap_std": 20.385866165161133, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.434654919236417, "grad_norm": 12.911087989807129, "learning_rate": 3.498049431928577e-07, "logits/chosen": -2.647209882736206, "logits/rejected": -2.0055761337280273, "loss": 0.9563, "step": 296 }, { "beta_dpo/beta_used": 0.08515140414237976, "beta_dpo/beta_used_raw": 0.07509875297546387, "beta_dpo/gap_mean": 18.416446685791016, "beta_dpo/gap_std": 20.195480346679688, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.43612334801762115, "grad_norm": 20.788816452026367, "learning_rate": 3.486270052146694e-07, "logits/chosen": -0.4445190131664276, "logits/rejected": -0.14910635352134705, "loss": 0.8408, "step": 297 }, { "beta_dpo/beta_used": 0.054413195699453354, "beta_dpo/beta_used_raw": 0.0014029070734977722, "beta_dpo/gap_mean": 18.70124626159668, "beta_dpo/gap_std": 20.339801788330078, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.43759177679882527, "grad_norm": 24.009151458740234, "learning_rate": 3.474464683231698e-07, "logits/chosen": -2.56980562210083, "logits/rejected": -2.0647857189178467, "loss": 1.0073, "step": 298 }, { "beta_dpo/beta_used": 0.15039290487766266, "beta_dpo/beta_used_raw": 0.13273771107196808, "beta_dpo/gap_mean": 19.192279815673828, "beta_dpo/gap_std": 20.42599105834961, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4390602055800294, "grad_norm": 46.7759895324707, "learning_rate": 3.462633636266041e-07, "logits/chosen": -1.3872895240783691, "logits/rejected": -1.2783632278442383, "loss": 0.884, "step": 299 }, { "beta_dpo/beta_used": 0.08607316762208939, "beta_dpo/beta_used_raw": 0.08607316762208939, "beta_dpo/gap_mean": 19.255733489990234, "beta_dpo/gap_std": 20.56812286376953, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.44052863436123346, "grad_norm": 28.43634796142578, "learning_rate": 3.4507772230088147e-07, "logits/chosen": -1.5185415744781494, "logits/rejected": -1.420533299446106, "loss": 0.6858, "step": 300 }, { "epoch": 0.44052863436123346, "eval_beta_dpo/beta_used": 0.014630923978984356, "eval_beta_dpo/beta_used_raw": -0.20379017293453217, "eval_beta_dpo/gap_mean": 19.238937377929688, "eval_beta_dpo/gap_std": 20.832778930664062, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -1.4133440256118774, "eval_logits/rejected": -1.1125527620315552, "eval_loss": 0.6556317806243896, "eval_runtime": 44.1723, "eval_samples_per_second": 52.952, "eval_steps_per_second": 1.675, "step": 300 }, { "beta_dpo/beta_used": 0.16168291866779327, "beta_dpo/beta_used_raw": 0.16168291866779327, "beta_dpo/gap_mean": 19.706905364990234, "beta_dpo/gap_std": 20.81890869140625, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4419970631424376, "grad_norm": 40.82109832763672, "learning_rate": 3.4388957558875316e-07, "logits/chosen": -1.3794118165969849, "logits/rejected": -0.6710242033004761, "loss": 0.3912, "step": 301 }, { "beta_dpo/beta_used": 0.04608767479658127, "beta_dpo/beta_used_raw": -0.05097521096467972, "beta_dpo/gap_mean": 19.474781036376953, "beta_dpo/gap_std": 20.766199111938477, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4434654919236417, "grad_norm": 20.73455810546875, "learning_rate": 3.426989547989902e-07, "logits/chosen": -1.3489367961883545, "logits/rejected": -1.0147000551223755, "loss": 0.9427, "step": 302 }, { "beta_dpo/beta_used": 0.19274744391441345, "beta_dpo/beta_used_raw": 0.19274744391441345, "beta_dpo/gap_mean": 19.43286895751953, "beta_dpo/gap_std": 20.82029914855957, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.44493392070484583, "grad_norm": 67.36486053466797, "learning_rate": 3.4150589130555773e-07, "logits/chosen": -1.1536281108856201, "logits/rejected": -1.2038754224777222, "loss": 0.5521, "step": 303 }, { "beta_dpo/beta_used": 0.22337572276592255, "beta_dpo/beta_used_raw": 0.22337572276592255, "beta_dpo/gap_mean": 19.48892593383789, "beta_dpo/gap_std": 20.469839096069336, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.44640234948604995, "grad_norm": 40.49778366088867, "learning_rate": 3.403104165467883e-07, "logits/chosen": -1.9870970249176025, "logits/rejected": -1.7439109086990356, "loss": 0.4345, "step": 304 }, { "beta_dpo/beta_used": 0.23246397078037262, "beta_dpo/beta_used_raw": 0.0920034795999527, "beta_dpo/gap_mean": 19.861440658569336, "beta_dpo/gap_std": 20.17599105834961, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.447870778267254, "grad_norm": 46.85494613647461, "learning_rate": 3.391125620245535e-07, "logits/chosen": -1.7749977111816406, "logits/rejected": -0.8431670069694519, "loss": 0.8585, "step": 305 }, { "beta_dpo/beta_used": 0.19380076229572296, "beta_dpo/beta_used_raw": 0.19380076229572296, "beta_dpo/gap_mean": 20.399768829345703, "beta_dpo/gap_std": 19.314353942871094, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.44933920704845814, "grad_norm": 19.110017776489258, "learning_rate": 3.3791235930343417e-07, "logits/chosen": -1.5655086040496826, "logits/rejected": -1.0931357145309448, "loss": 0.1871, "step": 306 }, { "beta_dpo/beta_used": 0.08654443174600601, "beta_dpo/beta_used_raw": 0.08654443174600601, "beta_dpo/gap_mean": 20.135807037353516, "beta_dpo/gap_std": 19.544708251953125, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.45080763582966227, "grad_norm": 41.54376220703125, "learning_rate": 3.367098400098881e-07, "logits/chosen": -1.68919038772583, "logits/rejected": -1.4870936870574951, "loss": 0.7641, "step": 307 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.062202103435993195, "beta_dpo/gap_mean": 19.679759979248047, "beta_dpo/gap_std": 19.862483978271484, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4522760646108664, "grad_norm": 0.8234802484512329, "learning_rate": 3.355050358314172e-07, "logits/chosen": -1.5247477293014526, "logits/rejected": -1.2153220176696777, "loss": 1.3695, "step": 308 }, { "beta_dpo/beta_used": 0.10133226960897446, "beta_dpo/beta_used_raw": 0.10133226960897446, "beta_dpo/gap_mean": 19.294086456298828, "beta_dpo/gap_std": 19.56965446472168, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.45374449339207046, "grad_norm": 36.94253158569336, "learning_rate": 3.3429797851573183e-07, "logits/chosen": -1.8994240760803223, "logits/rejected": -1.6872971057891846, "loss": 0.7467, "step": 309 }, { "beta_dpo/beta_used": 0.07378049194812775, "beta_dpo/beta_used_raw": 0.0014588460326194763, "beta_dpo/gap_mean": 18.949199676513672, "beta_dpo/gap_std": 19.642480850219727, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4552129221732746, "grad_norm": 29.535703659057617, "learning_rate": 3.3308869986991487e-07, "logits/chosen": -1.4717403650283813, "logits/rejected": -1.187292218208313, "loss": 0.9784, "step": 310 }, { "beta_dpo/beta_used": 0.035632696002721786, "beta_dpo/beta_used_raw": -0.04914240911602974, "beta_dpo/gap_mean": 19.039033889770508, "beta_dpo/gap_std": 19.660785675048828, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4566813509544787, "grad_norm": 18.50994110107422, "learning_rate": 3.3187723175958346e-07, "logits/chosen": -1.74489426612854, "logits/rejected": -0.8437553644180298, "loss": 1.0601, "step": 311 }, { "beta_dpo/beta_used": 0.26365670561790466, "beta_dpo/beta_used_raw": 0.26365670561790466, "beta_dpo/gap_mean": 19.256107330322266, "beta_dpo/gap_std": 19.863792419433594, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.4581497797356828, "grad_norm": 57.05311965942383, "learning_rate": 3.306636061080487e-07, "logits/chosen": -1.6745738983154297, "logits/rejected": -1.0429904460906982, "loss": 0.4036, "step": 312 }, { "beta_dpo/beta_used": 0.027451997622847557, "beta_dpo/beta_used_raw": 0.0052652303129434586, "beta_dpo/gap_mean": 19.125635147094727, "beta_dpo/gap_std": 19.95581817626953, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.45961820851688695, "grad_norm": 18.35624885559082, "learning_rate": 3.2944785489547537e-07, "logits/chosen": -1.5230427980422974, "logits/rejected": -1.2524843215942383, "loss": 1.0697, "step": 313 }, { "beta_dpo/beta_used": 0.11080139130353928, "beta_dpo/beta_used_raw": 0.11080139130353928, "beta_dpo/gap_mean": 19.45569610595703, "beta_dpo/gap_std": 20.40787124633789, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.461086637298091, "grad_norm": 36.80620574951172, "learning_rate": 3.2823001015803857e-07, "logits/chosen": -3.1518020629882812, "logits/rejected": -2.7199316024780273, "loss": 0.7265, "step": 314 }, { "beta_dpo/beta_used": 0.10506674647331238, "beta_dpo/beta_used_raw": 0.033276014029979706, "beta_dpo/gap_mean": 19.23229217529297, "beta_dpo/gap_std": 20.247135162353516, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.46255506607929514, "grad_norm": 33.03825378417969, "learning_rate": 3.270101039870797e-07, "logits/chosen": -1.4115521907806396, "logits/rejected": -1.6197593212127686, "loss": 0.9081, "step": 315 }, { "beta_dpo/beta_used": 0.04140298068523407, "beta_dpo/beta_used_raw": 0.01636468805372715, "beta_dpo/gap_mean": 18.893306732177734, "beta_dpo/gap_std": 20.267620086669922, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.46402349486049926, "grad_norm": 23.41454315185547, "learning_rate": 3.2578816852826086e-07, "logits/chosen": -1.0961880683898926, "logits/rejected": -1.3026573657989502, "loss": 1.0036, "step": 316 }, { "beta_dpo/beta_used": 0.19719788432121277, "beta_dpo/beta_used_raw": 0.19719788432121277, "beta_dpo/gap_mean": 20.042945861816406, "beta_dpo/gap_std": 20.478172302246094, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4654919236417034, "grad_norm": 48.166709899902344, "learning_rate": 3.2456423598071783e-07, "logits/chosen": -2.067380905151367, "logits/rejected": -1.5481832027435303, "loss": 0.6744, "step": 317 }, { "beta_dpo/beta_used": 0.09255687892436981, "beta_dpo/beta_used_raw": 0.09255687892436981, "beta_dpo/gap_mean": 20.607425689697266, "beta_dpo/gap_std": 20.436546325683594, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4669603524229075, "grad_norm": 22.07493019104004, "learning_rate": 3.233383385962115e-07, "logits/chosen": -1.4881207942962646, "logits/rejected": -1.3716187477111816, "loss": 0.5404, "step": 318 }, { "beta_dpo/beta_used": 0.04406355693936348, "beta_dpo/beta_used_raw": 0.030920254066586494, "beta_dpo/gap_mean": 20.835041046142578, "beta_dpo/gap_std": 20.766437530517578, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4684287812041116, "grad_norm": 16.44093132019043, "learning_rate": 3.2211050867827805e-07, "logits/chosen": -1.8453693389892578, "logits/rejected": -1.4117746353149414, "loss": 0.9557, "step": 319 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.02245442382991314, "beta_dpo/gap_mean": 20.911170959472656, "beta_dpo/gap_std": 20.606945037841797, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4698972099853157, "grad_norm": 0.6617114543914795, "learning_rate": 3.208807785813777e-07, "logits/chosen": -2.3459713459014893, "logits/rejected": -2.076826572418213, "loss": 1.3674, "step": 320 }, { "beta_dpo/beta_used": 0.20259536802768707, "beta_dpo/beta_used_raw": 0.17031897604465485, "beta_dpo/gap_mean": 21.156105041503906, "beta_dpo/gap_std": 20.89751625061035, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4713656387665198, "grad_norm": 83.28850555419922, "learning_rate": 3.1964918071004217e-07, "logits/chosen": -1.2465331554412842, "logits/rejected": -0.7227134704589844, "loss": 1.1559, "step": 321 }, { "beta_dpo/beta_used": 0.16320009529590607, "beta_dpo/beta_used_raw": 0.1414235532283783, "beta_dpo/gap_mean": 21.687091827392578, "beta_dpo/gap_std": 21.04585075378418, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.47283406754772395, "grad_norm": 54.43437576293945, "learning_rate": 3.184157475180207e-07, "logits/chosen": -1.6666946411132812, "logits/rejected": -1.5545620918273926, "loss": 0.894, "step": 322 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.03169306740164757, "beta_dpo/gap_mean": 21.5245361328125, "beta_dpo/gap_std": 20.66463851928711, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.47430249632892807, "grad_norm": 0.6268118619918823, "learning_rate": 3.171805115074251e-07, "logits/chosen": -1.8980016708374023, "logits/rejected": -1.7553255558013916, "loss": 1.3672, "step": 323 }, { "beta_dpo/beta_used": 0.09208405017852783, "beta_dpo/beta_used_raw": 0.036237284541130066, "beta_dpo/gap_mean": 21.520673751831055, "beta_dpo/gap_std": 20.412513732910156, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.47577092511013214, "grad_norm": 9.876269340515137, "learning_rate": 3.1594350522787295e-07, "logits/chosen": -2.019019365310669, "logits/rejected": -1.5792837142944336, "loss": 0.7511, "step": 324 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.023323828354477882, "beta_dpo/gap_mean": 21.458913803100586, "beta_dpo/gap_std": 20.22439956665039, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.47723935389133626, "grad_norm": 0.5373139977455139, "learning_rate": 3.147047612756302e-07, "logits/chosen": -1.8162975311279297, "logits/rejected": -1.1010674238204956, "loss": 1.3667, "step": 325 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14518536627292633, "beta_dpo/gap_mean": 20.929393768310547, "beta_dpo/gap_std": 19.83502197265625, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.4787077826725404, "grad_norm": 0.5610789060592651, "learning_rate": 3.134643122927519e-07, "logits/chosen": -2.363149642944336, "logits/rejected": -1.5660017728805542, "loss": 1.3703, "step": 326 }, { "beta_dpo/beta_used": 0.08452271670103073, "beta_dpo/beta_used_raw": 0.07866226136684418, "beta_dpo/gap_mean": 21.300310134887695, "beta_dpo/gap_std": 20.118789672851562, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4801762114537445, "grad_norm": 44.85610580444336, "learning_rate": 3.1222219096622264e-07, "logits/chosen": -2.5472209453582764, "logits/rejected": -2.0613203048706055, "loss": 0.9766, "step": 327 }, { "beta_dpo/beta_used": 0.13179154694080353, "beta_dpo/beta_used_raw": 0.07303038239479065, "beta_dpo/gap_mean": 21.73531150817871, "beta_dpo/gap_std": 20.580921173095703, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.48164464023494863, "grad_norm": 38.4698600769043, "learning_rate": 3.1097843002709427e-07, "logits/chosen": -0.9203963875770569, "logits/rejected": -0.6813238859176636, "loss": 0.8666, "step": 328 }, { "beta_dpo/beta_used": 0.1431805044412613, "beta_dpo/beta_used_raw": 0.1339118927717209, "beta_dpo/gap_mean": 22.007827758789062, "beta_dpo/gap_std": 20.543258666992188, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4831130690161527, "grad_norm": 55.4683952331543, "learning_rate": 3.0973306224962437e-07, "logits/chosen": -1.8243393898010254, "logits/rejected": -1.4740062952041626, "loss": 0.8328, "step": 329 }, { "beta_dpo/beta_used": 0.06974449753761292, "beta_dpo/beta_used_raw": 0.05155743658542633, "beta_dpo/gap_mean": 21.95925521850586, "beta_dpo/gap_std": 20.08019256591797, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4845814977973568, "grad_norm": 14.09010124206543, "learning_rate": 3.084861204504122e-07, "logits/chosen": -0.7235432267189026, "logits/rejected": -0.7327295541763306, "loss": 0.8165, "step": 330 }, { "beta_dpo/beta_used": 0.14360512793064117, "beta_dpo/beta_used_raw": 0.10686216503381729, "beta_dpo/gap_mean": 22.12273406982422, "beta_dpo/gap_std": 19.910371780395508, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.48604992657856094, "grad_norm": 49.20895767211914, "learning_rate": 3.072376374875335e-07, "logits/chosen": -1.7908895015716553, "logits/rejected": -1.774251937866211, "loss": 0.8817, "step": 331 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.21645134687423706, "beta_dpo/gap_mean": 21.41480827331543, "beta_dpo/gap_std": 19.71676254272461, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.48751835535976507, "grad_norm": 0.5669689774513245, "learning_rate": 3.059876462596758e-07, "logits/chosen": -3.0013060569763184, "logits/rejected": -2.7014455795288086, "loss": 1.3702, "step": 332 }, { "beta_dpo/beta_used": 0.26773303747177124, "beta_dpo/beta_used_raw": 0.26773303747177124, "beta_dpo/gap_mean": 21.477094650268555, "beta_dpo/gap_std": 20.913185119628906, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4889867841409692, "grad_norm": 63.755741119384766, "learning_rate": 3.0473617970527015e-07, "logits/chosen": -2.101442575454712, "logits/rejected": -1.8676471710205078, "loss": 0.5863, "step": 333 }, { "beta_dpo/beta_used": 0.03854161128401756, "beta_dpo/beta_used_raw": -0.0196581669151783, "beta_dpo/gap_mean": 21.293357849121094, "beta_dpo/gap_std": 21.081459045410156, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.49045521292217326, "grad_norm": 15.362908363342285, "learning_rate": 3.034832708016243e-07, "logits/chosen": -1.1139665842056274, "logits/rejected": -0.9013174772262573, "loss": 0.9373, "step": 334 }, { "beta_dpo/beta_used": 0.00933966413140297, "beta_dpo/beta_used_raw": -0.07118765264749527, "beta_dpo/gap_mean": 20.568031311035156, "beta_dpo/gap_std": 20.87885093688965, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4919236417033774, "grad_norm": 6.62734842300415, "learning_rate": 3.022289525640531e-07, "logits/chosen": -2.6309964656829834, "logits/rejected": -1.9602861404418945, "loss": 1.2298, "step": 335 }, { "beta_dpo/beta_used": 0.09196795523166656, "beta_dpo/beta_used_raw": 0.046571940183639526, "beta_dpo/gap_mean": 20.350025177001953, "beta_dpo/gap_std": 21.055912017822266, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4933920704845815, "grad_norm": 31.968475341796875, "learning_rate": 3.009732580450086e-07, "logits/chosen": -1.0544962882995605, "logits/rejected": -1.6155023574829102, "loss": 0.8782, "step": 336 }, { "beta_dpo/beta_used": 0.039065878838300705, "beta_dpo/beta_used_raw": 0.039065878838300705, "beta_dpo/gap_mean": 20.489887237548828, "beta_dpo/gap_std": 21.286602020263672, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4948604992657856, "grad_norm": 17.768884658813477, "learning_rate": 2.9971622033320914e-07, "logits/chosen": -2.2330615520477295, "logits/rejected": -1.8793858289718628, "loss": 0.8887, "step": 337 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.03587936982512474, "beta_dpo/gap_mean": 20.25599479675293, "beta_dpo/gap_std": 20.72097396850586, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.49632892804698975, "grad_norm": 0.6294784545898438, "learning_rate": 2.984578725527675e-07, "logits/chosen": -2.2754828929901123, "logits/rejected": -1.6043195724487305, "loss": 1.3685, "step": 338 }, { "beta_dpo/beta_used": 0.21255150437355042, "beta_dpo/beta_used_raw": 0.21255150437355042, "beta_dpo/gap_mean": 20.265918731689453, "beta_dpo/gap_std": 20.91079330444336, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.4977973568281938, "grad_norm": 38.84512710571289, "learning_rate": 2.9719824786231796e-07, "logits/chosen": -2.450939178466797, "logits/rejected": -1.6690856218338013, "loss": 0.3035, "step": 339 }, { "beta_dpo/beta_used": 0.0905815064907074, "beta_dpo/beta_used_raw": 0.05626142397522926, "beta_dpo/gap_mean": 20.259986877441406, "beta_dpo/gap_std": 20.784656524658203, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.49926578560939794, "grad_norm": 42.127742767333984, "learning_rate": 2.959373794541426e-07, "logits/chosen": -0.23517589271068573, "logits/rejected": -0.11417008936405182, "loss": 0.9428, "step": 340 }, { "beta_dpo/beta_used": 0.14896519482135773, "beta_dpo/beta_used_raw": 0.1087593138217926, "beta_dpo/gap_mean": 20.340232849121094, "beta_dpo/gap_std": 20.545101165771484, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5007342143906021, "grad_norm": 49.7312126159668, "learning_rate": 2.946753005532965e-07, "logits/chosen": -1.1599042415618896, "logits/rejected": -0.9632190465927124, "loss": 0.8317, "step": 341 }, { "beta_dpo/beta_used": 0.21445688605308533, "beta_dpo/beta_used_raw": 0.21445688605308533, "beta_dpo/gap_mean": 21.331052780151367, "beta_dpo/gap_std": 20.33488655090332, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5022026431718062, "grad_norm": 77.49415588378906, "learning_rate": 2.934120444167326e-07, "logits/chosen": -1.7246596813201904, "logits/rejected": -0.9319741129875183, "loss": 0.3666, "step": 342 }, { "beta_dpo/beta_used": 0.1534561663866043, "beta_dpo/beta_used_raw": 0.1534561663866043, "beta_dpo/gap_mean": 21.743297576904297, "beta_dpo/gap_std": 19.88051986694336, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5036710719530103, "grad_norm": 30.1379451751709, "learning_rate": 2.9214764433242476e-07, "logits/chosen": -2.0510997772216797, "logits/rejected": -1.7147619724273682, "loss": 0.6043, "step": 343 }, { "beta_dpo/beta_used": 0.14332126080989838, "beta_dpo/beta_used_raw": 0.07776373624801636, "beta_dpo/gap_mean": 22.24991226196289, "beta_dpo/gap_std": 20.18541145324707, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5051395007342144, "grad_norm": 17.113056182861328, "learning_rate": 2.9088213361849126e-07, "logits/chosen": -1.8417720794677734, "logits/rejected": -1.6827924251556396, "loss": 0.7821, "step": 344 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19198331236839294, "beta_dpo/gap_mean": 22.42481231689453, "beta_dpo/gap_std": 20.958477020263672, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5066079295154186, "grad_norm": 0.6480767726898193, "learning_rate": 2.896155456223163e-07, "logits/chosen": -1.9061219692230225, "logits/rejected": -1.5006232261657715, "loss": 1.3691, "step": 345 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06592794507741928, "beta_dpo/gap_mean": 21.798906326293945, "beta_dpo/gap_std": 21.57804298400879, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5080763582966226, "grad_norm": 0.620153546333313, "learning_rate": 2.883479137196714e-07, "logits/chosen": -1.9283161163330078, "logits/rejected": -1.6173841953277588, "loss": 1.367, "step": 346 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.30267396569252014, "beta_dpo/gap_mean": 21.181474685668945, "beta_dpo/gap_std": 21.699264526367188, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5095447870778267, "grad_norm": 0.632464587688446, "learning_rate": 2.8707927131383614e-07, "logits/chosen": -1.1111118793487549, "logits/rejected": -0.6248849034309387, "loss": 1.3714, "step": 347 }, { "beta_dpo/beta_used": 0.14382125437259674, "beta_dpo/beta_used_raw": 0.12307719141244888, "beta_dpo/gap_mean": 20.678237915039062, "beta_dpo/gap_std": 21.25815200805664, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5110132158590308, "grad_norm": 90.56324768066406, "learning_rate": 2.858096518347179e-07, "logits/chosen": -1.8461867570877075, "logits/rejected": -1.9853928089141846, "loss": 0.9309, "step": 348 }, { "beta_dpo/beta_used": 0.1115606427192688, "beta_dpo/beta_used_raw": 0.019670851528644562, "beta_dpo/gap_mean": 20.17742347717285, "beta_dpo/gap_std": 21.435304641723633, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5124816446402349, "grad_norm": 41.818668365478516, "learning_rate": 2.845390887379706e-07, "logits/chosen": -2.2712676525115967, "logits/rejected": -2.359731674194336, "loss": 0.887, "step": 349 }, { "beta_dpo/beta_used": 0.04465498775243759, "beta_dpo/beta_used_raw": -0.012502066791057587, "beta_dpo/gap_mean": 19.989093780517578, "beta_dpo/gap_std": 20.79399299621582, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5139500734214391, "grad_norm": 18.32737159729004, "learning_rate": 2.8326761550411346e-07, "logits/chosen": -1.5236661434173584, "logits/rejected": -1.6363747119903564, "loss": 0.9417, "step": 350 }, { "beta_dpo/beta_used": 0.238266259431839, "beta_dpo/beta_used_raw": 0.238266259431839, "beta_dpo/gap_mean": 20.557537078857422, "beta_dpo/gap_std": 21.024560928344727, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5154185022026432, "grad_norm": 72.1306381225586, "learning_rate": 2.819952656376487e-07, "logits/chosen": -1.2730485200881958, "logits/rejected": -1.1638447046279907, "loss": 0.5071, "step": 351 }, { "beta_dpo/beta_used": 0.032640982419252396, "beta_dpo/beta_used_raw": -0.014997676014900208, "beta_dpo/gap_mean": 20.560970306396484, "beta_dpo/gap_std": 21.061851501464844, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5168869309838473, "grad_norm": 12.820404052734375, "learning_rate": 2.8072207266617854e-07, "logits/chosen": -2.1646246910095215, "logits/rejected": -1.5866097211837769, "loss": 0.967, "step": 352 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.03380546718835831, "beta_dpo/gap_mean": 19.41574478149414, "beta_dpo/gap_std": 20.40303611755371, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5183553597650514, "grad_norm": 0.5963950157165527, "learning_rate": 2.794480701395219e-07, "logits/chosen": -2.016444206237793, "logits/rejected": -1.8473145961761475, "loss": 1.3694, "step": 353 }, { "beta_dpo/beta_used": 0.01778414286673069, "beta_dpo/beta_used_raw": 0.0027513625100255013, "beta_dpo/gap_mean": 19.44772720336914, "beta_dpo/gap_std": 20.172094345092773, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5198237885462555, "grad_norm": 10.177046775817871, "learning_rate": 2.781732916288303e-07, "logits/chosen": -2.242934226989746, "logits/rejected": -1.9050970077514648, "loss": 1.1316, "step": 354 }, { "beta_dpo/beta_used": 0.04900088161230087, "beta_dpo/beta_used_raw": 0.02209782786667347, "beta_dpo/gap_mean": 20.053104400634766, "beta_dpo/gap_std": 20.282699584960938, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5212922173274597, "grad_norm": 18.826129913330078, "learning_rate": 2.7689777072570284e-07, "logits/chosen": -2.6832900047302246, "logits/rejected": -2.443674087524414, "loss": 1.0111, "step": 355 }, { "beta_dpo/beta_used": 0.011569945141673088, "beta_dpo/beta_used_raw": -0.09082677960395813, "beta_dpo/gap_mean": 19.223495483398438, "beta_dpo/gap_std": 20.17916488647461, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.5227606461086637, "grad_norm": 7.324426651000977, "learning_rate": 2.7562154104130176e-07, "logits/chosen": -1.615821361541748, "logits/rejected": -1.0635333061218262, "loss": 1.2255, "step": 356 }, { "beta_dpo/beta_used": 0.33572131395339966, "beta_dpo/beta_used_raw": 0.33572131395339966, "beta_dpo/gap_mean": 19.8756160736084, "beta_dpo/gap_std": 20.011764526367188, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5242290748898678, "grad_norm": 41.16965103149414, "learning_rate": 2.7434463620546594e-07, "logits/chosen": -1.359694242477417, "logits/rejected": -1.2512187957763672, "loss": 0.2297, "step": 357 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13100573420524597, "beta_dpo/gap_mean": 19.743938446044922, "beta_dpo/gap_std": 20.232032775878906, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.5256975036710719, "grad_norm": 0.6093282103538513, "learning_rate": 2.730670898658255e-07, "logits/chosen": -1.9155205488204956, "logits/rejected": -1.704842209815979, "loss": 1.3711, "step": 358 }, { "beta_dpo/beta_used": 0.021278539672493935, "beta_dpo/beta_used_raw": -0.15173041820526123, "beta_dpo/gap_mean": 19.536911010742188, "beta_dpo/gap_std": 20.790668487548828, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.527165932452276, "grad_norm": 13.17918872833252, "learning_rate": 2.717889356869146e-07, "logits/chosen": -2.165461540222168, "logits/rejected": -0.9831835031509399, "loss": 1.1387, "step": 359 }, { "beta_dpo/beta_used": 0.20132240653038025, "beta_dpo/beta_used_raw": 0.20132240653038025, "beta_dpo/gap_mean": 18.98889923095703, "beta_dpo/gap_std": 20.544391632080078, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5286343612334802, "grad_norm": 29.0384578704834, "learning_rate": 2.7051020734928443e-07, "logits/chosen": -1.4908459186553955, "logits/rejected": -1.3189365863800049, "loss": 0.3159, "step": 360 }, { "beta_dpo/beta_used": 0.0302118007093668, "beta_dpo/beta_used_raw": -0.03345338627696037, "beta_dpo/gap_mean": 19.088459014892578, "beta_dpo/gap_std": 20.403377532958984, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5301027900146843, "grad_norm": 15.58735466003418, "learning_rate": 2.6923093854861593e-07, "logits/chosen": -1.501673936843872, "logits/rejected": -0.7541710138320923, "loss": 1.0874, "step": 361 }, { "beta_dpo/beta_used": 0.3873189687728882, "beta_dpo/beta_used_raw": 0.3873189687728882, "beta_dpo/gap_mean": 20.01319122314453, "beta_dpo/gap_std": 21.248180389404297, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5315712187958884, "grad_norm": 54.79500198364258, "learning_rate": 2.679511629948319e-07, "logits/chosen": -1.0194931030273438, "logits/rejected": -0.6144936084747314, "loss": 0.3577, "step": 362 }, { "beta_dpo/beta_used": 0.24340449273586273, "beta_dpo/beta_used_raw": 0.24340449273586273, "beta_dpo/gap_mean": 20.988332748413086, "beta_dpo/gap_std": 21.32819938659668, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5330396475770925, "grad_norm": 74.15052032470703, "learning_rate": 2.6667091441120816e-07, "logits/chosen": -1.7819249629974365, "logits/rejected": -1.423370122909546, "loss": 0.5138, "step": 363 }, { "beta_dpo/beta_used": 0.2154492437839508, "beta_dpo/beta_used_raw": 0.04012212157249451, "beta_dpo/gap_mean": 20.825626373291016, "beta_dpo/gap_std": 20.912500381469727, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5345080763582967, "grad_norm": 32.2479248046875, "learning_rate": 2.6539022653348575e-07, "logits/chosen": -1.2419793605804443, "logits/rejected": -1.2207051515579224, "loss": 0.7728, "step": 364 }, { "beta_dpo/beta_used": 0.06870291382074356, "beta_dpo/beta_used_raw": 0.0367172546684742, "beta_dpo/gap_mean": 21.15119171142578, "beta_dpo/gap_std": 21.144515991210938, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5359765051395007, "grad_norm": 28.152324676513672, "learning_rate": 2.641091331089811e-07, "logits/chosen": -1.8173648118972778, "logits/rejected": -1.5551416873931885, "loss": 1.018, "step": 365 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12697885930538177, "beta_dpo/gap_mean": 21.803970336914062, "beta_dpo/gap_std": 22.21269989013672, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5374449339207048, "grad_norm": 0.6190149784088135, "learning_rate": 2.6282766789569736e-07, "logits/chosen": -1.9114937782287598, "logits/rejected": -1.516441822052002, "loss": 1.3686, "step": 366 }, { "beta_dpo/beta_used": 0.187638521194458, "beta_dpo/beta_used_raw": 0.187638521194458, "beta_dpo/gap_mean": 21.763540267944336, "beta_dpo/gap_std": 21.840267181396484, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5389133627019089, "grad_norm": 23.619152069091797, "learning_rate": 2.615458646614349e-07, "logits/chosen": -0.946923017501831, "logits/rejected": -0.6229996681213379, "loss": 0.1925, "step": 367 }, { "beta_dpo/beta_used": 0.21771633625030518, "beta_dpo/beta_used_raw": 0.21771633625030518, "beta_dpo/gap_mean": 21.924060821533203, "beta_dpo/gap_std": 21.678436279296875, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.540381791483113, "grad_norm": 55.797672271728516, "learning_rate": 2.6026375718290083e-07, "logits/chosen": -1.2728760242462158, "logits/rejected": -1.2920920848846436, "loss": 0.3787, "step": 368 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0573999285697937, "beta_dpo/gap_mean": 21.91362953186035, "beta_dpo/gap_std": 20.918893814086914, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5418502202643172, "grad_norm": 0.6155146360397339, "learning_rate": 2.589813792448196e-07, "logits/chosen": -1.5803459882736206, "logits/rejected": -1.3985533714294434, "loss": 1.3669, "step": 369 }, { "beta_dpo/beta_used": 0.05128272622823715, "beta_dpo/beta_used_raw": 0.05128272622823715, "beta_dpo/gap_mean": 21.337799072265625, "beta_dpo/gap_std": 20.956153869628906, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5433186490455213, "grad_norm": 20.048946380615234, "learning_rate": 2.5769876463904263e-07, "logits/chosen": -1.6505661010742188, "logits/rejected": -1.9167686700820923, "loss": 0.8251, "step": 370 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1542610377073288, "beta_dpo/gap_mean": 20.49684715270996, "beta_dpo/gap_std": 21.171977996826172, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5447870778267254, "grad_norm": 0.6783925294876099, "learning_rate": 2.5641594716365744e-07, "logits/chosen": -1.2043266296386719, "logits/rejected": -0.6036630868911743, "loss": 1.3694, "step": 371 }, { "beta_dpo/beta_used": 0.2940911650657654, "beta_dpo/beta_used_raw": 0.2940911650657654, "beta_dpo/gap_mean": 21.267375946044922, "beta_dpo/gap_std": 20.91968536376953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5462555066079295, "grad_norm": 36.68511962890625, "learning_rate": 2.551329606220976e-07, "logits/chosen": -1.1457383632659912, "logits/rejected": -0.5475665330886841, "loss": 0.2941, "step": 372 }, { "beta_dpo/beta_used": 0.09652945399284363, "beta_dpo/beta_used_raw": 0.07870151102542877, "beta_dpo/gap_mean": 21.611835479736328, "beta_dpo/gap_std": 20.99405288696289, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.5477239353891337, "grad_norm": 69.20957946777344, "learning_rate": 2.538498388222517e-07, "logits/chosen": -1.8481800556182861, "logits/rejected": -1.6795766353607178, "loss": 0.8334, "step": 373 }, { "beta_dpo/beta_used": 0.08411690592765808, "beta_dpo/beta_used_raw": 0.01571076363325119, "beta_dpo/gap_mean": 21.991518020629883, "beta_dpo/gap_std": 21.008726119995117, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5491923641703378, "grad_norm": 20.165863037109375, "learning_rate": 2.525666155755725e-07, "logits/chosen": -1.8438756465911865, "logits/rejected": -1.745306372642517, "loss": 0.8808, "step": 374 }, { "beta_dpo/beta_used": 0.24988201260566711, "beta_dpo/beta_used_raw": 0.24988201260566711, "beta_dpo/gap_mean": 22.35379409790039, "beta_dpo/gap_std": 21.22625732421875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5506607929515418, "grad_norm": 41.397769927978516, "learning_rate": 2.512833246961859e-07, "logits/chosen": -1.7292652130126953, "logits/rejected": -1.7314016819000244, "loss": 0.1809, "step": 375 }, { "beta_dpo/beta_used": 0.15126290917396545, "beta_dpo/beta_used_raw": 0.15126290917396545, "beta_dpo/gap_mean": 22.917600631713867, "beta_dpo/gap_std": 21.65682601928711, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5521292217327459, "grad_norm": 28.154109954833984, "learning_rate": 2.5e-07, "logits/chosen": -1.2041659355163574, "logits/rejected": -0.8368335962295532, "loss": 0.3891, "step": 376 }, { "beta_dpo/beta_used": 0.22889791429042816, "beta_dpo/beta_used_raw": 0.22889791429042816, "beta_dpo/gap_mean": 23.89520835876465, "beta_dpo/gap_std": 22.11446762084961, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.55359765051395, "grad_norm": 53.93498611450195, "learning_rate": 2.487166753038141e-07, "logits/chosen": -1.858637809753418, "logits/rejected": -1.1286814212799072, "loss": 0.3235, "step": 377 }, { "beta_dpo/beta_used": 0.12794490158557892, "beta_dpo/beta_used_raw": 0.08371762186288834, "beta_dpo/gap_mean": 23.858905792236328, "beta_dpo/gap_std": 22.27143096923828, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5550660792951542, "grad_norm": 32.8420295715332, "learning_rate": 2.4743338442442754e-07, "logits/chosen": -1.9058127403259277, "logits/rejected": -1.4075047969818115, "loss": 0.8802, "step": 378 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12115731835365295, "beta_dpo/gap_mean": 23.98584747314453, "beta_dpo/gap_std": 22.97738265991211, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5565345080763583, "grad_norm": 0.849181056022644, "learning_rate": 2.461501611777483e-07, "logits/chosen": -2.5830283164978027, "logits/rejected": -2.1841976642608643, "loss": 1.366, "step": 379 }, { "beta_dpo/beta_used": 0.0020707196090370417, "beta_dpo/beta_used_raw": -0.006974679417908192, "beta_dpo/gap_mean": 24.015705108642578, "beta_dpo/gap_std": 24.113147735595703, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5580029368575624, "grad_norm": 1.6928387880325317, "learning_rate": 2.4486703937790243e-07, "logits/chosen": -1.7757701873779297, "logits/rejected": -1.3707565069198608, "loss": 1.342, "step": 380 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14553116261959076, "beta_dpo/gap_mean": 23.626941680908203, "beta_dpo/gap_std": 24.207130432128906, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5594713656387665, "grad_norm": 0.5873382687568665, "learning_rate": 2.435840528363426e-07, "logits/chosen": -1.7703765630722046, "logits/rejected": -1.2845559120178223, "loss": 1.3669, "step": 381 }, { "beta_dpo/beta_used": 0.13747642934322357, "beta_dpo/beta_used_raw": 0.13747642934322357, "beta_dpo/gap_mean": 23.6456356048584, "beta_dpo/gap_std": 24.189830780029297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5609397944199707, "grad_norm": 47.54448699951172, "learning_rate": 2.4230123536095745e-07, "logits/chosen": -1.9117825031280518, "logits/rejected": -1.4356023073196411, "loss": 0.4738, "step": 382 }, { "beta_dpo/beta_used": 0.053510311990976334, "beta_dpo/beta_used_raw": -0.07105323672294617, "beta_dpo/gap_mean": 23.24592399597168, "beta_dpo/gap_std": 23.784160614013672, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.5624082232011748, "grad_norm": 20.330209732055664, "learning_rate": 2.4101862075518037e-07, "logits/chosen": -2.630809783935547, "logits/rejected": -2.082855701446533, "loss": 0.93, "step": 383 }, { "beta_dpo/beta_used": 0.1253817230463028, "beta_dpo/beta_used_raw": 0.10570378601551056, "beta_dpo/gap_mean": 22.61440086364746, "beta_dpo/gap_std": 23.486896514892578, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5638766519823789, "grad_norm": 36.63646697998047, "learning_rate": 2.397362428170992e-07, "logits/chosen": -2.096801519393921, "logits/rejected": -2.136882781982422, "loss": 0.821, "step": 384 }, { "beta_dpo/beta_used": 0.1230027824640274, "beta_dpo/beta_used_raw": 0.0754595696926117, "beta_dpo/gap_mean": 23.04998207092285, "beta_dpo/gap_std": 23.0758056640625, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5653450807635829, "grad_norm": 72.39258575439453, "learning_rate": 2.3845413533856514e-07, "logits/chosen": -2.6184840202331543, "logits/rejected": -1.7057888507843018, "loss": 1.0586, "step": 385 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.005410731304436922, "beta_dpo/gap_mean": 22.54529571533203, "beta_dpo/gap_std": 22.520282745361328, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.566813509544787, "grad_norm": 0.6479215025901794, "learning_rate": 2.3717233210430254e-07, "logits/chosen": -1.6671037673950195, "logits/rejected": -1.5030872821807861, "loss": 1.3653, "step": 386 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.009181392379105091, "beta_dpo/gap_mean": 22.327953338623047, "beta_dpo/gap_std": 22.604331970214844, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5682819383259912, "grad_norm": 0.6571717262268066, "learning_rate": 2.3589086689101889e-07, "logits/chosen": -2.0239405632019043, "logits/rejected": -1.2882328033447266, "loss": 1.3652, "step": 387 }, { "beta_dpo/beta_used": 0.3050941526889801, "beta_dpo/beta_used_raw": 0.3050941526889801, "beta_dpo/gap_mean": 22.91834831237793, "beta_dpo/gap_std": 22.365230560302734, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5697503671071953, "grad_norm": 84.97122955322266, "learning_rate": 2.3460977346651428e-07, "logits/chosen": -1.9105610847473145, "logits/rejected": -1.5583992004394531, "loss": 0.4685, "step": 388 }, { "beta_dpo/beta_used": 0.061634305864572525, "beta_dpo/beta_used_raw": 0.050996627658605576, "beta_dpo/gap_mean": 22.8848819732666, "beta_dpo/gap_std": 22.791643142700195, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5712187958883994, "grad_norm": 22.67323875427246, "learning_rate": 2.3332908558879177e-07, "logits/chosen": -1.6809544563293457, "logits/rejected": -1.4034664630889893, "loss": 0.9438, "step": 389 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2398403286933899, "beta_dpo/gap_mean": 22.924835205078125, "beta_dpo/gap_std": 23.773258209228516, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5726872246696035, "grad_norm": 0.605733335018158, "learning_rate": 2.320488370051681e-07, "logits/chosen": -1.8637104034423828, "logits/rejected": -1.792722225189209, "loss": 1.3691, "step": 390 }, { "beta_dpo/beta_used": 0.025480810552835464, "beta_dpo/beta_used_raw": 0.01907052844762802, "beta_dpo/gap_mean": 22.331531524658203, "beta_dpo/gap_std": 23.542163848876953, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.5741556534508077, "grad_norm": 10.959379196166992, "learning_rate": 2.3076906145138405e-07, "logits/chosen": -2.0537145137786865, "logits/rejected": -1.7673195600509644, "loss": 1.0463, "step": 391 }, { "beta_dpo/beta_used": 0.30475738644599915, "beta_dpo/beta_used_raw": 0.267634779214859, "beta_dpo/gap_mean": 22.344764709472656, "beta_dpo/gap_std": 23.590797424316406, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5756240822320118, "grad_norm": 94.24765014648438, "learning_rate": 2.294897926507156e-07, "logits/chosen": -2.280344247817993, "logits/rejected": -1.6492552757263184, "loss": 0.7929, "step": 392 }, { "beta_dpo/beta_used": 0.10215222090482712, "beta_dpo/beta_used_raw": 0.015524506568908691, "beta_dpo/gap_mean": 22.918624877929688, "beta_dpo/gap_std": 23.211952209472656, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5770925110132159, "grad_norm": 46.94270706176758, "learning_rate": 2.2821106431308543e-07, "logits/chosen": -1.895100474357605, "logits/rejected": -1.7161359786987305, "loss": 0.8893, "step": 393 }, { "beta_dpo/beta_used": 0.09863930195569992, "beta_dpo/beta_used_raw": 0.09863930195569992, "beta_dpo/gap_mean": 23.259998321533203, "beta_dpo/gap_std": 22.927837371826172, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.57856093979442, "grad_norm": 41.98773193359375, "learning_rate": 2.2693291013417452e-07, "logits/chosen": -1.9885823726654053, "logits/rejected": -1.2636914253234863, "loss": 0.821, "step": 394 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06710291653871536, "beta_dpo/gap_mean": 23.46683120727539, "beta_dpo/gap_std": 23.799558639526367, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.580029368575624, "grad_norm": 0.6184390187263489, "learning_rate": 2.2565536379453404e-07, "logits/chosen": -2.3219876289367676, "logits/rejected": -1.794336199760437, "loss": 1.3662, "step": 395 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.26747581362724304, "beta_dpo/gap_mean": 22.49011993408203, "beta_dpo/gap_std": 23.999927520751953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5814977973568282, "grad_norm": 0.5957443714141846, "learning_rate": 2.2437845895869825e-07, "logits/chosen": -2.0833539962768555, "logits/rejected": -1.9447077512741089, "loss": 1.37, "step": 396 }, { "beta_dpo/beta_used": 0.08920581638813019, "beta_dpo/beta_used_raw": 0.08920581638813019, "beta_dpo/gap_mean": 23.13333511352539, "beta_dpo/gap_std": 24.469520568847656, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5829662261380323, "grad_norm": 43.53438186645508, "learning_rate": 2.2310222927429716e-07, "logits/chosen": -1.4863579273223877, "logits/rejected": -1.5815786123275757, "loss": 0.9259, "step": 397 }, { "beta_dpo/beta_used": 0.24258211255073547, "beta_dpo/beta_used_raw": 0.24258211255073547, "beta_dpo/gap_mean": 23.732725143432617, "beta_dpo/gap_std": 25.253231048583984, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5844346549192364, "grad_norm": 109.95307159423828, "learning_rate": 2.2182670837116972e-07, "logits/chosen": -1.8581675291061401, "logits/rejected": -1.4040751457214355, "loss": 0.8225, "step": 398 }, { "beta_dpo/beta_used": 0.23558039963245392, "beta_dpo/beta_used_raw": 0.23558039963245392, "beta_dpo/gap_mean": 24.469818115234375, "beta_dpo/gap_std": 24.760602951049805, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.5859030837004405, "grad_norm": 92.55284881591797, "learning_rate": 2.2055192986047804e-07, "logits/chosen": -3.1604866981506348, "logits/rejected": -1.5631942749023438, "loss": 0.6451, "step": 399 }, { "beta_dpo/beta_used": 0.10073019564151764, "beta_dpo/beta_used_raw": 0.10073019564151764, "beta_dpo/gap_mean": 25.53512954711914, "beta_dpo/gap_std": 25.147260665893555, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5873715124816447, "grad_norm": 42.9210205078125, "learning_rate": 2.192779273338215e-07, "logits/chosen": -2.9547784328460693, "logits/rejected": -2.0937581062316895, "loss": 0.7246, "step": 400 }, { "epoch": 0.5873715124816447, "eval_beta_dpo/beta_used": 0.009405079297721386, "eval_beta_dpo/beta_used_raw": -0.36535316705703735, "eval_beta_dpo/gap_mean": 25.619625091552734, "eval_beta_dpo/gap_std": 25.05843734741211, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -1.569831132888794, "eval_logits/rejected": -1.2495183944702148, "eval_loss": 0.6605032086372375, "eval_runtime": 44.1498, "eval_samples_per_second": 52.979, "eval_steps_per_second": 1.676, "step": 400 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3439871668815613, "beta_dpo/gap_mean": 25.12491226196289, "beta_dpo/gap_std": 25.90760040283203, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5888399412628488, "grad_norm": 0.6196178793907166, "learning_rate": 2.1800473436235136e-07, "logits/chosen": -1.1647582054138184, "logits/rejected": -0.9064052104949951, "loss": 1.3688, "step": 401 }, { "beta_dpo/beta_used": 0.273027241230011, "beta_dpo/beta_used_raw": 0.273027241230011, "beta_dpo/gap_mean": 25.792510986328125, "beta_dpo/gap_std": 25.570724487304688, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5903083700440529, "grad_norm": 32.324867248535156, "learning_rate": 2.1673238449588665e-07, "logits/chosen": -2.0468878746032715, "logits/rejected": -1.7435054779052734, "loss": 0.2296, "step": 402 }, { "beta_dpo/beta_used": 0.16802377998828888, "beta_dpo/beta_used_raw": 0.09610848873853683, "beta_dpo/gap_mean": 26.102928161621094, "beta_dpo/gap_std": 25.163429260253906, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.591776798825257, "grad_norm": 40.2495002746582, "learning_rate": 2.154609112620295e-07, "logits/chosen": -2.2095389366149902, "logits/rejected": -1.726819634437561, "loss": 0.7774, "step": 403 }, { "beta_dpo/beta_used": 0.06464260816574097, "beta_dpo/beta_used_raw": 0.03125058859586716, "beta_dpo/gap_mean": 26.120956420898438, "beta_dpo/gap_std": 25.64906883239746, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.593245227606461, "grad_norm": 20.02524185180664, "learning_rate": 2.1419034816528218e-07, "logits/chosen": -2.186288356781006, "logits/rejected": -1.8319518566131592, "loss": 0.8812, "step": 404 }, { "beta_dpo/beta_used": 0.08662373572587967, "beta_dpo/beta_used_raw": -0.009302124381065369, "beta_dpo/gap_mean": 25.158933639526367, "beta_dpo/gap_std": 24.917097091674805, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5947136563876652, "grad_norm": 49.19369888305664, "learning_rate": 2.129207286861638e-07, "logits/chosen": -1.164164662361145, "logits/rejected": -0.8499673008918762, "loss": 0.9047, "step": 405 }, { "beta_dpo/beta_used": 0.0923011526465416, "beta_dpo/beta_used_raw": 0.056421127170324326, "beta_dpo/gap_mean": 24.574819564819336, "beta_dpo/gap_std": 24.394176483154297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5961820851688693, "grad_norm": 12.233301162719727, "learning_rate": 2.1165208628032861e-07, "logits/chosen": -2.419933795928955, "logits/rejected": -2.014167547225952, "loss": 0.7806, "step": 406 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18378406763076782, "beta_dpo/gap_mean": 24.153892517089844, "beta_dpo/gap_std": 24.29613494873047, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.5976505139500734, "grad_norm": 0.6539500951766968, "learning_rate": 2.1038445437768375e-07, "logits/chosen": -2.9986298084259033, "logits/rejected": -2.2421011924743652, "loss": 1.3682, "step": 407 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1888156533241272, "beta_dpo/gap_mean": 23.59590721130371, "beta_dpo/gap_std": 24.68634605407715, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5991189427312775, "grad_norm": 0.710385799407959, "learning_rate": 2.0911786638150872e-07, "logits/chosen": -1.2492729425430298, "logits/rejected": -0.6833850145339966, "loss": 1.3671, "step": 408 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07780434191226959, "beta_dpo/gap_mean": 22.793479919433594, "beta_dpo/gap_std": 24.770523071289062, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6005873715124816, "grad_norm": 0.7162730097770691, "learning_rate": 2.0785235566757517e-07, "logits/chosen": -1.8896914720535278, "logits/rejected": -1.1642524003982544, "loss": 1.3662, "step": 409 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12291724234819412, "beta_dpo/gap_mean": 22.16793441772461, "beta_dpo/gap_std": 24.20937728881836, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6020558002936858, "grad_norm": 0.6539977192878723, "learning_rate": 2.065879555832674e-07, "logits/chosen": -1.9637231826782227, "logits/rejected": -1.8833253383636475, "loss": 1.3679, "step": 410 }, { "beta_dpo/beta_used": 0.09384816139936447, "beta_dpo/beta_used_raw": 0.05118773877620697, "beta_dpo/gap_mean": 22.44048309326172, "beta_dpo/gap_std": 24.338491439819336, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6035242290748899, "grad_norm": 57.7232666015625, "learning_rate": 2.0532469944670343e-07, "logits/chosen": -2.1327853202819824, "logits/rejected": -2.2029080390930176, "loss": 1.1001, "step": 411 }, { "beta_dpo/beta_used": 0.03725043311715126, "beta_dpo/beta_used_raw": -0.07972878217697144, "beta_dpo/gap_mean": 22.416568756103516, "beta_dpo/gap_std": 24.20757293701172, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.604992657856094, "grad_norm": 17.349088668823242, "learning_rate": 2.0406262054585738e-07, "logits/chosen": -1.886979103088379, "logits/rejected": -1.8534328937530518, "loss": 1.0068, "step": 412 }, { "beta_dpo/beta_used": 0.15613144636154175, "beta_dpo/beta_used_raw": 0.15613144636154175, "beta_dpo/gap_mean": 22.269824981689453, "beta_dpo/gap_std": 24.325923919677734, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6064610866372981, "grad_norm": 35.187564849853516, "learning_rate": 2.0280175213768205e-07, "logits/chosen": -0.7825952172279358, "logits/rejected": -0.36407551169395447, "loss": 0.3748, "step": 413 }, { "beta_dpo/beta_used": 0.08755208551883698, "beta_dpo/beta_used_raw": 0.008257351815700531, "beta_dpo/gap_mean": 22.121807098388672, "beta_dpo/gap_std": 24.83641815185547, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6079295154185022, "grad_norm": 45.86546325683594, "learning_rate": 2.0154212744723247e-07, "logits/chosen": -2.5157737731933594, "logits/rejected": -1.974806785583496, "loss": 0.8325, "step": 414 }, { "beta_dpo/beta_used": 0.06630411744117737, "beta_dpo/beta_used_raw": -0.01721137762069702, "beta_dpo/gap_mean": 22.35672378540039, "beta_dpo/gap_std": 25.50603485107422, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.6093979441997063, "grad_norm": 44.08209228515625, "learning_rate": 2.002837796667909e-07, "logits/chosen": -1.761643886566162, "logits/rejected": -1.2777553796768188, "loss": 1.1056, "step": 415 }, { "beta_dpo/beta_used": 0.3845203220844269, "beta_dpo/beta_used_raw": 0.3845203220844269, "beta_dpo/gap_mean": 22.946575164794922, "beta_dpo/gap_std": 25.25613021850586, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6108663729809104, "grad_norm": 92.45121002197266, "learning_rate": 1.990267419549914e-07, "logits/chosen": -2.2547457218170166, "logits/rejected": -1.6286828517913818, "loss": 0.5203, "step": 416 }, { "beta_dpo/beta_used": 0.37170472741127014, "beta_dpo/beta_used_raw": 0.37170472741127014, "beta_dpo/gap_mean": 23.813949584960938, "beta_dpo/gap_std": 24.991924285888672, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6123348017621145, "grad_norm": 55.1314811706543, "learning_rate": 1.9777104743594686e-07, "logits/chosen": -2.4560434818267822, "logits/rejected": -1.0010004043579102, "loss": 0.2669, "step": 417 }, { "beta_dpo/beta_used": 0.33069807291030884, "beta_dpo/beta_used_raw": 0.22797717154026031, "beta_dpo/gap_mean": 24.798458099365234, "beta_dpo/gap_std": 25.08316421508789, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6138032305433186, "grad_norm": 92.97559356689453, "learning_rate": 1.965167291983757e-07, "logits/chosen": -2.13330078125, "logits/rejected": -1.9335625171661377, "loss": 0.8709, "step": 418 }, { "beta_dpo/beta_used": 0.15325038135051727, "beta_dpo/beta_used_raw": 0.026439383625984192, "beta_dpo/gap_mean": 25.586071014404297, "beta_dpo/gap_std": 25.69367790222168, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6152716593245228, "grad_norm": 64.4251480102539, "learning_rate": 1.9526382029472988e-07, "logits/chosen": -1.5011570453643799, "logits/rejected": -1.302954912185669, "loss": 0.9016, "step": 419 }, { "beta_dpo/beta_used": 0.25911739468574524, "beta_dpo/beta_used_raw": 0.25911739468574524, "beta_dpo/gap_mean": 25.320785522460938, "beta_dpo/gap_std": 25.962156295776367, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6167400881057269, "grad_norm": 53.652259826660156, "learning_rate": 1.9401235374032425e-07, "logits/chosen": -2.263819694519043, "logits/rejected": -1.8596880435943604, "loss": 0.3069, "step": 420 }, { "beta_dpo/beta_used": 0.12224863469600677, "beta_dpo/beta_used_raw": 0.12224863469600677, "beta_dpo/gap_mean": 25.586990356445312, "beta_dpo/gap_std": 26.067787170410156, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.618208516886931, "grad_norm": 46.267398834228516, "learning_rate": 1.9276236251246653e-07, "logits/chosen": -1.5814555883407593, "logits/rejected": -1.524078607559204, "loss": 0.7272, "step": 421 }, { "beta_dpo/beta_used": 0.025601375848054886, "beta_dpo/beta_used_raw": -0.14375457167625427, "beta_dpo/gap_mean": 24.974611282348633, "beta_dpo/gap_std": 25.582592010498047, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6196769456681351, "grad_norm": 12.181634902954102, "learning_rate": 1.9151387954958792e-07, "logits/chosen": -2.5378003120422363, "logits/rejected": -2.0938880443573, "loss": 0.9912, "step": 422 }, { "beta_dpo/beta_used": 0.13401761651039124, "beta_dpo/beta_used_raw": 0.13401761651039124, "beta_dpo/gap_mean": 24.909393310546875, "beta_dpo/gap_std": 25.882190704345703, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6211453744493393, "grad_norm": 47.32422637939453, "learning_rate": 1.902669377503756e-07, "logits/chosen": -1.5475959777832031, "logits/rejected": -1.6587059497833252, "loss": 0.4962, "step": 423 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.034625787287950516, "beta_dpo/gap_mean": 24.600170135498047, "beta_dpo/gap_std": 26.172245025634766, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.6226138032305433, "grad_norm": 0.7027908563613892, "learning_rate": 1.890215699729057e-07, "logits/chosen": -1.8585600852966309, "logits/rejected": -1.1137242317199707, "loss": 1.3642, "step": 424 }, { "beta_dpo/beta_used": 0.05164389684796333, "beta_dpo/beta_used_raw": -0.03845195099711418, "beta_dpo/gap_mean": 23.97371482849121, "beta_dpo/gap_std": 26.5534610748291, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6240822320117474, "grad_norm": 26.48809051513672, "learning_rate": 1.8777780903377732e-07, "logits/chosen": -2.3856022357940674, "logits/rejected": -2.2705893516540527, "loss": 1.0747, "step": 425 }, { "beta_dpo/beta_used": 0.15089742839336395, "beta_dpo/beta_used_raw": 0.15089742839336395, "beta_dpo/gap_mean": 23.90325927734375, "beta_dpo/gap_std": 25.989383697509766, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6255506607929515, "grad_norm": 37.8873176574707, "learning_rate": 1.8653568770724803e-07, "logits/chosen": -2.274775505065918, "logits/rejected": -1.6316537857055664, "loss": 0.6657, "step": 426 }, { "beta_dpo/beta_used": 0.03543705865740776, "beta_dpo/beta_used_raw": 0.016834238544106483, "beta_dpo/gap_mean": 23.831756591796875, "beta_dpo/gap_std": 25.224937438964844, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6270190895741556, "grad_norm": 13.970566749572754, "learning_rate": 1.8529523872436977e-07, "logits/chosen": -1.8503241539001465, "logits/rejected": -1.3610749244689941, "loss": 0.9504, "step": 427 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1355256587266922, "beta_dpo/gap_mean": 23.303905487060547, "beta_dpo/gap_std": 25.156648635864258, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6284875183553598, "grad_norm": 0.6985291242599487, "learning_rate": 1.8405649477212697e-07, "logits/chosen": -1.3080241680145264, "logits/rejected": -1.3926000595092773, "loss": 1.3672, "step": 428 }, { "beta_dpo/beta_used": 0.20610594749450684, "beta_dpo/beta_used_raw": 0.20610594749450684, "beta_dpo/gap_mean": 22.89441680908203, "beta_dpo/gap_std": 25.40219497680664, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6299559471365639, "grad_norm": 65.67558288574219, "learning_rate": 1.828194884925749e-07, "logits/chosen": -2.0998034477233887, "logits/rejected": -1.4335576295852661, "loss": 0.5392, "step": 429 }, { "beta_dpo/beta_used": 0.1917858123779297, "beta_dpo/beta_used_raw": 0.1917858123779297, "beta_dpo/gap_mean": 23.49513816833496, "beta_dpo/gap_std": 25.55303382873535, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.631424375917768, "grad_norm": 49.91692352294922, "learning_rate": 1.8158425248197928e-07, "logits/chosen": -1.1634879112243652, "logits/rejected": -1.3685014247894287, "loss": 0.3402, "step": 430 }, { "beta_dpo/beta_used": 0.09054918587207794, "beta_dpo/beta_used_raw": 0.09054918587207794, "beta_dpo/gap_mean": 23.721134185791016, "beta_dpo/gap_std": 25.834186553955078, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6328928046989721, "grad_norm": 36.18718338012695, "learning_rate": 1.8035081928995788e-07, "logits/chosen": -2.5709052085876465, "logits/rejected": -2.3715858459472656, "loss": 0.6399, "step": 431 }, { "beta_dpo/beta_used": 0.1696254163980484, "beta_dpo/beta_used_raw": 0.1696254163980484, "beta_dpo/gap_mean": 23.902099609375, "beta_dpo/gap_std": 25.986339569091797, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6343612334801763, "grad_norm": 79.0599365234375, "learning_rate": 1.791192214186223e-07, "logits/chosen": -1.4236735105514526, "logits/rejected": -0.7997815608978271, "loss": 0.6583, "step": 432 }, { "beta_dpo/beta_used": 0.11669200658798218, "beta_dpo/beta_used_raw": 0.10000326484441757, "beta_dpo/gap_mean": 23.92194366455078, "beta_dpo/gap_std": 26.365129470825195, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6358296622613803, "grad_norm": 42.03449249267578, "learning_rate": 1.7788949132172193e-07, "logits/chosen": -1.4471304416656494, "logits/rejected": -0.8094350695610046, "loss": 0.846, "step": 433 }, { "beta_dpo/beta_used": 0.1338217407464981, "beta_dpo/beta_used_raw": 0.03278639167547226, "beta_dpo/gap_mean": 23.78887176513672, "beta_dpo/gap_std": 27.037960052490234, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6372980910425844, "grad_norm": 19.04947853088379, "learning_rate": 1.7666166140378853e-07, "logits/chosen": -2.431135654449463, "logits/rejected": -1.7750380039215088, "loss": 0.8154, "step": 434 }, { "beta_dpo/beta_used": 0.06533200293779373, "beta_dpo/beta_used_raw": 0.038825489580631256, "beta_dpo/gap_mean": 23.712087631225586, "beta_dpo/gap_std": 26.80076026916504, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6387665198237885, "grad_norm": 43.388587951660156, "learning_rate": 1.7543576401928218e-07, "logits/chosen": -2.072295665740967, "logits/rejected": -1.6574585437774658, "loss": 1.0371, "step": 435 }, { "beta_dpo/beta_used": 0.13881349563598633, "beta_dpo/beta_used_raw": 0.13881349563598633, "beta_dpo/gap_mean": 24.322784423828125, "beta_dpo/gap_std": 26.062278747558594, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6402349486049926, "grad_norm": 46.741336822509766, "learning_rate": 1.742118314717391e-07, "logits/chosen": -1.5973613262176514, "logits/rejected": -0.5990985631942749, "loss": 0.5509, "step": 436 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12126322090625763, "beta_dpo/gap_mean": 23.640636444091797, "beta_dpo/gap_std": 25.07185935974121, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6417033773861968, "grad_norm": 0.6110405325889587, "learning_rate": 1.7298989601292036e-07, "logits/chosen": -2.5639896392822266, "logits/rejected": -2.3195319175720215, "loss": 1.3667, "step": 437 }, { "beta_dpo/beta_used": 0.01529831625521183, "beta_dpo/beta_used_raw": -0.01920161023736, "beta_dpo/gap_mean": 23.29519271850586, "beta_dpo/gap_std": 24.44326400756836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6431718061674009, "grad_norm": 9.355488777160645, "learning_rate": 1.7176998984196144e-07, "logits/chosen": -2.1953420639038086, "logits/rejected": -1.6790311336517334, "loss": 1.1463, "step": 438 }, { "beta_dpo/beta_used": 0.05295996740460396, "beta_dpo/beta_used_raw": -0.004124250262975693, "beta_dpo/gap_mean": 23.02737045288086, "beta_dpo/gap_std": 24.17821502685547, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.644640234948605, "grad_norm": 25.170711517333984, "learning_rate": 1.7055214510452458e-07, "logits/chosen": -1.6655948162078857, "logits/rejected": -1.1508713960647583, "loss": 0.9209, "step": 439 }, { "beta_dpo/beta_used": 0.17924237251281738, "beta_dpo/beta_used_raw": 0.16911785304546356, "beta_dpo/gap_mean": 22.635499954223633, "beta_dpo/gap_std": 24.43799591064453, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6461086637298091, "grad_norm": 114.68986511230469, "learning_rate": 1.6933639389195134e-07, "logits/chosen": -2.1340644359588623, "logits/rejected": -1.0800987482070923, "loss": 1.441, "step": 440 }, { "beta_dpo/beta_used": 0.002155003370717168, "beta_dpo/beta_used_raw": -0.037166256457567215, "beta_dpo/gap_mean": 23.210651397705078, "beta_dpo/gap_std": 24.7412109375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6475770925110133, "grad_norm": 1.4207937717437744, "learning_rate": 1.681227682404166e-07, "logits/chosen": -1.8811532258987427, "logits/rejected": -0.946062445640564, "loss": 1.342, "step": 441 }, { "beta_dpo/beta_used": 0.07147623598575592, "beta_dpo/beta_used_raw": 0.07147623598575592, "beta_dpo/gap_mean": 23.945392608642578, "beta_dpo/gap_std": 24.70497703552246, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6490455212922174, "grad_norm": 25.162975311279297, "learning_rate": 1.669113001300851e-07, "logits/chosen": -0.78822261095047, "logits/rejected": -0.35674917697906494, "loss": 0.7944, "step": 442 }, { "beta_dpo/beta_used": 0.04567694664001465, "beta_dpo/beta_used_raw": 0.008691076189279556, "beta_dpo/gap_mean": 24.038724899291992, "beta_dpo/gap_std": 24.4070987701416, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6505139500734214, "grad_norm": 23.7645320892334, "learning_rate": 1.6570202148426815e-07, "logits/chosen": -1.9316351413726807, "logits/rejected": -1.5170958042144775, "loss": 0.9393, "step": 443 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14725573360919952, "beta_dpo/gap_mean": 23.436275482177734, "beta_dpo/gap_std": 24.264347076416016, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6519823788546255, "grad_norm": 0.6778625249862671, "learning_rate": 1.6449496416858282e-07, "logits/chosen": -2.8322625160217285, "logits/rejected": -2.147847890853882, "loss": 1.3673, "step": 444 }, { "beta_dpo/beta_used": 0.057776615023612976, "beta_dpo/beta_used_raw": 0.03618288040161133, "beta_dpo/gap_mean": 23.653621673583984, "beta_dpo/gap_std": 24.027339935302734, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6534508076358296, "grad_norm": 17.004131317138672, "learning_rate": 1.6329015999011182e-07, "logits/chosen": -2.289595127105713, "logits/rejected": -2.0982038974761963, "loss": 0.8241, "step": 445 }, { "beta_dpo/beta_used": 0.10398882627487183, "beta_dpo/beta_used_raw": 0.04328737035393715, "beta_dpo/gap_mean": 23.774471282958984, "beta_dpo/gap_std": 24.207866668701172, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6549192364170338, "grad_norm": 45.588523864746094, "learning_rate": 1.6208764069656578e-07, "logits/chosen": -0.8566497564315796, "logits/rejected": -0.7313945293426514, "loss": 0.9242, "step": 446 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.21297773718833923, "beta_dpo/gap_mean": 23.687850952148438, "beta_dpo/gap_std": 25.157394409179688, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6563876651982379, "grad_norm": 0.664543628692627, "learning_rate": 1.608874379754465e-07, "logits/chosen": -1.9025514125823975, "logits/rejected": -1.9528214931488037, "loss": 1.3683, "step": 447 }, { "beta_dpo/beta_used": 0.11449765413999557, "beta_dpo/beta_used_raw": -0.042009443044662476, "beta_dpo/gap_mean": 23.503799438476562, "beta_dpo/gap_std": 24.869077682495117, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.657856093979442, "grad_norm": 61.825439453125, "learning_rate": 1.5968958345321177e-07, "logits/chosen": -1.6304960250854492, "logits/rejected": -1.7240123748779297, "loss": 0.9718, "step": 448 }, { "beta_dpo/beta_used": 0.12202105671167374, "beta_dpo/beta_used_raw": 0.07867221534252167, "beta_dpo/gap_mean": 24.079425811767578, "beta_dpo/gap_std": 25.076366424560547, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6593245227606461, "grad_norm": 31.599090576171875, "learning_rate": 1.584941086944423e-07, "logits/chosen": -1.6092393398284912, "logits/rejected": -1.0893751382827759, "loss": 0.9168, "step": 449 }, { "beta_dpo/beta_used": 0.3655173182487488, "beta_dpo/beta_used_raw": 0.3655173182487488, "beta_dpo/gap_mean": 24.724367141723633, "beta_dpo/gap_std": 24.448747634887695, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6607929515418502, "grad_norm": 71.5322036743164, "learning_rate": 1.573010452010098e-07, "logits/chosen": -2.9565317630767822, "logits/rejected": -2.4777119159698486, "loss": 0.1979, "step": 450 }, { "beta_dpo/beta_used": 0.10620734095573425, "beta_dpo/beta_used_raw": 0.10620734095573425, "beta_dpo/gap_mean": 24.306663513183594, "beta_dpo/gap_std": 24.566997528076172, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6622613803230544, "grad_norm": 23.241680145263672, "learning_rate": 1.5611042441124687e-07, "logits/chosen": -1.8549528121948242, "logits/rejected": -1.3230280876159668, "loss": 0.5711, "step": 451 }, { "beta_dpo/beta_used": 0.16152921319007874, "beta_dpo/beta_used_raw": 0.16152921319007874, "beta_dpo/gap_mean": 23.995746612548828, "beta_dpo/gap_std": 23.87276840209961, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.6637298091042585, "grad_norm": 49.24470901489258, "learning_rate": 1.549222776991186e-07, "logits/chosen": -2.2875595092773438, "logits/rejected": -2.118117094039917, "loss": 0.3483, "step": 452 }, { "beta_dpo/beta_used": 0.005642706993967295, "beta_dpo/beta_used_raw": -0.07630041986703873, "beta_dpo/gap_mean": 24.373931884765625, "beta_dpo/gap_std": 24.05362319946289, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.6651982378854625, "grad_norm": 4.06926155090332, "learning_rate": 1.5373663637339584e-07, "logits/chosen": -1.8347233533859253, "logits/rejected": -1.5946714878082275, "loss": 1.2667, "step": 453 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19752493500709534, "beta_dpo/gap_mean": 24.122852325439453, "beta_dpo/gap_std": 24.29592514038086, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6666666666666666, "grad_norm": 0.657343327999115, "learning_rate": 1.5255353167683017e-07, "logits/chosen": -2.046905040740967, "logits/rejected": -1.7205897569656372, "loss": 1.3672, "step": 454 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1890154629945755, "beta_dpo/gap_mean": 24.46869659423828, "beta_dpo/gap_std": 24.696008682250977, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.6681350954478708, "grad_norm": 0.7729347348213196, "learning_rate": 1.5137299478533064e-07, "logits/chosen": -2.308173656463623, "logits/rejected": -2.5527238845825195, "loss": 1.3673, "step": 455 }, { "beta_dpo/beta_used": 0.10838553309440613, "beta_dpo/beta_used_raw": 0.03978702425956726, "beta_dpo/gap_mean": 24.512104034423828, "beta_dpo/gap_std": 24.691377639770508, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6696035242290749, "grad_norm": 42.247413635253906, "learning_rate": 1.5019505680714232e-07, "logits/chosen": -1.8087551593780518, "logits/rejected": -1.8514174222946167, "loss": 0.8742, "step": 456 }, { "beta_dpo/beta_used": 0.13772788643836975, "beta_dpo/beta_used_raw": -0.02056203782558441, "beta_dpo/gap_mean": 25.14911651611328, "beta_dpo/gap_std": 24.04446029663086, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.671071953010279, "grad_norm": 4.883652210235596, "learning_rate": 1.4901974878202627e-07, "logits/chosen": -1.543813943862915, "logits/rejected": -1.3994325399398804, "loss": 0.6975, "step": 457 }, { "beta_dpo/beta_used": 0.018343646079301834, "beta_dpo/beta_used_raw": -0.2884172797203064, "beta_dpo/gap_mean": 24.749656677246094, "beta_dpo/gap_std": 24.605478286743164, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6725403817914831, "grad_norm": 9.62954044342041, "learning_rate": 1.4784710168044212e-07, "logits/chosen": -1.2308340072631836, "logits/rejected": -0.8876813650131226, "loss": 1.0782, "step": 458 }, { "beta_dpo/beta_used": 0.13343806564807892, "beta_dpo/beta_used_raw": 0.0319179967045784, "beta_dpo/gap_mean": 24.73650360107422, "beta_dpo/gap_std": 24.42650604248047, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6740088105726872, "grad_norm": 31.03327751159668, "learning_rate": 1.466771464027316e-07, "logits/chosen": -2.679572105407715, "logits/rejected": -2.30106520652771, "loss": 0.7906, "step": 459 }, { "beta_dpo/beta_used": 0.014424387365579605, "beta_dpo/beta_used_raw": -0.03031376376748085, "beta_dpo/gap_mean": 23.957611083984375, "beta_dpo/gap_std": 23.70931625366211, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6754772393538914, "grad_norm": 9.91299057006836, "learning_rate": 1.4550991377830423e-07, "logits/chosen": -1.9293622970581055, "logits/rejected": -1.8261902332305908, "loss": 1.1246, "step": 460 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.28378790616989136, "beta_dpo/gap_mean": 22.883316040039062, "beta_dpo/gap_std": 23.486953735351562, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6769456681350955, "grad_norm": 0.6803312301635742, "learning_rate": 1.4434543456482518e-07, "logits/chosen": -1.3033266067504883, "logits/rejected": -1.0037317276000977, "loss": 1.3695, "step": 461 }, { "beta_dpo/beta_used": 0.1492975503206253, "beta_dpo/beta_used_raw": 0.1492975503206253, "beta_dpo/gap_mean": 22.511737823486328, "beta_dpo/gap_std": 22.973257064819336, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6784140969162996, "grad_norm": 38.22011947631836, "learning_rate": 1.4318373944740484e-07, "logits/chosen": -2.354351282119751, "logits/rejected": -2.0672004222869873, "loss": 0.662, "step": 462 }, { "beta_dpo/beta_used": 0.3572339117527008, "beta_dpo/beta_used_raw": 0.3572339117527008, "beta_dpo/gap_mean": 23.360401153564453, "beta_dpo/gap_std": 23.381099700927734, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6798825256975036, "grad_norm": 74.08802032470703, "learning_rate": 1.4202485903778976e-07, "logits/chosen": -2.4058332443237305, "logits/rejected": -2.0344924926757812, "loss": 0.4366, "step": 463 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06975067406892776, "beta_dpo/gap_mean": 23.911937713623047, "beta_dpo/gap_std": 23.886293411254883, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.6813509544787077, "grad_norm": 0.8541564345359802, "learning_rate": 1.4086882387355658e-07, "logits/chosen": -1.4215198755264282, "logits/rejected": -1.7960498332977295, "loss": 1.3652, "step": 464 }, { "beta_dpo/beta_used": 0.028192659839987755, "beta_dpo/beta_used_raw": 0.019971748813986778, "beta_dpo/gap_mean": 24.532211303710938, "beta_dpo/gap_std": 23.919170379638672, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6828193832599119, "grad_norm": 16.16191864013672, "learning_rate": 1.3971566441730714e-07, "logits/chosen": -1.4708728790283203, "logits/rejected": -1.1633594036102295, "loss": 1.0196, "step": 465 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04159821569919586, "beta_dpo/gap_mean": 25.499624252319336, "beta_dpo/gap_std": 24.219432830810547, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.684287812041116, "grad_norm": 0.6992933750152588, "learning_rate": 1.3856541105586545e-07, "logits/chosen": -1.7571964263916016, "logits/rejected": -0.9470900893211365, "loss": 1.3635, "step": 466 }, { "beta_dpo/beta_used": 0.22183255851268768, "beta_dpo/beta_used_raw": 0.22183255851268768, "beta_dpo/gap_mean": 26.462804794311523, "beta_dpo/gap_std": 25.145883560180664, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6857562408223201, "grad_norm": 96.3606185913086, "learning_rate": 1.3741809409947729e-07, "logits/chosen": -1.2724277973175049, "logits/rejected": -0.8243029117584229, "loss": 0.7054, "step": 467 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.17463745176792145, "beta_dpo/gap_mean": 25.78664779663086, "beta_dpo/gap_std": 25.59890365600586, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6872246696035242, "grad_norm": 0.6921370625495911, "learning_rate": 1.362737437810114e-07, "logits/chosen": -1.6112632751464844, "logits/rejected": -1.4802091121673584, "loss": 1.3656, "step": 468 }, { "beta_dpo/beta_used": 0.047440025955438614, "beta_dpo/beta_used_raw": -0.01731177791953087, "beta_dpo/gap_mean": 25.817581176757812, "beta_dpo/gap_std": 25.734947204589844, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6886930983847284, "grad_norm": 25.594966888427734, "learning_rate": 1.351323902551631e-07, "logits/chosen": -1.0756289958953857, "logits/rejected": -1.1045548915863037, "loss": 1.0234, "step": 469 }, { "beta_dpo/beta_used": 0.021085133776068687, "beta_dpo/beta_used_raw": -0.1629122942686081, "beta_dpo/gap_mean": 25.85068702697754, "beta_dpo/gap_std": 25.390472412109375, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6901615271659325, "grad_norm": 9.844660758972168, "learning_rate": 1.339940635976592e-07, "logits/chosen": -1.3108842372894287, "logits/rejected": -0.6206603050231934, "loss": 1.0523, "step": 470 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1044345274567604, "beta_dpo/gap_mean": 25.482086181640625, "beta_dpo/gap_std": 24.27267837524414, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6916299559471366, "grad_norm": 0.6610987186431885, "learning_rate": 1.3285879380446563e-07, "logits/chosen": -1.423391342163086, "logits/rejected": -1.448016881942749, "loss": 1.3646, "step": 471 }, { "beta_dpo/beta_used": 0.2060602307319641, "beta_dpo/beta_used_raw": 0.20053143799304962, "beta_dpo/gap_mean": 25.83689308166504, "beta_dpo/gap_std": 24.579055786132812, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6930983847283406, "grad_norm": 42.54352951049805, "learning_rate": 1.317266107909975e-07, "logits/chosen": -1.8569872379302979, "logits/rejected": -0.8077524900436401, "loss": 0.7033, "step": 472 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18200881779193878, "beta_dpo/gap_mean": 25.466594696044922, "beta_dpo/gap_std": 24.39486312866211, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6945668135095447, "grad_norm": 0.6892096996307373, "learning_rate": 1.3059754439133002e-07, "logits/chosen": -1.5234148502349854, "logits/rejected": -1.2367827892303467, "loss": 1.366, "step": 473 }, { "beta_dpo/beta_used": 0.2482704520225525, "beta_dpo/beta_used_raw": 0.041917115449905396, "beta_dpo/gap_mean": 24.003257751464844, "beta_dpo/gap_std": 24.40390396118164, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6960352422907489, "grad_norm": 64.44613647460938, "learning_rate": 1.2947162435741277e-07, "logits/chosen": -1.6847041845321655, "logits/rejected": -1.9953196048736572, "loss": 0.8605, "step": 474 }, { "beta_dpo/beta_used": 0.08866570144891739, "beta_dpo/beta_used_raw": -0.021716512739658356, "beta_dpo/gap_mean": 23.967273712158203, "beta_dpo/gap_std": 25.094600677490234, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.697503671071953, "grad_norm": 64.81230926513672, "learning_rate": 1.2834888035828596e-07, "logits/chosen": -2.347712993621826, "logits/rejected": -2.292513847351074, "loss": 1.0405, "step": 475 }, { "beta_dpo/beta_used": 0.09056875109672546, "beta_dpo/beta_used_raw": 0.06551661342382431, "beta_dpo/gap_mean": 24.103946685791016, "beta_dpo/gap_std": 25.448429107666016, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6989720998531571, "grad_norm": 40.68457794189453, "learning_rate": 1.2722934197929802e-07, "logits/chosen": -3.05594539642334, "logits/rejected": -2.536451816558838, "loss": 1.0054, "step": 476 }, { "beta_dpo/beta_used": 0.1769174486398697, "beta_dpo/beta_used_raw": 0.12678027153015137, "beta_dpo/gap_mean": 24.300308227539062, "beta_dpo/gap_std": 24.919261932373047, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7004405286343612, "grad_norm": 85.09745025634766, "learning_rate": 1.2611303872132631e-07, "logits/chosen": -3.4631779193878174, "logits/rejected": -2.302935838699341, "loss": 1.0475, "step": 477 }, { "beta_dpo/beta_used": 0.07639861106872559, "beta_dpo/beta_used_raw": 0.06331229954957962, "beta_dpo/gap_mean": 24.570049285888672, "beta_dpo/gap_std": 24.76816177368164, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7019089574155654, "grad_norm": 9.503323554992676, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -1.6086479425430298, "logits/rejected": -1.4580334424972534, "loss": 0.7357, "step": 478 }, { "beta_dpo/beta_used": 0.027390312403440475, "beta_dpo/beta_used_raw": -0.0354989692568779, "beta_dpo/gap_mean": 23.97896385192871, "beta_dpo/gap_std": 24.565216064453125, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7033773861967695, "grad_norm": 17.492374420166016, "learning_rate": 1.2389025514492456e-07, "logits/chosen": -1.1278550624847412, "logits/rejected": -1.251664400100708, "loss": 1.0451, "step": 479 }, { "beta_dpo/beta_used": 0.14048559963703156, "beta_dpo/beta_used_raw": 0.14048559963703156, "beta_dpo/gap_mean": 24.32904624938965, "beta_dpo/gap_std": 24.614551544189453, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7048458149779736, "grad_norm": 37.384212493896484, "learning_rate": 1.227838333989088e-07, "logits/chosen": -0.9816099405288696, "logits/rejected": -0.38579627871513367, "loss": 0.5926, "step": 480 }, { "beta_dpo/beta_used": 0.2901894152164459, "beta_dpo/beta_used_raw": 0.24488958716392517, "beta_dpo/gap_mean": 24.815683364868164, "beta_dpo/gap_std": 24.725587844848633, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7063142437591777, "grad_norm": 69.83039855957031, "learning_rate": 1.2168076391719489e-07, "logits/chosen": -1.7151495218276978, "logits/rejected": -1.237488031387329, "loss": 0.754, "step": 481 }, { "beta_dpo/beta_used": 0.2500970959663391, "beta_dpo/beta_used_raw": 0.2500970959663391, "beta_dpo/gap_mean": 24.80794334411621, "beta_dpo/gap_std": 24.68585777282715, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7077826725403817, "grad_norm": 72.19092559814453, "learning_rate": 1.2058107576668938e-07, "logits/chosen": -1.7398509979248047, "logits/rejected": -1.5857124328613281, "loss": 0.3995, "step": 482 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.05012429505586624, "beta_dpo/gap_mean": 24.991384506225586, "beta_dpo/gap_std": 24.81667137145996, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7092511013215859, "grad_norm": 0.6664114594459534, "learning_rate": 1.194847979251979e-07, "logits/chosen": -2.6922659873962402, "logits/rejected": -2.0293326377868652, "loss": 1.3641, "step": 483 }, { "beta_dpo/beta_used": 0.05779772624373436, "beta_dpo/beta_used_raw": -0.043017227202653885, "beta_dpo/gap_mean": 25.618431091308594, "beta_dpo/gap_std": 24.99521255493164, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.71071953010279, "grad_norm": 24.220054626464844, "learning_rate": 1.1839195928066101e-07, "logits/chosen": -2.480839729309082, "logits/rejected": -1.7270921468734741, "loss": 1.0274, "step": 484 }, { "beta_dpo/beta_used": 0.4556177258491516, "beta_dpo/beta_used_raw": 0.4556177258491516, "beta_dpo/gap_mean": 26.87214469909668, "beta_dpo/gap_std": 25.37643051147461, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7121879588839941, "grad_norm": 204.2612762451172, "learning_rate": 1.1730258863039347e-07, "logits/chosen": -1.7958972454071045, "logits/rejected": -1.4648010730743408, "loss": 0.5049, "step": 485 }, { "beta_dpo/beta_used": 0.10207835584878922, "beta_dpo/beta_used_raw": -0.0033923983573913574, "beta_dpo/gap_mean": 27.25442886352539, "beta_dpo/gap_std": 25.22077178955078, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7136563876651982, "grad_norm": 54.36297607421875, "learning_rate": 1.1621671468032493e-07, "logits/chosen": -1.51706862449646, "logits/rejected": -0.514898419380188, "loss": 1.0155, "step": 486 }, { "beta_dpo/beta_used": 0.04191367328166962, "beta_dpo/beta_used_raw": -0.09983708709478378, "beta_dpo/gap_mean": 27.748754501342773, "beta_dpo/gap_std": 25.296875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7151248164464024, "grad_norm": 15.441408157348633, "learning_rate": 1.1513436604424378e-07, "logits/chosen": -2.518860340118408, "logits/rejected": -2.168724536895752, "loss": 0.8498, "step": 487 }, { "beta_dpo/beta_used": 0.04545029625296593, "beta_dpo/beta_used_raw": -0.12155961990356445, "beta_dpo/gap_mean": 27.471683502197266, "beta_dpo/gap_std": 25.476192474365234, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7165932452276065, "grad_norm": 15.30657958984375, "learning_rate": 1.1405557124304335e-07, "logits/chosen": -1.4583563804626465, "logits/rejected": -1.155217170715332, "loss": 0.8823, "step": 488 }, { "beta_dpo/beta_used": 0.06218419224023819, "beta_dpo/beta_used_raw": -0.1422080397605896, "beta_dpo/gap_mean": 26.843271255493164, "beta_dpo/gap_std": 25.29082489013672, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7180616740088106, "grad_norm": 16.644136428833008, "learning_rate": 1.1298035870396985e-07, "logits/chosen": -1.4313712120056152, "logits/rejected": -0.8691326379776001, "loss": 0.8221, "step": 489 }, { "beta_dpo/beta_used": 0.20237208902835846, "beta_dpo/beta_used_raw": 0.18236497044563293, "beta_dpo/gap_mean": 26.245960235595703, "beta_dpo/gap_std": 25.95622444152832, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7195301027900147, "grad_norm": 53.396732330322266, "learning_rate": 1.1190875675987355e-07, "logits/chosen": -3.046250104904175, "logits/rejected": -3.4327287673950195, "loss": 0.7618, "step": 490 }, { "beta_dpo/beta_used": 0.1410069763660431, "beta_dpo/beta_used_raw": 0.11552975326776505, "beta_dpo/gap_mean": 26.14499855041504, "beta_dpo/gap_std": 26.140344619750977, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7209985315712188, "grad_norm": 84.63758087158203, "learning_rate": 1.1084079364846241e-07, "logits/chosen": -2.0656046867370605, "logits/rejected": -1.3837230205535889, "loss": 1.1114, "step": 491 }, { "beta_dpo/beta_used": 0.14474520087242126, "beta_dpo/beta_used_raw": 0.14474520087242126, "beta_dpo/gap_mean": 25.744234085083008, "beta_dpo/gap_std": 26.764545440673828, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7224669603524229, "grad_norm": 48.452571868896484, "learning_rate": 1.097764975115576e-07, "logits/chosen": -2.1017768383026123, "logits/rejected": -1.7172629833221436, "loss": 0.8304, "step": 492 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0884954184293747, "beta_dpo/gap_mean": 25.308929443359375, "beta_dpo/gap_std": 25.760934829711914, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.723935389133627, "grad_norm": 0.626876950263977, "learning_rate": 1.0871589639435203e-07, "logits/chosen": -1.3586655855178833, "logits/rejected": -1.147123098373413, "loss": 1.3647, "step": 493 }, { "beta_dpo/beta_used": 0.32471179962158203, "beta_dpo/beta_used_raw": 0.3110436797142029, "beta_dpo/gap_mean": 25.867393493652344, "beta_dpo/gap_std": 25.818172454833984, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7254038179148311, "grad_norm": 94.02973175048828, "learning_rate": 1.0765901824467166e-07, "logits/chosen": -0.9558006525039673, "logits/rejected": -0.8172507286071777, "loss": 0.9824, "step": 494 }, { "beta_dpo/beta_used": 0.07395792752504349, "beta_dpo/beta_used_raw": -0.03144054859876633, "beta_dpo/gap_mean": 25.145401000976562, "beta_dpo/gap_std": 26.142412185668945, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.7268722466960352, "grad_norm": 42.371150970458984, "learning_rate": 1.0660589091223854e-07, "logits/chosen": -2.23215651512146, "logits/rejected": -1.574699878692627, "loss": 0.9694, "step": 495 }, { "beta_dpo/beta_used": 0.2092335820198059, "beta_dpo/beta_used_raw": 0.2092335820198059, "beta_dpo/gap_mean": 24.547943115234375, "beta_dpo/gap_std": 25.825014114379883, "beta_dpo/mask_keep_frac": 0.4375, "epoch": 0.7283406754772394, "grad_norm": 45.06632995605469, "learning_rate": 1.0555654214793722e-07, "logits/chosen": -2.4776511192321777, "logits/rejected": -2.3953630924224854, "loss": 0.2468, "step": 496 }, { "beta_dpo/beta_used": 0.021729838103055954, "beta_dpo/beta_used_raw": -0.23946470022201538, "beta_dpo/gap_mean": 24.095136642456055, "beta_dpo/gap_std": 25.3642578125, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7298091042584435, "grad_norm": 10.57982063293457, "learning_rate": 1.0451099960308374e-07, "logits/chosen": -1.4335005283355713, "logits/rejected": -0.9387121200561523, "loss": 1.0408, "step": 497 }, { "beta_dpo/beta_used": 0.09472735226154327, "beta_dpo/beta_used_raw": 0.03154543787240982, "beta_dpo/gap_mean": 23.83123779296875, "beta_dpo/gap_std": 25.33839225769043, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7312775330396476, "grad_norm": 36.32901382446289, "learning_rate": 1.0346929082869641e-07, "logits/chosen": -1.6869301795959473, "logits/rejected": -0.898349940776825, "loss": 0.9679, "step": 498 }, { "beta_dpo/beta_used": 0.16567786037921906, "beta_dpo/beta_used_raw": 0.12113827466964722, "beta_dpo/gap_mean": 24.075727462768555, "beta_dpo/gap_std": 25.831310272216797, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7327459618208517, "grad_norm": 64.64530181884766, "learning_rate": 1.0243144327477013e-07, "logits/chosen": -1.4007351398468018, "logits/rejected": -1.5927693843841553, "loss": 1.0755, "step": 499 }, { "beta_dpo/beta_used": 0.03878691792488098, "beta_dpo/beta_used_raw": -0.2836288809776306, "beta_dpo/gap_mean": 24.272212982177734, "beta_dpo/gap_std": 26.225910186767578, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7342143906020558, "grad_norm": 19.999631881713867, "learning_rate": 1.0139748428955333e-07, "logits/chosen": -2.7180049419403076, "logits/rejected": -2.44711971282959, "loss": 0.9705, "step": 500 }, { "epoch": 0.7342143906020558, "eval_beta_dpo/beta_used": 0.03219933807849884, "eval_beta_dpo/beta_used_raw": -0.17833033204078674, "eval_beta_dpo/gap_mean": 24.03917694091797, "eval_beta_dpo/gap_std": 26.52628517150879, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -1.7312169075012207, "eval_logits/rejected": -1.4078279733657837, "eval_loss": 0.6340479254722595, "eval_runtime": 44.171, "eval_samples_per_second": 52.953, "eval_steps_per_second": 1.675, "step": 500 }, { "beta_dpo/beta_used": 0.10995148867368698, "beta_dpo/beta_used_raw": 0.013247430324554443, "beta_dpo/gap_mean": 24.187057495117188, "beta_dpo/gap_std": 27.05206298828125, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.73568281938326, "grad_norm": 90.26338195800781, "learning_rate": 1.0036744111882672e-07, "logits/chosen": -2.1896305084228516, "logits/rejected": -1.6922519207000732, "loss": 1.1053, "step": 501 }, { "beta_dpo/beta_used": 0.11361445486545563, "beta_dpo/beta_used_raw": -0.03644367307424545, "beta_dpo/gap_mean": 23.625625610351562, "beta_dpo/gap_std": 26.485179901123047, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.737151248164464, "grad_norm": 44.8062858581543, "learning_rate": 9.934134090518592e-08, "logits/chosen": -1.3716320991516113, "logits/rejected": -0.4437238276004791, "loss": 0.9276, "step": 502 }, { "beta_dpo/beta_used": 0.12637387216091156, "beta_dpo/beta_used_raw": 0.05120537430047989, "beta_dpo/gap_mean": 23.886451721191406, "beta_dpo/gap_std": 26.337364196777344, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7386196769456681, "grad_norm": 52.749027252197266, "learning_rate": 9.831921068732571e-08, "logits/chosen": -1.1856811046600342, "logits/rejected": -0.5425550937652588, "loss": 0.9624, "step": 503 }, { "beta_dpo/beta_used": 0.14221353828907013, "beta_dpo/beta_used_raw": 0.14221353828907013, "beta_dpo/gap_mean": 24.439533233642578, "beta_dpo/gap_std": 26.03683853149414, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7400881057268722, "grad_norm": 22.412988662719727, "learning_rate": 9.730107739932805e-08, "logits/chosen": -2.424509048461914, "logits/rejected": -1.730525255203247, "loss": 0.4629, "step": 504 }, { "beta_dpo/beta_used": 0.01897226832807064, "beta_dpo/beta_used_raw": -0.029433485120534897, "beta_dpo/gap_mean": 24.782123565673828, "beta_dpo/gap_std": 25.870738983154297, "beta_dpo/mask_keep_frac": 0.5, "epoch": 0.7415565345080763, "grad_norm": 16.965612411499023, "learning_rate": 9.628696786995188e-08, "logits/chosen": -2.425438404083252, "logits/rejected": -1.7937641143798828, "loss": 1.1082, "step": 505 }, { "beta_dpo/beta_used": 0.061722710728645325, "beta_dpo/beta_used_raw": -0.059955500066280365, "beta_dpo/gap_mean": 24.745590209960938, "beta_dpo/gap_std": 25.26523780822754, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7430249632892805, "grad_norm": 33.604942321777344, "learning_rate": 9.527690882192635e-08, "logits/chosen": -2.8098220825195312, "logits/rejected": -2.106682300567627, "loss": 0.8848, "step": 506 }, { "beta_dpo/beta_used": 0.19439199566841125, "beta_dpo/beta_used_raw": 0.043975263833999634, "beta_dpo/gap_mean": 24.21612548828125, "beta_dpo/gap_std": 25.324092864990234, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7444933920704846, "grad_norm": 68.09627532958984, "learning_rate": 9.427092687124691e-08, "logits/chosen": -2.1560895442962646, "logits/rejected": -1.386178731918335, "loss": 0.7663, "step": 507 }, { "beta_dpo/beta_used": 0.25187134742736816, "beta_dpo/beta_used_raw": 0.14820647239685059, "beta_dpo/gap_mean": 24.648590087890625, "beta_dpo/gap_std": 26.04003143310547, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.7459618208516887, "grad_norm": 182.68023681640625, "learning_rate": 9.326904852647344e-08, "logits/chosen": -1.7161022424697876, "logits/rejected": -1.4046952724456787, "loss": 2.9516, "step": 508 }, { "beta_dpo/beta_used": 0.09821511805057526, "beta_dpo/beta_used_raw": -0.038649074733257294, "beta_dpo/gap_mean": 24.13115119934082, "beta_dpo/gap_std": 26.21490478515625, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7474302496328928, "grad_norm": 18.679121017456055, "learning_rate": 9.227130018803195e-08, "logits/chosen": -1.3101774454116821, "logits/rejected": -0.8527284264564514, "loss": 0.8712, "step": 509 }, { "beta_dpo/beta_used": 0.30942094326019287, "beta_dpo/beta_used_raw": 0.30942094326019287, "beta_dpo/gap_mean": 24.463417053222656, "beta_dpo/gap_std": 26.568939208984375, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.748898678414097, "grad_norm": 79.88699340820312, "learning_rate": 9.127770814751932e-08, "logits/chosen": -2.267930030822754, "logits/rejected": -1.8385182619094849, "loss": 0.6115, "step": 510 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04102417826652527, "beta_dpo/gap_mean": 24.398059844970703, "beta_dpo/gap_std": 26.453224182128906, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.750367107195301, "grad_norm": 0.6812777519226074, "learning_rate": 9.028829858700973e-08, "logits/chosen": -2.034882068634033, "logits/rejected": -1.846768856048584, "loss": 1.3643, "step": 511 }, { "beta_dpo/beta_used": 0.07764732837677002, "beta_dpo/beta_used_raw": 0.03792363032698631, "beta_dpo/gap_mean": 24.210956573486328, "beta_dpo/gap_std": 26.983474731445312, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7518355359765051, "grad_norm": 31.43082046508789, "learning_rate": 8.930309757836516e-08, "logits/chosen": -2.611680030822754, "logits/rejected": -2.0863518714904785, "loss": 0.9018, "step": 512 }, { "beta_dpo/beta_used": 0.1845654845237732, "beta_dpo/beta_used_raw": 0.03322139382362366, "beta_dpo/gap_mean": 24.388751983642578, "beta_dpo/gap_std": 26.420513153076172, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7533039647577092, "grad_norm": 24.38973617553711, "learning_rate": 8.832213108254863e-08, "logits/chosen": -2.388493537902832, "logits/rejected": -2.022644281387329, "loss": 0.8024, "step": 513 }, { "beta_dpo/beta_used": 0.1264495700597763, "beta_dpo/beta_used_raw": 0.06263618916273117, "beta_dpo/gap_mean": 23.975135803222656, "beta_dpo/gap_std": 26.291915893554688, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7547723935389133, "grad_norm": 78.80061340332031, "learning_rate": 8.734542494893954e-08, "logits/chosen": -2.2603392601013184, "logits/rejected": -1.7184593677520752, "loss": 1.4285, "step": 514 }, { "beta_dpo/beta_used": 0.041680097579956055, "beta_dpo/beta_used_raw": -0.0024248547852039337, "beta_dpo/gap_mean": 23.374832153320312, "beta_dpo/gap_std": 26.226722717285156, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7562408223201175, "grad_norm": 18.761693954467773, "learning_rate": 8.637300491465272e-08, "logits/chosen": -1.198162317276001, "logits/rejected": -1.1071486473083496, "loss": 0.9247, "step": 515 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.16223004460334778, "beta_dpo/gap_mean": 23.667404174804688, "beta_dpo/gap_std": 27.000638961791992, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7577092511013216, "grad_norm": 0.6352587938308716, "learning_rate": 8.540489660386064e-08, "logits/chosen": -2.1877317428588867, "logits/rejected": -1.726811170578003, "loss": 1.3689, "step": 516 }, { "beta_dpo/beta_used": 0.19271814823150635, "beta_dpo/beta_used_raw": 0.19271814823150635, "beta_dpo/gap_mean": 24.49957847595215, "beta_dpo/gap_std": 26.765764236450195, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7591776798825257, "grad_norm": 37.68376922607422, "learning_rate": 8.444112552711752e-08, "logits/chosen": -1.6701611280441284, "logits/rejected": -1.4171370267868042, "loss": 0.6856, "step": 517 }, { "beta_dpo/beta_used": 0.0481136217713356, "beta_dpo/beta_used_raw": -0.09706801176071167, "beta_dpo/gap_mean": 23.66852569580078, "beta_dpo/gap_std": 26.297685623168945, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7606461086637298, "grad_norm": 23.860557556152344, "learning_rate": 8.348171708068747e-08, "logits/chosen": -1.3623682260513306, "logits/rejected": -1.3445403575897217, "loss": 1.0054, "step": 518 }, { "beta_dpo/beta_used": 0.08863373100757599, "beta_dpo/beta_used_raw": 0.08863373100757599, "beta_dpo/gap_mean": 24.184040069580078, "beta_dpo/gap_std": 25.983379364013672, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.762114537444934, "grad_norm": 24.790510177612305, "learning_rate": 8.25266965458755e-08, "logits/chosen": -2.043759822845459, "logits/rejected": -1.825181007385254, "loss": 0.5269, "step": 519 }, { "beta_dpo/beta_used": 0.194649800658226, "beta_dpo/beta_used_raw": 0.194649800658226, "beta_dpo/gap_mean": 24.660749435424805, "beta_dpo/gap_std": 25.033594131469727, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7635829662261381, "grad_norm": 31.6447696685791, "learning_rate": 8.15760890883607e-08, "logits/chosen": -1.6810235977172852, "logits/rejected": -1.055724024772644, "loss": 0.3677, "step": 520 }, { "beta_dpo/beta_used": 0.1723722517490387, "beta_dpo/beta_used_raw": 0.14304295182228088, "beta_dpo/gap_mean": 24.762470245361328, "beta_dpo/gap_std": 24.43991470336914, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7650513950073421, "grad_norm": 50.71248245239258, "learning_rate": 8.062991975753378e-08, "logits/chosen": -1.3611412048339844, "logits/rejected": -1.1393729448318481, "loss": 0.8888, "step": 521 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20311886072158813, "beta_dpo/gap_mean": 24.528409957885742, "beta_dpo/gap_std": 24.356666564941406, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7665198237885462, "grad_norm": 0.6016008853912354, "learning_rate": 7.968821348583643e-08, "logits/chosen": -1.0033719539642334, "logits/rejected": -1.315284013748169, "loss": 1.3679, "step": 522 }, { "beta_dpo/beta_used": 0.03243253007531166, "beta_dpo/beta_used_raw": 0.017287597060203552, "beta_dpo/gap_mean": 23.78322982788086, "beta_dpo/gap_std": 24.862255096435547, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7679882525697503, "grad_norm": 17.586872100830078, "learning_rate": 7.875099508810484e-08, "logits/chosen": -2.4233005046844482, "logits/rejected": -0.5649760365486145, "loss": 1.0367, "step": 523 }, { "beta_dpo/beta_used": 0.09405438601970673, "beta_dpo/beta_used_raw": -0.04172598570585251, "beta_dpo/gap_mean": 23.840919494628906, "beta_dpo/gap_std": 24.95424461364746, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7694566813509545, "grad_norm": 44.90016555786133, "learning_rate": 7.781828926091535e-08, "logits/chosen": -1.8031442165374756, "logits/rejected": -1.5159375667572021, "loss": 0.9214, "step": 524 }, { "beta_dpo/beta_used": 0.20258314907550812, "beta_dpo/beta_used_raw": 0.056729406118392944, "beta_dpo/gap_mean": 24.42901611328125, "beta_dpo/gap_std": 25.306596755981445, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7709251101321586, "grad_norm": 90.99578857421875, "learning_rate": 7.689012058193384e-08, "logits/chosen": -2.179680824279785, "logits/rejected": -1.9391980171203613, "loss": 0.8666, "step": 525 }, { "beta_dpo/beta_used": 0.3182232677936554, "beta_dpo/beta_used_raw": 0.3182232677936554, "beta_dpo/gap_mean": 24.774818420410156, "beta_dpo/gap_std": 25.899059295654297, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7723935389133627, "grad_norm": 102.18709564208984, "learning_rate": 7.596651350926836e-08, "logits/chosen": -1.1254323720932007, "logits/rejected": -0.37631484866142273, "loss": 0.5365, "step": 526 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0945284515619278, "beta_dpo/gap_mean": 25.259456634521484, "beta_dpo/gap_std": 25.716175079345703, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7738619676945668, "grad_norm": 0.6562523245811462, "learning_rate": 7.504749238082414e-08, "logits/chosen": -2.5148041248321533, "logits/rejected": -1.8032597303390503, "loss": 1.3654, "step": 527 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04346314072608948, "beta_dpo/gap_mean": 25.307907104492188, "beta_dpo/gap_std": 25.934850692749023, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.775330396475771, "grad_norm": 0.6787688732147217, "learning_rate": 7.413308141366254e-08, "logits/chosen": -2.6292572021484375, "logits/rejected": -2.262312650680542, "loss": 1.3635, "step": 528 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.24182364344596863, "beta_dpo/gap_mean": 25.056306838989258, "beta_dpo/gap_std": 25.85653305053711, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7767988252569751, "grad_norm": 0.6740339398384094, "learning_rate": 7.322330470336313e-08, "logits/chosen": -1.7039151191711426, "logits/rejected": -1.4595433473587036, "loss": 1.3671, "step": 529 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14634296298027039, "beta_dpo/gap_mean": 24.878009796142578, "beta_dpo/gap_std": 25.102537155151367, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7782672540381792, "grad_norm": 0.6975300312042236, "learning_rate": 7.231818622338822e-08, "logits/chosen": -1.078080177307129, "logits/rejected": -0.6516724228858948, "loss": 1.3653, "step": 530 }, { "beta_dpo/beta_used": 0.08173258602619171, "beta_dpo/beta_used_raw": 0.08126630634069443, "beta_dpo/gap_mean": 25.204334259033203, "beta_dpo/gap_std": 24.35788345336914, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.7797356828193832, "grad_norm": 42.624412536621094, "learning_rate": 7.141774982445147e-08, "logits/chosen": -2.033020496368408, "logits/rejected": -1.5002036094665527, "loss": 1.1236, "step": 531 }, { "beta_dpo/beta_used": 0.3339642286300659, "beta_dpo/beta_used_raw": 0.3339642286300659, "beta_dpo/gap_mean": 26.063257217407227, "beta_dpo/gap_std": 24.355146408081055, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7812041116005873, "grad_norm": 191.6692352294922, "learning_rate": 7.052201923388953e-08, "logits/chosen": -0.7689719796180725, "logits/rejected": -0.2834465205669403, "loss": 0.6366, "step": 532 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2281951606273651, "beta_dpo/gap_mean": 25.080419540405273, "beta_dpo/gap_std": 24.1630916595459, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7826725403817915, "grad_norm": 0.6387060284614563, "learning_rate": 6.963101805503646e-08, "logits/chosen": -2.5465145111083984, "logits/rejected": -2.6926231384277344, "loss": 1.3668, "step": 533 }, { "beta_dpo/beta_used": 0.13291440904140472, "beta_dpo/beta_used_raw": 0.13291440904140472, "beta_dpo/gap_mean": 24.893821716308594, "beta_dpo/gap_std": 24.348995208740234, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7841409691629956, "grad_norm": 64.63507080078125, "learning_rate": 6.874476976660184e-08, "logits/chosen": -1.547204852104187, "logits/rejected": -1.0614492893218994, "loss": 0.7573, "step": 534 }, { "beta_dpo/beta_used": 0.06851141154766083, "beta_dpo/beta_used_raw": 0.06851141154766083, "beta_dpo/gap_mean": 25.35269546508789, "beta_dpo/gap_std": 24.81476593017578, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7856093979441997, "grad_norm": 24.18308448791504, "learning_rate": 6.786329772205246e-08, "logits/chosen": -1.1582674980163574, "logits/rejected": -1.0309771299362183, "loss": 0.9132, "step": 535 }, { "beta_dpo/beta_used": 0.1708642691373825, "beta_dpo/beta_used_raw": -0.009656161069869995, "beta_dpo/gap_mean": 25.8299503326416, "beta_dpo/gap_std": 24.747276306152344, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7870778267254038, "grad_norm": 56.29839324951172, "learning_rate": 6.698662514899638e-08, "logits/chosen": -2.369691848754883, "logits/rejected": -1.7605574131011963, "loss": 0.818, "step": 536 }, { "beta_dpo/beta_used": 0.13988648355007172, "beta_dpo/beta_used_raw": 0.12796461582183838, "beta_dpo/gap_mean": 26.769397735595703, "beta_dpo/gap_std": 24.93299674987793, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.788546255506608, "grad_norm": 5.829554557800293, "learning_rate": 6.611477514857114e-08, "logits/chosen": -1.4654922485351562, "logits/rejected": -1.1734015941619873, "loss": 0.6961, "step": 537 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.29702022671699524, "beta_dpo/gap_mean": 26.403671264648438, "beta_dpo/gap_std": 24.248538970947266, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.7900146842878121, "grad_norm": 0.6783150434494019, "learning_rate": 6.524777069483525e-08, "logits/chosen": -2.1116223335266113, "logits/rejected": -1.5885636806488037, "loss": 1.3666, "step": 538 }, { "beta_dpo/beta_used": 0.15629830956459045, "beta_dpo/beta_used_raw": 0.15629830956459045, "beta_dpo/gap_mean": 26.40357780456543, "beta_dpo/gap_std": 24.325096130371094, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7914831130690162, "grad_norm": 66.54961395263672, "learning_rate": 6.438563463416221e-08, "logits/chosen": -1.9622362852096558, "logits/rejected": -1.5275343656539917, "loss": 0.5391, "step": 539 }, { "beta_dpo/beta_used": 0.10224727541208267, "beta_dpo/beta_used_raw": 0.10224727541208267, "beta_dpo/gap_mean": 26.399925231933594, "beta_dpo/gap_std": 24.970712661743164, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7929515418502202, "grad_norm": 25.27249526977539, "learning_rate": 6.352838968463919e-08, "logits/chosen": -1.668944001197815, "logits/rejected": -1.3243017196655273, "loss": 0.4662, "step": 540 }, { "beta_dpo/beta_used": 0.00875120796263218, "beta_dpo/beta_used_raw": -0.10428391396999359, "beta_dpo/gap_mean": 26.720138549804688, "beta_dpo/gap_std": 25.313186645507812, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7944199706314243, "grad_norm": 6.300972938537598, "learning_rate": 6.267605843546767e-08, "logits/chosen": -1.533596396446228, "logits/rejected": -0.9114401340484619, "loss": 1.1952, "step": 541 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12573042511940002, "beta_dpo/gap_mean": 26.617843627929688, "beta_dpo/gap_std": 25.30525779724121, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7958883994126285, "grad_norm": 0.7291933298110962, "learning_rate": 6.182866334636888e-08, "logits/chosen": -2.6588997840881348, "logits/rejected": -2.492736339569092, "loss": 1.3627, "step": 542 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2791219651699066, "beta_dpo/gap_mean": 25.565059661865234, "beta_dpo/gap_std": 25.342849731445312, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.7973568281938326, "grad_norm": 0.7055125832557678, "learning_rate": 6.098622674699147e-08, "logits/chosen": -1.1182262897491455, "logits/rejected": -0.9715873003005981, "loss": 1.3674, "step": 543 }, { "beta_dpo/beta_used": 0.0719255730509758, "beta_dpo/beta_used_raw": -0.025115452706813812, "beta_dpo/gap_mean": 25.20094108581543, "beta_dpo/gap_std": 25.864303588867188, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7988252569750367, "grad_norm": 28.51535415649414, "learning_rate": 6.01487708363232e-08, "logits/chosen": -1.7860791683197021, "logits/rejected": -1.5388264656066895, "loss": 0.8885, "step": 544 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.042080581188201904, "beta_dpo/gap_mean": 25.147363662719727, "beta_dpo/gap_std": 25.81969451904297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8002936857562408, "grad_norm": 0.7087168097496033, "learning_rate": 5.9316317682106294e-08, "logits/chosen": -1.6923884153366089, "logits/rejected": -1.492473840713501, "loss": 1.3638, "step": 545 }, { "beta_dpo/beta_used": 0.09297899156808853, "beta_dpo/beta_used_raw": 0.09297899156808853, "beta_dpo/gap_mean": 25.392616271972656, "beta_dpo/gap_std": 25.999805450439453, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.801762114537445, "grad_norm": 31.122329711914062, "learning_rate": 5.848888922025552e-08, "logits/chosen": -1.4724502563476562, "logits/rejected": -1.0641326904296875, "loss": 0.4907, "step": 546 }, { "beta_dpo/beta_used": 0.24060708284378052, "beta_dpo/beta_used_raw": 0.24060708284378052, "beta_dpo/gap_mean": 25.35540771484375, "beta_dpo/gap_std": 25.47772216796875, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8032305433186491, "grad_norm": 57.88156509399414, "learning_rate": 5.7666507254280265e-08, "logits/chosen": -1.461564302444458, "logits/rejected": -1.031999945640564, "loss": 0.4733, "step": 547 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22666704654693604, "beta_dpo/gap_mean": 24.725261688232422, "beta_dpo/gap_std": 25.350818634033203, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8046989720998532, "grad_norm": 0.6792864799499512, "learning_rate": 5.684919345471029e-08, "logits/chosen": -2.3294479846954346, "logits/rejected": -2.422576904296875, "loss": 1.3682, "step": 548 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20774686336517334, "beta_dpo/gap_mean": 24.332277297973633, "beta_dpo/gap_std": 25.893722534179688, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8061674008810573, "grad_norm": 0.6158374547958374, "learning_rate": 5.603696935852426e-08, "logits/chosen": -1.8410158157348633, "logits/rejected": -0.6334822177886963, "loss": 1.3676, "step": 549 }, { "beta_dpo/beta_used": 0.1715359389781952, "beta_dpo/beta_used_raw": 0.1715359389781952, "beta_dpo/gap_mean": 24.272951126098633, "beta_dpo/gap_std": 25.14063835144043, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8076358296622613, "grad_norm": 53.38132858276367, "learning_rate": 5.5229856368582376e-08, "logits/chosen": -0.1244838610291481, "logits/rejected": -0.11963202804327011, "loss": 0.4099, "step": 550 }, { "beta_dpo/beta_used": 0.14299984276294708, "beta_dpo/beta_used_raw": 0.07115853577852249, "beta_dpo/gap_mean": 25.380428314208984, "beta_dpo/gap_std": 24.791709899902344, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8091042584434655, "grad_norm": 56.76998519897461, "learning_rate": 5.4427875753062734e-08, "logits/chosen": -1.888406753540039, "logits/rejected": -1.1206146478652954, "loss": 0.8416, "step": 551 }, { "beta_dpo/beta_used": 0.12425128370523453, "beta_dpo/beta_used_raw": 0.07331615686416626, "beta_dpo/gap_mean": 26.72222328186035, "beta_dpo/gap_std": 25.7940731048584, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8105726872246696, "grad_norm": 81.13810729980469, "learning_rate": 5.363104864490034e-08, "logits/chosen": -2.0671606063842773, "logits/rejected": -1.740760326385498, "loss": 1.1724, "step": 552 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1840367615222931, "beta_dpo/gap_mean": 26.55316162109375, "beta_dpo/gap_std": 26.07613754272461, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8120411160058737, "grad_norm": 0.6811166405677795, "learning_rate": 5.2839396041230415e-08, "logits/chosen": -1.305177927017212, "logits/rejected": -1.4849331378936768, "loss": 1.3655, "step": 553 }, { "beta_dpo/beta_used": 0.03134859725832939, "beta_dpo/beta_used_raw": -0.07137447595596313, "beta_dpo/gap_mean": 26.297405242919922, "beta_dpo/gap_std": 26.220775604248047, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8135095447870778, "grad_norm": 16.067913055419922, "learning_rate": 5.205293880283551e-08, "logits/chosen": -1.3502602577209473, "logits/rejected": -0.5793172121047974, "loss": 1.0013, "step": 554 }, { "beta_dpo/beta_used": 0.08660194277763367, "beta_dpo/beta_used_raw": 0.030508212745189667, "beta_dpo/gap_mean": 26.256061553955078, "beta_dpo/gap_std": 26.373807907104492, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8149779735682819, "grad_norm": 36.38199234008789, "learning_rate": 5.127169765359515e-08, "logits/chosen": -1.3001048564910889, "logits/rejected": -1.1895201206207275, "loss": 0.8685, "step": 555 }, { "beta_dpo/beta_used": 0.13554587960243225, "beta_dpo/beta_used_raw": 0.13554587960243225, "beta_dpo/gap_mean": 26.637409210205078, "beta_dpo/gap_std": 26.593616485595703, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8164464023494861, "grad_norm": 39.49778747558594, "learning_rate": 5.049569317994012e-08, "logits/chosen": -2.0038812160491943, "logits/rejected": -1.3363038301467896, "loss": 0.417, "step": 556 }, { "beta_dpo/beta_used": 0.13636960089206696, "beta_dpo/beta_used_raw": -0.021815553307533264, "beta_dpo/gap_mean": 26.767135620117188, "beta_dpo/gap_std": 26.989856719970703, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8179148311306902, "grad_norm": 46.596839904785156, "learning_rate": 4.9724945830310144e-08, "logits/chosen": -2.1644203662872314, "logits/rejected": -2.26560115814209, "loss": 0.7871, "step": 557 }, { "beta_dpo/beta_used": 0.25266367197036743, "beta_dpo/beta_used_raw": 0.10354121029376984, "beta_dpo/gap_mean": 27.236129760742188, "beta_dpo/gap_std": 27.027782440185547, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8193832599118943, "grad_norm": 2.480834484100342, "learning_rate": 4.8959475914614554e-08, "logits/chosen": -2.784749984741211, "logits/rejected": -2.2754716873168945, "loss": 0.6847, "step": 558 }, { "beta_dpo/beta_used": 0.1398455649614334, "beta_dpo/beta_used_raw": 0.05602721869945526, "beta_dpo/gap_mean": 27.292774200439453, "beta_dpo/gap_std": 27.778480529785156, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8208516886930984, "grad_norm": 56.5252799987793, "learning_rate": 4.8199303603697614e-08, "logits/chosen": -2.1940698623657227, "logits/rejected": -1.6307884454727173, "loss": 0.97, "step": 559 }, { "beta_dpo/beta_used": 0.16665856540203094, "beta_dpo/beta_used_raw": 0.16665856540203094, "beta_dpo/gap_mean": 27.126052856445312, "beta_dpo/gap_std": 27.71031951904297, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.8223201174743024, "grad_norm": 70.78883361816406, "learning_rate": 4.7444448928806615e-08, "logits/chosen": -0.21908852458000183, "logits/rejected": -0.17472022771835327, "loss": 0.6115, "step": 560 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20892953872680664, "beta_dpo/gap_mean": 26.475391387939453, "beta_dpo/gap_std": 27.537622451782227, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8237885462555066, "grad_norm": 0.7130993604660034, "learning_rate": 4.669493178106432e-08, "logits/chosen": -1.5300698280334473, "logits/rejected": -0.9870960712432861, "loss": 1.3652, "step": 561 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2608240246772766, "beta_dpo/gap_mean": 26.278995513916016, "beta_dpo/gap_std": 27.876379013061523, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8252569750367107, "grad_norm": 0.7420074939727783, "learning_rate": 4.5950771910944596e-08, "logits/chosen": -1.4309711456298828, "logits/rejected": -1.5285253524780273, "loss": 1.3653, "step": 562 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19262418150901794, "beta_dpo/gap_mean": 25.140338897705078, "beta_dpo/gap_std": 27.699390411376953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8267254038179148, "grad_norm": 0.7049899697303772, "learning_rate": 4.521198892775202e-08, "logits/chosen": -1.4333343505859375, "logits/rejected": -0.8749819993972778, "loss": 1.3663, "step": 563 }, { "beta_dpo/beta_used": 0.11520747095346451, "beta_dpo/beta_used_raw": -0.007625162601470947, "beta_dpo/gap_mean": 24.54560089111328, "beta_dpo/gap_std": 26.745800018310547, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.8281938325991189, "grad_norm": 25.564876556396484, "learning_rate": 4.447860229910544e-08, "logits/chosen": -2.1329026222229004, "logits/rejected": -1.4126560688018799, "loss": 0.7624, "step": 564 }, { "beta_dpo/beta_used": 0.3736911714076996, "beta_dpo/beta_used_raw": 0.3736911714076996, "beta_dpo/gap_mean": 25.43872833251953, "beta_dpo/gap_std": 26.266254425048828, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8296622613803231, "grad_norm": 145.1612091064453, "learning_rate": 4.375063135042445e-08, "logits/chosen": -1.8364756107330322, "logits/rejected": -0.5215842723846436, "loss": 0.6621, "step": 565 }, { "beta_dpo/beta_used": 0.17025381326675415, "beta_dpo/beta_used_raw": 0.17025381326675415, "beta_dpo/gap_mean": 25.916160583496094, "beta_dpo/gap_std": 26.456092834472656, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8311306901615272, "grad_norm": 47.683231353759766, "learning_rate": 4.3028095264420525e-08, "logits/chosen": -2.213182210922241, "logits/rejected": -1.3068116903305054, "loss": 0.4773, "step": 566 }, { "beta_dpo/beta_used": 0.0031331873033195734, "beta_dpo/beta_used_raw": -0.02852408029139042, "beta_dpo/gap_mean": 25.87657356262207, "beta_dpo/gap_std": 26.403339385986328, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8325991189427313, "grad_norm": 2.5650317668914795, "learning_rate": 4.231101308059165e-08, "logits/chosen": -1.532239556312561, "logits/rejected": -1.5073673725128174, "loss": 1.321, "step": 567 }, { "beta_dpo/beta_used": 0.2132313847541809, "beta_dpo/beta_used_raw": 0.15188802778720856, "beta_dpo/gap_mean": 26.08514404296875, "beta_dpo/gap_std": 26.061296463012695, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.8340675477239354, "grad_norm": 10.647150039672852, "learning_rate": 4.1599403694720145e-08, "logits/chosen": -2.2890617847442627, "logits/rejected": -2.017902374267578, "loss": 0.6939, "step": 568 }, { "beta_dpo/beta_used": 0.11874410510063171, "beta_dpo/beta_used_raw": 0.11874410510063171, "beta_dpo/gap_mean": 25.48981475830078, "beta_dpo/gap_std": 26.66592025756836, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8355359765051396, "grad_norm": 41.747718811035156, "learning_rate": 4.089328585837512e-08, "logits/chosen": -1.2407170534133911, "logits/rejected": -0.9986321926116943, "loss": 0.5822, "step": 569 }, { "beta_dpo/beta_used": 0.025809142738580704, "beta_dpo/beta_used_raw": 0.005916915833950043, "beta_dpo/gap_mean": 25.438308715820312, "beta_dpo/gap_std": 25.799644470214844, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8370044052863436, "grad_norm": 14.593240737915039, "learning_rate": 4.019267817841834e-08, "logits/chosen": -2.2040014266967773, "logits/rejected": -1.2692387104034424, "loss": 1.0207, "step": 570 }, { "beta_dpo/beta_used": 0.023714642971754074, "beta_dpo/beta_used_raw": -0.01072479598224163, "beta_dpo/gap_mean": 26.145469665527344, "beta_dpo/gap_std": 25.934444427490234, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8384728340675477, "grad_norm": 13.779581069946289, "learning_rate": 3.9497599116513705e-08, "logits/chosen": -1.3093101978302002, "logits/rejected": -1.4198007583618164, "loss": 1.0302, "step": 571 }, { "beta_dpo/beta_used": 0.08039849251508713, "beta_dpo/beta_used_raw": 0.009870670735836029, "beta_dpo/gap_mean": 26.3587646484375, "beta_dpo/gap_std": 26.530506134033203, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8399412628487518, "grad_norm": 20.452983856201172, "learning_rate": 3.880806698864086e-08, "logits/chosen": -1.8533153533935547, "logits/rejected": -2.2176146507263184, "loss": 0.8942, "step": 572 }, { "beta_dpo/beta_used": 0.10495683550834656, "beta_dpo/beta_used_raw": -0.05813818424940109, "beta_dpo/gap_mean": 25.619884490966797, "beta_dpo/gap_std": 26.07189178466797, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8414096916299559, "grad_norm": 31.09406089782715, "learning_rate": 3.812409996461275e-08, "logits/chosen": -1.6736924648284912, "logits/rejected": -1.4833910465240479, "loss": 0.8138, "step": 573 }, { "beta_dpo/beta_used": 0.08065403997898102, "beta_dpo/beta_used_raw": 0.053314968943595886, "beta_dpo/gap_mean": 25.877761840820312, "beta_dpo/gap_std": 26.195384979248047, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8428781204111601, "grad_norm": 17.340517044067383, "learning_rate": 3.74457160675965e-08, "logits/chosen": -2.150381088256836, "logits/rejected": -1.5433233976364136, "loss": 0.8075, "step": 574 }, { "beta_dpo/beta_used": 0.10325983911752701, "beta_dpo/beta_used_raw": 0.05682985112071037, "beta_dpo/gap_mean": 26.40753173828125, "beta_dpo/gap_std": 27.02420425415039, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8443465491923642, "grad_norm": 32.89806365966797, "learning_rate": 3.677293317363864e-08, "logits/chosen": -1.5819101333618164, "logits/rejected": -0.894752562046051, "loss": 0.8303, "step": 575 }, { "beta_dpo/beta_used": 0.04864989221096039, "beta_dpo/beta_used_raw": 0.023738402873277664, "beta_dpo/gap_mean": 26.20545196533203, "beta_dpo/gap_std": 27.297000885009766, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8458149779735683, "grad_norm": 21.144390106201172, "learning_rate": 3.6105769011194224e-08, "logits/chosen": -1.6136865615844727, "logits/rejected": -1.4469764232635498, "loss": 0.9443, "step": 576 }, { "beta_dpo/beta_used": 0.05051780119538307, "beta_dpo/beta_used_raw": -0.00780254602432251, "beta_dpo/gap_mean": 26.54003143310547, "beta_dpo/gap_std": 27.425918579101562, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8472834067547724, "grad_norm": 36.830814361572266, "learning_rate": 3.5444241160659304e-08, "logits/chosen": -2.3327486515045166, "logits/rejected": -1.5140879154205322, "loss": 1.0336, "step": 577 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10638011991977692, "beta_dpo/gap_mean": 25.88504409790039, "beta_dpo/gap_std": 26.577232360839844, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8487518355359766, "grad_norm": 0.6997745633125305, "learning_rate": 3.478836705390808e-08, "logits/chosen": -0.6083793640136719, "logits/rejected": -0.7344316840171814, "loss": 1.3638, "step": 578 }, { "beta_dpo/beta_used": 0.023828348144888878, "beta_dpo/beta_used_raw": 0.023828348144888878, "beta_dpo/gap_mean": 25.424047470092773, "beta_dpo/gap_std": 26.196489334106445, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8502202643171806, "grad_norm": 11.364592552185059, "learning_rate": 3.41381639738331e-08, "logits/chosen": -2.4373326301574707, "logits/rejected": -1.5058765411376953, "loss": 0.9338, "step": 579 }, { "beta_dpo/beta_used": 0.01163013931363821, "beta_dpo/beta_used_raw": -0.044407907873392105, "beta_dpo/gap_mean": 25.5772705078125, "beta_dpo/gap_std": 25.849599838256836, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8516886930983847, "grad_norm": 7.67764949798584, "learning_rate": 3.349364905389032e-08, "logits/chosen": -1.0445411205291748, "logits/rejected": -0.7093319892883301, "loss": 1.1626, "step": 580 }, { "beta_dpo/beta_used": 0.19128121435642242, "beta_dpo/beta_used_raw": 0.11997915059328079, "beta_dpo/gap_mean": 25.443954467773438, "beta_dpo/gap_std": 25.912918090820312, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8531571218795888, "grad_norm": 99.09800720214844, "learning_rate": 3.285483927764726e-08, "logits/chosen": -1.3885164260864258, "logits/rejected": -1.6896731853485107, "loss": 0.7616, "step": 581 }, { "beta_dpo/beta_used": 0.07839217782020569, "beta_dpo/beta_used_raw": -0.11643987149000168, "beta_dpo/gap_mean": 25.567218780517578, "beta_dpo/gap_std": 26.388256072998047, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8546255506607929, "grad_norm": 27.022602081298828, "learning_rate": 3.222175147833556e-08, "logits/chosen": -1.7658624649047852, "logits/rejected": -1.426491141319275, "loss": 0.8402, "step": 582 }, { "beta_dpo/beta_used": 0.067328542470932, "beta_dpo/beta_used_raw": -0.10475502163171768, "beta_dpo/gap_mean": 25.635608673095703, "beta_dpo/gap_std": 26.159648895263672, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.856093979441997, "grad_norm": 21.52065086364746, "learning_rate": 3.159440233840763e-08, "logits/chosen": -1.7464110851287842, "logits/rejected": -1.3861937522888184, "loss": 0.8295, "step": 583 }, { "beta_dpo/beta_used": 0.30127981305122375, "beta_dpo/beta_used_raw": 0.25634637475013733, "beta_dpo/gap_mean": 25.642108917236328, "beta_dpo/gap_std": 26.18915557861328, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8575624082232012, "grad_norm": 55.7706298828125, "learning_rate": 3.0972808389096635e-08, "logits/chosen": -1.611590027809143, "logits/rejected": -1.0209101438522339, "loss": 0.7086, "step": 584 }, { "beta_dpo/beta_used": 0.029697787016630173, "beta_dpo/beta_used_raw": 0.014374672435224056, "beta_dpo/gap_mean": 25.895137786865234, "beta_dpo/gap_std": 26.302135467529297, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8590308370044053, "grad_norm": 16.616464614868164, "learning_rate": 3.035698600998121e-08, "logits/chosen": -2.070708990097046, "logits/rejected": -1.8533859252929688, "loss": 0.9978, "step": 585 }, { "beta_dpo/beta_used": 0.055410776287317276, "beta_dpo/beta_used_raw": -0.021417979151010513, "beta_dpo/gap_mean": 25.659912109375, "beta_dpo/gap_std": 26.0551815032959, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8604992657856094, "grad_norm": 23.458040237426758, "learning_rate": 2.974695142855388e-08, "logits/chosen": -0.9927098751068115, "logits/rejected": -0.9853583574295044, "loss": 0.9897, "step": 586 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2464330941438675, "beta_dpo/gap_mean": 25.266849517822266, "beta_dpo/gap_std": 26.114063262939453, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8619676945668135, "grad_norm": 0.6116196513175964, "learning_rate": 2.9142720719793122e-08, "logits/chosen": -2.70188045501709, "logits/rejected": -2.299798011779785, "loss": 1.3669, "step": 587 }, { "beta_dpo/beta_used": 0.03825625404715538, "beta_dpo/beta_used_raw": -0.05128197744488716, "beta_dpo/gap_mean": 25.785541534423828, "beta_dpo/gap_std": 27.123783111572266, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8634361233480177, "grad_norm": 19.513792037963867, "learning_rate": 2.8544309805740018e-08, "logits/chosen": -1.5824187994003296, "logits/rejected": -1.2183144092559814, "loss": 0.985, "step": 588 }, { "beta_dpo/beta_used": 0.362135648727417, "beta_dpo/beta_used_raw": 0.362135648727417, "beta_dpo/gap_mean": 26.436059951782227, "beta_dpo/gap_std": 26.827016830444336, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8649045521292217, "grad_norm": 78.58889770507812, "learning_rate": 2.7951734455078786e-08, "logits/chosen": -2.444143056869507, "logits/rejected": -1.7856106758117676, "loss": 0.6102, "step": 589 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0802900567650795, "beta_dpo/gap_mean": 27.119421005249023, "beta_dpo/gap_std": 26.420612335205078, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8663729809104258, "grad_norm": 0.7165948748588562, "learning_rate": 2.736501028272095e-08, "logits/chosen": -1.8171558380126953, "logits/rejected": -1.760206937789917, "loss": 1.362, "step": 590 }, { "beta_dpo/beta_used": 0.054503440856933594, "beta_dpo/beta_used_raw": 0.054503440856933594, "beta_dpo/gap_mean": 27.428272247314453, "beta_dpo/gap_std": 25.88664436340332, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8678414096916299, "grad_norm": 13.594551086425781, "learning_rate": 2.678415274939408e-08, "logits/chosen": -1.202202558517456, "logits/rejected": -1.1097545623779297, "loss": 0.7712, "step": 591 }, { "beta_dpo/beta_used": 0.018850848078727722, "beta_dpo/beta_used_raw": -0.1262149065732956, "beta_dpo/gap_mean": 27.040130615234375, "beta_dpo/gap_std": 25.773883819580078, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.869309838472834, "grad_norm": 10.164037704467773, "learning_rate": 2.6209177161234442e-08, "logits/chosen": -1.5192279815673828, "logits/rejected": -1.4401228427886963, "loss": 1.0384, "step": 592 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22714272141456604, "beta_dpo/gap_mean": 26.155765533447266, "beta_dpo/gap_std": 26.15311622619629, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8707782672540382, "grad_norm": 0.6460676193237305, "learning_rate": 2.564009866938349e-08, "logits/chosen": -1.8813679218292236, "logits/rejected": -1.5969023704528809, "loss": 1.3662, "step": 593 }, { "beta_dpo/beta_used": 0.0679648295044899, "beta_dpo/beta_used_raw": -0.10102081298828125, "beta_dpo/gap_mean": 25.800151824951172, "beta_dpo/gap_std": 26.050155639648438, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8722466960352423, "grad_norm": 20.22568130493164, "learning_rate": 2.5076932269588708e-08, "logits/chosen": -1.7823147773742676, "logits/rejected": -1.2715544700622559, "loss": 0.8081, "step": 594 }, { "beta_dpo/beta_used": 0.006703744176775217, "beta_dpo/beta_used_raw": -0.09921251982450485, "beta_dpo/gap_mean": 25.734390258789062, "beta_dpo/gap_std": 26.45250701904297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8737151248164464, "grad_norm": 4.529580593109131, "learning_rate": 2.451969280180849e-08, "logits/chosen": -2.684058666229248, "logits/rejected": -2.5586256980895996, "loss": 1.2393, "step": 595 }, { "beta_dpo/beta_used": 0.16087494790554047, "beta_dpo/beta_used_raw": 0.10710237175226212, "beta_dpo/gap_mean": 25.589027404785156, "beta_dpo/gap_std": 26.904956817626953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8751835535976505, "grad_norm": 56.43586730957031, "learning_rate": 2.396839494982103e-08, "logits/chosen": -0.9374496936798096, "logits/rejected": -0.8124482035636902, "loss": 0.8874, "step": 596 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07549858838319778, "beta_dpo/gap_mean": 26.283626556396484, "beta_dpo/gap_std": 26.850948333740234, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8766519823788547, "grad_norm": 0.7111362814903259, "learning_rate": 2.3423053240837514e-08, "logits/chosen": -1.090895175933838, "logits/rejected": -1.0381711721420288, "loss": 1.3635, "step": 597 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04087088257074356, "beta_dpo/gap_mean": 26.07274627685547, "beta_dpo/gap_std": 26.833065032958984, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.8781204111600588, "grad_norm": 0.7083949446678162, "learning_rate": 2.2883682045119062e-08, "logits/chosen": -3.575258255004883, "logits/rejected": -3.1597683429718018, "loss": 1.3634, "step": 598 }, { "beta_dpo/beta_used": 0.10630277544260025, "beta_dpo/beta_used_raw": 0.004977069795131683, "beta_dpo/gap_mean": 25.62136459350586, "beta_dpo/gap_std": 26.38665008544922, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8795888399412628, "grad_norm": 73.53407287597656, "learning_rate": 2.2350295575598367e-08, "logits/chosen": -1.3346710205078125, "logits/rejected": -1.1897668838500977, "loss": 0.971, "step": 599 }, { "beta_dpo/beta_used": 0.09986226260662079, "beta_dpo/beta_used_raw": 0.061525192111730576, "beta_dpo/gap_mean": 25.4707088470459, "beta_dpo/gap_std": 26.178421020507812, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8810572687224669, "grad_norm": 25.936363220214844, "learning_rate": 2.1822907887504932e-08, "logits/chosen": -1.6731359958648682, "logits/rejected": -1.5466392040252686, "loss": 0.8177, "step": 600 }, { "epoch": 0.8810572687224669, "eval_beta_dpo/beta_used": 0.021189022809267044, "eval_beta_dpo/beta_used_raw": -0.25365081429481506, "eval_beta_dpo/gap_mean": 25.718292236328125, "eval_beta_dpo/gap_std": 26.182870864868164, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -1.6122984886169434, "eval_logits/rejected": -1.2796200513839722, "eval_loss": 0.6505056619644165, "eval_runtime": 44.1187, "eval_samples_per_second": 53.016, "eval_steps_per_second": 1.677, "step": 600 }, { "beta_dpo/beta_used": 0.12984035909175873, "beta_dpo/beta_used_raw": 0.12984035909175873, "beta_dpo/gap_mean": 26.179027557373047, "beta_dpo/gap_std": 26.613693237304688, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.882525697503671, "grad_norm": 107.63837432861328, "learning_rate": 2.1301532877994742e-08, "logits/chosen": -1.7914152145385742, "logits/rejected": -1.9762234687805176, "loss": 1.0178, "step": 601 }, { "beta_dpo/beta_used": 0.017126336693763733, "beta_dpo/beta_used_raw": -0.1130400225520134, "beta_dpo/gap_mean": 26.409027099609375, "beta_dpo/gap_std": 26.42571449279785, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8839941262848752, "grad_norm": 8.777104377746582, "learning_rate": 2.0786184285784298e-08, "logits/chosen": -2.1035757064819336, "logits/rejected": -2.0259461402893066, "loss": 1.0727, "step": 602 }, { "beta_dpo/beta_used": 0.21063096821308136, "beta_dpo/beta_used_raw": 0.21063096821308136, "beta_dpo/gap_mean": 26.976688385009766, "beta_dpo/gap_std": 26.73578643798828, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8854625550660793, "grad_norm": 53.16716003417969, "learning_rate": 2.0276875690788204e-08, "logits/chosen": -2.0368385314941406, "logits/rejected": -1.4027612209320068, "loss": 0.431, "step": 603 }, { "beta_dpo/beta_used": 0.1964634358882904, "beta_dpo/beta_used_raw": 0.07125077396631241, "beta_dpo/gap_mean": 27.32293701171875, "beta_dpo/gap_std": 26.424095153808594, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8869309838472834, "grad_norm": 15.568916320800781, "learning_rate": 1.977362051376158e-08, "logits/chosen": -2.341930866241455, "logits/rejected": -1.854198932647705, "loss": 0.6969, "step": 604 }, { "beta_dpo/beta_used": 0.13269981741905212, "beta_dpo/beta_used_raw": 0.04611806571483612, "beta_dpo/gap_mean": 27.005504608154297, "beta_dpo/gap_std": 26.37883758544922, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8883994126284875, "grad_norm": 20.861234664916992, "learning_rate": 1.9276432015946446e-08, "logits/chosen": -1.0397030115127563, "logits/rejected": -0.052525296807289124, "loss": 0.7124, "step": 605 }, { "beta_dpo/beta_used": 0.04934832826256752, "beta_dpo/beta_used_raw": 0.0479503832757473, "beta_dpo/gap_mean": 27.078205108642578, "beta_dpo/gap_std": 26.055770874023438, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.8898678414096917, "grad_norm": 20.553180694580078, "learning_rate": 1.8785323298722093e-08, "logits/chosen": -1.6765646934509277, "logits/rejected": -1.3924915790557861, "loss": 0.8645, "step": 606 }, { "beta_dpo/beta_used": 0.04134753346443176, "beta_dpo/beta_used_raw": 0.018087653443217278, "beta_dpo/gap_mean": 27.655214309692383, "beta_dpo/gap_std": 26.571216583251953, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8913362701908958, "grad_norm": 16.795377731323242, "learning_rate": 1.8300307303259904e-08, "logits/chosen": -1.4865968227386475, "logits/rejected": -0.9536029696464539, "loss": 0.938, "step": 607 }, { "beta_dpo/beta_used": 0.19230753183364868, "beta_dpo/beta_used_raw": 0.19230753183364868, "beta_dpo/gap_mean": 27.888565063476562, "beta_dpo/gap_std": 26.88840103149414, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8928046989720999, "grad_norm": 60.357391357421875, "learning_rate": 1.7821396810182437e-08, "logits/chosen": -1.0982064008712769, "logits/rejected": -0.5400052666664124, "loss": 0.7477, "step": 608 }, { "beta_dpo/beta_used": 0.12406554818153381, "beta_dpo/beta_used_raw": 0.07590121030807495, "beta_dpo/gap_mean": 28.08792495727539, "beta_dpo/gap_std": 26.806245803833008, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.8942731277533039, "grad_norm": 44.63237380981445, "learning_rate": 1.7348604439226617e-08, "logits/chosen": -2.1751508712768555, "logits/rejected": -1.6474738121032715, "loss": 0.8564, "step": 609 }, { "beta_dpo/beta_used": 0.2061629593372345, "beta_dpo/beta_used_raw": 0.2061629593372345, "beta_dpo/gap_mean": 27.960865020751953, "beta_dpo/gap_std": 26.91543960571289, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.895741556534508, "grad_norm": 101.64844512939453, "learning_rate": 1.6881942648911074e-08, "logits/chosen": -1.126846194267273, "logits/rejected": -0.9453625082969666, "loss": 0.4032, "step": 610 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3135502338409424, "beta_dpo/gap_mean": 27.73170280456543, "beta_dpo/gap_std": 27.425939559936523, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8972099853157122, "grad_norm": 0.6740864515304565, "learning_rate": 1.6421423736208e-08, "logits/chosen": -1.3016668558120728, "logits/rejected": -1.5816915035247803, "loss": 1.3659, "step": 611 }, { "beta_dpo/beta_used": 0.019122015684843063, "beta_dpo/beta_used_raw": -0.04090452194213867, "beta_dpo/gap_mean": 27.90454864501953, "beta_dpo/gap_std": 28.067529678344727, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8986784140969163, "grad_norm": 11.847250938415527, "learning_rate": 1.5967059836219042e-08, "logits/chosen": -2.828847646713257, "logits/rejected": -1.5389125347137451, "loss": 1.0534, "step": 612 }, { "beta_dpo/beta_used": 0.08581092953681946, "beta_dpo/beta_used_raw": 0.08581092953681946, "beta_dpo/gap_mean": 28.143531799316406, "beta_dpo/gap_std": 27.665599822998047, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9001468428781204, "grad_norm": 32.16731643676758, "learning_rate": 1.551886292185553e-08, "logits/chosen": -0.8254266977310181, "logits/rejected": -0.2951732873916626, "loss": 0.8487, "step": 613 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0637907013297081, "beta_dpo/gap_mean": 27.91874122619629, "beta_dpo/gap_std": 26.732559204101562, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9016152716593245, "grad_norm": 0.9008828997612, "learning_rate": 1.507684480352292e-08, "logits/chosen": -1.2602735757827759, "logits/rejected": -1.2624118328094482, "loss": 1.3614, "step": 614 }, { "beta_dpo/beta_used": 0.20127537846565247, "beta_dpo/beta_used_raw": 0.09561780840158463, "beta_dpo/gap_mean": 26.90203857421875, "beta_dpo/gap_std": 26.413787841796875, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9030837004405287, "grad_norm": 35.35264205932617, "learning_rate": 1.4641017128809801e-08, "logits/chosen": -3.1033453941345215, "logits/rejected": -1.8919872045516968, "loss": 0.7225, "step": 615 }, { "beta_dpo/beta_used": 0.035515908151865005, "beta_dpo/beta_used_raw": 0.030270632356405258, "beta_dpo/gap_mean": 26.748119354248047, "beta_dpo/gap_std": 26.577213287353516, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9045521292217328, "grad_norm": 18.992765426635742, "learning_rate": 1.4211391382180637e-08, "logits/chosen": -1.7871594429016113, "logits/rejected": -1.0935783386230469, "loss": 0.9495, "step": 616 }, { "beta_dpo/beta_used": 0.1273186355829239, "beta_dpo/beta_used_raw": -0.08706134557723999, "beta_dpo/gap_mean": 25.87103843688965, "beta_dpo/gap_std": 26.988807678222656, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9060205580029369, "grad_norm": 33.350303649902344, "learning_rate": 1.378797888467345e-08, "logits/chosen": -1.6357133388519287, "logits/rejected": -0.8707866072654724, "loss": 1.0214, "step": 617 }, { "beta_dpo/beta_used": 0.28145626187324524, "beta_dpo/beta_used_raw": 0.28145626187324524, "beta_dpo/gap_mean": 25.480789184570312, "beta_dpo/gap_std": 26.98863983154297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9074889867841409, "grad_norm": 74.97677612304688, "learning_rate": 1.3370790793601371e-08, "logits/chosen": -2.809556245803833, "logits/rejected": -2.3729324340820312, "loss": 0.2889, "step": 618 }, { "beta_dpo/beta_used": 0.24077007174491882, "beta_dpo/beta_used_raw": 0.24077007174491882, "beta_dpo/gap_mean": 25.54322052001953, "beta_dpo/gap_std": 26.776268005371094, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.908957415565345, "grad_norm": 93.3560791015625, "learning_rate": 1.2959838102258535e-08, "logits/chosen": -1.683895468711853, "logits/rejected": -0.9932112097740173, "loss": 0.4014, "step": 619 }, { "beta_dpo/beta_used": 0.3967885673046112, "beta_dpo/beta_used_raw": 0.1724894642829895, "beta_dpo/gap_mean": 25.977336883544922, "beta_dpo/gap_std": 27.07024383544922, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9104258443465492, "grad_norm": 122.45503234863281, "learning_rate": 1.2555131639630567e-08, "logits/chosen": -1.6116111278533936, "logits/rejected": -1.515150785446167, "loss": 1.0302, "step": 620 }, { "beta_dpo/beta_used": 0.14680472016334534, "beta_dpo/beta_used_raw": 0.05492217093706131, "beta_dpo/gap_mean": 25.018667221069336, "beta_dpo/gap_std": 27.220109939575195, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9118942731277533, "grad_norm": 60.58484649658203, "learning_rate": 1.2156682070109086e-08, "logits/chosen": -1.7409666776657104, "logits/rejected": -1.3526718616485596, "loss": 0.9069, "step": 621 }, { "beta_dpo/beta_used": 0.17629124224185944, "beta_dpo/beta_used_raw": 0.17629124224185944, "beta_dpo/gap_mean": 25.912124633789062, "beta_dpo/gap_std": 26.764728546142578, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9133627019089574, "grad_norm": 45.40700912475586, "learning_rate": 1.1764499893210878e-08, "logits/chosen": -0.560531497001648, "logits/rejected": 0.17780210077762604, "loss": 0.2976, "step": 622 }, { "beta_dpo/beta_used": 0.05971324071288109, "beta_dpo/beta_used_raw": -0.05395708605647087, "beta_dpo/gap_mean": 25.94444465637207, "beta_dpo/gap_std": 27.35669708251953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9148311306901615, "grad_norm": 19.917722702026367, "learning_rate": 1.1378595443300998e-08, "logits/chosen": -2.4165191650390625, "logits/rejected": -2.535884141921997, "loss": 0.8965, "step": 623 }, { "beta_dpo/beta_used": 0.2743593156337738, "beta_dpo/beta_used_raw": 0.2545613944530487, "beta_dpo/gap_mean": 26.42656135559082, "beta_dpo/gap_std": 27.526315689086914, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9162995594713657, "grad_norm": 53.50072479248047, "learning_rate": 1.0998978889320582e-08, "logits/chosen": -1.464743733406067, "logits/rejected": -0.29761308431625366, "loss": 0.7317, "step": 624 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.34066319465637207, "beta_dpo/gap_mean": 26.23963737487793, "beta_dpo/gap_std": 27.129396438598633, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9177679882525698, "grad_norm": 0.6834034323692322, "learning_rate": 1.0625660234518913e-08, "logits/chosen": -2.4838218688964844, "logits/rejected": -2.34793758392334, "loss": 1.3677, "step": 625 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2781468331813812, "beta_dpo/gap_mean": 25.508012771606445, "beta_dpo/gap_std": 27.87520408630371, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9192364170337739, "grad_norm": 0.693095862865448, "learning_rate": 1.0258649316189721e-08, "logits/chosen": -2.392935276031494, "logits/rejected": -2.3022007942199707, "loss": 1.3675, "step": 626 }, { "beta_dpo/beta_used": 0.1320492923259735, "beta_dpo/beta_used_raw": 0.1320492923259735, "beta_dpo/gap_mean": 25.508769989013672, "beta_dpo/gap_std": 28.16644287109375, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.920704845814978, "grad_norm": 66.25963592529297, "learning_rate": 9.897955805412e-09, "logits/chosen": -1.5383257865905762, "logits/rejected": -1.2918891906738281, "loss": 1.0095, "step": 627 }, { "beta_dpo/beta_used": 0.1707049161195755, "beta_dpo/beta_used_raw": 0.1004292219877243, "beta_dpo/gap_mean": 25.589433670043945, "beta_dpo/gap_std": 27.666522979736328, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.922173274596182, "grad_norm": 43.68147659301758, "learning_rate": 9.543589206795238e-09, "logits/chosen": -1.54828679561615, "logits/rejected": -1.1098421812057495, "loss": 0.7534, "step": 628 }, { "beta_dpo/beta_used": 0.03018147312104702, "beta_dpo/beta_used_raw": 0.028293948620557785, "beta_dpo/gap_mean": 25.53607749938965, "beta_dpo/gap_std": 27.197887420654297, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9236417033773862, "grad_norm": 19.315677642822266, "learning_rate": 9.19555885822887e-09, "logits/chosen": -2.1567575931549072, "logits/rejected": -1.6035345792770386, "loss": 1.0793, "step": 629 }, { "beta_dpo/beta_used": 0.034268446266651154, "beta_dpo/beta_used_raw": -0.07359858602285385, "beta_dpo/gap_mean": 24.495513916015625, "beta_dpo/gap_std": 26.417217254638672, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.9251101321585903, "grad_norm": 20.050827026367188, "learning_rate": 8.85387393063622e-09, "logits/chosen": -2.140623092651367, "logits/rejected": -0.20457985997200012, "loss": 1.0099, "step": 630 }, { "beta_dpo/beta_used": 0.046777982264757156, "beta_dpo/beta_used_raw": 0.023703955113887787, "beta_dpo/gap_mean": 24.764623641967773, "beta_dpo/gap_std": 25.562318801879883, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9265785609397944, "grad_norm": 16.772321701049805, "learning_rate": 8.518543427732949e-09, "logits/chosen": -3.0626821517944336, "logits/rejected": -1.7369043827056885, "loss": 0.9139, "step": 631 }, { "beta_dpo/beta_used": 0.03596285730600357, "beta_dpo/beta_used_raw": 0.03169193118810654, "beta_dpo/gap_mean": 25.46234703063965, "beta_dpo/gap_std": 25.864564895629883, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9280469897209985, "grad_norm": 15.070470809936523, "learning_rate": 8.189576185789637e-09, "logits/chosen": -2.1580801010131836, "logits/rejected": -1.8691926002502441, "loss": 0.9446, "step": 632 }, { "beta_dpo/beta_used": 0.037844493985176086, "beta_dpo/beta_used_raw": -0.249537855386734, "beta_dpo/gap_mean": 24.667333602905273, "beta_dpo/gap_std": 27.10137939453125, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9295154185022027, "grad_norm": 40.30422592163086, "learning_rate": 7.866980873399015e-09, "logits/chosen": -3.4099979400634766, "logits/rejected": -2.978597640991211, "loss": 1.0363, "step": 633 }, { "beta_dpo/beta_used": 0.02373570203781128, "beta_dpo/beta_used_raw": -0.027643514797091484, "beta_dpo/gap_mean": 24.457666397094727, "beta_dpo/gap_std": 26.525691986083984, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9309838472834068, "grad_norm": 10.418085098266602, "learning_rate": 7.550765991247654e-09, "logits/chosen": -1.44098699092865, "logits/rejected": -1.1311659812927246, "loss": 1.031, "step": 634 }, { "beta_dpo/beta_used": 0.014054001308977604, "beta_dpo/beta_used_raw": -0.1692088097333908, "beta_dpo/gap_mean": 24.24664306640625, "beta_dpo/gap_std": 26.660503387451172, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9324522760646109, "grad_norm": 7.660090923309326, "learning_rate": 7.240939871891699e-09, "logits/chosen": -1.804953694343567, "logits/rejected": -1.106596827507019, "loss": 1.1452, "step": 635 }, { "beta_dpo/beta_used": 0.11992073059082031, "beta_dpo/beta_used_raw": 0.11992073059082031, "beta_dpo/gap_mean": 24.010807037353516, "beta_dpo/gap_std": 26.382919311523438, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.933920704845815, "grad_norm": 54.858829498291016, "learning_rate": 6.937510679537628e-09, "logits/chosen": -1.4366188049316406, "logits/rejected": -0.8485536575317383, "loss": 0.6125, "step": 636 }, { "beta_dpo/beta_used": 0.012525239959359169, "beta_dpo/beta_used_raw": -0.11114945262670517, "beta_dpo/gap_mean": 24.7161808013916, "beta_dpo/gap_std": 27.203895568847656, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.9353891336270191, "grad_norm": 8.482524871826172, "learning_rate": 6.640486409826785e-09, "logits/chosen": -1.6223981380462646, "logits/rejected": -1.295461893081665, "loss": 1.1776, "step": 637 }, { "beta_dpo/beta_used": 0.17386887967586517, "beta_dpo/beta_used_raw": 0.17386887967586517, "beta_dpo/gap_mean": 25.465686798095703, "beta_dpo/gap_std": 26.684253692626953, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9368575624082232, "grad_norm": 42.16156005859375, "learning_rate": 6.349874889624962e-09, "logits/chosen": -2.806030750274658, "logits/rejected": -0.8436174988746643, "loss": 0.3056, "step": 638 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14556750655174255, "beta_dpo/gap_mean": 24.781633377075195, "beta_dpo/gap_std": 26.568370819091797, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9383259911894273, "grad_norm": 0.7115759253501892, "learning_rate": 6.065683776815933e-09, "logits/chosen": -2.160405158996582, "logits/rejected": -0.9737479090690613, "loss": 1.3648, "step": 639 }, { "beta_dpo/beta_used": 0.3657962381839752, "beta_dpo/beta_used_raw": 0.3394223153591156, "beta_dpo/gap_mean": 24.88129234313965, "beta_dpo/gap_std": 26.40639877319336, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9397944199706314, "grad_norm": 6.0283379554748535, "learning_rate": 5.7879205600998296e-09, "logits/chosen": -0.7498547434806824, "logits/rejected": -0.22841249406337738, "loss": 0.6861, "step": 640 }, { "beta_dpo/beta_used": 0.21244415640830994, "beta_dpo/beta_used_raw": 0.21244415640830994, "beta_dpo/gap_mean": 25.962242126464844, "beta_dpo/gap_std": 26.106050491333008, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9412628487518355, "grad_norm": 93.76725006103516, "learning_rate": 5.516592558795746e-09, "logits/chosen": -1.8660616874694824, "logits/rejected": -1.753645896911621, "loss": 0.625, "step": 641 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.39809074997901917, "beta_dpo/gap_mean": 25.666969299316406, "beta_dpo/gap_std": 27.017744064331055, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9427312775330396, "grad_norm": 0.6580331921577454, "learning_rate": 5.251706922648868e-09, "logits/chosen": -0.9073336124420166, "logits/rejected": -0.7449837923049927, "loss": 1.3687, "step": 642 }, { "beta_dpo/beta_used": 0.07014614343643188, "beta_dpo/beta_used_raw": 0.07014614343643188, "beta_dpo/gap_mean": 25.45240020751953, "beta_dpo/gap_std": 26.006860733032227, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9441997063142438, "grad_norm": 21.325944900512695, "learning_rate": 4.993270631642038e-09, "logits/chosen": -1.990086555480957, "logits/rejected": -1.38441801071167, "loss": 0.6708, "step": 643 }, { "beta_dpo/beta_used": 0.10355755686759949, "beta_dpo/beta_used_raw": -0.07786447554826736, "beta_dpo/gap_mean": 24.64284896850586, "beta_dpo/gap_std": 26.2049617767334, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9456681350954479, "grad_norm": 22.50238037109375, "learning_rate": 4.741290495811873e-09, "logits/chosen": -1.9852948188781738, "logits/rejected": -1.767737865447998, "loss": 0.7768, "step": 644 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3595721125602722, "beta_dpo/gap_mean": 23.876996994018555, "beta_dpo/gap_std": 26.21371078491211, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.947136563876652, "grad_norm": 0.7167469263076782, "learning_rate": 4.495773155069299e-09, "logits/chosen": -2.1556835174560547, "logits/rejected": -1.0982701778411865, "loss": 1.3709, "step": 645 }, { "beta_dpo/beta_used": 0.27249401807785034, "beta_dpo/beta_used_raw": 0.27249401807785034, "beta_dpo/gap_mean": 24.075374603271484, "beta_dpo/gap_std": 25.402545928955078, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.9486049926578561, "grad_norm": 78.34426879882812, "learning_rate": 4.256725079024553e-09, "logits/chosen": -1.273078203201294, "logits/rejected": -1.0210223197937012, "loss": 0.2992, "step": 646 }, { "beta_dpo/beta_used": 0.1067810207605362, "beta_dpo/beta_used_raw": 0.013192906975746155, "beta_dpo/gap_mean": 23.82613754272461, "beta_dpo/gap_std": 25.484859466552734, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9500734214390602, "grad_norm": 73.75732421875, "learning_rate": 4.024152566816791e-09, "logits/chosen": -0.9040647745132446, "logits/rejected": -0.8365122079849243, "loss": 1.2658, "step": 647 }, { "beta_dpo/beta_used": 0.183420792222023, "beta_dpo/beta_used_raw": 0.10964904725551605, "beta_dpo/gap_mean": 24.101375579833984, "beta_dpo/gap_std": 25.932687759399414, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9515418502202643, "grad_norm": 102.65180969238281, "learning_rate": 3.798061746947995e-09, "logits/chosen": -1.5339770317077637, "logits/rejected": -1.5160984992980957, "loss": 0.8779, "step": 648 }, { "beta_dpo/beta_used": 0.1578582227230072, "beta_dpo/beta_used_raw": 0.0430719330906868, "beta_dpo/gap_mean": 24.48971176147461, "beta_dpo/gap_std": 25.154579162597656, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9530102790014684, "grad_norm": 35.50594711303711, "learning_rate": 3.5784585771215235e-09, "logits/chosen": -2.3788206577301025, "logits/rejected": -1.731053113937378, "loss": 0.8886, "step": 649 }, { "beta_dpo/beta_used": 0.08072511106729507, "beta_dpo/beta_used_raw": 0.04472571983933449, "beta_dpo/gap_mean": 24.677059173583984, "beta_dpo/gap_std": 25.670509338378906, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9544787077826725, "grad_norm": 32.246803283691406, "learning_rate": 3.3653488440851253e-09, "logits/chosen": -1.5401017665863037, "logits/rejected": -1.5119943618774414, "loss": 0.8795, "step": 650 }, { "beta_dpo/beta_used": 0.266397625207901, "beta_dpo/beta_used_raw": 0.266397625207901, "beta_dpo/gap_mean": 25.562501907348633, "beta_dpo/gap_std": 25.796520233154297, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9559471365638766, "grad_norm": 75.5047836303711, "learning_rate": 3.158738163478475e-09, "logits/chosen": -2.793809413909912, "logits/rejected": -2.2159769535064697, "loss": 0.4221, "step": 651 }, { "beta_dpo/beta_used": 0.23461079597473145, "beta_dpo/beta_used_raw": 0.23461079597473145, "beta_dpo/gap_mean": 25.857515335083008, "beta_dpo/gap_std": 25.84009552001953, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.9574155653450808, "grad_norm": 62.18571090698242, "learning_rate": 2.9586319796851555e-09, "logits/chosen": -1.5306251049041748, "logits/rejected": -1.4811850786209106, "loss": 0.3882, "step": 652 }, { "beta_dpo/beta_used": 0.3161761462688446, "beta_dpo/beta_used_raw": 0.3161761462688446, "beta_dpo/gap_mean": 25.806331634521484, "beta_dpo/gap_std": 25.399927139282227, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9588839941262849, "grad_norm": 105.35147094726562, "learning_rate": 2.7650355656892166e-09, "logits/chosen": -2.4217348098754883, "logits/rejected": -2.5725555419921875, "loss": 0.3804, "step": 653 }, { "beta_dpo/beta_used": 0.11472293734550476, "beta_dpo/beta_used_raw": 0.04106505215167999, "beta_dpo/gap_mean": 25.800559997558594, "beta_dpo/gap_std": 25.6098690032959, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.960352422907489, "grad_norm": 39.34584045410156, "learning_rate": 2.577954022936174e-09, "logits/chosen": -2.2942886352539062, "logits/rejected": -1.7099244594573975, "loss": 0.9204, "step": 654 }, { "beta_dpo/beta_used": 0.09627825766801834, "beta_dpo/beta_used_raw": -0.00997423380613327, "beta_dpo/gap_mean": 25.342327117919922, "beta_dpo/gap_std": 26.117286682128906, "beta_dpo/mask_keep_frac": 1.0, "epoch": 0.9618208516886931, "grad_norm": 35.4230842590332, "learning_rate": 2.397392281198729e-09, "logits/chosen": -1.9102520942687988, "logits/rejected": -1.835174322128296, "loss": 0.8524, "step": 655 }, { "beta_dpo/beta_used": 0.02918560430407524, "beta_dpo/beta_used_raw": 0.019424546509981155, "beta_dpo/gap_mean": 25.836530685424805, "beta_dpo/gap_std": 26.004581451416016, "beta_dpo/mask_keep_frac": 0.5, "epoch": 0.9632892804698973, "grad_norm": 15.73218059539795, "learning_rate": 2.223355098446622e-09, "logits/chosen": -2.0381622314453125, "logits/rejected": -2.282194137573242, "loss": 0.9823, "step": 656 }, { "beta_dpo/beta_used": 0.14083074033260345, "beta_dpo/beta_used_raw": -0.011161044239997864, "beta_dpo/gap_mean": 26.164352416992188, "beta_dpo/gap_std": 26.37403106689453, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9647577092511013, "grad_norm": 36.36809158325195, "learning_rate": 2.055847060721566e-09, "logits/chosen": -1.8447120189666748, "logits/rejected": -1.3986084461212158, "loss": 0.846, "step": 657 }, { "beta_dpo/beta_used": 0.09425677359104156, "beta_dpo/beta_used_raw": -0.04021822661161423, "beta_dpo/gap_mean": 25.671611785888672, "beta_dpo/gap_std": 26.10125732421875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9662261380323054, "grad_norm": 41.52330780029297, "learning_rate": 1.8948725820160662e-09, "logits/chosen": -2.079184055328369, "logits/rejected": -1.5296201705932617, "loss": 0.8892, "step": 658 }, { "beta_dpo/beta_used": 0.300968736410141, "beta_dpo/beta_used_raw": 0.300968736410141, "beta_dpo/gap_mean": 26.545684814453125, "beta_dpo/gap_std": 26.43194580078125, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9676945668135095, "grad_norm": 127.47429656982422, "learning_rate": 1.7404359041573723e-09, "logits/chosen": -2.7157797813415527, "logits/rejected": -2.105652093887329, "loss": 0.7889, "step": 659 }, { "beta_dpo/beta_used": 0.10049507021903992, "beta_dpo/beta_used_raw": 0.0901188924908638, "beta_dpo/gap_mean": 27.32921600341797, "beta_dpo/gap_std": 26.936813354492188, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9691629955947136, "grad_norm": 40.15045928955078, "learning_rate": 1.592541096695571e-09, "logits/chosen": -2.3412139415740967, "logits/rejected": -1.4522958993911743, "loss": 0.7886, "step": 660 }, { "beta_dpo/beta_used": 0.08502917736768723, "beta_dpo/beta_used_raw": -0.018630720674991608, "beta_dpo/gap_mean": 27.41143798828125, "beta_dpo/gap_std": 27.408355712890625, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9706314243759178, "grad_norm": 45.90566635131836, "learning_rate": 1.4511920567963908e-09, "logits/chosen": -1.7122316360473633, "logits/rejected": -1.2374494075775146, "loss": 0.9341, "step": 661 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2160995602607727, "beta_dpo/gap_mean": 27.318252563476562, "beta_dpo/gap_std": 26.791423797607422, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9720998531571219, "grad_norm": 0.7011018395423889, "learning_rate": 1.3163925091384532e-09, "logits/chosen": -1.200093388557434, "logits/rejected": -0.6818442940711975, "loss": 1.3643, "step": 662 }, { "beta_dpo/beta_used": 0.12379209697246552, "beta_dpo/beta_used_raw": 0.12379209697246552, "beta_dpo/gap_mean": 27.517772674560547, "beta_dpo/gap_std": 26.901273727416992, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.973568281938326, "grad_norm": 45.353824615478516, "learning_rate": 1.1881460058152382e-09, "logits/chosen": -1.6320724487304688, "logits/rejected": -1.3605599403381348, "loss": 0.7098, "step": 663 }, { "beta_dpo/beta_used": 0.08696911484003067, "beta_dpo/beta_used_raw": -0.04690101742744446, "beta_dpo/gap_mean": 27.899158477783203, "beta_dpo/gap_std": 26.73941421508789, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9750367107195301, "grad_norm": 39.758148193359375, "learning_rate": 1.066455926241383e-09, "logits/chosen": -1.6941994428634644, "logits/rejected": -1.2433688640594482, "loss": 0.889, "step": 664 }, { "beta_dpo/beta_used": 0.042768318206071854, "beta_dpo/beta_used_raw": -0.043198782950639725, "beta_dpo/gap_mean": 27.266984939575195, "beta_dpo/gap_std": 26.420181274414062, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9765051395007343, "grad_norm": 16.841934204101562, "learning_rate": 9.513254770636137e-10, "logits/chosen": -2.474729537963867, "logits/rejected": -2.2313597202301025, "loss": 0.8784, "step": 665 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2649265229701996, "beta_dpo/gap_mean": 26.876924514770508, "beta_dpo/gap_std": 26.414169311523438, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9779735682819384, "grad_norm": 0.7575967311859131, "learning_rate": 8.427576920763956e-10, "logits/chosen": -1.7433545589447021, "logits/rejected": -1.490132451057434, "loss": 1.3656, "step": 666 }, { "beta_dpo/beta_used": 0.04926947504281998, "beta_dpo/beta_used_raw": -0.007738005369901657, "beta_dpo/gap_mean": 27.131610870361328, "beta_dpo/gap_std": 26.764110565185547, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9794419970631424, "grad_norm": 20.461111068725586, "learning_rate": 7.407554321417764e-10, "logits/chosen": -1.7287938594818115, "logits/rejected": -1.0780360698699951, "loss": 0.8868, "step": 667 }, { "beta_dpo/beta_used": 0.11481890082359314, "beta_dpo/beta_used_raw": -0.020425312221050262, "beta_dpo/gap_mean": 27.668758392333984, "beta_dpo/gap_std": 26.51095962524414, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9809104258443465, "grad_norm": 51.90634536743164, "learning_rate": 6.453213851142225e-10, "logits/chosen": -1.7414575815200806, "logits/rejected": -1.8504266738891602, "loss": 0.881, "step": 668 }, { "beta_dpo/beta_used": 0.089015893638134, "beta_dpo/beta_used_raw": 0.031783655285835266, "beta_dpo/gap_mean": 28.0015869140625, "beta_dpo/gap_std": 26.40846061706543, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9823788546255506, "grad_norm": 21.52220916748047, "learning_rate": 5.564580657695939e-10, "logits/chosen": -1.7665306329727173, "logits/rejected": -1.1365385055541992, "loss": 0.8646, "step": 669 }, { "beta_dpo/beta_used": 0.11147114634513855, "beta_dpo/beta_used_raw": 0.09976670891046524, "beta_dpo/gap_mean": 28.321340560913086, "beta_dpo/gap_std": 26.639236450195312, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9838472834067548, "grad_norm": 49.622772216796875, "learning_rate": 4.741678157389739e-10, "logits/chosen": -1.5533534288406372, "logits/rejected": -1.235931634902954, "loss": 0.8828, "step": 670 }, { "beta_dpo/beta_used": 0.15611684322357178, "beta_dpo/beta_used_raw": -0.028427034616470337, "beta_dpo/gap_mean": 28.04564094543457, "beta_dpo/gap_std": 26.574295043945312, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9853157121879589, "grad_norm": 8.204655647277832, "learning_rate": 3.9845280344705245e-10, "logits/chosen": -0.9096536040306091, "logits/rejected": -0.6570190191268921, "loss": 0.7011, "step": 671 }, { "beta_dpo/beta_used": 0.10520876944065094, "beta_dpo/beta_used_raw": 0.05002519115805626, "beta_dpo/gap_mean": 27.482242584228516, "beta_dpo/gap_std": 26.394447326660156, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.986784140969163, "grad_norm": 51.78870391845703, "learning_rate": 3.293150240547549e-10, "logits/chosen": -2.238128185272217, "logits/rejected": -2.1167397499084473, "loss": 0.8612, "step": 672 }, { "beta_dpo/beta_used": 0.0886186733841896, "beta_dpo/beta_used_raw": 0.07913055270910263, "beta_dpo/gap_mean": 27.103679656982422, "beta_dpo/gap_std": 26.009197235107422, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9882525697503671, "grad_norm": 30.509424209594727, "learning_rate": 2.6675629940689504e-10, "logits/chosen": -1.1244845390319824, "logits/rejected": -1.012803077697754, "loss": 0.7891, "step": 673 }, { "beta_dpo/beta_used": 0.04007472097873688, "beta_dpo/beta_used_raw": 0.018813492730259895, "beta_dpo/gap_mean": 27.488269805908203, "beta_dpo/gap_std": 25.604915618896484, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9897209985315712, "grad_norm": 11.617090225219727, "learning_rate": 2.1077827798404725e-10, "logits/chosen": -1.8489723205566406, "logits/rejected": -1.3508100509643555, "loss": 0.8895, "step": 674 }, { "beta_dpo/beta_used": 0.10179030150175095, "beta_dpo/beta_used_raw": 0.0895192101597786, "beta_dpo/gap_mean": 27.70388412475586, "beta_dpo/gap_std": 26.114885330200195, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9911894273127754, "grad_norm": 25.02197265625, "learning_rate": 1.6138243485910863e-10, "logits/chosen": -1.0838446617126465, "logits/rejected": -0.8627390265464783, "loss": 0.8013, "step": 675 }, { "beta_dpo/beta_used": 0.25374189019203186, "beta_dpo/beta_used_raw": 0.25374189019203186, "beta_dpo/gap_mean": 27.704769134521484, "beta_dpo/gap_std": 26.05438995361328, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9926578560939795, "grad_norm": 109.19337463378906, "learning_rate": 1.1857007165852472e-10, "logits/chosen": -2.0344905853271484, "logits/rejected": -1.4180444478988647, "loss": 0.6234, "step": 676 }, { "beta_dpo/beta_used": 0.05424497649073601, "beta_dpo/beta_used_raw": 0.05424497649073601, "beta_dpo/gap_mean": 28.181198120117188, "beta_dpo/gap_std": 25.20414924621582, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9941262848751835, "grad_norm": 18.980443954467773, "learning_rate": 8.23423165278725e-11, "logits/chosen": -2.439286708831787, "logits/rejected": -2.477078437805176, "loss": 0.6725, "step": 677 }, { "beta_dpo/beta_used": 0.24579310417175293, "beta_dpo/beta_used_raw": 0.24579310417175293, "beta_dpo/gap_mean": 28.24429702758789, "beta_dpo/gap_std": 24.93535614013672, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9955947136563876, "grad_norm": 58.01787185668945, "learning_rate": 5.270012410216185e-11, "logits/chosen": -1.2655192613601685, "logits/rejected": -0.5176064968109131, "loss": 0.6773, "step": 678 }, { "beta_dpo/beta_used": 0.09529916197061539, "beta_dpo/beta_used_raw": 0.09048715978860855, "beta_dpo/gap_mean": 28.419395446777344, "beta_dpo/gap_std": 24.707523345947266, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9970631424375918, "grad_norm": 21.957143783569336, "learning_rate": 2.9644275480772416e-11, "logits/chosen": -2.128697633743286, "logits/rejected": -1.8750262260437012, "loss": 0.7942, "step": 679 }, { "beta_dpo/beta_used": 0.02317173406481743, "beta_dpo/beta_used_raw": -0.06876889616250992, "beta_dpo/gap_mean": 27.96090316772461, "beta_dpo/gap_std": 25.136274337768555, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9985315712187959, "grad_norm": 12.635197639465332, "learning_rate": 1.31753782067201e-11, "logits/chosen": -2.110539436340332, "logits/rejected": -2.0527472496032715, "loss": 1.0468, "step": 680 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19943192601203918, "beta_dpo/gap_mean": 27.429988861083984, "beta_dpo/gap_std": 24.85990333557129, "beta_dpo/mask_keep_frac": 0.75, "epoch": 1.0, "grad_norm": 0.6805768609046936, "learning_rate": 3.2938662507808745e-12, "logits/chosen": -1.4662553071975708, "logits/rejected": -1.3521288633346558, "loss": 1.3642, "step": 681 }, { "epoch": 1.0, "step": 681, "total_flos": 0.0, "train_loss": 0.9969710769807365, "train_runtime": 3178.2358, "train_samples_per_second": 13.718, "train_steps_per_second": 0.214 } ], "logging_steps": 1, "max_steps": 681, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }