{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 681, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta": 0.10115084052085876, "beta_dpo/beta_margin_grad_mean": -0.5005621910095215, "beta_dpo/beta_margin_grad_std": 0.010608955286443233, "beta_dpo/beta_margin_mean": -0.002253394341096282, "beta_dpo/beta_margin_std": 0.042461980134248734, "beta_dpo/beta_used": 0.10115084052085876, "beta_dpo/beta_used_raw": 0.10115084052085876, "beta_dpo/gap_mean": -0.004527175799012184, "beta_dpo/gap_std": 0.06229356676340103, "beta_dpo/loss_margin_mean": -0.02287048101425171, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0014684287812041115, "grad_norm": 83.69244384765625, "learning_rate": 0.0, "logits/chosen": -0.4974287748336792, "logits/rejected": -0.43299180269241333, "loss": 1.3849, "step": 1 }, { "beta_dpo/beta": 0.09928660839796066, "beta_dpo/beta_margin_grad_mean": -0.501632034778595, "beta_dpo/beta_margin_grad_std": 0.008741416968405247, "beta_dpo/beta_margin_mean": -0.006530125625431538, "beta_dpo/beta_margin_std": 0.034978773444890976, "beta_dpo/beta_used": 0.09928660839796066, "beta_dpo/beta_used_raw": 0.09928660839796066, "beta_dpo/gap_mean": -0.0141224917024374, "beta_dpo/gap_std": 0.1194789782166481, "beta_dpo/loss_margin_mean": -0.06572240591049194, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.002936857562408223, "grad_norm": 72.02227783203125, "learning_rate": 7.246376811594203e-09, "logits/chosen": -0.4953641891479492, "logits/rejected": -0.4594460129737854, "loss": 1.389, "step": 2 }, { "beta_dpo/beta": 0.09881577640771866, "beta_dpo/beta_margin_grad_mean": -0.49877238273620605, "beta_dpo/beta_margin_grad_std": 0.008976051583886147, "beta_dpo/beta_margin_mean": 0.00491556478664279, "beta_dpo/beta_margin_std": 0.03592138737440109, "beta_dpo/beta_used": 0.09881577640771866, "beta_dpo/beta_used_raw": 0.09881577640771866, "beta_dpo/gap_mean": -0.006174812093377113, "beta_dpo/gap_std": 0.16936704516410828, "beta_dpo/loss_margin_mean": 0.04976421594619751, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.004405286343612335, "grad_norm": 67.19432067871094, "learning_rate": 1.4492753623188406e-08, "logits/chosen": -0.48158758878707886, "logits/rejected": -0.4422696828842163, "loss": 1.389, "step": 3 }, { "beta_dpo/beta": 0.09335151314735413, "beta_dpo/beta_margin_grad_mean": -0.5010735988616943, "beta_dpo/beta_margin_grad_std": 0.009380017407238483, "beta_dpo/beta_margin_mean": -0.004296026658266783, "beta_dpo/beta_margin_std": 0.03754071146249771, "beta_dpo/beta_used": 0.09335151314735413, "beta_dpo/beta_used_raw": 0.09335151314735413, "beta_dpo/gap_mean": -0.00973600521683693, "beta_dpo/gap_std": 0.2109805941581726, "beta_dpo/loss_margin_mean": -0.04590195417404175, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.005873715124816446, "grad_norm": 67.43733215332031, "learning_rate": 2.1739130434782606e-08, "logits/chosen": -0.45274418592453003, "logits/rejected": -0.42465052008628845, "loss": 1.3977, "step": 4 }, { "beta_dpo/beta": 0.10049673914909363, "beta_dpo/beta_margin_grad_mean": -0.4986048936843872, "beta_dpo/beta_margin_grad_std": 0.009488900192081928, "beta_dpo/beta_margin_mean": 0.005582462064921856, "beta_dpo/beta_margin_std": 0.03796974569559097, "beta_dpo/beta_used": 0.10049673914909363, "beta_dpo/beta_used_raw": 0.10049673914909363, "beta_dpo/gap_mean": -0.0020640306174755096, "beta_dpo/gap_std": 0.2421741932630539, "beta_dpo/loss_margin_mean": 0.05585688352584839, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.007342143906020558, "grad_norm": 87.71318817138672, "learning_rate": 2.898550724637681e-08, "logits/chosen": -0.4956454932689667, "logits/rejected": -0.4505915641784668, "loss": 1.3858, "step": 5 }, { "beta_dpo/beta": 0.10047884285449982, "beta_dpo/beta_margin_grad_mean": -0.500208854675293, "beta_dpo/beta_margin_grad_std": 0.008933261968195438, "beta_dpo/beta_margin_mean": -0.0008351176511496305, "beta_dpo/beta_margin_std": 0.03574404865503311, "beta_dpo/beta_used": 0.10047884285449982, "beta_dpo/beta_used_raw": 0.10047884285449982, "beta_dpo/gap_mean": 0.0017710481770336628, "beta_dpo/gap_std": 0.2680016756057739, "beta_dpo/loss_margin_mean": -0.007976382970809937, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.00881057268722467, "grad_norm": 90.84674072265625, "learning_rate": 3.6231884057971014e-08, "logits/chosen": -0.5020167827606201, "logits/rejected": -0.4594297409057617, "loss": 1.3854, "step": 6 }, { "beta_dpo/beta": 0.09998422861099243, "beta_dpo/beta_margin_grad_mean": -0.5002336502075195, "beta_dpo/beta_margin_grad_std": 0.01014900952577591, "beta_dpo/beta_margin_mean": -0.0009349790052510798, "beta_dpo/beta_margin_std": 0.04061206057667732, "beta_dpo/beta_used": 0.09998422861099243, "beta_dpo/beta_used_raw": 0.09998422861099243, "beta_dpo/gap_mean": 6.500491872429848e-05, "beta_dpo/gap_std": 0.2939686179161072, "beta_dpo/loss_margin_mean": -0.009219467639923096, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.010279001468428781, "grad_norm": 83.6563491821289, "learning_rate": 4.347826086956521e-08, "logits/chosen": -0.5061265826225281, "logits/rejected": -0.4723086953163147, "loss": 1.3865, "step": 7 }, { "beta_dpo/beta": 0.1022939383983612, "beta_dpo/beta_margin_grad_mean": -0.5015852451324463, "beta_dpo/beta_margin_grad_std": 0.010492443107068539, "beta_dpo/beta_margin_mean": -0.006352751050144434, "beta_dpo/beta_margin_std": 0.042014747858047485, "beta_dpo/beta_used": 0.1022939383983612, "beta_dpo/beta_used_raw": 0.1022939383983612, "beta_dpo/gap_mean": -0.009944056160748005, "beta_dpo/gap_std": 0.3154027462005615, "beta_dpo/loss_margin_mean": -0.061917901039123535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.011747430249632892, "grad_norm": 77.50525665283203, "learning_rate": 5.0724637681159424e-08, "logits/chosen": -0.5334175825119019, "logits/rejected": -0.510188102722168, "loss": 1.3836, "step": 8 }, { "beta_dpo/beta": 0.09855471551418304, "beta_dpo/beta_margin_grad_mean": -0.49951478838920593, "beta_dpo/beta_margin_grad_std": 0.011848426423966885, "beta_dpo/beta_margin_mean": 0.00194238789845258, "beta_dpo/beta_margin_std": 0.04742159694433212, "beta_dpo/beta_used": 0.09855471551418304, "beta_dpo/beta_used_raw": 0.09855471551418304, "beta_dpo/gap_mean": -0.005505750421434641, "beta_dpo/gap_std": 0.34114253520965576, "beta_dpo/loss_margin_mean": 0.02003002166748047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.013215859030837005, "grad_norm": 77.50155639648438, "learning_rate": 5.797101449275362e-08, "logits/chosen": -0.523200511932373, "logits/rejected": -0.478301465511322, "loss": 1.3895, "step": 9 }, { "beta_dpo/beta": 0.0998501181602478, "beta_dpo/beta_margin_grad_mean": -0.5005317330360413, "beta_dpo/beta_margin_grad_std": 0.010131197981536388, "beta_dpo/beta_margin_mean": -0.002129613421857357, "beta_dpo/beta_margin_std": 0.04054965451359749, "beta_dpo/beta_used": 0.0998501181602478, "beta_dpo/beta_used_raw": 0.0998501181602478, "beta_dpo/gap_mean": -0.010290170088410378, "beta_dpo/gap_std": 0.3536257743835449, "beta_dpo/loss_margin_mean": -0.021320700645446777, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.014684287812041116, "grad_norm": 72.39192962646484, "learning_rate": 6.521739130434782e-08, "logits/chosen": -0.518991231918335, "logits/rejected": -0.4768357276916504, "loss": 1.3878, "step": 10 }, { "beta_dpo/beta": 0.10206712037324905, "beta_dpo/beta_margin_grad_mean": -0.49908754229545593, "beta_dpo/beta_margin_grad_std": 0.008663208223879337, "beta_dpo/beta_margin_mean": 0.003652524435892701, "beta_dpo/beta_margin_std": 0.03466520085930824, "beta_dpo/beta_used": 0.10206712037324905, "beta_dpo/beta_used_raw": 0.10206712037324905, "beta_dpo/gap_mean": -0.004253363702446222, "beta_dpo/gap_std": 0.35756930708885193, "beta_dpo/loss_margin_mean": 0.03583630919456482, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.016152716593245228, "grad_norm": 66.96553802490234, "learning_rate": 7.246376811594203e-08, "logits/chosen": -0.4826526641845703, "logits/rejected": -0.4586416780948639, "loss": 1.3833, "step": 11 }, { "beta_dpo/beta": 0.09693565964698792, "beta_dpo/beta_margin_grad_mean": -0.500446617603302, "beta_dpo/beta_margin_grad_std": 0.01050448976457119, "beta_dpo/beta_margin_mean": -0.0017924468265846372, "beta_dpo/beta_margin_std": 0.042050570249557495, "beta_dpo/beta_used": 0.09693565964698792, "beta_dpo/beta_used_raw": 0.09693565964698792, "beta_dpo/gap_mean": -0.00683976337313652, "beta_dpo/gap_std": 0.3720043897628784, "beta_dpo/loss_margin_mean": -0.017470553517341614, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.01762114537444934, "grad_norm": 83.22624206542969, "learning_rate": 7.971014492753623e-08, "logits/chosen": -0.547247052192688, "logits/rejected": -0.5113379955291748, "loss": 1.392, "step": 12 }, { "beta_dpo/beta": 0.09837324917316437, "beta_dpo/beta_margin_grad_mean": -0.4999392330646515, "beta_dpo/beta_margin_grad_std": 0.00992752518504858, "beta_dpo/beta_margin_mean": 0.00025006092619150877, "beta_dpo/beta_margin_std": 0.03974674642086029, "beta_dpo/beta_used": 0.09837324917316437, "beta_dpo/beta_used_raw": 0.09837324917316437, "beta_dpo/gap_mean": -0.006056391168385744, "beta_dpo/gap_std": 0.3698127865791321, "beta_dpo/loss_margin_mean": 0.002656310796737671, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.01908957415565345, "grad_norm": 82.04718017578125, "learning_rate": 8.695652173913042e-08, "logits/chosen": -0.4824892282485962, "logits/rejected": -0.45439815521240234, "loss": 1.3897, "step": 13 }, { "beta_dpo/beta": 0.09926562756299973, "beta_dpo/beta_margin_grad_mean": -0.5004010200500488, "beta_dpo/beta_margin_grad_std": 0.009033882059156895, "beta_dpo/beta_margin_mean": -0.001605634461157024, "beta_dpo/beta_margin_std": 0.03615177050232887, "beta_dpo/beta_used": 0.09926562756299973, "beta_dpo/beta_used_raw": 0.09926562756299973, "beta_dpo/gap_mean": -0.0021513975225389004, "beta_dpo/gap_std": 0.37402260303497314, "beta_dpo/loss_margin_mean": -0.01792725920677185, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.020558002936857563, "grad_norm": 89.19822692871094, "learning_rate": 9.420289855072464e-08, "logits/chosen": -0.47684037685394287, "logits/rejected": -0.437483549118042, "loss": 1.3877, "step": 14 }, { "beta_dpo/beta": 0.1028667539358139, "beta_dpo/beta_margin_grad_mean": -0.49847865104675293, "beta_dpo/beta_margin_grad_std": 0.009109060280025005, "beta_dpo/beta_margin_mean": 0.006086469162255526, "beta_dpo/beta_margin_std": 0.03645266592502594, "beta_dpo/beta_used": 0.1028667539358139, "beta_dpo/beta_used_raw": 0.1028667539358139, "beta_dpo/gap_mean": 0.0069586304016411304, "beta_dpo/gap_std": 0.3670150637626648, "beta_dpo/loss_margin_mean": 0.05616268515586853, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.022026431718061675, "grad_norm": 72.2989501953125, "learning_rate": 1.0144927536231885e-07, "logits/chosen": -0.4976166784763336, "logits/rejected": -0.47907328605651855, "loss": 1.3806, "step": 15 }, { "beta_dpo/beta": 0.10129574686288834, "beta_dpo/beta_margin_grad_mean": -0.4988415837287903, "beta_dpo/beta_margin_grad_std": 0.009300184436142445, "beta_dpo/beta_margin_mean": 0.004635946359485388, "beta_dpo/beta_margin_std": 0.03721487522125244, "beta_dpo/beta_used": 0.10129574686288834, "beta_dpo/beta_used_raw": 0.10129574686288834, "beta_dpo/gap_mean": 0.01056666485965252, "beta_dpo/gap_std": 0.369087815284729, "beta_dpo/loss_margin_mean": 0.04578801989555359, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.023494860499265784, "grad_norm": 85.27164459228516, "learning_rate": 1.0869565217391303e-07, "logits/chosen": -0.5442918539047241, "logits/rejected": -0.5051777362823486, "loss": 1.3833, "step": 16 }, { "beta_dpo/beta": 0.10490189492702484, "beta_dpo/beta_margin_grad_mean": -0.4973750412464142, "beta_dpo/beta_margin_grad_std": 0.009830176830291748, "beta_dpo/beta_margin_mean": 0.010502819903194904, "beta_dpo/beta_margin_std": 0.039345428347587585, "beta_dpo/beta_used": 0.10490189492702484, "beta_dpo/beta_used_raw": 0.10490189492702484, "beta_dpo/gap_mean": 0.023403100669384003, "beta_dpo/gap_std": 0.37113308906555176, "beta_dpo/loss_margin_mean": 0.10013490915298462, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.024963289280469897, "grad_norm": 80.40909576416016, "learning_rate": 1.1594202898550725e-07, "logits/chosen": -0.4828060269355774, "logits/rejected": -0.44346535205841064, "loss": 1.3755, "step": 17 }, { "beta_dpo/beta": 0.1001388430595398, "beta_dpo/beta_margin_grad_mean": -0.4992016553878784, "beta_dpo/beta_margin_grad_std": 0.008092939853668213, "beta_dpo/beta_margin_mean": 0.00319434585981071, "beta_dpo/beta_margin_std": 0.03238019719719887, "beta_dpo/beta_used": 0.1001388430595398, "beta_dpo/beta_used_raw": 0.1001388430595398, "beta_dpo/gap_mean": 0.029124243184924126, "beta_dpo/gap_std": 0.3635770082473755, "beta_dpo/loss_margin_mean": 0.03153112530708313, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.02643171806167401, "grad_norm": 82.2762680053711, "learning_rate": 1.2318840579710146e-07, "logits/chosen": -0.5221867561340332, "logits/rejected": -0.4699585437774658, "loss": 1.3833, "step": 18 }, { "beta_dpo/beta": 0.10230091959238052, "beta_dpo/beta_margin_grad_mean": -0.4976135194301605, "beta_dpo/beta_margin_grad_std": 0.009962659329175949, "beta_dpo/beta_margin_mean": 0.009549921378493309, "beta_dpo/beta_margin_std": 0.03987620025873184, "beta_dpo/beta_used": 0.10230091959238052, "beta_dpo/beta_used_raw": 0.10230091959238052, "beta_dpo/gap_mean": 0.03644995018839836, "beta_dpo/gap_std": 0.36511197686195374, "beta_dpo/loss_margin_mean": 0.09297522902488708, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.027900146842878122, "grad_norm": 67.32933807373047, "learning_rate": 1.3043478260869563e-07, "logits/chosen": -0.49089670181274414, "logits/rejected": -0.4410245716571808, "loss": 1.3788, "step": 19 }, { "beta_dpo/beta": 0.10144417732954025, "beta_dpo/beta_margin_grad_mean": -0.49894100427627563, "beta_dpo/beta_margin_grad_std": 0.007821588777005672, "beta_dpo/beta_margin_mean": 0.0042366455309093, "beta_dpo/beta_margin_std": 0.031295765191316605, "beta_dpo/beta_used": 0.10144417732954025, "beta_dpo/beta_used_raw": 0.10144417732954025, "beta_dpo/gap_mean": 0.04330967366695404, "beta_dpo/gap_std": 0.36020204424858093, "beta_dpo/loss_margin_mean": 0.0418030321598053, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.02936857562408223, "grad_norm": 77.79698944091797, "learning_rate": 1.3768115942028986e-07, "logits/chosen": -0.5257374048233032, "logits/rejected": -0.4667814075946808, "loss": 1.3796, "step": 20 }, { "beta_dpo/beta": 0.10282687842845917, "beta_dpo/beta_margin_grad_mean": -0.49695706367492676, "beta_dpo/beta_margin_grad_std": 0.010617760010063648, "beta_dpo/beta_margin_mean": 0.012177429161965847, "beta_dpo/beta_margin_std": 0.04252319782972336, "beta_dpo/beta_used": 0.10282687842845917, "beta_dpo/beta_used_raw": 0.10282687842845917, "beta_dpo/gap_mean": 0.052578218281269073, "beta_dpo/gap_std": 0.3585847020149231, "beta_dpo/loss_margin_mean": 0.1178915798664093, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.030837004405286344, "grad_norm": 84.59689331054688, "learning_rate": 1.4492753623188405e-07, "logits/chosen": -0.5015609860420227, "logits/rejected": -0.4782274663448334, "loss": 1.3762, "step": 21 }, { "beta_dpo/beta": 0.1021641194820404, "beta_dpo/beta_margin_grad_mean": -0.494739294052124, "beta_dpo/beta_margin_grad_std": 0.011074875481426716, "beta_dpo/beta_margin_mean": 0.021053766831755638, "beta_dpo/beta_margin_std": 0.04432320222258568, "beta_dpo/beta_used": 0.1021641194820404, "beta_dpo/beta_used_raw": 0.1021641194820404, "beta_dpo/gap_mean": 0.07795767486095428, "beta_dpo/gap_std": 0.37775668501853943, "beta_dpo/loss_margin_mean": 0.2064528465270996, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.032305433186490456, "grad_norm": 82.02935028076172, "learning_rate": 1.5217391304347825e-07, "logits/chosen": -0.5076688528060913, "logits/rejected": -0.46508467197418213, "loss": 1.375, "step": 22 }, { "beta_dpo/beta": 0.10281073302030563, "beta_dpo/beta_margin_grad_mean": -0.4947512447834015, "beta_dpo/beta_margin_grad_std": 0.009751598350703716, "beta_dpo/beta_margin_mean": 0.02100636623799801, "beta_dpo/beta_margin_std": 0.03903375566005707, "beta_dpo/beta_used": 0.10281073302030563, "beta_dpo/beta_used_raw": 0.10281073302030563, "beta_dpo/gap_mean": 0.10390491783618927, "beta_dpo/gap_std": 0.3772027790546417, "beta_dpo/loss_margin_mean": 0.2033129334449768, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.033773861967694566, "grad_norm": 76.44645690917969, "learning_rate": 1.5942028985507245e-07, "logits/chosen": -0.5184653997421265, "logits/rejected": -0.4976601004600525, "loss": 1.3708, "step": 23 }, { "beta_dpo/beta": 0.10454396903514862, "beta_dpo/beta_margin_grad_mean": -0.4934062063694, "beta_dpo/beta_margin_grad_std": 0.010538320057094097, "beta_dpo/beta_margin_mean": 0.026394186541438103, "beta_dpo/beta_margin_std": 0.04219713807106018, "beta_dpo/beta_used": 0.10454396903514862, "beta_dpo/beta_used_raw": 0.10454396903514862, "beta_dpo/gap_mean": 0.12391284108161926, "beta_dpo/gap_std": 0.37767690420150757, "beta_dpo/loss_margin_mean": 0.2502744197845459, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03524229074889868, "grad_norm": 94.25565338134766, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -0.5424538254737854, "logits/rejected": -0.5254075527191162, "loss": 1.3656, "step": 24 }, { "beta_dpo/beta": 0.100839763879776, "beta_dpo/beta_margin_grad_mean": -0.4942309856414795, "beta_dpo/beta_margin_grad_std": 0.009877659380435944, "beta_dpo/beta_margin_mean": 0.02309180237352848, "beta_dpo/beta_margin_std": 0.03954963758587837, "beta_dpo/beta_used": 0.100839763879776, "beta_dpo/beta_used_raw": 0.100839763879776, "beta_dpo/gap_mean": 0.14912059903144836, "beta_dpo/gap_std": 0.3832852840423584, "beta_dpo/loss_margin_mean": 0.22906917333602905, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03671071953010279, "grad_norm": 75.07634735107422, "learning_rate": 1.7391304347826085e-07, "logits/chosen": -0.4918757677078247, "logits/rejected": -0.46183332800865173, "loss": 1.37, "step": 25 }, { "beta_dpo/beta": 0.10145638883113861, "beta_dpo/beta_margin_grad_mean": -0.4906671941280365, "beta_dpo/beta_margin_grad_std": 0.012507390230894089, "beta_dpo/beta_margin_mean": 0.037368275225162506, "beta_dpo/beta_margin_std": 0.050109487026929855, "beta_dpo/beta_used": 0.10145638883113861, "beta_dpo/beta_used_raw": 0.10145638883113861, "beta_dpo/gap_mean": 0.1847640573978424, "beta_dpo/gap_std": 0.4011450409889221, "beta_dpo/loss_margin_mean": 0.3683029115200043, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0381791483113069, "grad_norm": 78.68896484375, "learning_rate": 1.8115942028985507e-07, "logits/chosen": -0.5504664182662964, "logits/rejected": -0.5192441344261169, "loss": 1.3654, "step": 26 }, { "beta_dpo/beta": 0.10302956402301788, "beta_dpo/beta_margin_grad_mean": -0.4876747727394104, "beta_dpo/beta_margin_grad_std": 0.01424187608063221, "beta_dpo/beta_margin_mean": 0.049370817840099335, "beta_dpo/beta_margin_std": 0.057142678648233414, "beta_dpo/beta_used": 0.10302956402301788, "beta_dpo/beta_used_raw": 0.10302956402301788, "beta_dpo/gap_mean": 0.23974978923797607, "beta_dpo/gap_std": 0.42792779207229614, "beta_dpo/loss_margin_mean": 0.47268885374069214, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.039647577092511016, "grad_norm": 87.7347183227539, "learning_rate": 1.8840579710144927e-07, "logits/chosen": -0.5187373161315918, "logits/rejected": -0.4824272394180298, "loss": 1.3563, "step": 27 }, { "beta_dpo/beta": 0.102115698158741, "beta_dpo/beta_margin_grad_mean": -0.49245062470436096, "beta_dpo/beta_margin_grad_std": 0.014135321602225304, "beta_dpo/beta_margin_mean": 0.03022361919283867, "beta_dpo/beta_margin_std": 0.056595128029584885, "beta_dpo/beta_used": 0.102115698158741, "beta_dpo/beta_used_raw": 0.102115698158741, "beta_dpo/gap_mean": 0.2491932511329651, "beta_dpo/gap_std": 0.4498485326766968, "beta_dpo/loss_margin_mean": 0.295854777097702, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.041116005873715125, "grad_norm": 75.64714050292969, "learning_rate": 1.9565217391304347e-07, "logits/chosen": -0.5084043741226196, "logits/rejected": -0.4534956216812134, "loss": 1.3579, "step": 28 }, { "beta_dpo/beta": 0.10585251450538635, "beta_dpo/beta_margin_grad_mean": -0.4868943691253662, "beta_dpo/beta_margin_grad_std": 0.015499315224587917, "beta_dpo/beta_margin_mean": 0.05249761790037155, "beta_dpo/beta_margin_std": 0.062127504497766495, "beta_dpo/beta_used": 0.10585251450538635, "beta_dpo/beta_used_raw": 0.10585251450538635, "beta_dpo/gap_mean": 0.29277026653289795, "beta_dpo/gap_std": 0.47807806730270386, "beta_dpo/loss_margin_mean": 0.4953559637069702, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.042584434654919234, "grad_norm": 94.25686645507812, "learning_rate": 2.028985507246377e-07, "logits/chosen": -0.5720121264457703, "logits/rejected": -0.5272256731987, "loss": 1.346, "step": 29 }, { "beta_dpo/beta": 0.10716623067855835, "beta_dpo/beta_margin_grad_mean": -0.48364534974098206, "beta_dpo/beta_margin_grad_std": 0.016273001208901405, "beta_dpo/beta_margin_mean": 0.06553145498037338, "beta_dpo/beta_margin_std": 0.06532347202301025, "beta_dpo/beta_used": 0.10716623067855835, "beta_dpo/beta_used_raw": 0.10716623067855835, "beta_dpo/gap_mean": 0.3511636555194855, "beta_dpo/gap_std": 0.5038948059082031, "beta_dpo/loss_margin_mean": 0.6101883053779602, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04405286343612335, "grad_norm": 91.32884979248047, "learning_rate": 2.1014492753623187e-07, "logits/chosen": -0.4944499731063843, "logits/rejected": -0.4637511968612671, "loss": 1.3372, "step": 30 }, { "beta_dpo/beta": 0.09747521579265594, "beta_dpo/beta_margin_grad_mean": -0.48976314067840576, "beta_dpo/beta_margin_grad_std": 0.014673292636871338, "beta_dpo/beta_margin_mean": 0.041009921580553055, "beta_dpo/beta_margin_std": 0.05886054411530495, "beta_dpo/beta_used": 0.09747521579265594, "beta_dpo/beta_used_raw": 0.09747521579265594, "beta_dpo/gap_mean": 0.36561119556427, "beta_dpo/gap_std": 0.5108226537704468, "beta_dpo/loss_margin_mean": 0.4201761782169342, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04552129221732746, "grad_norm": 68.29032135009766, "learning_rate": 2.1739130434782607e-07, "logits/chosen": -0.5044275522232056, "logits/rejected": -0.4597151279449463, "loss": 1.3554, "step": 31 }, { "beta_dpo/beta": 0.10314959287643433, "beta_dpo/beta_margin_grad_mean": -0.4819798171520233, "beta_dpo/beta_margin_grad_std": 0.020708369091153145, "beta_dpo/beta_margin_mean": 0.07229103147983551, "beta_dpo/beta_margin_std": 0.08329294621944427, "beta_dpo/beta_used": 0.10314959287643433, "beta_dpo/beta_used_raw": 0.10314959287643433, "beta_dpo/gap_mean": 0.4219781458377838, "beta_dpo/gap_std": 0.56684410572052, "beta_dpo/loss_margin_mean": 0.7036821842193604, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04698972099853157, "grad_norm": 78.29996490478516, "learning_rate": 2.2463768115942027e-07, "logits/chosen": -0.5184359550476074, "logits/rejected": -0.4776637554168701, "loss": 1.338, "step": 32 }, { "beta_dpo/beta": 0.10217119753360748, "beta_dpo/beta_margin_grad_mean": -0.48680615425109863, "beta_dpo/beta_margin_grad_std": 0.016086775809526443, "beta_dpo/beta_margin_mean": 0.05285169929265976, "beta_dpo/beta_margin_std": 0.0644962415099144, "beta_dpo/beta_used": 0.10217119753360748, "beta_dpo/beta_used_raw": 0.10217119753360748, "beta_dpo/gap_mean": 0.4387624263763428, "beta_dpo/gap_std": 0.5823417901992798, "beta_dpo/loss_margin_mean": 0.5102719664573669, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.048458149779735685, "grad_norm": 75.79508209228516, "learning_rate": 2.318840579710145e-07, "logits/chosen": -0.47731277346611023, "logits/rejected": -0.4508548974990845, "loss": 1.3384, "step": 33 }, { "beta_dpo/beta": 0.09928236901760101, "beta_dpo/beta_margin_grad_mean": -0.4819219708442688, "beta_dpo/beta_margin_grad_std": 0.01917845755815506, "beta_dpo/beta_margin_mean": 0.07247772812843323, "beta_dpo/beta_margin_std": 0.07699740678071976, "beta_dpo/beta_used": 0.09928236901760101, "beta_dpo/beta_used_raw": 0.09928236901760101, "beta_dpo/gap_mean": 0.48840245604515076, "beta_dpo/gap_std": 0.6152428388595581, "beta_dpo/loss_margin_mean": 0.7295181751251221, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.049926578560939794, "grad_norm": 66.3543930053711, "learning_rate": 2.391304347826087e-07, "logits/chosen": -0.5100936889648438, "logits/rejected": -0.4925019145011902, "loss": 1.3401, "step": 34 }, { "beta_dpo/beta": 0.10785353183746338, "beta_dpo/beta_margin_grad_mean": -0.47311800718307495, "beta_dpo/beta_margin_grad_std": 0.02489115111529827, "beta_dpo/beta_margin_mean": 0.10804824531078339, "beta_dpo/beta_margin_std": 0.10100562125444412, "beta_dpo/beta_used": 0.10785353183746338, "beta_dpo/beta_used_raw": 0.10785353183746338, "beta_dpo/gap_mean": 0.5772824883460999, "beta_dpo/gap_std": 0.6622889637947083, "beta_dpo/loss_margin_mean": 0.9983453750610352, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0513950073421439, "grad_norm": 77.56873321533203, "learning_rate": 2.463768115942029e-07, "logits/chosen": -0.5331075191497803, "logits/rejected": -0.49618980288505554, "loss": 1.3114, "step": 35 }, { "beta_dpo/beta": 0.10545908659696579, "beta_dpo/beta_margin_grad_mean": -0.4742385447025299, "beta_dpo/beta_margin_grad_std": 0.030729172751307487, "beta_dpo/beta_margin_mean": 0.10363934934139252, "beta_dpo/beta_margin_std": 0.12403807044029236, "beta_dpo/beta_used": 0.10545908659696579, "beta_dpo/beta_used_raw": 0.10545908659696579, "beta_dpo/gap_mean": 0.6375015377998352, "beta_dpo/gap_std": 0.7486386299133301, "beta_dpo/loss_margin_mean": 0.9657546281814575, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05286343612334802, "grad_norm": 73.26063537597656, "learning_rate": 2.536231884057971e-07, "logits/chosen": -0.5830014944076538, "logits/rejected": -0.5479526519775391, "loss": 1.3121, "step": 36 }, { "beta_dpo/beta": 0.0942203551530838, "beta_dpo/beta_margin_grad_mean": -0.47738873958587646, "beta_dpo/beta_margin_grad_std": 0.03081784024834633, "beta_dpo/beta_margin_mean": 0.09107129275798798, "beta_dpo/beta_margin_std": 0.1248544380068779, "beta_dpo/beta_used": 0.0942203551530838, "beta_dpo/beta_used_raw": 0.0942203551530838, "beta_dpo/gap_mean": 0.7214508056640625, "beta_dpo/gap_std": 0.8505280017852783, "beta_dpo/loss_margin_mean": 0.9462437629699707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05433186490455213, "grad_norm": 50.44397735595703, "learning_rate": 2.6086956521739126e-07, "logits/chosen": -0.5012315511703491, "logits/rejected": -0.45690277218818665, "loss": 1.3286, "step": 37 }, { "beta_dpo/beta": 0.1041734591126442, "beta_dpo/beta_margin_grad_mean": -0.4682784676551819, "beta_dpo/beta_margin_grad_std": 0.03961404040455818, "beta_dpo/beta_margin_mean": 0.12841160595417023, "beta_dpo/beta_margin_std": 0.16240736842155457, "beta_dpo/beta_used": 0.1041734591126442, "beta_dpo/beta_used_raw": 0.1041734591126442, "beta_dpo/gap_mean": 0.7879455089569092, "beta_dpo/gap_std": 0.9812790155410767, "beta_dpo/loss_margin_mean": 1.224595069885254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.055800293685756244, "grad_norm": 67.5627212524414, "learning_rate": 2.681159420289855e-07, "logits/chosen": -0.5276659727096558, "logits/rejected": -0.4949561655521393, "loss": 1.2998, "step": 38 }, { "beta_dpo/beta": 0.10857867449522018, "beta_dpo/beta_margin_grad_mean": -0.46116903424263, "beta_dpo/beta_margin_grad_std": 0.03715595230460167, "beta_dpo/beta_margin_mean": 0.15660372376441956, "beta_dpo/beta_margin_std": 0.15102945268154144, "beta_dpo/beta_used": 0.10857867449522018, "beta_dpo/beta_used_raw": 0.10857867449522018, "beta_dpo/gap_mean": 0.9118002653121948, "beta_dpo/gap_std": 1.0534446239471436, "beta_dpo/loss_margin_mean": 1.4260352849960327, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05726872246696035, "grad_norm": 74.21395874023438, "learning_rate": 2.753623188405797e-07, "logits/chosen": -0.5588313341140747, "logits/rejected": -0.5193623304367065, "loss": 1.275, "step": 39 }, { "beta_dpo/beta": 0.0998622328042984, "beta_dpo/beta_margin_grad_mean": -0.4660206437110901, "beta_dpo/beta_margin_grad_std": 0.03987602889537811, "beta_dpo/beta_margin_mean": 0.13751423358917236, "beta_dpo/beta_margin_std": 0.16336165368556976, "beta_dpo/beta_used": 0.0998622328042984, "beta_dpo/beta_used_raw": 0.0998622328042984, "beta_dpo/gap_mean": 0.9838204383850098, "beta_dpo/gap_std": 1.121214509010315, "beta_dpo/loss_margin_mean": 1.3697092533111572, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05873715124816446, "grad_norm": 55.91511154174805, "learning_rate": 2.8260869565217386e-07, "logits/chosen": -0.4998742341995239, "logits/rejected": -0.46878963708877563, "loss": 1.2931, "step": 40 }, { "beta_dpo/beta": 0.09814733266830444, "beta_dpo/beta_margin_grad_mean": -0.4590160846710205, "beta_dpo/beta_margin_grad_std": 0.04150310531258583, "beta_dpo/beta_margin_mean": 0.1658371239900589, "beta_dpo/beta_margin_std": 0.16969500482082367, "beta_dpo/beta_used": 0.09814733266830444, "beta_dpo/beta_used_raw": 0.09814733266830444, "beta_dpo/gap_mean": 1.111755609512329, "beta_dpo/gap_std": 1.2354657649993896, "beta_dpo/loss_margin_mean": 1.6996898651123047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06020558002936858, "grad_norm": 59.53895950317383, "learning_rate": 2.898550724637681e-07, "logits/chosen": -0.5415744781494141, "logits/rejected": -0.5051206350326538, "loss": 1.2849, "step": 41 }, { "beta_dpo/beta": 0.11233452707529068, "beta_dpo/beta_margin_grad_mean": -0.43761613965034485, "beta_dpo/beta_margin_grad_std": 0.055440664291381836, "beta_dpo/beta_margin_mean": 0.25520431995391846, "beta_dpo/beta_margin_std": 0.23295927047729492, "beta_dpo/beta_used": 0.11233452707529068, "beta_dpo/beta_used_raw": 0.11233452707529068, "beta_dpo/gap_mean": 1.3095552921295166, "beta_dpo/gap_std": 1.4133354425430298, "beta_dpo/loss_margin_mean": 2.2716450691223145, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06167400881057269, "grad_norm": 74.77738189697266, "learning_rate": 2.971014492753623e-07, "logits/chosen": -0.5276546478271484, "logits/rejected": -0.4807955324649811, "loss": 1.2274, "step": 42 }, { "beta_dpo/beta": 0.11648497730493546, "beta_dpo/beta_margin_grad_mean": -0.4362444281578064, "beta_dpo/beta_margin_grad_std": 0.05007302016019821, "beta_dpo/beta_margin_mean": 0.2601800560951233, "beta_dpo/beta_margin_std": 0.2120179980993271, "beta_dpo/beta_used": 0.11648497730493546, "beta_dpo/beta_used_raw": 0.11648497730493546, "beta_dpo/gap_mean": 1.495275855064392, "beta_dpo/gap_std": 1.494248390197754, "beta_dpo/loss_margin_mean": 2.2226815223693848, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0631424375917768, "grad_norm": 79.2459487915039, "learning_rate": 3.043478260869565e-07, "logits/chosen": -0.5580309629440308, "logits/rejected": -0.5340878963470459, "loss": 1.1947, "step": 43 }, { "beta_dpo/beta": 0.11155369877815247, "beta_dpo/beta_margin_grad_mean": -0.4344336986541748, "beta_dpo/beta_margin_grad_std": 0.05017215758562088, "beta_dpo/beta_margin_mean": 0.2672099471092224, "beta_dpo/beta_margin_std": 0.20892754197120667, "beta_dpo/beta_used": 0.11155369877815247, "beta_dpo/beta_used_raw": 0.11155369877815247, "beta_dpo/gap_mean": 1.653472900390625, "beta_dpo/gap_std": 1.5553144216537476, "beta_dpo/loss_margin_mean": 2.398895740509033, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06461086637298091, "grad_norm": 80.41355895996094, "learning_rate": 3.115942028985507e-07, "logits/chosen": -0.47205644845962524, "logits/rejected": -0.45171642303466797, "loss": 1.1951, "step": 44 }, { "beta_dpo/beta": 0.07954459637403488, "beta_dpo/beta_margin_grad_mean": -0.4617185890674591, "beta_dpo/beta_margin_grad_std": 0.043333351612091064, "beta_dpo/beta_margin_mean": 0.15512201189994812, "beta_dpo/beta_margin_std": 0.17768782377243042, "beta_dpo/beta_used": 0.07954459637403488, "beta_dpo/beta_used_raw": 0.07954459637403488, "beta_dpo/gap_mean": 1.7186641693115234, "beta_dpo/gap_std": 1.6547086238861084, "beta_dpo/loss_margin_mean": 1.9536571502685547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06607929515418502, "grad_norm": 47.4119987487793, "learning_rate": 3.188405797101449e-07, "logits/chosen": -0.45731085538864136, "logits/rejected": -0.4441610276699066, "loss": 1.2831, "step": 45 }, { "beta_dpo/beta": 0.08992807567119598, "beta_dpo/beta_margin_grad_mean": -0.44419437646865845, "beta_dpo/beta_margin_grad_std": 0.060576457530260086, "beta_dpo/beta_margin_mean": 0.22959379851818085, "beta_dpo/beta_margin_std": 0.2589755356311798, "beta_dpo/beta_used": 0.08992807567119598, "beta_dpo/beta_used_raw": 0.08992807567119598, "beta_dpo/gap_mean": 1.8407939672470093, "beta_dpo/gap_std": 1.877316951751709, "beta_dpo/loss_margin_mean": 2.509418249130249, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06754772393538913, "grad_norm": 66.04317474365234, "learning_rate": 3.260869565217391e-07, "logits/chosen": -0.517339289188385, "logits/rejected": -0.46569010615348816, "loss": 1.244, "step": 46 }, { "beta_dpo/beta": 0.10393651574850082, "beta_dpo/beta_margin_grad_mean": -0.43122005462646484, "beta_dpo/beta_margin_grad_std": 0.062102172523736954, "beta_dpo/beta_margin_mean": 0.28237393498420715, "beta_dpo/beta_margin_std": 0.2598910629749298, "beta_dpo/beta_used": 0.10393651574850082, "beta_dpo/beta_used_raw": 0.10393651574850082, "beta_dpo/gap_mean": 1.97328519821167, "beta_dpo/gap_std": 1.9843567609786987, "beta_dpo/loss_margin_mean": 2.603851795196533, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06901615271659324, "grad_norm": 67.16490173339844, "learning_rate": 3.333333333333333e-07, "logits/chosen": -0.5952492952346802, "logits/rejected": -0.5439423322677612, "loss": 1.1832, "step": 47 }, { "beta_dpo/beta": 0.09790638089179993, "beta_dpo/beta_margin_grad_mean": -0.43831878900527954, "beta_dpo/beta_margin_grad_std": 0.06469718366861343, "beta_dpo/beta_margin_mean": 0.2532716393470764, "beta_dpo/beta_margin_std": 0.272605299949646, "beta_dpo/beta_used": 0.09790638089179993, "beta_dpo/beta_used_raw": 0.09790638089179993, "beta_dpo/gap_mean": 2.1250531673431396, "beta_dpo/gap_std": 2.0948853492736816, "beta_dpo/loss_margin_mean": 2.544447422027588, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07048458149779736, "grad_norm": 78.59500122070312, "learning_rate": 3.4057971014492755e-07, "logits/chosen": -0.5685693025588989, "logits/rejected": -0.5092687606811523, "loss": 1.1987, "step": 48 }, { "beta_dpo/beta": 0.11987863481044769, "beta_dpo/beta_margin_grad_mean": -0.41220971941947937, "beta_dpo/beta_margin_grad_std": 0.08246695250272751, "beta_dpo/beta_margin_mean": 0.3685282766819, "beta_dpo/beta_margin_std": 0.3620261251926422, "beta_dpo/beta_used": 0.11987863481044769, "beta_dpo/beta_used_raw": 0.11987863481044769, "beta_dpo/gap_mean": 2.2471675872802734, "beta_dpo/gap_std": 2.2004098892211914, "beta_dpo/loss_margin_mean": 3.125662088394165, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07195301027900147, "grad_norm": 240.3484344482422, "learning_rate": 3.478260869565217e-07, "logits/chosen": -0.5591256618499756, "logits/rejected": -0.5024401545524597, "loss": 1.1095, "step": 49 }, { "beta_dpo/beta": 0.10016916692256927, "beta_dpo/beta_margin_grad_mean": -0.4190990924835205, "beta_dpo/beta_margin_grad_std": 0.07547645270824432, "beta_dpo/beta_margin_mean": 0.3364598751068115, "beta_dpo/beta_margin_std": 0.32345935702323914, "beta_dpo/beta_used": 0.10016916692256927, "beta_dpo/beta_used_raw": 0.10016916692256927, "beta_dpo/gap_mean": 2.4781899452209473, "beta_dpo/gap_std": 2.4213905334472656, "beta_dpo/loss_margin_mean": 3.3676936626434326, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07342143906020558, "grad_norm": 64.82975769042969, "learning_rate": 3.5507246376811595e-07, "logits/chosen": -0.5334613919258118, "logits/rejected": -0.497406542301178, "loss": 1.1672, "step": 50 }, { "beta_dpo/beta": 0.0657687559723854, "beta_dpo/beta_margin_grad_mean": -0.44602659344673157, "beta_dpo/beta_margin_grad_std": 0.06567390263080597, "beta_dpo/beta_margin_mean": 0.22254019975662231, "beta_dpo/beta_margin_std": 0.2765715718269348, "beta_dpo/beta_used": 0.0657687559723854, "beta_dpo/beta_used_raw": 0.0657687559723854, "beta_dpo/gap_mean": 2.662703275680542, "beta_dpo/gap_std": 2.715353012084961, "beta_dpo/loss_margin_mean": 3.3309483528137207, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07488986784140969, "grad_norm": 36.31479263305664, "learning_rate": 3.6231884057971015e-07, "logits/chosen": -0.5016952753067017, "logits/rejected": -0.4681543707847595, "loss": 1.2592, "step": 51 }, { "beta_dpo/beta": 0.13919858634471893, "beta_dpo/beta_margin_grad_mean": -0.34051814675331116, "beta_dpo/beta_margin_grad_std": 0.11514287441968918, "beta_dpo/beta_margin_mean": 0.7171680927276611, "beta_dpo/beta_margin_std": 0.5753344297409058, "beta_dpo/beta_used": 0.13919858634471893, "beta_dpo/beta_used_raw": 0.13919858634471893, "beta_dpo/gap_mean": 3.020768404006958, "beta_dpo/gap_std": 2.9662249088287354, "beta_dpo/loss_margin_mean": 5.1557536125183105, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0763582966226138, "grad_norm": 85.15430450439453, "learning_rate": 3.695652173913043e-07, "logits/chosen": -0.5425816774368286, "logits/rejected": -0.4867020845413208, "loss": 0.9776, "step": 52 }, { "beta_dpo/beta": 0.10637001693248749, "beta_dpo/beta_margin_grad_mean": -0.3794803321361542, "beta_dpo/beta_margin_grad_std": 0.10878144204616547, "beta_dpo/beta_margin_mean": 0.5370194315910339, "beta_dpo/beta_margin_std": 0.5486578345298767, "beta_dpo/beta_used": 0.10637001693248749, "beta_dpo/beta_used_raw": 0.10637001693248749, "beta_dpo/gap_mean": 3.373033046722412, "beta_dpo/gap_std": 3.254366874694824, "beta_dpo/loss_margin_mean": 5.15134334564209, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07782672540381791, "grad_norm": 66.78472137451172, "learning_rate": 3.7681159420289855e-07, "logits/chosen": -0.5930138826370239, "logits/rejected": -0.5710781812667847, "loss": 1.0761, "step": 53 }, { "beta_dpo/beta": 0.09235785901546478, "beta_dpo/beta_margin_grad_mean": -0.4021127223968506, "beta_dpo/beta_margin_grad_std": 0.09637561440467834, "beta_dpo/beta_margin_mean": 0.42590391635894775, "beta_dpo/beta_margin_std": 0.46513980627059937, "beta_dpo/beta_used": 0.09235785901546478, "beta_dpo/beta_used_raw": 0.09235785901546478, "beta_dpo/gap_mean": 3.6533608436584473, "beta_dpo/gap_std": 3.5544323921203613, "beta_dpo/loss_margin_mean": 4.466633319854736, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07929515418502203, "grad_norm": 54.912174224853516, "learning_rate": 3.8405797101449274e-07, "logits/chosen": -0.5769028663635254, "logits/rejected": -0.5225714445114136, "loss": 1.0957, "step": 54 }, { "beta_dpo/beta": 0.12684877216815948, "beta_dpo/beta_margin_grad_mean": -0.3536130487918854, "beta_dpo/beta_margin_grad_std": 0.14455373585224152, "beta_dpo/beta_margin_mean": 0.690856397151947, "beta_dpo/beta_margin_std": 0.7624755501747131, "beta_dpo/beta_used": 0.12684877216815948, "beta_dpo/beta_used_raw": 0.12684877216815948, "beta_dpo/gap_mean": 3.942603826522827, "beta_dpo/gap_std": 3.9598231315612793, "beta_dpo/loss_margin_mean": 5.50035285949707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08076358296622614, "grad_norm": 70.0872573852539, "learning_rate": 3.9130434782608694e-07, "logits/chosen": -0.6278643608093262, "logits/rejected": -0.5644968748092651, "loss": 0.9505, "step": 55 }, { "beta_dpo/beta": 0.08802211284637451, "beta_dpo/beta_margin_grad_mean": -0.39712223410606384, "beta_dpo/beta_margin_grad_std": 0.1159137487411499, "beta_dpo/beta_margin_mean": 0.45057377219200134, "beta_dpo/beta_margin_std": 0.5337446928024292, "beta_dpo/beta_used": 0.08802211284637451, "beta_dpo/beta_used_raw": 0.08802211284637451, "beta_dpo/gap_mean": 4.207155227661133, "beta_dpo/gap_std": 4.369948387145996, "beta_dpo/loss_margin_mean": 5.1305742263793945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08223201174743025, "grad_norm": 50.04378128051758, "learning_rate": 3.9855072463768114e-07, "logits/chosen": -0.6108717918395996, "logits/rejected": -0.5681912899017334, "loss": 1.0989, "step": 56 }, { "beta_dpo/beta": 0.151127427816391, "beta_dpo/beta_margin_grad_mean": -0.31904980540275574, "beta_dpo/beta_margin_grad_std": 0.14913946390151978, "beta_dpo/beta_margin_mean": 0.8648303747177124, "beta_dpo/beta_margin_std": 0.7930364012718201, "beta_dpo/beta_used": 0.151127427816391, "beta_dpo/beta_used_raw": 0.151127427816391, "beta_dpo/gap_mean": 4.442320823669434, "beta_dpo/gap_std": 4.536768436431885, "beta_dpo/loss_margin_mean": 5.748650074005127, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08370044052863436, "grad_norm": 76.4854736328125, "learning_rate": 4.057971014492754e-07, "logits/chosen": -0.5923542976379395, "logits/rejected": -0.5654958486557007, "loss": 0.8215, "step": 57 }, { "beta_dpo/beta": 0.09416334331035614, "beta_dpo/beta_margin_grad_mean": -0.3633388876914978, "beta_dpo/beta_margin_grad_std": 0.13083474338054657, "beta_dpo/beta_margin_mean": 0.6299749612808228, "beta_dpo/beta_margin_std": 0.6659680008888245, "beta_dpo/beta_used": 0.09416334331035614, "beta_dpo/beta_used_raw": 0.09416334331035614, "beta_dpo/gap_mean": 4.803388595581055, "beta_dpo/gap_std": 4.8988494873046875, "beta_dpo/loss_margin_mean": 6.5755534172058105, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08516886930983847, "grad_norm": 63.09685134887695, "learning_rate": 4.1304347826086954e-07, "logits/chosen": -0.595874547958374, "logits/rejected": -0.5206152200698853, "loss": 1.0303, "step": 58 }, { "beta_dpo/beta": 0.10466543585062027, "beta_dpo/beta_margin_grad_mean": -0.34448105096817017, "beta_dpo/beta_margin_grad_std": 0.15328913927078247, "beta_dpo/beta_margin_mean": 0.829659640789032, "beta_dpo/beta_margin_std": 1.0400630235671997, "beta_dpo/beta_used": 0.10466543585062027, "beta_dpo/beta_used_raw": 0.10466543585062027, "beta_dpo/gap_mean": 5.30738639831543, "beta_dpo/gap_std": 5.2926130294799805, "beta_dpo/loss_margin_mean": 6.950667381286621, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08663729809104258, "grad_norm": 60.62688064575195, "learning_rate": 4.2028985507246374e-07, "logits/chosen": -0.5974893569946289, "logits/rejected": -0.5545705556869507, "loss": 0.9537, "step": 59 }, { "beta_dpo/beta": 0.11850239336490631, "beta_dpo/beta_margin_grad_mean": -0.3506718575954437, "beta_dpo/beta_margin_grad_std": 0.15503977239131927, "beta_dpo/beta_margin_mean": 0.7030664086341858, "beta_dpo/beta_margin_std": 0.7772324085235596, "beta_dpo/beta_used": 0.11850239336490631, "beta_dpo/beta_used_raw": 0.11850239336490631, "beta_dpo/gap_mean": 5.407642364501953, "beta_dpo/gap_std": 5.513436317443848, "beta_dpo/loss_margin_mean": 5.766895294189453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0881057268722467, "grad_norm": 69.3149185180664, "learning_rate": 4.2753623188405794e-07, "logits/chosen": -0.5805940628051758, "logits/rejected": -0.5189210772514343, "loss": 0.8759, "step": 60 }, { "beta_dpo/beta": 0.08738794177770615, "beta_dpo/beta_margin_grad_mean": -0.37938931584358215, "beta_dpo/beta_margin_grad_std": 0.15377961099147797, "beta_dpo/beta_margin_mean": 0.6398810744285583, "beta_dpo/beta_margin_std": 1.0747108459472656, "beta_dpo/beta_used": 0.08738794177770615, "beta_dpo/beta_used_raw": 0.08738794177770615, "beta_dpo/gap_mean": 5.656585693359375, "beta_dpo/gap_std": 6.2068586349487305, "beta_dpo/loss_margin_mean": 6.976743221282959, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08957415565345081, "grad_norm": 49.676326751708984, "learning_rate": 4.3478260869565214e-07, "logits/chosen": -0.5972954034805298, "logits/rejected": -0.5621410608291626, "loss": 1.0428, "step": 61 }, { "beta_dpo/beta": 0.07970167696475983, "beta_dpo/beta_margin_grad_mean": -0.4036404490470886, "beta_dpo/beta_margin_grad_std": 0.1279177963733673, "beta_dpo/beta_margin_mean": 0.45351850986480713, "beta_dpo/beta_margin_std": 0.6815299987792969, "beta_dpo/beta_used": 0.07970167696475983, "beta_dpo/beta_used_raw": 0.07970167696475983, "beta_dpo/gap_mean": 5.591924667358398, "beta_dpo/gap_std": 6.288469314575195, "beta_dpo/loss_margin_mean": 5.3183794021606445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09104258443465492, "grad_norm": 49.01858901977539, "learning_rate": 4.420289855072464e-07, "logits/chosen": -0.5811234712600708, "logits/rejected": -0.5460039973258972, "loss": 1.0477, "step": 62 }, { "beta_dpo/beta": 0.10061165690422058, "beta_dpo/beta_margin_grad_mean": -0.3452926576137543, "beta_dpo/beta_margin_grad_std": 0.16270661354064941, "beta_dpo/beta_margin_mean": 0.8012576103210449, "beta_dpo/beta_margin_std": 0.977336049079895, "beta_dpo/beta_used": 0.10061165690422058, "beta_dpo/beta_used_raw": 0.10061165690422058, "beta_dpo/gap_mean": 5.912351608276367, "beta_dpo/gap_std": 6.507175445556641, "beta_dpo/loss_margin_mean": 7.235960006713867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09251101321585903, "grad_norm": 54.96387481689453, "learning_rate": 4.4927536231884053e-07, "logits/chosen": -0.5760135650634766, "logits/rejected": -0.5288089513778687, "loss": 0.934, "step": 63 }, { "beta_dpo/beta": 0.11127346754074097, "beta_dpo/beta_margin_grad_mean": -0.32286009192466736, "beta_dpo/beta_margin_grad_std": 0.17790742218494415, "beta_dpo/beta_margin_mean": 1.0696979761123657, "beta_dpo/beta_margin_std": 1.435511589050293, "beta_dpo/beta_used": 0.11127346754074097, "beta_dpo/beta_used_raw": 0.11127346754074097, "beta_dpo/gap_mean": 6.382755279541016, "beta_dpo/gap_std": 7.030701637268066, "beta_dpo/loss_margin_mean": 8.447539329528809, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09397944199706314, "grad_norm": 54.98874282836914, "learning_rate": 4.5652173913043473e-07, "logits/chosen": -0.5682976245880127, "logits/rejected": -0.5359951257705688, "loss": 0.892, "step": 64 }, { "beta_dpo/beta": 0.15355268120765686, "beta_dpo/beta_margin_grad_mean": -0.28559890389442444, "beta_dpo/beta_margin_grad_std": 0.21047906577587128, "beta_dpo/beta_margin_mean": 1.3807626962661743, "beta_dpo/beta_margin_std": 1.8169898986816406, "beta_dpo/beta_used": 0.15355268120765686, "beta_dpo/beta_used_raw": 0.15355268120765686, "beta_dpo/gap_mean": 6.738654136657715, "beta_dpo/gap_std": 7.486597061157227, "beta_dpo/loss_margin_mean": 8.504437446594238, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09544787077826726, "grad_norm": 84.47888946533203, "learning_rate": 4.63768115942029e-07, "logits/chosen": -0.6392999887466431, "logits/rejected": -0.6247435808181763, "loss": 0.7454, "step": 65 }, { "beta_dpo/beta": 0.038759633898735046, "beta_dpo/beta_margin_grad_mean": -0.42082634568214417, "beta_dpo/beta_margin_grad_std": 0.11057644337415695, "beta_dpo/beta_margin_mean": 0.3621111810207367, "beta_dpo/beta_margin_std": 0.5689931511878967, "beta_dpo/beta_used": 0.038759633898735046, "beta_dpo/beta_used_raw": 0.038759633898735046, "beta_dpo/gap_mean": 7.011206150054932, "beta_dpo/gap_std": 7.803816795349121, "beta_dpo/loss_margin_mean": 7.870203971862793, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09691629955947137, "grad_norm": 30.142791748046875, "learning_rate": 4.7101449275362313e-07, "logits/chosen": -0.6175287365913391, "logits/rejected": -0.5830913186073303, "loss": 1.1833, "step": 66 }, { "beta_dpo/beta": 0.06989531219005585, "beta_dpo/beta_margin_grad_mean": -0.38001659512519836, "beta_dpo/beta_margin_grad_std": 0.14094047248363495, "beta_dpo/beta_margin_mean": 0.59562087059021, "beta_dpo/beta_margin_std": 0.8447734117507935, "beta_dpo/beta_used": 0.06989531219005585, "beta_dpo/beta_used_raw": 0.06989531219005585, "beta_dpo/gap_mean": 7.094534873962402, "beta_dpo/gap_std": 8.07803726196289, "beta_dpo/loss_margin_mean": 8.12269401550293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09838472834067548, "grad_norm": 44.186004638671875, "learning_rate": 4.782608695652174e-07, "logits/chosen": -0.6101835370063782, "logits/rejected": -0.5699295997619629, "loss": 1.0324, "step": 67 }, { "beta_dpo/beta": 0.09041684120893478, "beta_dpo/beta_margin_grad_mean": -0.3650799095630646, "beta_dpo/beta_margin_grad_std": 0.1839817315340042, "beta_dpo/beta_margin_mean": 0.7865732908248901, "beta_dpo/beta_margin_std": 1.181038498878479, "beta_dpo/beta_used": 0.09041684120893478, "beta_dpo/beta_used_raw": 0.06118408590555191, "beta_dpo/gap_mean": 7.258274078369141, "beta_dpo/gap_std": 8.184741973876953, "beta_dpo/loss_margin_mean": 7.898317813873291, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09985315712187959, "grad_norm": 40.886878967285156, "learning_rate": 4.855072463768116e-07, "logits/chosen": -0.6218951940536499, "logits/rejected": -0.5630506873130798, "loss": 0.953, "step": 68 }, { "beta_dpo/beta": 0.12943625450134277, "beta_dpo/beta_margin_grad_mean": -0.28914546966552734, "beta_dpo/beta_margin_grad_std": 0.1749580055475235, "beta_dpo/beta_margin_mean": 1.2787585258483887, "beta_dpo/beta_margin_std": 1.491976022720337, "beta_dpo/beta_used": 0.12943625450134277, "beta_dpo/beta_used_raw": 0.12943625450134277, "beta_dpo/gap_mean": 7.689189434051514, "beta_dpo/gap_std": 8.327251434326172, "beta_dpo/loss_margin_mean": 9.463652610778809, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1013215859030837, "grad_norm": 72.10195922851562, "learning_rate": 4.927536231884058e-07, "logits/chosen": -0.5902745723724365, "logits/rejected": -0.5661255717277527, "loss": 0.7568, "step": 69 }, { "beta_dpo/beta": 0.0740790069103241, "beta_dpo/beta_margin_grad_mean": -0.369037926197052, "beta_dpo/beta_margin_grad_std": 0.1858556717634201, "beta_dpo/beta_margin_mean": 0.8269989490509033, "beta_dpo/beta_margin_std": 1.3370610475540161, "beta_dpo/beta_used": 0.0740790069103241, "beta_dpo/beta_used_raw": 0.06600124388933182, "beta_dpo/gap_mean": 8.018512725830078, "beta_dpo/gap_std": 8.71467399597168, "beta_dpo/loss_margin_mean": 10.070143699645996, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1027900146842878, "grad_norm": 58.23539352416992, "learning_rate": 5e-07, "logits/chosen": -0.6226065158843994, "logits/rejected": -0.5874596834182739, "loss": 1.0241, "step": 70 }, { "beta_dpo/beta": 0.10465647280216217, "beta_dpo/beta_margin_grad_mean": -0.29003310203552246, "beta_dpo/beta_margin_grad_std": 0.17214025557041168, "beta_dpo/beta_margin_mean": 1.1773220300674438, "beta_dpo/beta_margin_std": 1.2341235876083374, "beta_dpo/beta_used": 0.10465647280216217, "beta_dpo/beta_used_raw": 0.10465647280216217, "beta_dpo/gap_mean": 8.682525634765625, "beta_dpo/gap_std": 9.29095458984375, "beta_dpo/loss_margin_mean": 11.49172306060791, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10425844346549193, "grad_norm": 47.67396545410156, "learning_rate": 4.999967061337492e-07, "logits/chosen": -0.6586190462112427, "logits/rejected": -0.6172687411308289, "loss": 0.8167, "step": 71 }, { "beta_dpo/beta": 0.1546517014503479, "beta_dpo/beta_margin_grad_mean": -0.24096769094467163, "beta_dpo/beta_margin_grad_std": 0.22502072155475616, "beta_dpo/beta_margin_mean": 1.905733585357666, "beta_dpo/beta_margin_std": 2.095893383026123, "beta_dpo/beta_used": 0.1546517014503479, "beta_dpo/beta_used_raw": 0.1546517014503479, "beta_dpo/gap_mean": 9.315265655517578, "beta_dpo/gap_std": 9.664226531982422, "beta_dpo/loss_margin_mean": 12.12594985961914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10572687224669604, "grad_norm": 75.66039276123047, "learning_rate": 4.999868246217933e-07, "logits/chosen": -0.6442696452140808, "logits/rejected": -0.6082816123962402, "loss": 0.5912, "step": 72 }, { "beta_dpo/beta": 0.09382159262895584, "beta_dpo/beta_margin_grad_mean": -0.3226276934146881, "beta_dpo/beta_margin_grad_std": 0.23689226806163788, "beta_dpo/beta_margin_mean": 1.1267133951187134, "beta_dpo/beta_margin_std": 1.6691551208496094, "beta_dpo/beta_used": 0.09382159262895584, "beta_dpo/beta_used_raw": 0.09382159262895584, "beta_dpo/gap_mean": 9.892107009887695, "beta_dpo/gap_std": 10.947005271911621, "beta_dpo/loss_margin_mean": 12.176095008850098, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10719530102790015, "grad_norm": 63.61186981201172, "learning_rate": 4.999703557245192e-07, "logits/chosen": -0.6697020530700684, "logits/rejected": -0.6270005702972412, "loss": 0.877, "step": 73 }, { "beta_dpo/beta": 0.0458955280482769, "beta_dpo/beta_margin_grad_mean": -0.38849544525146484, "beta_dpo/beta_margin_grad_std": 0.181712806224823, "beta_dpo/beta_margin_mean": 0.6362202763557434, "beta_dpo/beta_margin_std": 1.2357457876205444, "beta_dpo/beta_used": 0.0458955280482769, "beta_dpo/beta_used_raw": 0.04306982085108757, "beta_dpo/gap_mean": 10.440993309020996, "beta_dpo/gap_std": 12.396344184875488, "beta_dpo/loss_margin_mean": 13.167186737060547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10866372980910426, "grad_norm": 36.97188949584961, "learning_rate": 4.999472998758977e-07, "logits/chosen": -0.605143129825592, "logits/rejected": -0.5923604965209961, "loss": 1.0827, "step": 74 }, { "beta_dpo/beta": 0.1566300094127655, "beta_dpo/beta_margin_grad_mean": -0.19501835107803345, "beta_dpo/beta_margin_grad_std": 0.2327680140733719, "beta_dpo/beta_margin_mean": 3.023698568344116, "beta_dpo/beta_margin_std": 3.2827866077423096, "beta_dpo/beta_used": 0.1566300094127655, "beta_dpo/beta_used_raw": 0.1566300094127655, "beta_dpo/gap_mean": 11.546646118164062, "beta_dpo/gap_std": 13.614230155944824, "beta_dpo/loss_margin_mean": 18.43977165222168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11013215859030837, "grad_norm": 92.53497314453125, "learning_rate": 4.999176576834721e-07, "logits/chosen": -0.6607112288475037, "logits/rejected": -0.6499860286712646, "loss": 0.6467, "step": 75 }, { "beta_dpo/beta": 0.05693836510181427, "beta_dpo/beta_margin_grad_mean": -0.3819631040096283, "beta_dpo/beta_margin_grad_std": 0.20355312526226044, "beta_dpo/beta_margin_mean": 0.6884029507637024, "beta_dpo/beta_margin_std": 1.4083665609359741, "beta_dpo/beta_used": 0.05693836510181427, "beta_dpo/beta_used_raw": 0.004215408116579056, "beta_dpo/gap_mean": 12.032630920410156, "beta_dpo/gap_std": 13.884933471679688, "beta_dpo/loss_margin_mean": 11.839239120483398, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11160058737151249, "grad_norm": 44.36159133911133, "learning_rate": 4.998814299283415e-07, "logits/chosen": -0.6945298910140991, "logits/rejected": -0.6507744789123535, "loss": 1.0088, "step": 76 }, { "beta_dpo/beta": 0.3072592616081238, "beta_dpo/beta_margin_grad_mean": -0.16621431708335876, "beta_dpo/beta_margin_grad_std": 0.2623097002506256, "beta_dpo/beta_margin_mean": 6.070537090301514, "beta_dpo/beta_margin_std": 7.8197712898254395, "beta_dpo/beta_used": 0.3072592616081238, "beta_dpo/beta_used_raw": 0.3072592616081238, "beta_dpo/gap_mean": 13.085380554199219, "beta_dpo/gap_std": 14.796323776245117, "beta_dpo/loss_margin_mean": 18.652969360351562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1130690161527166, "grad_norm": 122.56193542480469, "learning_rate": 4.998386175651409e-07, "logits/chosen": -0.6592667102813721, "logits/rejected": -0.6153388023376465, "loss": 0.3922, "step": 77 }, { "beta_dpo/beta": 0.14949087798595428, "beta_dpo/beta_margin_grad_mean": -0.3154319226741791, "beta_dpo/beta_margin_grad_std": 0.24938298761844635, "beta_dpo/beta_margin_mean": 2.418715238571167, "beta_dpo/beta_margin_std": 3.7272212505340576, "beta_dpo/beta_used": 0.14949087798595428, "beta_dpo/beta_used_raw": 0.12561628222465515, "beta_dpo/gap_mean": 13.365839958190918, "beta_dpo/gap_std": 15.315971374511719, "beta_dpo/loss_margin_mean": 14.452160835266113, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1145374449339207, "grad_norm": 55.331443786621094, "learning_rate": 4.997892217220159e-07, "logits/chosen": -0.6192628145217896, "logits/rejected": -0.5899114608764648, "loss": 0.7759, "step": 78 }, { "beta_dpo/beta": 0.11019716411828995, "beta_dpo/beta_margin_grad_mean": -0.3492397964000702, "beta_dpo/beta_margin_grad_std": 0.24441301822662354, "beta_dpo/beta_margin_mean": 1.7321070432662964, "beta_dpo/beta_margin_std": 3.166022777557373, "beta_dpo/beta_used": 0.11019716411828995, "beta_dpo/beta_used_raw": -0.001482747495174408, "beta_dpo/gap_mean": 13.848381042480469, "beta_dpo/gap_std": 16.022428512573242, "beta_dpo/loss_margin_mean": 15.7933349609375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11600587371512482, "grad_norm": 69.28112030029297, "learning_rate": 4.997332437005931e-07, "logits/chosen": -0.6417551636695862, "logits/rejected": -0.608524739742279, "loss": 0.8819, "step": 79 }, { "beta_dpo/beta": 0.001718068728223443, "beta_dpo/beta_margin_grad_mean": -0.49326348304748535, "beta_dpo/beta_margin_grad_std": 0.011248563416302204, "beta_dpo/beta_margin_mean": 0.02697627618908882, "beta_dpo/beta_margin_std": 0.04508247226476669, "beta_dpo/beta_used": 0.001718068728223443, "beta_dpo/beta_used_raw": -0.12951478362083435, "beta_dpo/gap_mean": 14.141023635864258, "beta_dpo/gap_std": 16.736181259155273, "beta_dpo/loss_margin_mean": 14.089604377746582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11747430249632893, "grad_norm": 2.357767343521118, "learning_rate": 4.996706849759452e-07, "logits/chosen": -0.7271322011947632, "logits/rejected": -0.6814069747924805, "loss": 1.3671, "step": 80 }, { "beta_dpo/beta": 0.1173420324921608, "beta_dpo/beta_margin_grad_mean": -0.3296668529510498, "beta_dpo/beta_margin_grad_std": 0.2772652506828308, "beta_dpo/beta_margin_mean": 2.6220462322235107, "beta_dpo/beta_margin_std": 4.677156925201416, "beta_dpo/beta_used": 0.1173420324921608, "beta_dpo/beta_used_raw": 0.08890701830387115, "beta_dpo/gap_mean": 14.902729034423828, "beta_dpo/gap_std": 17.593263626098633, "beta_dpo/loss_margin_mean": 19.815006256103516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11894273127753303, "grad_norm": 137.00436401367188, "learning_rate": 4.996015471965529e-07, "logits/chosen": -0.7320711016654968, "logits/rejected": -0.699401319026947, "loss": 1.0778, "step": 81 }, { "beta_dpo/beta": 0.04351024702191353, "beta_dpo/beta_margin_grad_mean": -0.40177345275878906, "beta_dpo/beta_margin_grad_std": 0.19916068017482758, "beta_dpo/beta_margin_mean": 0.48920586705207825, "beta_dpo/beta_margin_std": 1.2577557563781738, "beta_dpo/beta_used": 0.04351024702191353, "beta_dpo/beta_used_raw": 0.04351024702191353, "beta_dpo/gap_mean": 14.832651138305664, "beta_dpo/gap_std": 18.701509475708008, "beta_dpo/loss_margin_mean": 15.354249954223633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12041116005873716, "grad_norm": 50.82543182373047, "learning_rate": 4.995258321842611e-07, "logits/chosen": -0.649533748626709, "logits/rejected": -0.6332418918609619, "loss": 1.0506, "step": 82 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4955040216445923, "beta_dpo/beta_margin_grad_std": 0.0052512530237436295, "beta_dpo/beta_margin_mean": 0.017986806109547615, "beta_dpo/beta_margin_std": 0.021009791642427444, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06825613230466843, "beta_dpo/gap_mean": 15.605181694030762, "beta_dpo/gap_std": 19.392963409423828, "beta_dpo/loss_margin_mean": 17.986804962158203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12187958883994127, "grad_norm": 1.6841143369674683, "learning_rate": 4.994435419342304e-07, "logits/chosen": -0.6862367391586304, "logits/rejected": -0.643555760383606, "loss": 1.3736, "step": 83 }, { "beta_dpo/beta": 0.14811725914478302, "beta_dpo/beta_margin_grad_mean": -0.2708915174007416, "beta_dpo/beta_margin_grad_std": 0.20906409621238708, "beta_dpo/beta_margin_mean": 2.8868696689605713, "beta_dpo/beta_margin_std": 4.1358442306518555, "beta_dpo/beta_used": 0.14811725914478302, "beta_dpo/beta_used_raw": 0.14811725914478302, "beta_dpo/gap_mean": 15.893194198608398, "beta_dpo/gap_std": 18.990737915039062, "beta_dpo/loss_margin_mean": 15.966986656188965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12334801762114538, "grad_norm": 86.9267349243164, "learning_rate": 4.993546786148857e-07, "logits/chosen": -0.6301603317260742, "logits/rejected": -0.5886775851249695, "loss": 0.7014, "step": 84 }, { "beta_dpo/beta": 0.05548453703522682, "beta_dpo/beta_margin_grad_mean": -0.3620225489139557, "beta_dpo/beta_margin_grad_std": 0.21889419853687286, "beta_dpo/beta_margin_mean": 1.1414363384246826, "beta_dpo/beta_margin_std": 1.9398654699325562, "beta_dpo/beta_used": 0.05548453703522682, "beta_dpo/beta_used_raw": -0.06038748845458031, "beta_dpo/gap_mean": 15.512821197509766, "beta_dpo/gap_std": 18.84861183166504, "beta_dpo/loss_margin_mean": 15.94063663482666, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12481644640234948, "grad_norm": 61.42685317993164, "learning_rate": 4.992592445678582e-07, "logits/chosen": -0.6268604397773743, "logits/rejected": -0.5931763648986816, "loss": 1.0304, "step": 85 }, { "beta_dpo/beta": 0.08018074184656143, "beta_dpo/beta_margin_grad_mean": -0.3861086666584015, "beta_dpo/beta_margin_grad_std": 0.2810457944869995, "beta_dpo/beta_margin_mean": 1.180087924003601, "beta_dpo/beta_margin_std": 3.0287249088287354, "beta_dpo/beta_used": 0.08018074184656143, "beta_dpo/beta_used_raw": -0.07008485496044159, "beta_dpo/gap_mean": 15.852239608764648, "beta_dpo/gap_std": 20.208812713623047, "beta_dpo/loss_margin_mean": 16.962554931640625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1262848751835536, "grad_norm": 116.6102523803711, "learning_rate": 4.991572423079235e-07, "logits/chosen": -0.6792384386062622, "logits/rejected": -0.6633239984512329, "loss": 1.17, "step": 86 }, { "beta_dpo/beta": 0.12275532633066177, "beta_dpo/beta_margin_grad_mean": -0.2609297037124634, "beta_dpo/beta_margin_grad_std": 0.26698076725006104, "beta_dpo/beta_margin_mean": 2.5129313468933105, "beta_dpo/beta_margin_std": 3.3721165657043457, "beta_dpo/beta_used": 0.12275532633066177, "beta_dpo/beta_used_raw": 0.12275532633066177, "beta_dpo/gap_mean": 16.574663162231445, "beta_dpo/gap_std": 21.20650863647461, "beta_dpo/loss_margin_mean": 18.905868530273438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1277533039647577, "grad_norm": 81.023681640625, "learning_rate": 4.990486745229364e-07, "logits/chosen": -0.7079585790634155, "logits/rejected": -0.675015389919281, "loss": 0.7054, "step": 87 }, { "beta_dpo/beta": 0.10302203893661499, "beta_dpo/beta_margin_grad_mean": -0.38228002190589905, "beta_dpo/beta_margin_grad_std": 0.26822036504745483, "beta_dpo/beta_margin_mean": 1.8344087600708008, "beta_dpo/beta_margin_std": 4.733022689819336, "beta_dpo/beta_used": 0.10302203893661499, "beta_dpo/beta_used_raw": 0.07114126533269882, "beta_dpo/gap_mean": 16.420879364013672, "beta_dpo/gap_std": 22.033344268798828, "beta_dpo/loss_margin_mean": 14.693923950195312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12922173274596183, "grad_norm": 91.79285430908203, "learning_rate": 4.989335440737586e-07, "logits/chosen": -0.661591649055481, "logits/rejected": -0.6481854915618896, "loss": 1.0505, "step": 88 }, { "beta_dpo/beta": 0.12089363485574722, "beta_dpo/beta_margin_grad_mean": -0.32127439975738525, "beta_dpo/beta_margin_grad_std": 0.2475607842206955, "beta_dpo/beta_margin_mean": 2.3120830059051514, "beta_dpo/beta_margin_std": 3.9636423587799072, "beta_dpo/beta_used": 0.12089363485574722, "beta_dpo/beta_used_raw": -0.0026644468307495117, "beta_dpo/gap_mean": 15.963903427124023, "beta_dpo/gap_std": 21.23855209350586, "beta_dpo/loss_margin_mean": 15.816018104553223, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13069016152716592, "grad_norm": 84.89918518066406, "learning_rate": 4.988118539941847e-07, "logits/chosen": -0.7054777145385742, "logits/rejected": -0.666853129863739, "loss": 0.8893, "step": 89 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4948834478855133, "beta_dpo/beta_margin_grad_std": 0.007280984427779913, "beta_dpo/beta_margin_mean": 0.020473351702094078, "beta_dpo/beta_margin_std": 0.029139788821339607, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10932803153991699, "beta_dpo/gap_mean": 16.511451721191406, "beta_dpo/gap_std": 22.19609832763672, "beta_dpo/loss_margin_mean": 20.473350524902344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13215859030837004, "grad_norm": 1.6320456266403198, "learning_rate": 4.986836074908615e-07, "logits/chosen": -0.6601126194000244, "logits/rejected": -0.6607536673545837, "loss": 1.3734, "step": 90 }, { "beta_dpo/beta": 0.0956064909696579, "beta_dpo/beta_margin_grad_mean": -0.36107704043388367, "beta_dpo/beta_margin_grad_std": 0.26534104347229004, "beta_dpo/beta_margin_mean": 1.9452344179153442, "beta_dpo/beta_margin_std": 3.7261810302734375, "beta_dpo/beta_used": 0.0956064909696579, "beta_dpo/beta_used_raw": 0.060106635093688965, "beta_dpo/gap_mean": 16.999650955200195, "beta_dpo/gap_std": 22.816213607788086, "beta_dpo/loss_margin_mean": 17.7425594329834, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13362701908957417, "grad_norm": 163.5145721435547, "learning_rate": 4.985488079432037e-07, "logits/chosen": -0.683163583278656, "logits/rejected": -0.6435012817382812, "loss": 1.135, "step": 91 }, { "beta_dpo/beta": 0.004416329320520163, "beta_dpo/beta_margin_grad_mean": -0.47850051522254944, "beta_dpo/beta_margin_grad_std": 0.033828821033239365, "beta_dpo/beta_margin_mean": 0.08677387237548828, "beta_dpo/beta_margin_std": 0.1371731013059616, "beta_dpo/beta_used": 0.004416329320520163, "beta_dpo/beta_used_raw": -0.09906575083732605, "beta_dpo/gap_mean": 17.035350799560547, "beta_dpo/gap_std": 22.991302490234375, "beta_dpo/loss_margin_mean": 17.186429977416992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13509544787077826, "grad_norm": 7.026480197906494, "learning_rate": 4.984074589033043e-07, "logits/chosen": -0.714478611946106, "logits/rejected": -0.685989499092102, "loss": 1.3231, "step": 92 }, { "beta_dpo/beta": 0.012795208021998405, "beta_dpo/beta_margin_grad_mean": -0.4487362504005432, "beta_dpo/beta_margin_grad_std": 0.08327450603246689, "beta_dpo/beta_margin_mean": 0.21816346049308777, "beta_dpo/beta_margin_std": 0.3640429377555847, "beta_dpo/beta_used": 0.012795208021998405, "beta_dpo/beta_used_raw": 0.003189191222190857, "beta_dpo/gap_mean": 17.194652557373047, "beta_dpo/gap_std": 22.38436508178711, "beta_dpo/loss_margin_mean": 17.124244689941406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13656387665198239, "grad_norm": 17.654693603515625, "learning_rate": 4.982595640958425e-07, "logits/chosen": -0.7458562850952148, "logits/rejected": -0.6881492137908936, "loss": 1.216, "step": 93 }, { "beta_dpo/beta": 0.08266030997037888, "beta_dpo/beta_margin_grad_mean": -0.34286096692085266, "beta_dpo/beta_margin_grad_std": 0.254118949174881, "beta_dpo/beta_margin_mean": 1.8386805057525635, "beta_dpo/beta_margin_std": 3.1514334678649902, "beta_dpo/beta_used": 0.08266030997037888, "beta_dpo/beta_used_raw": 0.05387556180357933, "beta_dpo/gap_mean": 17.62067222595215, "beta_dpo/gap_std": 22.231197357177734, "beta_dpo/loss_margin_mean": 19.16136932373047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13803230543318648, "grad_norm": 86.43866729736328, "learning_rate": 4.98105127417984e-07, "logits/chosen": -0.6766291260719299, "logits/rejected": -0.6523104310035706, "loss": 0.9494, "step": 94 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4960479736328125, "beta_dpo/beta_margin_grad_std": 0.00468993978574872, "beta_dpo/beta_margin_mean": 0.015809904783964157, "beta_dpo/beta_margin_std": 0.018763281404972076, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1939472258090973, "beta_dpo/gap_mean": 17.355606079101562, "beta_dpo/gap_std": 21.673551559448242, "beta_dpo/loss_margin_mean": 15.809903144836426, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1395007342143906, "grad_norm": 1.29397714138031, "learning_rate": 4.979441529392784e-07, "logits/chosen": -0.737293004989624, "logits/rejected": -0.7039185166358948, "loss": 1.3739, "step": 95 }, { "beta_dpo/beta": 0.1486305147409439, "beta_dpo/beta_margin_grad_mean": -0.30508890748023987, "beta_dpo/beta_margin_grad_std": 0.2417270988225937, "beta_dpo/beta_margin_mean": 3.7028400897979736, "beta_dpo/beta_margin_std": 6.17563533782959, "beta_dpo/beta_used": 0.1486305147409439, "beta_dpo/beta_used_raw": -0.02500748634338379, "beta_dpo/gap_mean": 17.98691177368164, "beta_dpo/gap_std": 21.86615753173828, "beta_dpo/loss_margin_mean": 21.639110565185547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14096916299559473, "grad_norm": 48.836517333984375, "learning_rate": 4.977766449015534e-07, "logits/chosen": -0.7020214796066284, "logits/rejected": -0.6632054448127747, "loss": 0.7946, "step": 96 }, { "beta_dpo/beta": 0.06436537951231003, "beta_dpo/beta_margin_grad_mean": -0.3319231867790222, "beta_dpo/beta_margin_grad_std": 0.21798565983772278, "beta_dpo/beta_margin_mean": 1.3948326110839844, "beta_dpo/beta_margin_std": 2.1092705726623535, "beta_dpo/beta_used": 0.06436537951231003, "beta_dpo/beta_used_raw": 0.023374740034341812, "beta_dpo/gap_mean": 17.544296264648438, "beta_dpo/gap_std": 21.351360321044922, "beta_dpo/loss_margin_mean": 16.46492576599121, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14243759177679882, "grad_norm": 62.58485794067383, "learning_rate": 4.976026077188012e-07, "logits/chosen": -0.6425115466117859, "logits/rejected": -0.5889946818351746, "loss": 0.9477, "step": 97 }, { "beta_dpo/beta": 0.1993415206670761, "beta_dpo/beta_margin_grad_mean": -0.3418026566505432, "beta_dpo/beta_margin_grad_std": 0.29540500044822693, "beta_dpo/beta_margin_mean": 3.733274221420288, "beta_dpo/beta_margin_std": 8.150524139404297, "beta_dpo/beta_used": 0.1993415206670761, "beta_dpo/beta_used_raw": 0.16680875420570374, "beta_dpo/gap_mean": 17.85407257080078, "beta_dpo/gap_std": 21.613468170166016, "beta_dpo/loss_margin_mean": 18.21445655822754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14390602055800295, "grad_norm": 155.92921447753906, "learning_rate": 4.974220459770639e-07, "logits/chosen": -0.6993780136108398, "logits/rejected": -0.6774000525474548, "loss": 1.0858, "step": 98 }, { "beta_dpo/beta": 0.05922618508338928, "beta_dpo/beta_margin_grad_mean": -0.29211270809173584, "beta_dpo/beta_margin_grad_std": 0.1934242695569992, "beta_dpo/beta_margin_mean": 1.2846572399139404, "beta_dpo/beta_margin_std": 1.4927436113357544, "beta_dpo/beta_used": 0.05922618508338928, "beta_dpo/beta_used_raw": 0.05922618508338928, "beta_dpo/gap_mean": 18.435466766357422, "beta_dpo/gap_std": 22.153942108154297, "beta_dpo/loss_margin_mean": 21.74091911315918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14537444933920704, "grad_norm": 45.9489860534668, "learning_rate": 4.972349644343108e-07, "logits/chosen": -0.6738119125366211, "logits/rejected": -0.6671220660209656, "loss": 0.7627, "step": 99 }, { "beta_dpo/beta": 0.049059588462114334, "beta_dpo/beta_margin_grad_mean": -0.376477986574173, "beta_dpo/beta_margin_grad_std": 0.19105187058448792, "beta_dpo/beta_margin_mean": 0.7820718884468079, "beta_dpo/beta_margin_std": 1.3751544952392578, "beta_dpo/beta_used": 0.049059588462114334, "beta_dpo/beta_used_raw": 0.027484482154250145, "beta_dpo/gap_mean": 17.79035186767578, "beta_dpo/gap_std": 22.48064422607422, "beta_dpo/loss_margin_mean": 13.807634353637695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14684287812041116, "grad_norm": 40.60963821411133, "learning_rate": 4.970413680203148e-07, "logits/chosen": -0.6705986261367798, "logits/rejected": -0.62305748462677, "loss": 0.9552, "step": 100 }, { "epoch": 0.14684287812041116, "eval_beta_dpo/beta": 0.004559545312076807, "eval_beta_dpo/beta_margin_grad_mean": -0.489467054605484, "eval_beta_dpo/beta_margin_grad_std": 0.016749924048781395, "eval_beta_dpo/beta_margin_mean": 0.07250447571277618, "eval_beta_dpo/beta_margin_std": 0.1095583513379097, "eval_beta_dpo/beta_used": 0.004559545312076807, "eval_beta_dpo/beta_used_raw": -0.37313562631607056, "eval_beta_dpo/gap_mean": 17.69536590576172, "eval_beta_dpo/gap_std": 22.184284210205078, "eval_beta_dpo/loss_margin_mean": 9.809774398803711, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.6697728037834167, "eval_logits/rejected": -0.641778290271759, "eval_loss": 0.6785586476325989, "eval_runtime": 40.2677, "eval_samples_per_second": 58.086, "eval_steps_per_second": 1.838, "step": 100 }, { "beta_dpo/beta": 0.07321029156446457, "beta_dpo/beta_margin_grad_mean": -0.3686811923980713, "beta_dpo/beta_margin_grad_std": 0.25677189230918884, "beta_dpo/beta_margin_mean": 1.4722819328308105, "beta_dpo/beta_margin_std": 3.1001694202423096, "beta_dpo/beta_used": 0.07321029156446457, "beta_dpo/beta_used_raw": -0.08677682280540466, "beta_dpo/gap_mean": 17.66168975830078, "beta_dpo/gap_std": 22.67660903930664, "beta_dpo/loss_margin_mean": 16.247787475585938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14831130690161526, "grad_norm": 78.9544448852539, "learning_rate": 4.968412618365215e-07, "logits/chosen": -0.6895424127578735, "logits/rejected": -0.650581955909729, "loss": 1.0062, "step": 101 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4970797300338745, "beta_dpo/beta_margin_grad_std": 0.005516585893929005, "beta_dpo/beta_margin_mean": 0.011683052405714989, "beta_dpo/beta_margin_std": 0.022071124985814095, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20228251814842224, "beta_dpo/gap_mean": 16.36496925354004, "beta_dpo/gap_std": 22.436237335205078, "beta_dpo/loss_margin_mean": 11.683052062988281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14977973568281938, "grad_norm": 1.3097914457321167, "learning_rate": 4.966346511559149e-07, "logits/chosen": -0.7123202681541443, "logits/rejected": -0.6617774963378906, "loss": 1.3751, "step": 102 }, { "beta_dpo/beta": 0.12889112532138824, "beta_dpo/beta_margin_grad_mean": -0.33258166909217834, "beta_dpo/beta_margin_grad_std": 0.2698776423931122, "beta_dpo/beta_margin_mean": 2.756754159927368, "beta_dpo/beta_margin_std": 4.887447357177734, "beta_dpo/beta_used": 0.12889112532138824, "beta_dpo/beta_used_raw": 0.11348496377468109, "beta_dpo/gap_mean": 17.27025032043457, "beta_dpo/gap_std": 22.856536865234375, "beta_dpo/loss_margin_mean": 22.42643928527832, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1512481644640235, "grad_norm": 182.22842407226562, "learning_rate": 4.964215414228785e-07, "logits/chosen": -0.6717199087142944, "logits/rejected": -0.6303577423095703, "loss": 1.0281, "step": 103 }, { "beta_dpo/beta": 0.06325404345989227, "beta_dpo/beta_margin_grad_mean": -0.36961308121681213, "beta_dpo/beta_margin_grad_std": 0.2353500872850418, "beta_dpo/beta_margin_mean": 1.4412060976028442, "beta_dpo/beta_margin_std": 3.0654594898223877, "beta_dpo/beta_used": 0.06325404345989227, "beta_dpo/beta_used_raw": 0.046856410801410675, "beta_dpo/gap_mean": 17.94310760498047, "beta_dpo/gap_std": 23.2835693359375, "beta_dpo/loss_margin_mean": 21.150423049926758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1527165932452276, "grad_norm": 88.83678436279297, "learning_rate": 4.96201938253052e-07, "logits/chosen": -0.7104257345199585, "logits/rejected": -0.6671864986419678, "loss": 1.0529, "step": 104 }, { "beta_dpo/beta": 0.36780738830566406, "beta_dpo/beta_margin_grad_mean": -0.08803264796733856, "beta_dpo/beta_margin_grad_std": 0.23639245331287384, "beta_dpo/beta_margin_mean": 10.949403762817383, "beta_dpo/beta_margin_std": 9.001367568969727, "beta_dpo/beta_used": 0.36780738830566406, "beta_dpo/beta_used_raw": 0.36780738830566406, "beta_dpo/gap_mean": 19.79109764099121, "beta_dpo/gap_std": 23.633255004882812, "beta_dpo/loss_margin_mean": 29.683706283569336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15418502202643172, "grad_norm": 242.09918212890625, "learning_rate": 4.959758474331832e-07, "logits/chosen": -0.732721209526062, "logits/rejected": -0.7001240849494934, "loss": 0.522, "step": 105 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4954530894756317, "beta_dpo/beta_margin_grad_std": 0.005140354391187429, "beta_dpo/beta_margin_mean": 0.01819043606519699, "beta_dpo/beta_margin_std": 0.020566506311297417, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09649403393268585, "beta_dpo/gap_mean": 20.10454750061035, "beta_dpo/gap_std": 23.268360137939453, "beta_dpo/loss_margin_mean": 18.190433502197266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15565345080763582, "grad_norm": 1.546743392944336, "learning_rate": 4.957432749209755e-07, "logits/chosen": -0.6345574855804443, "logits/rejected": -0.5829192399978638, "loss": 1.3696, "step": 106 }, { "beta_dpo/beta": 0.15242286026477814, "beta_dpo/beta_margin_grad_mean": -0.2269459068775177, "beta_dpo/beta_margin_grad_std": 0.27589160203933716, "beta_dpo/beta_margin_mean": 3.2626266479492188, "beta_dpo/beta_margin_std": 3.9688947200775146, "beta_dpo/beta_used": 0.15242286026477814, "beta_dpo/beta_used_raw": 0.15242286026477814, "beta_dpo/gap_mean": 20.150800704956055, "beta_dpo/gap_std": 23.207382202148438, "beta_dpo/loss_margin_mean": 20.48804473876953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15712187958883994, "grad_norm": 113.67718505859375, "learning_rate": 4.955042268449307e-07, "logits/chosen": -0.7242048978805542, "logits/rejected": -0.6693615317344666, "loss": 0.559, "step": 107 }, { "beta_dpo/beta": 0.035163089632987976, "beta_dpo/beta_margin_grad_mean": -0.3810098469257355, "beta_dpo/beta_margin_grad_std": 0.18505938351154327, "beta_dpo/beta_margin_mean": 0.7840278148651123, "beta_dpo/beta_margin_std": 1.483026146888733, "beta_dpo/beta_used": 0.035163089632987976, "beta_dpo/beta_used_raw": 0.017950953915715218, "beta_dpo/gap_mean": 20.339149475097656, "beta_dpo/gap_std": 24.504940032958984, "beta_dpo/loss_margin_mean": 22.11771011352539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15859030837004406, "grad_norm": 30.485048294067383, "learning_rate": 4.952587095041881e-07, "logits/chosen": -0.7240760326385498, "logits/rejected": -0.6844010949134827, "loss": 1.0071, "step": 108 }, { "beta_dpo/beta": 0.08255766332149506, "beta_dpo/beta_margin_grad_mean": -0.24582688510417938, "beta_dpo/beta_margin_grad_std": 0.24141037464141846, "beta_dpo/beta_margin_mean": 2.0719735622406006, "beta_dpo/beta_margin_std": 2.4666452407836914, "beta_dpo/beta_used": 0.08255766332149506, "beta_dpo/beta_used_raw": 0.08255766332149506, "beta_dpo/gap_mean": 21.11379051208496, "beta_dpo/gap_std": 24.862241744995117, "beta_dpo/loss_margin_mean": 24.515644073486328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16005873715124816, "grad_norm": 72.3277359008789, "learning_rate": 4.95006729368358e-07, "logits/chosen": -0.6247401833534241, "logits/rejected": -0.5946367979049683, "loss": 0.7082, "step": 109 }, { "beta_dpo/beta": 0.007696578744798899, "beta_dpo/beta_margin_grad_mean": -0.446740984916687, "beta_dpo/beta_margin_grad_std": 0.07942461222410202, "beta_dpo/beta_margin_mean": 0.2259088009595871, "beta_dpo/beta_margin_std": 0.35111066699028015, "beta_dpo/beta_used": 0.007696578744798899, "beta_dpo/beta_used_raw": -0.11108442395925522, "beta_dpo/gap_mean": 21.73975372314453, "beta_dpo/gap_std": 25.069347381591797, "beta_dpo/loss_margin_mean": 22.480884552001953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16152716593245228, "grad_norm": 12.362462043762207, "learning_rate": 4.947482930773511e-07, "logits/chosen": -0.6756146550178528, "logits/rejected": -0.6260861158370972, "loss": 1.2462, "step": 110 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49404606223106384, "beta_dpo/beta_margin_grad_std": 0.007231460884213448, "beta_dpo/beta_margin_mean": 0.023822510614991188, "beta_dpo/beta_margin_std": 0.028938332572579384, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0781029760837555, "beta_dpo/gap_mean": 21.86594009399414, "beta_dpo/gap_std": 25.79961395263672, "beta_dpo/loss_margin_mean": 23.822509765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16299559471365638, "grad_norm": 1.6921226978302002, "learning_rate": 4.944834074412042e-07, "logits/chosen": -0.6994168758392334, "logits/rejected": -0.6692637801170349, "loss": 1.3676, "step": 111 }, { "beta_dpo/beta": 0.15165650844573975, "beta_dpo/beta_margin_grad_mean": -0.34309816360473633, "beta_dpo/beta_margin_grad_std": 0.28766128420829773, "beta_dpo/beta_margin_mean": 2.929415702819824, "beta_dpo/beta_margin_std": 5.559157848358154, "beta_dpo/beta_used": 0.15165650844573975, "beta_dpo/beta_used_raw": -0.08103512227535248, "beta_dpo/gap_mean": 21.06276512145996, "beta_dpo/gap_std": 25.198822021484375, "beta_dpo/loss_margin_mean": 15.551918029785156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1644640234948605, "grad_norm": 29.376951217651367, "learning_rate": 4.942120794399002e-07, "logits/chosen": -0.6914588212966919, "logits/rejected": -0.6384344100952148, "loss": 0.7237, "step": 112 }, { "beta_dpo/beta": 0.06923054903745651, "beta_dpo/beta_margin_grad_mean": -0.37122005224227905, "beta_dpo/beta_margin_grad_std": 0.23908159136772156, "beta_dpo/beta_margin_mean": 1.2863365411758423, "beta_dpo/beta_margin_std": 2.375030994415283, "beta_dpo/beta_used": 0.06923054903745651, "beta_dpo/beta_used_raw": 0.0037154704332351685, "beta_dpo/gap_mean": 20.280515670776367, "beta_dpo/gap_std": 24.34324836730957, "beta_dpo/loss_margin_mean": 18.54205894470215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16593245227606462, "grad_norm": 73.8025894165039, "learning_rate": 4.939343162231841e-07, "logits/chosen": -0.6436402797698975, "logits/rejected": -0.5879355669021606, "loss": 1.0275, "step": 113 }, { "beta_dpo/beta": 0.10792845487594604, "beta_dpo/beta_margin_grad_mean": -0.3395880460739136, "beta_dpo/beta_margin_grad_std": 0.2739325165748596, "beta_dpo/beta_margin_mean": 3.3032939434051514, "beta_dpo/beta_margin_std": 6.392845630645752, "beta_dpo/beta_used": 0.10792845487594604, "beta_dpo/beta_used_raw": 0.0014918148517608643, "beta_dpo/gap_mean": 21.269786834716797, "beta_dpo/gap_std": 25.5091552734375, "beta_dpo/loss_margin_mean": 26.308393478393555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16740088105726872, "grad_norm": 182.32144165039062, "learning_rate": 4.936501251103751e-07, "logits/chosen": -0.6645747423171997, "logits/rejected": -0.6166965365409851, "loss": 1.1018, "step": 114 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4950384795665741, "beta_dpo/beta_margin_grad_std": 0.008213133551180363, "beta_dpo/beta_margin_mean": 0.019854702055454254, "beta_dpo/beta_margin_std": 0.03287569805979729, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13187555968761444, "beta_dpo/gap_mean": 20.974491119384766, "beta_dpo/gap_std": 26.741947174072266, "beta_dpo/loss_margin_mean": 19.854700088500977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16886930983847284, "grad_norm": 1.579263687133789, "learning_rate": 4.933595135901732e-07, "logits/chosen": -0.7198022603988647, "logits/rejected": -0.6816444396972656, "loss": 1.3694, "step": 115 }, { "beta_dpo/beta": 0.14748090505599976, "beta_dpo/beta_margin_grad_mean": -0.3244438171386719, "beta_dpo/beta_margin_grad_std": 0.25262880325317383, "beta_dpo/beta_margin_mean": 3.44006085395813, "beta_dpo/beta_margin_std": 5.296873569488525, "beta_dpo/beta_used": 0.14748090505599976, "beta_dpo/beta_used_raw": 7.016956806182861e-05, "beta_dpo/gap_mean": 20.802410125732422, "beta_dpo/gap_std": 26.271785736083984, "beta_dpo/loss_margin_mean": 20.255638122558594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17033773861967694, "grad_norm": 229.8918914794922, "learning_rate": 4.930624893204624e-07, "logits/chosen": -0.703331708908081, "logits/rejected": -0.6744290590286255, "loss": 0.9244, "step": 116 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49576959013938904, "beta_dpo/beta_margin_grad_std": 0.0058947219513356686, "beta_dpo/beta_margin_mean": 0.016924891620874405, "beta_dpo/beta_margin_std": 0.023586571216583252, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11201402544975281, "beta_dpo/gap_mean": 20.31169891357422, "beta_dpo/gap_std": 25.738601684570312, "beta_dpo/loss_margin_mean": 16.92489242553711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17180616740088106, "grad_norm": 1.411366581916809, "learning_rate": 4.927590601281083e-07, "logits/chosen": -0.6768569946289062, "logits/rejected": -0.6319583654403687, "loss": 1.3696, "step": 117 }, { "beta_dpo/beta": 0.05472584441304207, "beta_dpo/beta_margin_grad_mean": -0.3660266399383545, "beta_dpo/beta_margin_grad_std": 0.20633184909820557, "beta_dpo/beta_margin_mean": 1.0326712131500244, "beta_dpo/beta_margin_std": 1.876607060432434, "beta_dpo/beta_used": 0.05472584441304207, "beta_dpo/beta_used_raw": -0.008384305983781815, "beta_dpo/gap_mean": 19.919933319091797, "beta_dpo/gap_std": 25.017112731933594, "beta_dpo/loss_margin_mean": 18.486082077026367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17327459618208516, "grad_norm": 34.99771499633789, "learning_rate": 4.924492340087524e-07, "logits/chosen": -0.7068147659301758, "logits/rejected": -0.6715903282165527, "loss": 0.9355, "step": 118 }, { "beta_dpo/beta": 0.10491637140512466, "beta_dpo/beta_margin_grad_mean": -0.33534765243530273, "beta_dpo/beta_margin_grad_std": 0.2645687460899353, "beta_dpo/beta_margin_mean": 2.3737905025482178, "beta_dpo/beta_margin_std": 4.37314510345459, "beta_dpo/beta_used": 0.10491637140512466, "beta_dpo/beta_used_raw": -0.03881131112575531, "beta_dpo/gap_mean": 19.92425537109375, "beta_dpo/gap_std": 24.904251098632812, "beta_dpo/loss_margin_mean": 19.200084686279297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17474302496328928, "grad_norm": 146.54409790039062, "learning_rate": 4.92133019126601e-07, "logits/chosen": -0.7222672700881958, "logits/rejected": -0.6985388994216919, "loss": 1.0162, "step": 119 }, { "beta_dpo/beta": 0.21818916499614716, "beta_dpo/beta_margin_grad_mean": -0.1877627968788147, "beta_dpo/beta_margin_grad_std": 0.3119850158691406, "beta_dpo/beta_margin_mean": 5.290563106536865, "beta_dpo/beta_margin_std": 6.116404056549072, "beta_dpo/beta_used": 0.21818916499614716, "beta_dpo/beta_used_raw": 0.21818916499614716, "beta_dpo/gap_mean": 20.34283447265625, "beta_dpo/gap_std": 25.407583236694336, "beta_dpo/loss_margin_mean": 24.195154190063477, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1762114537444934, "grad_norm": 204.10935974121094, "learning_rate": 4.918104238142103e-07, "logits/chosen": -0.7220108509063721, "logits/rejected": -0.6780139207839966, "loss": 0.6997, "step": 120 }, { "beta_dpo/beta": 0.01847536489367485, "beta_dpo/beta_margin_grad_mean": -0.3978184163570404, "beta_dpo/beta_margin_grad_std": 0.16114307940006256, "beta_dpo/beta_margin_mean": 0.5521989464759827, "beta_dpo/beta_margin_std": 0.942378044128418, "beta_dpo/beta_used": 0.01847536489367485, "beta_dpo/beta_used_raw": 0.012065595015883446, "beta_dpo/gap_mean": 21.67633819580078, "beta_dpo/gap_std": 26.208454132080078, "beta_dpo/loss_margin_mean": 27.461488723754883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1776798825256975, "grad_norm": 23.686054229736328, "learning_rate": 4.91481456572267e-07, "logits/chosen": -0.6834492087364197, "logits/rejected": -0.6698263883590698, "loss": 1.1437, "step": 121 }, { "beta_dpo/beta": 0.45700308680534363, "beta_dpo/beta_margin_grad_mean": -0.08143386244773865, "beta_dpo/beta_margin_grad_std": 0.2316262423992157, "beta_dpo/beta_margin_mean": 15.591158866882324, "beta_dpo/beta_margin_std": 16.326433181762695, "beta_dpo/beta_used": 0.45700308680534363, "beta_dpo/beta_used_raw": 0.45700308680534363, "beta_dpo/gap_mean": 23.661128997802734, "beta_dpo/gap_std": 26.714675903320312, "beta_dpo/loss_margin_mean": 34.74848175048828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17914831130690162, "grad_norm": 238.48973083496094, "learning_rate": 4.911461260693638e-07, "logits/chosen": -0.6555478572845459, "logits/rejected": -0.6584290266036987, "loss": 0.3756, "step": 122 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4943942129611969, "beta_dpo/beta_margin_grad_std": 0.008313042111694813, "beta_dpo/beta_margin_mean": 0.02243146486580372, "beta_dpo/beta_margin_std": 0.033270444720983505, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1897989809513092, "beta_dpo/gap_mean": 24.12826919555664, "beta_dpo/gap_std": 27.712556838989258, "beta_dpo/loss_margin_mean": 22.43146324157715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18061674008810572, "grad_norm": 1.7068573236465454, "learning_rate": 4.908044411417711e-07, "logits/chosen": -0.6849197149276733, "logits/rejected": -0.6539350152015686, "loss": 1.3672, "step": 123 }, { "beta_dpo/beta": 0.03426466882228851, "beta_dpo/beta_margin_grad_mean": -0.3685888350009918, "beta_dpo/beta_margin_grad_std": 0.2092631459236145, "beta_dpo/beta_margin_mean": 1.0015679597854614, "beta_dpo/beta_margin_std": 1.8775554895401, "beta_dpo/beta_used": 0.03426466882228851, "beta_dpo/beta_used_raw": -0.07082332670688629, "beta_dpo/gap_mean": 24.793880462646484, "beta_dpo/gap_std": 30.202411651611328, "beta_dpo/loss_margin_mean": 29.717092514038086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18208516886930984, "grad_norm": 38.06232833862305, "learning_rate": 4.904564107932048e-07, "logits/chosen": -0.6627172231674194, "logits/rejected": -0.6565097570419312, "loss": 1.0074, "step": 124 }, { "beta_dpo/beta": 0.0012786721345037222, "beta_dpo/beta_margin_grad_mean": -0.4913226366043091, "beta_dpo/beta_margin_grad_std": 0.009015963412821293, "beta_dpo/beta_margin_mean": 0.03472711890935898, "beta_dpo/beta_margin_std": 0.03609345108270645, "beta_dpo/beta_used": 0.0012786721345037222, "beta_dpo/beta_used_raw": -0.12858377397060394, "beta_dpo/gap_mean": 25.316532135009766, "beta_dpo/gap_std": 30.038803100585938, "beta_dpo/loss_margin_mean": 27.47158432006836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18355359765051396, "grad_norm": 2.35784649848938, "learning_rate": 4.90102044194588e-07, "logits/chosen": -0.6600132584571838, "logits/rejected": -0.6433917284011841, "loss": 1.3587, "step": 125 }, { "beta_dpo/beta": 0.0375693216919899, "beta_dpo/beta_margin_grad_mean": -0.3544065058231354, "beta_dpo/beta_margin_grad_std": 0.2188321352005005, "beta_dpo/beta_margin_mean": 0.9979441165924072, "beta_dpo/beta_margin_std": 1.667494297027588, "beta_dpo/beta_used": 0.0375693216919899, "beta_dpo/beta_used_raw": -0.0951186865568161, "beta_dpo/gap_mean": 25.63544464111328, "beta_dpo/gap_std": 29.89664649963379, "beta_dpo/loss_margin_mean": 26.32620620727539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18502202643171806, "grad_norm": 47.1953125, "learning_rate": 4.897413506838102e-07, "logits/chosen": -0.6728538274765015, "logits/rejected": -0.6457461714744568, "loss": 0.9888, "step": 126 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49485456943511963, "beta_dpo/beta_margin_grad_std": 0.006044152192771435, "beta_dpo/beta_margin_mean": 0.020585671067237854, "beta_dpo/beta_margin_std": 0.02418256551027298, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22815854847431183, "beta_dpo/gap_mean": 24.85071563720703, "beta_dpo/gap_std": 29.267414093017578, "beta_dpo/loss_margin_mean": 20.585670471191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18649045521292218, "grad_norm": 1.646366834640503, "learning_rate": 4.89374339765481e-07, "logits/chosen": -0.6736807227134705, "logits/rejected": -0.6437186002731323, "loss": 1.3671, "step": 127 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4946313500404358, "beta_dpo/beta_margin_grad_std": 0.007378284819424152, "beta_dpo/beta_margin_mean": 0.021480618044734, "beta_dpo/beta_margin_std": 0.029524413868784904, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13018402457237244, "beta_dpo/gap_mean": 24.056352615356445, "beta_dpo/gap_std": 29.08978271484375, "beta_dpo/loss_margin_mean": 21.48061752319336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18795888399412627, "grad_norm": 1.6088193655014038, "learning_rate": 4.890010211106795e-07, "logits/chosen": -0.6654571294784546, "logits/rejected": -0.62144935131073, "loss": 1.3663, "step": 128 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4943726360797882, "beta_dpo/beta_margin_grad_std": 0.009340907447040081, "beta_dpo/beta_margin_mean": 0.02252124436199665, "beta_dpo/beta_margin_std": 0.037395406514406204, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12518861889839172, "beta_dpo/gap_mean": 23.854827880859375, "beta_dpo/gap_std": 30.1458740234375, "beta_dpo/loss_margin_mean": 22.521244049072266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1894273127753304, "grad_norm": 1.709029197692871, "learning_rate": 4.88621404556699e-07, "logits/chosen": -0.6773253679275513, "logits/rejected": -0.6511229276657104, "loss": 1.3665, "step": 129 }, { "beta_dpo/beta": 0.31604424118995667, "beta_dpo/beta_margin_grad_mean": -0.3025071620941162, "beta_dpo/beta_margin_grad_std": 0.28252968192100525, "beta_dpo/beta_margin_mean": 13.17651653289795, "beta_dpo/beta_margin_std": 20.62891387939453, "beta_dpo/beta_used": 0.31604424118995667, "beta_dpo/beta_used_raw": 0.2993201017379761, "beta_dpo/gap_mean": 24.651588439941406, "beta_dpo/gap_std": 31.216594696044922, "beta_dpo/loss_margin_mean": 32.33549880981445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19089574155653452, "grad_norm": 376.320556640625, "learning_rate": 4.882355001067891e-07, "logits/chosen": -0.6749308705329895, "logits/rejected": -0.6657023429870605, "loss": 1.8792, "step": 130 }, { "beta_dpo/beta": 0.31704258918762207, "beta_dpo/beta_margin_grad_mean": -0.13735538721084595, "beta_dpo/beta_margin_grad_std": 0.2471843659877777, "beta_dpo/beta_margin_mean": 10.250054359436035, "beta_dpo/beta_margin_std": 14.311327934265137, "beta_dpo/beta_used": 0.31704258918762207, "beta_dpo/beta_used_raw": 0.31704258918762207, "beta_dpo/gap_mean": 26.40115737915039, "beta_dpo/gap_std": 31.766616821289062, "beta_dpo/loss_margin_mean": 31.684356689453125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19236417033773862, "grad_norm": 32.02171325683594, "learning_rate": 4.878433179298909e-07, "logits/chosen": -0.6722965240478516, "logits/rejected": -0.6686934232711792, "loss": 0.2626, "step": 131 }, { "beta_dpo/beta": 0.07168679684400558, "beta_dpo/beta_margin_grad_mean": -0.3351624310016632, "beta_dpo/beta_margin_grad_std": 0.2476821094751358, "beta_dpo/beta_margin_mean": 2.218749761581421, "beta_dpo/beta_margin_std": 3.9757206439971924, "beta_dpo/beta_used": 0.07168679684400558, "beta_dpo/beta_used_raw": -0.14731627702713013, "beta_dpo/gap_mean": 27.047744750976562, "beta_dpo/gap_std": 32.3395881652832, "beta_dpo/loss_margin_mean": 28.040454864501953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19383259911894274, "grad_norm": 109.02853393554688, "learning_rate": 4.874448683603694e-07, "logits/chosen": -0.7075382471084595, "logits/rejected": -0.6883822679519653, "loss": 1.0233, "step": 132 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4935277998447418, "beta_dpo/beta_margin_grad_std": 0.009248698130249977, "beta_dpo/beta_margin_mean": 0.025902841240167618, "beta_dpo/beta_margin_std": 0.037025336176157, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.21696753799915314, "beta_dpo/gap_mean": 26.851011276245117, "beta_dpo/gap_std": 33.47434997558594, "beta_dpo/loss_margin_mean": 25.90283966064453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19530102790014683, "grad_norm": 1.9039520025253296, "learning_rate": 4.870401618977415e-07, "logits/chosen": -0.6817045211791992, "logits/rejected": -0.6619011163711548, "loss": 1.365, "step": 133 }, { "beta_dpo/beta": 0.06148982420563698, "beta_dpo/beta_margin_grad_mean": -0.30357643961906433, "beta_dpo/beta_margin_grad_std": 0.22313292324543, "beta_dpo/beta_margin_mean": 1.999468207359314, "beta_dpo/beta_margin_std": 3.1728920936584473, "beta_dpo/beta_used": 0.06148982420563698, "beta_dpo/beta_used_raw": -0.11817823350429535, "beta_dpo/gap_mean": 26.90046501159668, "beta_dpo/gap_std": 33.11681365966797, "beta_dpo/loss_margin_mean": 28.993818283081055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19676945668135096, "grad_norm": 41.841182708740234, "learning_rate": 4.866292092063986e-07, "logits/chosen": -0.682968020439148, "logits/rejected": -0.6522467136383057, "loss": 0.8588, "step": 134 }, { "beta_dpo/beta": 0.2148977667093277, "beta_dpo/beta_margin_grad_mean": -0.30633312463760376, "beta_dpo/beta_margin_grad_std": 0.27675861120224, "beta_dpo/beta_margin_mean": 7.662230968475342, "beta_dpo/beta_margin_std": 12.879323959350586, "beta_dpo/beta_used": 0.2148977667093277, "beta_dpo/beta_used_raw": 0.14902925491333008, "beta_dpo/gap_mean": 28.413272857666016, "beta_dpo/gap_std": 33.834190368652344, "beta_dpo/loss_margin_mean": 35.534969329833984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19823788546255505, "grad_norm": 202.7677459716797, "learning_rate": 4.862120211153265e-07, "logits/chosen": -0.6785898208618164, "logits/rejected": -0.6952340602874756, "loss": 0.8377, "step": 135 }, { "beta_dpo/beta": 0.008590362034738064, "beta_dpo/beta_margin_grad_mean": -0.4468691945075989, "beta_dpo/beta_margin_grad_std": 0.11143101006746292, "beta_dpo/beta_margin_mean": 0.2566927969455719, "beta_dpo/beta_margin_std": 0.5989749431610107, "beta_dpo/beta_used": 0.008590362034738064, "beta_dpo/beta_used_raw": -0.34298622608184814, "beta_dpo/gap_mean": 28.481983184814453, "beta_dpo/gap_std": 35.90342330932617, "beta_dpo/loss_margin_mean": 24.999900817871094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19970631424375918, "grad_norm": 16.698957443237305, "learning_rate": 4.857886086178193e-07, "logits/chosen": -0.6989582777023315, "logits/rejected": -0.6765180826187134, "loss": 1.2094, "step": 136 }, { "beta_dpo/beta": 0.1424337774515152, "beta_dpo/beta_margin_grad_mean": -0.29480937123298645, "beta_dpo/beta_margin_grad_std": 0.2575525939464569, "beta_dpo/beta_margin_mean": 5.513402938842773, "beta_dpo/beta_margin_std": 9.72019100189209, "beta_dpo/beta_used": 0.1424337774515152, "beta_dpo/beta_used_raw": -0.3468559980392456, "beta_dpo/gap_mean": 29.415794372558594, "beta_dpo/gap_std": 37.33689498901367, "beta_dpo/loss_margin_mean": 37.266632080078125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2011747430249633, "grad_norm": 72.38736724853516, "learning_rate": 4.853589828711902e-07, "logits/chosen": -0.6640630960464478, "logits/rejected": -0.6742027401924133, "loss": 0.8399, "step": 137 }, { "beta_dpo/beta": 0.11884160339832306, "beta_dpo/beta_margin_grad_mean": -0.3148714303970337, "beta_dpo/beta_margin_grad_std": 0.2580578923225403, "beta_dpo/beta_margin_mean": 3.768012046813965, "beta_dpo/beta_margin_std": 7.199725151062012, "beta_dpo/beta_used": 0.11884160339832306, "beta_dpo/beta_used_raw": 0.07956646382808685, "beta_dpo/gap_mean": 29.399906158447266, "beta_dpo/gap_std": 37.75701141357422, "beta_dpo/loss_margin_mean": 28.36585235595703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2026431718061674, "grad_norm": 56.19173812866211, "learning_rate": 4.849231551964771e-07, "logits/chosen": -0.6535968780517578, "logits/rejected": -0.6304416656494141, "loss": 0.7798, "step": 138 }, { "beta_dpo/beta": 0.39647993445396423, "beta_dpo/beta_margin_grad_mean": -0.17857220768928528, "beta_dpo/beta_margin_grad_std": 0.34773120284080505, "beta_dpo/beta_margin_mean": 13.393891334533691, "beta_dpo/beta_margin_std": 16.2137393951416, "beta_dpo/beta_used": 0.39647993445396423, "beta_dpo/beta_used_raw": 0.39647993445396423, "beta_dpo/gap_mean": 30.00773811340332, "beta_dpo/gap_std": 37.6620979309082, "beta_dpo/loss_margin_mean": 33.331172943115234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20411160058737152, "grad_norm": 566.4978637695312, "learning_rate": 4.844811370781446e-07, "logits/chosen": -0.672115683555603, "logits/rejected": -0.6479353904724121, "loss": 1.5227, "step": 139 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4924333691596985, "beta_dpo/beta_margin_grad_std": 0.009864427149295807, "beta_dpo/beta_margin_mean": 0.030282003805041313, "beta_dpo/beta_margin_std": 0.03948511183261871, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07030771672725677, "beta_dpo/gap_mean": 30.288619995117188, "beta_dpo/gap_std": 38.075069427490234, "beta_dpo/loss_margin_mean": 30.282001495361328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2055800293685756, "grad_norm": 2.395193576812744, "learning_rate": 4.840329401637809e-07, "logits/chosen": -0.6980350017547607, "logits/rejected": -0.6766492128372192, "loss": 1.3593, "step": 140 }, { "beta_dpo/beta": 0.20956987142562866, "beta_dpo/beta_margin_grad_mean": -0.19632378220558167, "beta_dpo/beta_margin_grad_std": 0.3448325991630554, "beta_dpo/beta_margin_mean": 6.225460052490234, "beta_dpo/beta_margin_std": 8.140890121459961, "beta_dpo/beta_used": 0.20956987142562866, "beta_dpo/beta_used_raw": 0.20956987142562866, "beta_dpo/gap_mean": 30.184303283691406, "beta_dpo/gap_std": 38.3173828125, "beta_dpo/loss_margin_mean": 29.690879821777344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20704845814977973, "grad_norm": 249.5445098876953, "learning_rate": 4.83578576263792e-07, "logits/chosen": -0.6725201606750488, "logits/rejected": -0.6453160047531128, "loss": 0.6739, "step": 141 }, { "beta_dpo/beta": 0.31159713864326477, "beta_dpo/beta_margin_grad_mean": -0.3195469379425049, "beta_dpo/beta_margin_grad_std": 0.2950160801410675, "beta_dpo/beta_margin_mean": 12.820926666259766, "beta_dpo/beta_margin_std": 21.074304580688477, "beta_dpo/beta_used": 0.31159713864326477, "beta_dpo/beta_used_raw": 0.20784735679626465, "beta_dpo/gap_mean": 30.36768341064453, "beta_dpo/gap_std": 39.55695343017578, "beta_dpo/loss_margin_mean": 33.904624938964844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20851688693098386, "grad_norm": 155.07594299316406, "learning_rate": 4.83118057351089e-07, "logits/chosen": -0.6555507183074951, "logits/rejected": -0.6462887525558472, "loss": 0.9589, "step": 142 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49460893869400024, "beta_dpo/beta_margin_grad_std": 0.008932164870202541, "beta_dpo/beta_margin_mean": 0.021574243903160095, "beta_dpo/beta_margin_std": 0.035751067101955414, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2274360954761505, "beta_dpo/gap_mean": 29.773212432861328, "beta_dpo/gap_std": 39.13104248046875, "beta_dpo/loss_margin_mean": 21.574241638183594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20998531571218795, "grad_norm": 2.0018367767333984, "learning_rate": 4.826513955607734e-07, "logits/chosen": -0.6578415632247925, "logits/rejected": -0.6270245313644409, "loss": 1.3623, "step": 143 }, { "beta_dpo/beta": 0.0523165799677372, "beta_dpo/beta_margin_grad_mean": -0.3649788200855255, "beta_dpo/beta_margin_grad_std": 0.2543392479419708, "beta_dpo/beta_margin_mean": 1.4633898735046387, "beta_dpo/beta_margin_std": 2.898139238357544, "beta_dpo/beta_used": 0.0523165799677372, "beta_dpo/beta_used_raw": -0.1444738358259201, "beta_dpo/gap_mean": 28.968791961669922, "beta_dpo/gap_std": 37.89672088623047, "beta_dpo/loss_margin_mean": 28.176090240478516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21145374449339208, "grad_norm": 75.73566436767578, "learning_rate": 4.821786031898176e-07, "logits/chosen": -0.6322454214096069, "logits/rejected": -0.5843130350112915, "loss": 1.0443, "step": 144 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4928479492664337, "beta_dpo/beta_margin_grad_std": 0.008205600082874298, "beta_dpo/beta_margin_mean": 0.028618808835744858, "beta_dpo/beta_margin_std": 0.03283938020467758, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06517390161752701, "beta_dpo/gap_mean": 28.99945640563965, "beta_dpo/gap_std": 36.7828254699707, "beta_dpo/loss_margin_mean": 28.61880874633789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21292217327459617, "grad_norm": 2.1011862754821777, "learning_rate": 4.816996926967401e-07, "logits/chosen": -0.6506826877593994, "logits/rejected": -0.6125441789627075, "loss": 1.3604, "step": 145 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4944137632846832, "beta_dpo/beta_margin_grad_std": 0.008961321786046028, "beta_dpo/beta_margin_mean": 0.022353263571858406, "beta_dpo/beta_margin_std": 0.035862602293491364, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.353656530380249, "beta_dpo/gap_mean": 27.972278594970703, "beta_dpo/gap_std": 36.326202392578125, "beta_dpo/loss_margin_mean": 22.353261947631836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2143906020558003, "grad_norm": 1.9944401979446411, "learning_rate": 4.812146767012779e-07, "logits/chosen": -0.6603084802627563, "logits/rejected": -0.608822226524353, "loss": 1.3663, "step": 146 }, { "beta_dpo/beta": 0.06166262924671173, "beta_dpo/beta_margin_grad_mean": -0.35987135767936707, "beta_dpo/beta_margin_grad_std": 0.25737276673316956, "beta_dpo/beta_margin_mean": 2.0429532527923584, "beta_dpo/beta_margin_std": 4.145395278930664, "beta_dpo/beta_used": 0.06166262924671173, "beta_dpo/beta_used_raw": -0.02383120357990265, "beta_dpo/gap_mean": 28.153667449951172, "beta_dpo/gap_std": 37.32135009765625, "beta_dpo/loss_margin_mean": 30.588022232055664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21585903083700442, "grad_norm": 132.78941345214844, "learning_rate": 4.807235679840536e-07, "logits/chosen": -0.6173018217086792, "logits/rejected": -0.5760653018951416, "loss": 1.1016, "step": 147 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49364525079727173, "beta_dpo/beta_margin_grad_std": 0.008732988499104977, "beta_dpo/beta_margin_mean": 0.02543247863650322, "beta_dpo/beta_margin_std": 0.03496631607413292, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20661629736423492, "beta_dpo/gap_mean": 27.30005645751953, "beta_dpo/gap_std": 36.48552703857422, "beta_dpo/loss_margin_mean": 25.432477951049805, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2173274596182085, "grad_norm": 2.2173750400543213, "learning_rate": 4.802263794862384e-07, "logits/chosen": -0.6911383271217346, "logits/rejected": -0.6686098575592041, "loss": 1.3644, "step": 148 }, { "beta_dpo/beta": 0.2956673502922058, "beta_dpo/beta_margin_grad_mean": -0.30034562945365906, "beta_dpo/beta_margin_grad_std": 0.2807537615299225, "beta_dpo/beta_margin_mean": 12.983054161071777, "beta_dpo/beta_margin_std": 21.63262176513672, "beta_dpo/beta_used": 0.2956673502922058, "beta_dpo/beta_used_raw": 0.21206964552402496, "beta_dpo/gap_mean": 29.230058670043945, "beta_dpo/gap_std": 37.05466079711914, "beta_dpo/loss_margin_mean": 35.43006896972656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21879588839941264, "grad_norm": 433.311279296875, "learning_rate": 4.797231243092118e-07, "logits/chosen": -0.6822164058685303, "logits/rejected": -0.6580536365509033, "loss": 0.9517, "step": 149 }, { "beta_dpo/beta": 0.06045344099402428, "beta_dpo/beta_margin_grad_mean": -0.3100597858428955, "beta_dpo/beta_margin_grad_std": 0.23861265182495117, "beta_dpo/beta_margin_mean": 2.6005029678344727, "beta_dpo/beta_margin_std": 4.630469799041748, "beta_dpo/beta_used": 0.06045344099402428, "beta_dpo/beta_used_raw": -0.12643922865390778, "beta_dpo/gap_mean": 30.138582229614258, "beta_dpo/gap_std": 38.105072021484375, "beta_dpo/loss_margin_mean": 33.075477600097656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22026431718061673, "grad_norm": 67.81720733642578, "learning_rate": 4.792138157142157e-07, "logits/chosen": -0.6348152160644531, "logits/rejected": -0.6246376037597656, "loss": 0.8393, "step": 150 }, { "beta_dpo/beta": 0.23719097673892975, "beta_dpo/beta_margin_grad_mean": -0.16241075098514557, "beta_dpo/beta_margin_grad_std": 0.31397631764411926, "beta_dpo/beta_margin_mean": 8.125089645385742, "beta_dpo/beta_margin_std": 11.137140274047852, "beta_dpo/beta_used": 0.23719097673892975, "beta_dpo/beta_used_raw": 0.23719097673892975, "beta_dpo/gap_mean": 30.40851593017578, "beta_dpo/gap_std": 38.59818649291992, "beta_dpo/loss_margin_mean": 34.17258071899414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22173274596182085, "grad_norm": 232.2014923095703, "learning_rate": 4.786984671220053e-07, "logits/chosen": -0.7024506330490112, "logits/rejected": -0.6618235111236572, "loss": 0.7063, "step": 151 }, { "beta_dpo/beta": 0.007499909959733486, "beta_dpo/beta_margin_grad_mean": -0.43083834648132324, "beta_dpo/beta_margin_grad_std": 0.12350592017173767, "beta_dpo/beta_margin_mean": 0.3257027268409729, "beta_dpo/beta_margin_std": 0.6100393533706665, "beta_dpo/beta_used": 0.007499909959733486, "beta_dpo/beta_used_raw": -0.09531690180301666, "beta_dpo/gap_mean": 31.590972900390625, "beta_dpo/gap_std": 38.75636291503906, "beta_dpo/loss_margin_mean": 39.02500534057617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22320117474302498, "grad_norm": 16.272369384765625, "learning_rate": 4.78177092112495e-07, "logits/chosen": -0.6470739245414734, "logits/rejected": -0.6299198865890503, "loss": 1.2106, "step": 152 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4919048547744751, "beta_dpo/beta_margin_grad_std": 0.011256770230829716, "beta_dpo/beta_margin_mean": 0.03240638226270676, "beta_dpo/beta_margin_std": 0.04508744925260544, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2720775008201599, "beta_dpo/gap_mean": 32.21462631225586, "beta_dpo/gap_std": 40.65864944458008, "beta_dpo/loss_margin_mean": 32.40637969970703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22466960352422907, "grad_norm": 2.2857508659362793, "learning_rate": 4.776497044244016e-07, "logits/chosen": -0.6825876235961914, "logits/rejected": -0.6697901487350464, "loss": 1.3607, "step": 153 }, { "beta_dpo/beta": 0.05751248076558113, "beta_dpo/beta_margin_grad_mean": -0.37671908736228943, "beta_dpo/beta_margin_grad_std": 0.2820754051208496, "beta_dpo/beta_margin_mean": 2.0761663913726807, "beta_dpo/beta_margin_std": 4.313288688659668, "beta_dpo/beta_used": 0.05751248076558113, "beta_dpo/beta_used_raw": -0.1990230530500412, "beta_dpo/gap_mean": 31.79343605041504, "beta_dpo/gap_std": 42.261234283447266, "beta_dpo/loss_margin_mean": 30.59186553955078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2261380323054332, "grad_norm": 210.48843383789062, "learning_rate": 4.771163179548808e-07, "logits/chosen": -0.6370819807052612, "logits/rejected": -0.6220649480819702, "loss": 1.5172, "step": 154 }, { "beta_dpo/beta": 0.05650586262345314, "beta_dpo/beta_margin_grad_mean": -0.34304705262184143, "beta_dpo/beta_margin_grad_std": 0.25319162011146545, "beta_dpo/beta_margin_mean": 2.0134613513946533, "beta_dpo/beta_margin_std": 3.595337152481079, "beta_dpo/beta_used": 0.05650586262345314, "beta_dpo/beta_used_raw": -0.056411731988191605, "beta_dpo/gap_mean": 31.931396484375, "beta_dpo/gap_std": 42.397926330566406, "beta_dpo/loss_margin_mean": 32.39974594116211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2276064610866373, "grad_norm": 98.22732543945312, "learning_rate": 4.7657694675916247e-07, "logits/chosen": -0.659138560295105, "logits/rejected": -0.6297906041145325, "loss": 1.0261, "step": 155 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49459147453308105, "beta_dpo/beta_margin_grad_std": 0.011738932691514492, "beta_dpo/beta_margin_mean": 0.02164500020444393, "beta_dpo/beta_margin_std": 0.04698922485113144, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.28176349401474, "beta_dpo/gap_mean": 30.717424392700195, "beta_dpo/gap_std": 43.143798828125, "beta_dpo/loss_margin_mean": 21.64499855041504, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2290748898678414, "grad_norm": 2.090728759765625, "learning_rate": 4.7603160505017893e-07, "logits/chosen": -0.6189935803413391, "logits/rejected": -0.5884729623794556, "loss": 1.3623, "step": 156 }, { "beta_dpo/beta": 0.5229025483131409, "beta_dpo/beta_margin_grad_mean": -0.17714013159275055, "beta_dpo/beta_margin_grad_std": 0.34146979451179504, "beta_dpo/beta_margin_mean": 23.52101707458496, "beta_dpo/beta_margin_std": 28.525287628173828, "beta_dpo/beta_used": 0.5229025483131409, "beta_dpo/beta_used_raw": 0.5229025483131409, "beta_dpo/gap_mean": 31.968534469604492, "beta_dpo/gap_std": 43.23138427734375, "beta_dpo/loss_margin_mean": 41.19452667236328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2305433186490455, "grad_norm": 566.1207885742188, "learning_rate": 4.7548030719819154e-07, "logits/chosen": -0.6501755714416504, "logits/rejected": -0.6394015550613403, "loss": 1.3783, "step": 157 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48920392990112305, "beta_dpo/beta_margin_grad_std": 0.014421283267438412, "beta_dpo/beta_margin_mean": 0.04324439913034439, "beta_dpo/beta_margin_std": 0.057812485843896866, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2828848361968994, "beta_dpo/gap_mean": 33.84852600097656, "beta_dpo/gap_std": 46.209800720214844, "beta_dpo/loss_margin_mean": 43.2443962097168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23201174743024963, "grad_norm": 2.377941131591797, "learning_rate": 4.7492306773041136e-07, "logits/chosen": -0.6457036733627319, "logits/rejected": -0.6453630328178406, "loss": 1.3594, "step": 158 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49216148257255554, "beta_dpo/beta_margin_grad_std": 0.01246555708348751, "beta_dpo/beta_margin_mean": 0.031383663415908813, "beta_dpo/beta_margin_std": 0.04992513731122017, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.592555582523346, "beta_dpo/gap_mean": 33.542808532714844, "beta_dpo/gap_std": 47.51161193847656, "beta_dpo/loss_margin_mean": 31.38365936279297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23348017621145375, "grad_norm": 2.286787748336792, "learning_rate": 4.743599013306165e-07, "logits/chosen": -0.6784383058547974, "logits/rejected": -0.6375674605369568, "loss": 1.3647, "step": 159 }, { "beta_dpo/beta": 0.16555535793304443, "beta_dpo/beta_margin_grad_mean": -0.3650355041027069, "beta_dpo/beta_margin_grad_std": 0.28689926862716675, "beta_dpo/beta_margin_mean": 5.125879287719727, "beta_dpo/beta_margin_std": 12.860206604003906, "beta_dpo/beta_used": 0.16555535793304443, "beta_dpo/beta_used_raw": -0.04903079569339752, "beta_dpo/gap_mean": 34.68842697143555, "beta_dpo/gap_std": 48.956329345703125, "beta_dpo/loss_margin_mean": 37.921714782714844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23494860499265785, "grad_norm": 179.4539794921875, "learning_rate": 4.737908228387656e-07, "logits/chosen": -0.6441961526870728, "logits/rejected": -0.6223233342170715, "loss": 1.2241, "step": 160 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49170783162117004, "beta_dpo/beta_margin_grad_std": 0.011308044195175171, "beta_dpo/beta_margin_mean": 0.03319420665502548, "beta_dpo/beta_margin_std": 0.04527975618839264, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6533927917480469, "beta_dpo/gap_mean": 34.30507278442383, "beta_dpo/gap_std": 48.81843948364258, "beta_dpo/loss_margin_mean": 33.1942024230957, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23641703377386197, "grad_norm": 2.1890041828155518, "learning_rate": 4.7321584725060594e-07, "logits/chosen": -0.6661697626113892, "logits/rejected": -0.646446704864502, "loss": 1.365, "step": 161 }, { "beta_dpo/beta": 0.0633186399936676, "beta_dpo/beta_margin_grad_mean": -0.33688822388648987, "beta_dpo/beta_margin_grad_std": 0.2615722715854645, "beta_dpo/beta_margin_mean": 3.0562398433685303, "beta_dpo/beta_margin_std": 6.056267261505127, "beta_dpo/beta_used": 0.0633186399936676, "beta_dpo/beta_used_raw": 0.001413147896528244, "beta_dpo/gap_mean": 34.92676544189453, "beta_dpo/gap_std": 49.209938049316406, "beta_dpo/loss_margin_mean": 34.6696891784668, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23788546255506607, "grad_norm": 66.83883666992188, "learning_rate": 4.7263498971727905e-07, "logits/chosen": -0.6479376554489136, "logits/rejected": -0.6184839010238647, "loss": 0.9902, "step": 162 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4915504455566406, "beta_dpo/beta_margin_grad_std": 0.010625366121530533, "beta_dpo/beta_margin_mean": 0.03381972759962082, "beta_dpo/beta_margin_std": 0.04253895580768585, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.27464038133621216, "beta_dpo/gap_mean": 34.13161849975586, "beta_dpo/gap_std": 47.94363021850586, "beta_dpo/loss_margin_mean": 33.819725036621094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2393538913362702, "grad_norm": 2.279186487197876, "learning_rate": 4.720482655449212e-07, "logits/chosen": -0.6360805630683899, "logits/rejected": -0.6068499684333801, "loss": 1.3589, "step": 163 }, { "beta_dpo/beta": 0.33781903982162476, "beta_dpo/beta_margin_grad_mean": -0.3097226321697235, "beta_dpo/beta_margin_grad_std": 0.29108506441116333, "beta_dpo/beta_margin_mean": 16.98153305053711, "beta_dpo/beta_margin_std": 27.556440353393555, "beta_dpo/beta_used": 0.33781903982162476, "beta_dpo/beta_used_raw": 0.19692449271678925, "beta_dpo/gap_mean": 35.37065124511719, "beta_dpo/gap_std": 47.215484619140625, "beta_dpo/loss_margin_mean": 39.373016357421875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24082232011747431, "grad_norm": 561.4354858398438, "learning_rate": 4.714556901942599e-07, "logits/chosen": -0.653258740901947, "logits/rejected": -0.6248881816864014, "loss": 2.7102, "step": 164 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.493878573179245, "beta_dpo/beta_margin_grad_std": 0.01068994589149952, "beta_dpo/beta_margin_mean": 0.024500226601958275, "beta_dpo/beta_margin_std": 0.04279119148850441, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4569028615951538, "beta_dpo/gap_mean": 33.61799621582031, "beta_dpo/gap_std": 46.200439453125, "beta_dpo/loss_margin_mean": 24.500225067138672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2422907488986784, "grad_norm": 2.3229660987854004, "learning_rate": 4.708572792802069e-07, "logits/chosen": -0.6612030267715454, "logits/rejected": -0.6200650930404663, "loss": 1.3624, "step": 165 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48799970746040344, "beta_dpo/beta_margin_grad_std": 0.015960004180669785, "beta_dpo/beta_margin_mean": 0.04809439927339554, "beta_dpo/beta_margin_std": 0.06407385319471359, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.17358143627643585, "beta_dpo/gap_mean": 35.345130920410156, "beta_dpo/gap_std": 49.15497589111328, "beta_dpo/loss_margin_mean": 48.09439468383789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24375917767988253, "grad_norm": 2.4269919395446777, "learning_rate": 4.702530485714461e-07, "logits/chosen": -0.6051408648490906, "logits/rejected": -0.609076738357544, "loss": 1.356, "step": 166 }, { "beta_dpo/beta": 0.4378092288970947, "beta_dpo/beta_margin_grad_mean": -0.16084226965904236, "beta_dpo/beta_margin_grad_std": 0.285220742225647, "beta_dpo/beta_margin_mean": 22.484102249145508, "beta_dpo/beta_margin_std": 31.359235763549805, "beta_dpo/beta_used": 0.4378092288970947, "beta_dpo/beta_used_raw": 0.4378092288970947, "beta_dpo/gap_mean": 37.78612518310547, "beta_dpo/gap_std": 49.52611541748047, "beta_dpo/loss_margin_mean": 48.357120513916016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24522760646108663, "grad_norm": 453.6042175292969, "learning_rate": 4.6964301399001877e-07, "logits/chosen": -0.5798200368881226, "logits/rejected": -0.5775001049041748, "loss": 1.1373, "step": 167 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4908505380153656, "beta_dpo/beta_margin_grad_std": 0.011183447204530239, "beta_dpo/beta_margin_mean": 0.036625493317842484, "beta_dpo/beta_margin_std": 0.04478682205080986, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22355516254901886, "beta_dpo/gap_mean": 38.02488327026367, "beta_dpo/gap_std": 49.34698486328125, "beta_dpo/loss_margin_mean": 36.625492095947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24669603524229075, "grad_norm": 2.242811679840088, "learning_rate": 4.690271916109034e-07, "logits/chosen": -0.632524847984314, "logits/rejected": -0.611569344997406, "loss": 1.3542, "step": 168 }, { "beta_dpo/beta": 0.2027139812707901, "beta_dpo/beta_margin_grad_mean": -0.3231821358203888, "beta_dpo/beta_margin_grad_std": 0.286211758852005, "beta_dpo/beta_margin_mean": 9.191436767578125, "beta_dpo/beta_margin_std": 18.672359466552734, "beta_dpo/beta_used": 0.2027139812707901, "beta_dpo/beta_used_raw": -0.21766288578510284, "beta_dpo/gap_mean": 37.77613830566406, "beta_dpo/gap_std": 50.35961151123047, "beta_dpo/loss_margin_mean": 33.702911376953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24816446402349487, "grad_norm": 235.57923889160156, "learning_rate": 4.6840559766159235e-07, "logits/chosen": -0.6554511189460754, "logits/rejected": -0.632649838924408, "loss": 1.2102, "step": 169 }, { "beta_dpo/beta": 0.28471559286117554, "beta_dpo/beta_margin_grad_mean": -0.29542797803878784, "beta_dpo/beta_margin_grad_std": 0.2833177447319031, "beta_dpo/beta_margin_mean": 14.588911056518555, "beta_dpo/beta_margin_std": 24.41724967956543, "beta_dpo/beta_used": 0.28471559286117554, "beta_dpo/beta_used_raw": 0.07621648907661438, "beta_dpo/gap_mean": 38.005332946777344, "beta_dpo/gap_std": 50.405731201171875, "beta_dpo/loss_margin_mean": 41.08964538574219, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24963289280469897, "grad_norm": 200.22003173828125, "learning_rate": 4.6777824852166437e-07, "logits/chosen": -0.6105868816375732, "logits/rejected": -0.5924707651138306, "loss": 0.8339, "step": 170 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49198728799819946, "beta_dpo/beta_margin_grad_std": 0.01487717404961586, "beta_dpo/beta_margin_mean": 0.03210390359163284, "beta_dpo/beta_margin_std": 0.059691086411476135, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4475504755973816, "beta_dpo/gap_mean": 36.77374267578125, "beta_dpo/gap_std": 51.403194427490234, "beta_dpo/loss_margin_mean": 32.103904724121094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2511013215859031, "grad_norm": 2.3106272220611572, "learning_rate": 4.6714516072235273e-07, "logits/chosen": -0.6592748761177063, "logits/rejected": -0.6177250146865845, "loss": 1.3592, "step": 171 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49181634187698364, "beta_dpo/beta_margin_grad_std": 0.012845886871218681, "beta_dpo/beta_margin_mean": 0.03276326134800911, "beta_dpo/beta_margin_std": 0.05145728588104248, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.24236111342906952, "beta_dpo/gap_mean": 36.16783905029297, "beta_dpo/gap_std": 52.5489501953125, "beta_dpo/loss_margin_mean": 32.76325988769531, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2525697503671072, "grad_norm": 2.4713857173919678, "learning_rate": 4.6650635094610966e-07, "logits/chosen": -0.6973352432250977, "logits/rejected": -0.6602545976638794, "loss": 1.3565, "step": 172 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4913583993911743, "beta_dpo/beta_margin_grad_std": 0.011392601765692234, "beta_dpo/beta_margin_mean": 0.03459496796131134, "beta_dpo/beta_margin_std": 0.04563411697745323, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4711419939994812, "beta_dpo/gap_mean": 35.43373107910156, "beta_dpo/gap_std": 51.29859161376953, "beta_dpo/loss_margin_mean": 34.594966888427734, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2540381791483113, "grad_norm": 2.825610637664795, "learning_rate": 4.6586183602616687e-07, "logits/chosen": -0.658379316329956, "logits/rejected": -0.6150857210159302, "loss": 1.3608, "step": 173 }, { "beta_dpo/beta": 0.21091538667678833, "beta_dpo/beta_margin_grad_mean": -0.3304974436759949, "beta_dpo/beta_margin_grad_std": 0.27658578753471375, "beta_dpo/beta_margin_mean": 10.612117767333984, "beta_dpo/beta_margin_std": 22.01104736328125, "beta_dpo/beta_used": 0.21091538667678833, "beta_dpo/beta_used_raw": 0.15830256044864655, "beta_dpo/gap_mean": 35.927711486816406, "beta_dpo/gap_std": 51.60816192626953, "beta_dpo/loss_margin_mean": 40.59208679199219, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2555066079295154, "grad_norm": 239.2338409423828, "learning_rate": 4.652116329460919e-07, "logits/chosen": -0.5859450101852417, "logits/rejected": -0.6018394231796265, "loss": 0.906, "step": 174 }, { "beta_dpo/beta": 0.4976291060447693, "beta_dpo/beta_margin_grad_mean": -0.14816464483737946, "beta_dpo/beta_margin_grad_std": 0.34046775102615356, "beta_dpo/beta_margin_mean": 28.725933074951172, "beta_dpo/beta_margin_std": 33.13698196411133, "beta_dpo/beta_used": 0.4976291060447693, "beta_dpo/beta_used_raw": 0.4976291060447693, "beta_dpo/gap_mean": 39.685943603515625, "beta_dpo/gap_std": 53.021728515625, "beta_dpo/loss_margin_mean": 56.412723541259766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25697503671071953, "grad_norm": 1072.8525390625, "learning_rate": 4.645557588393406e-07, "logits/chosen": -0.5577561855316162, "logits/rejected": -0.5457127690315247, "loss": 1.7614, "step": 175 }, { "beta_dpo/beta": 0.05489476025104523, "beta_dpo/beta_margin_grad_mean": -0.35436689853668213, "beta_dpo/beta_margin_grad_std": 0.25154080986976624, "beta_dpo/beta_margin_mean": 2.9585845470428467, "beta_dpo/beta_margin_std": 6.016172885894775, "beta_dpo/beta_used": 0.05489476025104523, "beta_dpo/beta_used_raw": -0.031695641577243805, "beta_dpo/gap_mean": 41.4671630859375, "beta_dpo/gap_std": 53.848289489746094, "beta_dpo/loss_margin_mean": 49.21371078491211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25844346549192365, "grad_norm": 136.2383270263672, "learning_rate": 4.638942309888058e-07, "logits/chosen": -0.5958969593048096, "logits/rejected": -0.6080245971679688, "loss": 1.1292, "step": 176 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48863834142684937, "beta_dpo/beta_margin_grad_std": 0.015036150813102722, "beta_dpo/beta_margin_mean": 0.045510660856962204, "beta_dpo/beta_margin_std": 0.06027425080537796, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20864097774028778, "beta_dpo/gap_mean": 42.564491271972656, "beta_dpo/gap_std": 55.388065338134766, "beta_dpo/loss_margin_mean": 45.510658264160156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2599118942731278, "grad_norm": 3.158673048019409, "learning_rate": 4.6322706682636137e-07, "logits/chosen": -0.612617015838623, "logits/rejected": -0.5953266024589539, "loss": 1.3498, "step": 177 }, { "beta_dpo/beta": 0.18339481949806213, "beta_dpo/beta_margin_grad_mean": -0.1955973207950592, "beta_dpo/beta_margin_grad_std": 0.3506017029285431, "beta_dpo/beta_margin_mean": 11.09678840637207, "beta_dpo/beta_margin_std": 12.919548988342285, "beta_dpo/beta_used": 0.18339481949806213, "beta_dpo/beta_used_raw": 0.18339481949806213, "beta_dpo/gap_mean": 45.61469650268555, "beta_dpo/gap_std": 58.27642059326172, "beta_dpo/loss_margin_mean": 61.0152587890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26138032305433184, "grad_norm": 433.4047546386719, "learning_rate": 4.6255428393240354e-07, "logits/chosen": -0.5409312844276428, "logits/rejected": -0.5512675642967224, "loss": 1.3433, "step": 178 }, { "beta_dpo/beta": 0.04584415256977081, "beta_dpo/beta_margin_grad_mean": -0.33513152599334717, "beta_dpo/beta_margin_grad_std": 0.3240673542022705, "beta_dpo/beta_margin_mean": 1.8824143409729004, "beta_dpo/beta_margin_std": 4.4029927253723145, "beta_dpo/beta_used": 0.04584415256977081, "beta_dpo/beta_used_raw": 0.04584415256977081, "beta_dpo/gap_mean": 46.626121520996094, "beta_dpo/gap_std": 60.98898696899414, "beta_dpo/loss_margin_mean": 46.46109390258789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26284875183553597, "grad_norm": 119.420166015625, "learning_rate": 4.6187590003538724e-07, "logits/chosen": -0.555252730846405, "logits/rejected": -0.5443023443222046, "loss": 0.977, "step": 179 }, { "beta_dpo/beta": 0.34115684032440186, "beta_dpo/beta_margin_grad_mean": -0.3222728371620178, "beta_dpo/beta_margin_grad_std": 0.29305145144462585, "beta_dpo/beta_margin_mean": 18.263864517211914, "beta_dpo/beta_margin_std": 32.01734924316406, "beta_dpo/beta_used": 0.34115684032440186, "beta_dpo/beta_used_raw": 0.2832660675048828, "beta_dpo/gap_mean": 47.3960075378418, "beta_dpo/gap_std": 61.42702102661133, "beta_dpo/loss_margin_mean": 54.383392333984375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2643171806167401, "grad_norm": 654.2474365234375, "learning_rate": 4.611919330113591e-07, "logits/chosen": -0.5455374717712402, "logits/rejected": -0.5354658961296082, "loss": 2.241, "step": 180 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49211806058883667, "beta_dpo/beta_margin_grad_std": 0.01292695663869381, "beta_dpo/beta_margin_mean": 0.031555600464344025, "beta_dpo/beta_margin_std": 0.051769278943538666, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6792909502983093, "beta_dpo/gap_mean": 45.22578430175781, "beta_dpo/gap_std": 59.60420608520508, "beta_dpo/loss_margin_mean": 31.555599212646484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2657856093979442, "grad_norm": 2.6833486557006836, "learning_rate": 4.605024008834863e-07, "logits/chosen": -0.6246634721755981, "logits/rejected": -0.5926576852798462, "loss": 1.3547, "step": 181 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48541346192359924, "beta_dpo/beta_margin_grad_std": 0.016013246029615402, "beta_dpo/beta_margin_mean": 0.05842866376042366, "beta_dpo/beta_margin_std": 0.06418631225824356, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13890297710895538, "beta_dpo/gap_mean": 46.82099533081055, "beta_dpo/gap_std": 60.325225830078125, "beta_dpo/loss_margin_mean": 58.42866134643555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26725403817914833, "grad_norm": 3.020500659942627, "learning_rate": 4.598073218215817e-07, "logits/chosen": -0.568708062171936, "logits/rejected": -0.5688773989677429, "loss": 1.3445, "step": 182 }, { "beta_dpo/beta": 0.21429939568042755, "beta_dpo/beta_margin_grad_mean": -0.31174200773239136, "beta_dpo/beta_margin_grad_std": 0.29498615860939026, "beta_dpo/beta_margin_mean": 11.448225975036621, "beta_dpo/beta_margin_std": 18.331274032592773, "beta_dpo/beta_used": 0.21429939568042755, "beta_dpo/beta_used_raw": -0.7067348957061768, "beta_dpo/gap_mean": 46.11639404296875, "beta_dpo/gap_std": 58.62394714355469, "beta_dpo/loss_margin_mean": 33.00257873535156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2687224669603524, "grad_norm": 198.4879608154297, "learning_rate": 4.5910671414162484e-07, "logits/chosen": -0.5793416500091553, "logits/rejected": -0.5485885739326477, "loss": 1.1118, "step": 183 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4913460612297058, "beta_dpo/beta_margin_grad_std": 0.012888733297586441, "beta_dpo/beta_margin_mean": 0.03465365990996361, "beta_dpo/beta_margin_std": 0.05163479968905449, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6931981444358826, "beta_dpo/gap_mean": 42.51811981201172, "beta_dpo/gap_std": 57.046356201171875, "beta_dpo/loss_margin_mean": 34.653656005859375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2701908957415565, "grad_norm": 2.891065835952759, "learning_rate": 4.5840059630527985e-07, "logits/chosen": -0.6110912561416626, "logits/rejected": -0.5948389768600464, "loss": 1.3576, "step": 184 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49350568652153015, "beta_dpo/beta_margin_grad_std": 0.013207558542490005, "beta_dpo/beta_margin_mean": 0.02600991725921631, "beta_dpo/beta_margin_std": 0.052919141948223114, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0860377550125122, "beta_dpo/gap_mean": 40.281700134277344, "beta_dpo/gap_std": 56.37439727783203, "beta_dpo/loss_margin_mean": 26.009916305541992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.27165932452276065, "grad_norm": 2.7128992080688477, "learning_rate": 4.5768898691940836e-07, "logits/chosen": -0.6103675961494446, "logits/rejected": -0.566834032535553, "loss": 1.3663, "step": 185 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4881168007850647, "beta_dpo/beta_margin_grad_std": 0.014697042293846607, "beta_dpo/beta_margin_mean": 0.0475916862487793, "beta_dpo/beta_margin_std": 0.05890846624970436, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08622078597545624, "beta_dpo/gap_mean": 40.561866760253906, "beta_dpo/gap_std": 56.94186019897461, "beta_dpo/loss_margin_mean": 47.59168243408203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.27312775330396477, "grad_norm": 2.8734123706817627, "learning_rate": 4.5697190473557947e-07, "logits/chosen": -0.5887362360954285, "logits/rejected": -0.5443171262741089, "loss": 1.3496, "step": 186 }, { "beta_dpo/beta": 0.09833470731973648, "beta_dpo/beta_margin_grad_mean": -0.29737338423728943, "beta_dpo/beta_margin_grad_std": 0.27039891481399536, "beta_dpo/beta_margin_mean": 5.131124019622803, "beta_dpo/beta_margin_std": 8.384321212768555, "beta_dpo/beta_used": 0.09833470731973648, "beta_dpo/beta_used_raw": -0.1899646818637848, "beta_dpo/gap_mean": 40.478797912597656, "beta_dpo/gap_std": 56.40562438964844, "beta_dpo/loss_margin_mean": 40.979331970214844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2745961820851689, "grad_norm": 143.63812255859375, "learning_rate": 4.5624936864957555e-07, "logits/chosen": -0.5601129531860352, "logits/rejected": -0.5561456680297852, "loss": 0.8623, "step": 187 }, { "beta_dpo/beta": 0.15021683275699615, "beta_dpo/beta_margin_grad_mean": -0.3293021023273468, "beta_dpo/beta_margin_grad_std": 0.28328651189804077, "beta_dpo/beta_margin_mean": 7.610217571258545, "beta_dpo/beta_margin_std": 14.164756774902344, "beta_dpo/beta_used": 0.15021683275699615, "beta_dpo/beta_used_raw": -0.06993640959262848, "beta_dpo/gap_mean": 42.536376953125, "beta_dpo/gap_std": 56.52394104003906, "beta_dpo/loss_margin_mean": 51.156455993652344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.27606461086637296, "grad_norm": 319.3719787597656, "learning_rate": 4.5552139770089454e-07, "logits/chosen": -0.5592831373214722, "logits/rejected": -0.5565686225891113, "loss": 1.1264, "step": 188 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4891382157802582, "beta_dpo/beta_margin_grad_std": 0.01579362154006958, "beta_dpo/beta_margin_mean": 0.04351968318223953, "beta_dpo/beta_margin_std": 0.06336233019828796, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.27419549226760864, "beta_dpo/gap_mean": 42.98434066772461, "beta_dpo/gap_std": 57.9720573425293, "beta_dpo/loss_margin_mean": 43.51968002319336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2775330396475771, "grad_norm": 2.7365245819091797, "learning_rate": 4.5478801107224794e-07, "logits/chosen": -0.5560922026634216, "logits/rejected": -0.5184494853019714, "loss": 1.3503, "step": 189 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48745664954185486, "beta_dpo/beta_margin_grad_std": 0.016793405637145042, "beta_dpo/beta_margin_mean": 0.05025511234998703, "beta_dpo/beta_margin_std": 0.0673254132270813, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.20993714034557343, "beta_dpo/gap_mean": 43.333824157714844, "beta_dpo/gap_std": 59.198699951171875, "beta_dpo/loss_margin_mean": 50.25510787963867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2790014684287812, "grad_norm": 3.3970863819122314, "learning_rate": 4.5404922808905543e-07, "logits/chosen": -0.6286275386810303, "logits/rejected": -0.6088818907737732, "loss": 1.349, "step": 190 }, { "beta_dpo/beta": 0.46209681034088135, "beta_dpo/beta_margin_grad_mean": -0.24650421738624573, "beta_dpo/beta_margin_grad_std": 0.2589731812477112, "beta_dpo/beta_margin_mean": 36.4068603515625, "beta_dpo/beta_margin_std": 57.78350067138672, "beta_dpo/beta_used": 0.46209681034088135, "beta_dpo/beta_used_raw": 0.46209681034088135, "beta_dpo/gap_mean": 48.85545349121094, "beta_dpo/gap_std": 62.08613586425781, "beta_dpo/loss_margin_mean": 71.1089096069336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.28046989720998533, "grad_norm": 471.61907958984375, "learning_rate": 4.5330506821893565e-07, "logits/chosen": -0.5496765375137329, "logits/rejected": -0.5357059240341187, "loss": 1.0202, "step": 191 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48698392510414124, "beta_dpo/beta_margin_grad_std": 0.018710140138864517, "beta_dpo/beta_margin_mean": 0.05218232050538063, "beta_dpo/beta_margin_std": 0.07510577142238617, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5453534126281738, "beta_dpo/gap_mean": 49.61614227294922, "beta_dpo/gap_std": 65.27165222167969, "beta_dpo/loss_margin_mean": 52.18231964111328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.28193832599118945, "grad_norm": 2.940540075302124, "learning_rate": 4.5255555107119336e-07, "logits/chosen": -0.5459074974060059, "logits/rejected": -0.5400164127349854, "loss": 1.3484, "step": 192 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4925091862678528, "beta_dpo/beta_margin_grad_std": 0.017181508243083954, "beta_dpo/beta_margin_mean": 0.02998378500342369, "beta_dpo/beta_margin_std": 0.06887201964855194, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6699286103248596, "beta_dpo/gap_mean": 46.75275421142578, "beta_dpo/gap_std": 65.9295425415039, "beta_dpo/loss_margin_mean": 29.983781814575195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2834067547723935, "grad_norm": 3.5088882446289062, "learning_rate": 4.5180069639630236e-07, "logits/chosen": -0.5572738647460938, "logits/rejected": -0.5282651782035828, "loss": 1.3531, "step": 193 }, { "beta_dpo/beta": 0.19052860140800476, "beta_dpo/beta_margin_grad_mean": -0.20192070305347443, "beta_dpo/beta_margin_grad_std": 0.34633687138557434, "beta_dpo/beta_margin_mean": 8.937496185302734, "beta_dpo/beta_margin_std": 12.730754852294922, "beta_dpo/beta_used": 0.19052860140800476, "beta_dpo/beta_used_raw": 0.19052860140800476, "beta_dpo/gap_mean": 46.84581756591797, "beta_dpo/gap_std": 64.2998046875, "beta_dpo/loss_margin_mean": 48.318939208984375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.28487518355359764, "grad_norm": 662.7078857421875, "learning_rate": 4.510405240853854e-07, "logits/chosen": -0.46685880422592163, "logits/rejected": -0.44785797595977783, "loss": 1.67, "step": 194 }, { "beta_dpo/beta": 0.34017544984817505, "beta_dpo/beta_margin_grad_mean": -0.24558886885643005, "beta_dpo/beta_margin_grad_std": 0.3135336637496948, "beta_dpo/beta_margin_mean": 17.846120834350586, "beta_dpo/beta_margin_std": 31.463382720947266, "beta_dpo/beta_used": 0.34017544984817505, "beta_dpo/beta_used_raw": 0.34017544984817505, "beta_dpo/gap_mean": 47.83788299560547, "beta_dpo/gap_std": 64.01758575439453, "beta_dpo/loss_margin_mean": 53.63036346435547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.28634361233480177, "grad_norm": 1409.7032470703125, "learning_rate": 4.5027505416968985e-07, "logits/chosen": -0.4705166518688202, "logits/rejected": -0.48458150029182434, "loss": 2.5194, "step": 195 }, { "beta_dpo/beta": 0.13741353154182434, "beta_dpo/beta_margin_grad_mean": -0.3610975742340088, "beta_dpo/beta_margin_grad_std": 0.3129690885543823, "beta_dpo/beta_margin_mean": 9.195865631103516, "beta_dpo/beta_margin_std": 19.96077537536621, "beta_dpo/beta_used": 0.13741353154182434, "beta_dpo/beta_used_raw": -0.07183443009853363, "beta_dpo/gap_mean": 49.88597869873047, "beta_dpo/gap_std": 66.26655578613281, "beta_dpo/loss_margin_mean": 57.657196044921875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2878120411160059, "grad_norm": 580.5075073242188, "learning_rate": 4.495043068200599e-07, "logits/chosen": -0.49783796072006226, "logits/rejected": -0.4742533564567566, "loss": 2.7513, "step": 196 }, { "beta_dpo/beta": 0.009012533351778984, "beta_dpo/beta_margin_grad_mean": -0.4102240204811096, "beta_dpo/beta_margin_grad_std": 0.14067673683166504, "beta_dpo/beta_margin_mean": 0.44567611813545227, "beta_dpo/beta_margin_std": 0.761162519454956, "beta_dpo/beta_used": 0.009012533351778984, "beta_dpo/beta_used_raw": -0.20918835699558258, "beta_dpo/gap_mean": 48.81125259399414, "beta_dpo/gap_std": 64.20172119140625, "beta_dpo/loss_margin_mean": 44.6383056640625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.28928046989721, "grad_norm": 23.286901473999023, "learning_rate": 4.4872830234640493e-07, "logits/chosen": -0.48581668734550476, "logits/rejected": -0.47287267446517944, "loss": 1.1075, "step": 197 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4868345856666565, "beta_dpo/beta_margin_grad_std": 0.0170100387185812, "beta_dpo/beta_margin_mean": 0.05277407914400101, "beta_dpo/beta_margin_std": 0.06834717839956284, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7153933048248291, "beta_dpo/gap_mean": 49.21632385253906, "beta_dpo/gap_std": 63.374412536621094, "beta_dpo/loss_margin_mean": 52.77407455444336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2907488986784141, "grad_norm": 3.0157718658447266, "learning_rate": 4.479470611971645e-07, "logits/chosen": -0.5509780645370483, "logits/rejected": -0.5530319213867188, "loss": 1.3515, "step": 198 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48704952001571655, "beta_dpo/beta_margin_grad_std": 0.01737978495657444, "beta_dpo/beta_margin_mean": 0.05188674107193947, "beta_dpo/beta_margin_std": 0.06968604773283005, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5050678253173828, "beta_dpo/gap_mean": 50.510684967041016, "beta_dpo/gap_std": 65.09575653076172, "beta_dpo/loss_margin_mean": 51.886741638183594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2922173274596182, "grad_norm": 3.9049272537231445, "learning_rate": 4.471606039587695e-07, "logits/chosen": -0.5343225002288818, "logits/rejected": -0.5195610523223877, "loss": 1.3469, "step": 199 }, { "beta_dpo/beta": 0.09858327358961105, "beta_dpo/beta_margin_grad_mean": -0.3039037585258484, "beta_dpo/beta_margin_grad_std": 0.26042643189430237, "beta_dpo/beta_margin_mean": 5.793294429779053, "beta_dpo/beta_margin_std": 12.536651611328125, "beta_dpo/beta_used": 0.09858327358961105, "beta_dpo/beta_used_raw": -0.24377571046352386, "beta_dpo/gap_mean": 50.77419662475586, "beta_dpo/gap_std": 67.68488311767578, "beta_dpo/loss_margin_mean": 54.00712966918945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2936857562408223, "grad_norm": 191.8094024658203, "learning_rate": 4.4636895135509966e-07, "logits/chosen": -0.5364083647727966, "logits/rejected": -0.5175313949584961, "loss": 0.8706, "step": 200 }, { "epoch": 0.2936857562408223, "eval_beta_dpo/beta": 0.004597905091941357, "eval_beta_dpo/beta_margin_grad_mean": -0.49029847979545593, "eval_beta_dpo/beta_margin_grad_std": 0.022800996899604797, "eval_beta_dpo/beta_margin_mean": 0.21994154155254364, "eval_beta_dpo/beta_margin_std": 0.3260399401187897, "eval_beta_dpo/beta_used": 0.004597905091941357, "eval_beta_dpo/beta_used_raw": -1.2767338752746582, "eval_beta_dpo/gap_mean": 50.691349029541016, "eval_beta_dpo/gap_std": 68.24334716796875, "eval_beta_dpo/loss_margin_mean": 27.745779037475586, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.6063677668571472, "eval_logits/rejected": -0.5873017311096191, "eval_loss": 0.6904171705245972, "eval_runtime": 40.1818, "eval_samples_per_second": 58.21, "eval_steps_per_second": 1.842, "step": 200 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48646533489227295, "beta_dpo/beta_margin_grad_std": 0.017951475456357002, "beta_dpo/beta_margin_mean": 0.05423285812139511, "beta_dpo/beta_margin_std": 0.07199931889772415, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.24471929669380188, "beta_dpo/gap_mean": 51.0998420715332, "beta_dpo/gap_std": 69.32807922363281, "beta_dpo/loss_margin_mean": 54.232852935791016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29515418502202645, "grad_norm": 3.234513282775879, "learning_rate": 4.455721242469372e-07, "logits/chosen": -0.5788037776947021, "logits/rejected": -0.5658458471298218, "loss": 1.3421, "step": 201 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4899270534515381, "beta_dpo/beta_margin_grad_std": 0.017768291756510735, "beta_dpo/beta_margin_mean": 0.040362436324357986, "beta_dpo/beta_margin_std": 0.07123276591300964, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5578510165214539, "beta_dpo/gap_mean": 49.74256896972656, "beta_dpo/gap_std": 69.538330078125, "beta_dpo/loss_margin_mean": 40.36243438720703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2966226138032305, "grad_norm": 3.0596237182617188, "learning_rate": 4.4477014363141755e-07, "logits/chosen": -0.5542974472045898, "logits/rejected": -0.557321310043335, "loss": 1.3486, "step": 202 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48856452107429504, "beta_dpo/beta_margin_grad_std": 0.015608040615916252, "beta_dpo/beta_margin_mean": 0.04581226408481598, "beta_dpo/beta_margin_std": 0.06257802248001099, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.41438037157058716, "beta_dpo/gap_mean": 48.89398193359375, "beta_dpo/gap_std": 68.63645935058594, "beta_dpo/loss_margin_mean": 45.81226348876953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29809104258443464, "grad_norm": 3.645709753036499, "learning_rate": 4.439630306414758e-07, "logits/chosen": -0.571040153503418, "logits/rejected": -0.5497109293937683, "loss": 1.347, "step": 203 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.489501029253006, "beta_dpo/beta_margin_grad_std": 0.01954388990998268, "beta_dpo/beta_margin_mean": 0.04208584129810333, "beta_dpo/beta_margin_std": 0.07838640362024307, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0310747623443604, "beta_dpo/gap_mean": 47.7497673034668, "beta_dpo/gap_std": 70.519287109375, "beta_dpo/loss_margin_mean": 42.08584213256836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29955947136563876, "grad_norm": 2.720808982849121, "learning_rate": 4.431508065452897e-07, "logits/chosen": -0.5842176675796509, "logits/rejected": -0.5408717393875122, "loss": 1.3582, "step": 204 }, { "beta_dpo/beta": 0.2624741196632385, "beta_dpo/beta_margin_grad_mean": -0.29318341612815857, "beta_dpo/beta_margin_grad_std": 0.2785731852054596, "beta_dpo/beta_margin_mean": 21.14405059814453, "beta_dpo/beta_margin_std": 34.92091369628906, "beta_dpo/beta_used": 0.2624741196632385, "beta_dpo/beta_used_raw": 0.12516099214553833, "beta_dpo/gap_mean": 50.11834716796875, "beta_dpo/gap_std": 70.84585571289062, "beta_dpo/loss_margin_mean": 59.972965240478516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3010279001468429, "grad_norm": 358.5487365722656, "learning_rate": 4.4233349274571974e-07, "logits/chosen": -0.54551100730896, "logits/rejected": -0.5079349875450134, "loss": 0.9147, "step": 205 }, { "beta_dpo/beta": 0.8118077516555786, "beta_dpo/beta_margin_grad_mean": -0.1911478042602539, "beta_dpo/beta_margin_grad_std": 0.3803271949291229, "beta_dpo/beta_margin_mean": 56.539398193359375, "beta_dpo/beta_margin_std": 60.37042236328125, "beta_dpo/beta_used": 0.8118077516555786, "beta_dpo/beta_used_raw": 0.8118077516555786, "beta_dpo/gap_mean": 52.5726318359375, "beta_dpo/gap_std": 71.26499938964844, "beta_dpo/loss_margin_mean": 68.52181243896484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.302496328928047, "grad_norm": 1746.28271484375, "learning_rate": 4.415111107797445e-07, "logits/chosen": -0.5080777406692505, "logits/rejected": -0.5112833976745605, "loss": 5.7592, "step": 206 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4823157787322998, "beta_dpo/beta_margin_grad_std": 0.02179008349776268, "beta_dpo/beta_margin_mean": 0.07096053659915924, "beta_dpo/beta_margin_std": 0.08763889223337173, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.22176781296730042, "beta_dpo/gap_mean": 55.76563262939453, "beta_dpo/gap_std": 74.22966766357422, "beta_dpo/loss_margin_mean": 70.96053314208984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3039647577092511, "grad_norm": 3.9254820346832275, "learning_rate": 4.4068368231789365e-07, "logits/chosen": -0.5645418167114258, "logits/rejected": -0.5385115742683411, "loss": 1.3372, "step": 207 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48605671525001526, "beta_dpo/beta_margin_grad_std": 0.021491041406989098, "beta_dpo/beta_margin_mean": 0.055905092507600784, "beta_dpo/beta_margin_std": 0.08626676350831985, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.131654754281044, "beta_dpo/gap_mean": 56.717201232910156, "beta_dpo/gap_std": 76.8087158203125, "beta_dpo/loss_margin_mean": 55.905086517333984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3054331864904552, "grad_norm": 3.8811442852020264, "learning_rate": 4.398512291636768e-07, "logits/chosen": -0.5704125761985779, "logits/rejected": -0.5577903985977173, "loss": 1.3351, "step": 208 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48689284920692444, "beta_dpo/beta_margin_grad_std": 0.019833343103528023, "beta_dpo/beta_margin_mean": 0.05253633111715317, "beta_dpo/beta_margin_std": 0.07954316586256027, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.45698946714401245, "beta_dpo/gap_mean": 55.72069549560547, "beta_dpo/gap_std": 78.26738739013672, "beta_dpo/loss_margin_mean": 52.53633117675781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3069016152716593, "grad_norm": 3.4770359992980957, "learning_rate": 4.3901377325300857e-07, "logits/chosen": -0.5012378692626953, "logits/rejected": -0.4895186424255371, "loss": 1.3415, "step": 209 }, { "beta_dpo/beta": 0.15351513028144836, "beta_dpo/beta_margin_grad_mean": -0.3444797396659851, "beta_dpo/beta_margin_grad_std": 0.28890836238861084, "beta_dpo/beta_margin_mean": 9.529181480407715, "beta_dpo/beta_margin_std": 20.73506736755371, "beta_dpo/beta_used": 0.15351513028144836, "beta_dpo/beta_used_raw": -0.5257502794265747, "beta_dpo/gap_mean": 55.32640075683594, "beta_dpo/gap_std": 78.07096862792969, "beta_dpo/loss_margin_mean": 55.78252029418945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30837004405286345, "grad_norm": 341.5815124511719, "learning_rate": 4.381713366536311e-07, "logits/chosen": -0.4934021234512329, "logits/rejected": -0.48370587825775146, "loss": 1.2834, "step": 210 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4880537688732147, "beta_dpo/beta_margin_grad_std": 0.0234391950070858, "beta_dpo/beta_margin_mean": 0.047958794981241226, "beta_dpo/beta_margin_std": 0.09425321221351624, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.437325358390808, "beta_dpo/gap_mean": 54.482818603515625, "beta_dpo/gap_std": 79.86414337158203, "beta_dpo/loss_margin_mean": 47.95879364013672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30983847283406757, "grad_norm": 3.5843217372894287, "learning_rate": 4.373239415645323e-07, "logits/chosen": -0.4944462776184082, "logits/rejected": -0.4566226005554199, "loss": 1.3584, "step": 211 }, { "beta_dpo/beta": 1.3223354816436768, "beta_dpo/beta_margin_grad_mean": -0.35223668813705444, "beta_dpo/beta_margin_grad_std": 0.32164767384529114, "beta_dpo/beta_margin_mean": 141.0015869140625, "beta_dpo/beta_margin_std": 267.85894775390625, "beta_dpo/beta_used": 1.3223354816436768, "beta_dpo/beta_used_raw": 1.0547301769256592, "beta_dpo/gap_mean": 59.36201477050781, "beta_dpo/gap_std": 85.50032043457031, "beta_dpo/loss_margin_mean": 81.92825317382812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31130690161527164, "grad_norm": 7063.01416015625, "learning_rate": 4.3647161031536086e-07, "logits/chosen": -0.4703846573829651, "logits/rejected": -0.4657232165336609, "loss": 29.8368, "step": 212 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4821443259716034, "beta_dpo/beta_margin_grad_std": 0.025429587811231613, "beta_dpo/beta_margin_mean": 0.07165674865245819, "beta_dpo/beta_margin_std": 0.10214556753635406, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.37106069922447205, "beta_dpo/gap_mean": 61.29865646362305, "beta_dpo/gap_std": 87.67449951171875, "beta_dpo/loss_margin_mean": 71.65673828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31277533039647576, "grad_norm": 4.132566452026367, "learning_rate": 4.3561436536583774e-07, "logits/chosen": -0.47617167234420776, "logits/rejected": -0.44875389337539673, "loss": 1.335, "step": 213 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4833696484565735, "beta_dpo/beta_margin_grad_std": 0.023280689492821693, "beta_dpo/beta_margin_mean": 0.06673929840326309, "beta_dpo/beta_margin_std": 0.09353061765432358, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2031300812959671, "beta_dpo/gap_mean": 62.14265823364258, "beta_dpo/gap_std": 89.926513671875, "beta_dpo/loss_margin_mean": 66.73929595947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3142437591776799, "grad_norm": 5.018362998962402, "learning_rate": 4.3475222930516473e-07, "logits/chosen": -0.4100716710090637, "logits/rejected": -0.41462287306785583, "loss": 1.3312, "step": 214 }, { "beta_dpo/beta": 0.3104745149612427, "beta_dpo/beta_margin_grad_mean": -0.35084572434425354, "beta_dpo/beta_margin_grad_std": 0.3201132118701935, "beta_dpo/beta_margin_mean": 24.29639434814453, "beta_dpo/beta_margin_std": 55.270938873291016, "beta_dpo/beta_used": 0.3104745149612427, "beta_dpo/beta_used_raw": -0.06115126609802246, "beta_dpo/gap_mean": 64.10518646240234, "beta_dpo/gap_std": 91.72321319580078, "beta_dpo/loss_margin_mean": 70.83655548095703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.315712187958884, "grad_norm": 1893.756103515625, "learning_rate": 4.3388522485142885e-07, "logits/chosen": -0.4227758049964905, "logits/rejected": -0.41368818283081055, "loss": 5.4992, "step": 215 }, { "beta_dpo/beta": 0.1452518105506897, "beta_dpo/beta_margin_grad_mean": -0.3200395703315735, "beta_dpo/beta_margin_grad_std": 0.28639811277389526, "beta_dpo/beta_margin_mean": 12.094311714172363, "beta_dpo/beta_margin_std": 23.100305557250977, "beta_dpo/beta_used": 0.1452518105506897, "beta_dpo/beta_used_raw": -0.4864157736301422, "beta_dpo/gap_mean": 63.70437240600586, "beta_dpo/gap_std": 92.65457153320312, "beta_dpo/loss_margin_mean": 66.72907257080078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31718061674008813, "grad_norm": 478.4328918457031, "learning_rate": 4.330133748510036e-07, "logits/chosen": -0.4186558425426483, "logits/rejected": -0.40211576223373413, "loss": 1.6065, "step": 216 }, { "beta_dpo/beta": 0.052402470260858536, "beta_dpo/beta_margin_grad_mean": -0.38916242122650146, "beta_dpo/beta_margin_grad_std": 0.3128577768802643, "beta_dpo/beta_margin_mean": 3.8621439933776855, "beta_dpo/beta_margin_std": 9.069067001342773, "beta_dpo/beta_used": 0.052402470260858536, "beta_dpo/beta_used_raw": -0.15126293897628784, "beta_dpo/gap_mean": 67.39096069335938, "beta_dpo/gap_std": 93.9806137084961, "beta_dpo/loss_margin_mean": 80.78280639648438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3186490455212922, "grad_norm": 547.8667602539062, "learning_rate": 4.3213670227794757e-07, "logits/chosen": -0.4100034236907959, "logits/rejected": -0.407045841217041, "loss": 1.4452, "step": 217 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4856181740760803, "beta_dpo/beta_margin_grad_std": 0.024022625759243965, "beta_dpo/beta_margin_mean": 0.05773010477423668, "beta_dpo/beta_margin_std": 0.09657855331897736, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1618341207504272, "beta_dpo/gap_mean": 66.28788757324219, "beta_dpo/gap_std": 94.35865783691406, "beta_dpo/loss_margin_mean": 57.7301025390625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3201174743024963, "grad_norm": 5.246548652648926, "learning_rate": 4.3125523023339815e-07, "logits/chosen": -0.431363046169281, "logits/rejected": -0.4271088242530823, "loss": 1.3429, "step": 218 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4872594475746155, "beta_dpo/beta_margin_grad_std": 0.02623271755874157, "beta_dpo/beta_margin_mean": 0.05115894228219986, "beta_dpo/beta_margin_std": 0.10570129752159119, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3351142406463623, "beta_dpo/gap_mean": 62.747528076171875, "beta_dpo/gap_std": 96.75794982910156, "beta_dpo/loss_margin_mean": 51.15894317626953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.32158590308370044, "grad_norm": 4.286383152008057, "learning_rate": 4.303689819449636e-07, "logits/chosen": -0.4274938106536865, "logits/rejected": -0.41343453526496887, "loss": 1.3488, "step": 219 }, { "beta_dpo/beta": 0.17918218672275543, "beta_dpo/beta_margin_grad_mean": -0.34378835558891296, "beta_dpo/beta_margin_grad_std": 0.3021136224269867, "beta_dpo/beta_margin_mean": 9.130165100097656, "beta_dpo/beta_margin_std": 20.58268928527832, "beta_dpo/beta_used": 0.17918218672275543, "beta_dpo/beta_used_raw": 0.05747605115175247, "beta_dpo/gap_mean": 60.67655944824219, "beta_dpo/gap_std": 93.46902465820312, "beta_dpo/loss_margin_mean": 43.123931884765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.32305433186490456, "grad_norm": 1213.65625, "learning_rate": 4.2947798076611047e-07, "logits/chosen": -0.45730453729629517, "logits/rejected": -0.43929389119148254, "loss": 0.914, "step": 220 }, { "beta_dpo/beta": 1.0828216075897217, "beta_dpo/beta_margin_grad_mean": -0.285607248544693, "beta_dpo/beta_margin_grad_std": 0.28007781505584717, "beta_dpo/beta_margin_mean": 110.4359359741211, "beta_dpo/beta_margin_std": 165.42660522460938, "beta_dpo/beta_used": 1.0828216075897217, "beta_dpo/beta_used_raw": 0.8143908977508545, "beta_dpo/gap_mean": 63.86392593383789, "beta_dpo/gap_std": 92.72855377197266, "beta_dpo/loss_margin_mean": 93.2608871459961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3245227606461087, "grad_norm": 3353.1982421875, "learning_rate": 4.285822501755485e-07, "logits/chosen": -0.41670554876327515, "logits/rejected": -0.42472323775291443, "loss": 8.7201, "step": 221 }, { "beta_dpo/beta": 0.24051879346370697, "beta_dpo/beta_margin_grad_mean": -0.19494900107383728, "beta_dpo/beta_margin_grad_std": 0.3727710545063019, "beta_dpo/beta_margin_mean": 14.743354797363281, "beta_dpo/beta_margin_std": 23.80963897705078, "beta_dpo/beta_used": 0.24051879346370697, "beta_dpo/beta_used_raw": 0.24051879346370697, "beta_dpo/gap_mean": 65.32881164550781, "beta_dpo/gap_std": 91.67716979980469, "beta_dpo/loss_margin_mean": 64.1358642578125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.32599118942731276, "grad_norm": 1461.958251953125, "learning_rate": 4.276818137766118e-07, "logits/chosen": -0.4541017413139343, "logits/rejected": -0.45362943410873413, "loss": 3.3505, "step": 222 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48626866936683655, "beta_dpo/beta_margin_grad_std": 0.02382073365151882, "beta_dpo/beta_margin_mean": 0.05513327941298485, "beta_dpo/beta_margin_std": 0.09587711095809937, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6068298816680908, "beta_dpo/gap_mean": 64.32170104980469, "beta_dpo/gap_std": 92.70675659179688, "beta_dpo/loss_margin_mean": 55.133277893066406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3274596182085169, "grad_norm": 5.079369068145752, "learning_rate": 4.2677669529663686e-07, "logits/chosen": -0.4292357563972473, "logits/rejected": -0.41810518503189087, "loss": 1.3355, "step": 223 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4834619462490082, "beta_dpo/beta_margin_grad_std": 0.024282945320010185, "beta_dpo/beta_margin_mean": 0.06639501452445984, "beta_dpo/beta_margin_std": 0.09770266711711884, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0090042352676392, "beta_dpo/gap_mean": 64.98542785644531, "beta_dpo/gap_std": 92.75971221923828, "beta_dpo/loss_margin_mean": 66.39501190185547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.328928046989721, "grad_norm": 5.289470672607422, "learning_rate": 4.2586691858633747e-07, "logits/chosen": -0.40946879982948303, "logits/rejected": -0.3898620009422302, "loss": 1.3414, "step": 224 }, { "beta_dpo/beta": 0.5190803408622742, "beta_dpo/beta_margin_grad_mean": -0.22263871133327484, "beta_dpo/beta_margin_grad_std": 0.4009822607040405, "beta_dpo/beta_margin_mean": 43.59006881713867, "beta_dpo/beta_margin_std": 67.32926940917969, "beta_dpo/beta_used": 0.5190803408622742, "beta_dpo/beta_used_raw": 0.5190803408622742, "beta_dpo/gap_mean": 66.35330200195312, "beta_dpo/gap_std": 93.56597137451172, "beta_dpo/loss_margin_mean": 78.7562484741211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3303964757709251, "grad_norm": 5388.49951171875, "learning_rate": 4.249525076191759e-07, "logits/chosen": -0.42718806862831116, "logits/rejected": -0.4125264286994934, "loss": 2.7657, "step": 225 }, { "beta_dpo/beta": 0.004440045915544033, "beta_dpo/beta_margin_grad_mean": -0.433518648147583, "beta_dpo/beta_margin_grad_std": 0.11666657030582428, "beta_dpo/beta_margin_mean": 0.30315059423446655, "beta_dpo/beta_margin_std": 0.5602424740791321, "beta_dpo/beta_used": 0.004440045915544033, "beta_dpo/beta_used_raw": -0.38874343037605286, "beta_dpo/gap_mean": 65.67891693115234, "beta_dpo/gap_std": 93.4427490234375, "beta_dpo/loss_margin_mean": 54.554115295410156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.33186490455212925, "grad_norm": 17.83782958984375, "learning_rate": 4.2403348649073167e-07, "logits/chosen": -0.48651188611984253, "logits/rejected": -0.4508872628211975, "loss": 1.1641, "step": 226 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4800347089767456, "beta_dpo/beta_margin_grad_std": 0.023754583671689034, "beta_dpo/beta_margin_mean": 0.08015818148851395, "beta_dpo/beta_margin_std": 0.09570427238941193, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.21814611554145813, "beta_dpo/gap_mean": 66.89671325683594, "beta_dpo/gap_std": 93.85809326171875, "beta_dpo/loss_margin_mean": 80.1581802368164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3333333333333333, "grad_norm": 4.970055103302002, "learning_rate": 4.2310987941806615e-07, "logits/chosen": -0.46300429105758667, "logits/rejected": -0.4537394046783447, "loss": 1.3266, "step": 227 }, { "beta_dpo/beta": 0.23101337254047394, "beta_dpo/beta_margin_grad_mean": -0.36090514063835144, "beta_dpo/beta_margin_grad_std": 0.31774094700813293, "beta_dpo/beta_margin_mean": 11.812125205993652, "beta_dpo/beta_margin_std": 30.85622215270996, "beta_dpo/beta_used": 0.23101337254047394, "beta_dpo/beta_used_raw": -0.04854981601238251, "beta_dpo/gap_mean": 65.2583999633789, "beta_dpo/gap_std": 93.36762237548828, "beta_dpo/loss_margin_mean": 55.80860137939453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.33480176211453744, "grad_norm": 576.836669921875, "learning_rate": 4.2218171073908463e-07, "logits/chosen": -0.46720415353775024, "logits/rejected": -0.4512375593185425, "loss": 2.1248, "step": 228 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.486246794462204, "beta_dpo/beta_margin_grad_std": 0.022847512736916542, "beta_dpo/beta_margin_mean": 0.05515596643090248, "beta_dpo/beta_margin_std": 0.09170445799827576, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1934027224779129, "beta_dpo/gap_mean": 63.560447692871094, "beta_dpo/gap_std": 93.40653991699219, "beta_dpo/loss_margin_mean": 55.15596389770508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.33627019089574156, "grad_norm": 5.151296615600586, "learning_rate": 4.212490049118951e-07, "logits/chosen": -0.52690190076828, "logits/rejected": -0.4995231628417969, "loss": 1.3298, "step": 229 }, { "beta_dpo/beta": 0.7624739408493042, "beta_dpo/beta_margin_grad_mean": -0.14170564711093903, "beta_dpo/beta_margin_grad_std": 0.3462100327014923, "beta_dpo/beta_margin_mean": 65.0893325805664, "beta_dpo/beta_margin_std": 68.40202331542969, "beta_dpo/beta_used": 0.7624739408493042, "beta_dpo/beta_used_raw": 0.7624739408493042, "beta_dpo/gap_mean": 66.75623321533203, "beta_dpo/gap_std": 92.87422180175781, "beta_dpo/loss_margin_mean": 86.35228729248047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3377386196769457, "grad_norm": 2202.667724609375, "learning_rate": 4.203117865141635e-07, "logits/chosen": -0.4267687201499939, "logits/rejected": -0.43476104736328125, "loss": 6.1252, "step": 230 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4862736463546753, "beta_dpo/beta_margin_grad_std": 0.020299429073929787, "beta_dpo/beta_margin_mean": 0.05506381019949913, "beta_dpo/beta_margin_std": 0.0815558135509491, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.2731541395187378, "beta_dpo/gap_mean": 65.62940979003906, "beta_dpo/gap_std": 90.5175552368164, "beta_dpo/loss_margin_mean": 55.06380844116211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3392070484581498, "grad_norm": 3.6118087768554688, "learning_rate": 4.1937008024246625e-07, "logits/chosen": -0.48225754499435425, "logits/rejected": -0.4550408124923706, "loss": 1.3451, "step": 231 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4886838495731354, "beta_dpo/beta_margin_grad_std": 0.02164299599826336, "beta_dpo/beta_margin_mean": 0.04541696980595589, "beta_dpo/beta_margin_std": 0.08694743365049362, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.4023609161376953, "beta_dpo/gap_mean": 62.820167541503906, "beta_dpo/gap_std": 90.34293365478516, "beta_dpo/loss_margin_mean": 45.41696548461914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3406754772393539, "grad_norm": 3.735759973526001, "learning_rate": 4.1842391091163933e-07, "logits/chosen": -0.459547221660614, "logits/rejected": -0.43855172395706177, "loss": 1.3498, "step": 232 }, { "beta_dpo/beta": 0.6553887128829956, "beta_dpo/beta_margin_grad_mean": -0.2702082693576813, "beta_dpo/beta_margin_grad_std": 0.43462416529655457, "beta_dpo/beta_margin_mean": 55.271568298339844, "beta_dpo/beta_margin_std": 99.48710632324219, "beta_dpo/beta_used": 0.6553887128829956, "beta_dpo/beta_used_raw": 0.6553887128829956, "beta_dpo/gap_mean": 63.577369689941406, "beta_dpo/gap_std": 93.35490417480469, "beta_dpo/loss_margin_mean": 80.09815216064453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.342143906020558, "grad_norm": 2282.5595703125, "learning_rate": 4.174733034541245e-07, "logits/chosen": -0.4606332778930664, "logits/rejected": -0.46368852257728577, "loss": 13.3621, "step": 233 }, { "beta_dpo/beta": 0.7232382297515869, "beta_dpo/beta_margin_grad_mean": -0.2983703017234802, "beta_dpo/beta_margin_grad_std": 0.284095823764801, "beta_dpo/beta_margin_mean": 74.6033935546875, "beta_dpo/beta_margin_std": 122.55489349365234, "beta_dpo/beta_used": 0.7232382297515869, "beta_dpo/beta_used_raw": 0.4511352777481079, "beta_dpo/gap_mean": 68.05307006835938, "beta_dpo/gap_std": 95.49946594238281, "beta_dpo/loss_margin_mean": 78.76392364501953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3436123348017621, "grad_norm": 2767.52880859375, "learning_rate": 4.165182829193126e-07, "logits/chosen": -0.43197929859161377, "logits/rejected": -0.4625827670097351, "loss": 8.0627, "step": 234 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4878506064414978, "beta_dpo/beta_margin_grad_std": 0.024142302572727203, "beta_dpo/beta_margin_mean": 0.04878600686788559, "beta_dpo/beta_margin_std": 0.09710308909416199, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6707329750061035, "beta_dpo/gap_mean": 64.27421569824219, "beta_dpo/gap_std": 95.8262939453125, "beta_dpo/loss_margin_mean": 48.786006927490234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34508076358296624, "grad_norm": 6.3571929931640625, "learning_rate": 4.1555887447288255e-07, "logits/chosen": -0.4989432692527771, "logits/rejected": -0.4859057068824768, "loss": 1.3526, "step": 235 }, { "beta_dpo/beta": 0.12677739560604095, "beta_dpo/beta_margin_grad_mean": -0.2837068736553192, "beta_dpo/beta_margin_grad_std": 0.26055774092674255, "beta_dpo/beta_margin_mean": 10.552834510803223, "beta_dpo/beta_margin_std": 17.651796340942383, "beta_dpo/beta_used": 0.12677739560604095, "beta_dpo/beta_used_raw": -0.10223083198070526, "beta_dpo/gap_mean": 65.10395050048828, "beta_dpo/gap_std": 94.22532653808594, "beta_dpo/loss_margin_mean": 74.87934875488281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3465491923641703, "grad_norm": 211.59228515625, "learning_rate": 4.1459510339613946e-07, "logits/chosen": -0.46568238735198975, "logits/rejected": -0.4750595688819885, "loss": 0.7942, "step": 236 }, { "beta_dpo/beta": 0.23019856214523315, "beta_dpo/beta_margin_grad_mean": -0.3403577208518982, "beta_dpo/beta_margin_grad_std": 0.2993144690990448, "beta_dpo/beta_margin_mean": 16.547616958618164, "beta_dpo/beta_margin_std": 33.88982391357422, "beta_dpo/beta_used": 0.23019856214523315, "beta_dpo/beta_used_raw": 0.05189155042171478, "beta_dpo/gap_mean": 66.33110046386719, "beta_dpo/gap_std": 94.28207397460938, "beta_dpo/loss_margin_mean": 68.86071014404297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34801762114537443, "grad_norm": 1457.3970947265625, "learning_rate": 4.136269950853473e-07, "logits/chosen": -0.4683570861816406, "logits/rejected": -0.4693116545677185, "loss": 4.2522, "step": 237 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48502317070961, "beta_dpo/beta_margin_grad_std": 0.021210981532931328, "beta_dpo/beta_margin_mean": 0.06012243032455444, "beta_dpo/beta_margin_std": 0.08544077724218369, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6816811561584473, "beta_dpo/gap_mean": 66.35641479492188, "beta_dpo/gap_std": 93.38137817382812, "beta_dpo/loss_margin_mean": 60.12242889404297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34948604992657856, "grad_norm": 4.830297946929932, "learning_rate": 4.126545750510605e-07, "logits/chosen": -0.4283304214477539, "logits/rejected": -0.4415178894996643, "loss": 1.3348, "step": 238 }, { "beta_dpo/beta": 1.085011601448059, "beta_dpo/beta_margin_grad_mean": -0.3376123607158661, "beta_dpo/beta_margin_grad_std": 0.3144451677799225, "beta_dpo/beta_margin_mean": 92.08358001708984, "beta_dpo/beta_margin_std": 171.84555053710938, "beta_dpo/beta_used": 1.085011601448059, "beta_dpo/beta_used_raw": 0.1778862476348877, "beta_dpo/gap_mean": 66.31056213378906, "beta_dpo/gap_std": 91.886962890625, "beta_dpo/loss_margin_mean": 67.11627197265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3509544787077827, "grad_norm": 3145.790283203125, "learning_rate": 4.116778689174514e-07, "logits/chosen": -0.46962711215019226, "logits/rejected": -0.45392659306526184, "loss": 5.1543, "step": 239 }, { "beta_dpo/beta": 0.01710430718958378, "beta_dpo/beta_margin_grad_mean": -0.35482582449913025, "beta_dpo/beta_margin_grad_std": 0.23242245614528656, "beta_dpo/beta_margin_mean": 1.1268202066421509, "beta_dpo/beta_margin_std": 2.160505771636963, "beta_dpo/beta_used": 0.01710430718958378, "beta_dpo/beta_used_raw": -0.3118809163570404, "beta_dpo/gap_mean": 64.25852966308594, "beta_dpo/gap_std": 89.93122863769531, "beta_dpo/loss_margin_mean": 53.26530075073242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3524229074889868, "grad_norm": 40.69264602661133, "learning_rate": 4.106969024216348e-07, "logits/chosen": -0.4985065460205078, "logits/rejected": -0.48068171739578247, "loss": 1.003, "step": 240 }, { "beta_dpo/beta": 0.15345998108386993, "beta_dpo/beta_margin_grad_mean": -0.2729555368423462, "beta_dpo/beta_margin_grad_std": 0.2604886293411255, "beta_dpo/beta_margin_mean": 10.851144790649414, "beta_dpo/beta_margin_std": 16.8941593170166, "beta_dpo/beta_used": 0.15345998108386993, "beta_dpo/beta_used_raw": -0.44774329662323, "beta_dpo/gap_mean": 65.07862854003906, "beta_dpo/gap_std": 90.019287109375, "beta_dpo/loss_margin_mean": 80.56120300292969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35389133627019087, "grad_norm": 423.2123107910156, "learning_rate": 4.097117014129903e-07, "logits/chosen": -0.509527862071991, "logits/rejected": -0.4832276701927185, "loss": 0.7624, "step": 241 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4855991005897522, "beta_dpo/beta_margin_grad_std": 0.023397963494062424, "beta_dpo/beta_margin_mean": 0.05778844282031059, "beta_dpo/beta_margin_std": 0.094021275639534, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.40156319737434387, "beta_dpo/gap_mean": 64.53580474853516, "beta_dpo/gap_std": 93.37384033203125, "beta_dpo/loss_margin_mean": 57.7884407043457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.355359765051395, "grad_norm": 4.439099311828613, "learning_rate": 4.087222918524807e-07, "logits/chosen": -0.45422351360321045, "logits/rejected": -0.42984485626220703, "loss": 1.3321, "step": 242 }, { "beta_dpo/beta": 0.30491340160369873, "beta_dpo/beta_margin_grad_mean": -0.3281807005405426, "beta_dpo/beta_margin_grad_std": 0.29721781611442566, "beta_dpo/beta_margin_mean": 22.148780822753906, "beta_dpo/beta_margin_std": 43.37929153442383, "beta_dpo/beta_used": 0.30491340160369873, "beta_dpo/beta_used_raw": -0.12471228837966919, "beta_dpo/gap_mean": 65.9927978515625, "beta_dpo/gap_std": 92.37184143066406, "beta_dpo/loss_margin_mean": 72.20804595947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3568281938325991, "grad_norm": 1469.662841796875, "learning_rate": 4.07728699811968e-07, "logits/chosen": -0.44465339183807373, "logits/rejected": -0.4099007844924927, "loss": 4.1283, "step": 243 }, { "beta_dpo/beta": 0.12539884448051453, "beta_dpo/beta_margin_grad_mean": -0.33266112208366394, "beta_dpo/beta_margin_grad_std": 0.2994270622730255, "beta_dpo/beta_margin_mean": 8.772866249084473, "beta_dpo/beta_margin_std": 14.808113098144531, "beta_dpo/beta_used": 0.12539884448051453, "beta_dpo/beta_used_raw": -0.010177649557590485, "beta_dpo/gap_mean": 67.40866088867188, "beta_dpo/gap_std": 90.40948486328125, "beta_dpo/loss_margin_mean": 73.00894165039062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35829662261380324, "grad_norm": 321.0909118652344, "learning_rate": 4.067309514735267e-07, "logits/chosen": -0.49787259101867676, "logits/rejected": -0.4910111427307129, "loss": 1.3698, "step": 244 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48563241958618164, "beta_dpo/beta_margin_grad_std": 0.025404594838619232, "beta_dpo/beta_margin_mean": 0.057682327926158905, "beta_dpo/beta_margin_std": 0.10235247761011124, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.7166011333465576, "beta_dpo/gap_mean": 67.58259582519531, "beta_dpo/gap_std": 91.15482330322266, "beta_dpo/loss_margin_mean": 57.68232727050781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35976505139500736, "grad_norm": 3.6279404163360596, "learning_rate": 4.057290731287531e-07, "logits/chosen": -0.5001641511917114, "logits/rejected": -0.4671769142150879, "loss": 1.3501, "step": 245 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48544150590896606, "beta_dpo/beta_margin_grad_std": 0.021831955760717392, "beta_dpo/beta_margin_mean": 0.05841972678899765, "beta_dpo/beta_margin_std": 0.08776440471410751, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5867970585823059, "beta_dpo/gap_mean": 64.41853332519531, "beta_dpo/gap_std": 90.19287872314453, "beta_dpo/loss_margin_mean": 58.41972351074219, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36123348017621143, "grad_norm": 4.705582618713379, "learning_rate": 4.047230911780736e-07, "logits/chosen": -0.5271105766296387, "logits/rejected": -0.49014580249786377, "loss": 1.3351, "step": 246 }, { "beta_dpo/beta": 0.1847115010023117, "beta_dpo/beta_margin_grad_mean": -0.2965923547744751, "beta_dpo/beta_margin_grad_std": 0.28494712710380554, "beta_dpo/beta_margin_mean": 15.352115631103516, "beta_dpo/beta_margin_std": 25.77711296081543, "beta_dpo/beta_used": 0.1847115010023117, "beta_dpo/beta_used_raw": 0.06344389915466309, "beta_dpo/gap_mean": 68.1501235961914, "beta_dpo/gap_std": 92.23121643066406, "beta_dpo/loss_margin_mean": 90.32220458984375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36270190895741555, "grad_norm": 471.9046325683594, "learning_rate": 4.0371303213004814e-07, "logits/chosen": -0.44840526580810547, "logits/rejected": -0.45401185750961304, "loss": 2.3625, "step": 247 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48346683382987976, "beta_dpo/beta_margin_grad_std": 0.018162554129958153, "beta_dpo/beta_margin_mean": 0.06628952920436859, "beta_dpo/beta_margin_std": 0.07302607595920563, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5669313669204712, "beta_dpo/gap_mean": 68.87054443359375, "beta_dpo/gap_std": 89.18070220947266, "beta_dpo/loss_margin_mean": 66.2895278930664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3641703377386197, "grad_norm": 4.8645339012146, "learning_rate": 4.0269892260067197e-07, "logits/chosen": -0.45725005865097046, "logits/rejected": -0.47495073080062866, "loss": 1.3302, "step": 248 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48953747749328613, "beta_dpo/beta_margin_grad_std": 0.019871097058057785, "beta_dpo/beta_margin_mean": 0.041934434324502945, "beta_dpo/beta_margin_std": 0.07966778427362442, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1384837627410889, "beta_dpo/gap_mean": 64.90923309326172, "beta_dpo/gap_std": 87.11177825927734, "beta_dpo/loss_margin_mean": 41.93442916870117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3656387665198238, "grad_norm": 6.0476884841918945, "learning_rate": 4.0168078931267426e-07, "logits/chosen": -0.47966477274894714, "logits/rejected": -0.45807725191116333, "loss": 1.3437, "step": 249 }, { "beta_dpo/beta": 0.3004174530506134, "beta_dpo/beta_margin_grad_mean": -0.277651309967041, "beta_dpo/beta_margin_grad_std": 0.2621324062347412, "beta_dpo/beta_margin_mean": 21.81104278564453, "beta_dpo/beta_margin_std": 32.62987518310547, "beta_dpo/beta_used": 0.3004174530506134, "beta_dpo/beta_used_raw": 0.17717288434505463, "beta_dpo/gap_mean": 62.63585662841797, "beta_dpo/gap_std": 83.05741119384766, "beta_dpo/loss_margin_mean": 62.211456298828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3671071953010279, "grad_norm": 598.4202270507812, "learning_rate": 4.006586590948141e-07, "logits/chosen": -0.458739697933197, "logits/rejected": -0.40397346019744873, "loss": 0.7885, "step": 250 }, { "beta_dpo/beta": 0.25873419642448425, "beta_dpo/beta_margin_grad_mean": -0.35796087980270386, "beta_dpo/beta_margin_grad_std": 0.3145868182182312, "beta_dpo/beta_margin_mean": 16.468461990356445, "beta_dpo/beta_margin_std": 38.37507629394531, "beta_dpo/beta_used": 0.25873419642448425, "beta_dpo/beta_used_raw": -0.07390487194061279, "beta_dpo/gap_mean": 62.36948013305664, "beta_dpo/gap_std": 82.18414306640625, "beta_dpo/loss_margin_mean": 55.00004959106445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.368575624082232, "grad_norm": 993.1358032226562, "learning_rate": 3.9963255888117325e-07, "logits/chosen": -0.45881718397140503, "logits/rejected": -0.43461471796035767, "loss": 1.1642, "step": 251 }, { "beta_dpo/beta": 0.43845975399017334, "beta_dpo/beta_margin_grad_mean": -0.27507588267326355, "beta_dpo/beta_margin_grad_std": 0.2723042070865631, "beta_dpo/beta_margin_mean": 30.300657272338867, "beta_dpo/beta_margin_std": 50.934173583984375, "beta_dpo/beta_used": 0.43845975399017334, "beta_dpo/beta_used_raw": 0.2833039164543152, "beta_dpo/gap_mean": 61.836875915527344, "beta_dpo/gap_std": 78.91354370117188, "beta_dpo/loss_margin_mean": 61.227333068847656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3700440528634361, "grad_norm": 1931.18115234375, "learning_rate": 3.9860251571044666e-07, "logits/chosen": -0.5125927925109863, "logits/rejected": -0.47563207149505615, "loss": 1.6671, "step": 252 }, { "beta_dpo/beta": 0.13669037818908691, "beta_dpo/beta_margin_grad_mean": -0.31530076265335083, "beta_dpo/beta_margin_grad_std": 0.2846486270427704, "beta_dpo/beta_margin_mean": 11.10105037689209, "beta_dpo/beta_margin_std": 19.28214454650879, "beta_dpo/beta_used": 0.13669037818908691, "beta_dpo/beta_used_raw": -0.5888211727142334, "beta_dpo/gap_mean": 60.489776611328125, "beta_dpo/gap_std": 77.81178283691406, "beta_dpo/loss_margin_mean": 61.38548278808594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37151248164464024, "grad_norm": 569.2015991210938, "learning_rate": 3.9756855672522986e-07, "logits/chosen": -0.49557358026504517, "logits/rejected": -0.4879855513572693, "loss": 1.364, "step": 253 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4855990707874298, "beta_dpo/beta_margin_grad_std": 0.02238706313073635, "beta_dpo/beta_margin_mean": 0.05776969715952873, "beta_dpo/beta_margin_std": 0.08991079777479172, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.21809083223342896, "beta_dpo/gap_mean": 61.257843017578125, "beta_dpo/gap_std": 80.37059020996094, "beta_dpo/loss_margin_mean": 57.76969528198242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37298091042584436, "grad_norm": 4.005491733551025, "learning_rate": 3.965307091713037e-07, "logits/chosen": -0.47500473260879517, "logits/rejected": -0.46256011724472046, "loss": 1.3323, "step": 254 }, { "beta_dpo/beta": 0.21867026388645172, "beta_dpo/beta_margin_grad_mean": -0.3410184681415558, "beta_dpo/beta_margin_grad_std": 0.3134188652038574, "beta_dpo/beta_margin_mean": 18.47532844543457, "beta_dpo/beta_margin_std": 37.53182601928711, "beta_dpo/beta_used": 0.21867026388645172, "beta_dpo/beta_used_raw": -0.4761512279510498, "beta_dpo/gap_mean": 61.839927673339844, "beta_dpo/gap_std": 83.36296844482422, "beta_dpo/loss_margin_mean": 60.37783432006836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3744493392070485, "grad_norm": 1103.354248046875, "learning_rate": 3.954890003969163e-07, "logits/chosen": -0.4370883107185364, "logits/rejected": -0.4320235848426819, "loss": 2.8414, "step": 255 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48553693294525146, "beta_dpo/beta_margin_grad_std": 0.021008189767599106, "beta_dpo/beta_margin_mean": 0.05798688158392906, "beta_dpo/beta_margin_std": 0.08430825173854828, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7590247988700867, "beta_dpo/gap_mean": 60.238243103027344, "beta_dpo/gap_std": 83.42945861816406, "beta_dpo/loss_margin_mean": 57.98687744140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37591776798825255, "grad_norm": 5.088190078735352, "learning_rate": 3.944434578520628e-07, "logits/chosen": -0.43487805128097534, "logits/rejected": -0.4386810064315796, "loss": 1.3417, "step": 256 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48144304752349854, "beta_dpo/beta_margin_grad_std": 0.024765780195593834, "beta_dpo/beta_margin_mean": 0.07454907149076462, "beta_dpo/beta_margin_std": 0.09976498037576675, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8660670518875122, "beta_dpo/gap_mean": 63.066001892089844, "beta_dpo/gap_std": 86.74974060058594, "beta_dpo/loss_margin_mean": 74.549072265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37738619676945667, "grad_norm": 4.403605937957764, "learning_rate": 3.933941090877615e-07, "logits/chosen": -0.4413556456565857, "logits/rejected": -0.42769724130630493, "loss": 1.3407, "step": 257 }, { "beta_dpo/beta": 0.8561594486236572, "beta_dpo/beta_margin_grad_mean": -0.24124778807163239, "beta_dpo/beta_margin_grad_std": 0.42249229550361633, "beta_dpo/beta_margin_mean": 54.67416000366211, "beta_dpo/beta_margin_std": 84.30635070800781, "beta_dpo/beta_used": 0.8561594486236572, "beta_dpo/beta_used_raw": 0.8561594486236572, "beta_dpo/gap_mean": 62.323787689208984, "beta_dpo/gap_std": 87.7547607421875, "beta_dpo/loss_margin_mean": 63.648921966552734, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3788546255506608, "grad_norm": 3212.95654296875, "learning_rate": 3.923409817553284e-07, "logits/chosen": -0.39796602725982666, "logits/rejected": -0.39811059832572937, "loss": 6.21, "step": 258 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48512884974479675, "beta_dpo/beta_margin_grad_std": 0.02116353064775467, "beta_dpo/beta_margin_mean": 0.05964351072907448, "beta_dpo/beta_margin_std": 0.08494514971971512, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3002283573150635, "beta_dpo/gap_mean": 62.94316864013672, "beta_dpo/gap_std": 88.68659973144531, "beta_dpo/loss_margin_mean": 59.64350891113281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3803230543318649, "grad_norm": 5.502564430236816, "learning_rate": 3.9128410360564793e-07, "logits/chosen": -0.45921239256858826, "logits/rejected": -0.4577338993549347, "loss": 1.3481, "step": 259 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48271456360816956, "beta_dpo/beta_margin_grad_std": 0.022007808089256287, "beta_dpo/beta_margin_mean": 0.06942640990018845, "beta_dpo/beta_margin_std": 0.08899199217557907, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4817598760128021, "beta_dpo/gap_mean": 62.76177215576172, "beta_dpo/gap_std": 87.07768249511719, "beta_dpo/loss_margin_mean": 69.42640686035156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.38179148311306904, "grad_norm": 6.234367847442627, "learning_rate": 3.9022350248844246e-07, "logits/chosen": -0.42374077439308167, "logits/rejected": -0.44464540481567383, "loss": 1.3345, "step": 260 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48329126834869385, "beta_dpo/beta_margin_grad_std": 0.022538091987371445, "beta_dpo/beta_margin_mean": 0.06707046180963516, "beta_dpo/beta_margin_std": 0.09085685759782791, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.943615198135376, "beta_dpo/gap_mean": 64.44114685058594, "beta_dpo/gap_std": 89.06988525390625, "beta_dpo/loss_margin_mean": 67.0704574584961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3832599118942731, "grad_norm": 4.799732208251953, "learning_rate": 3.891592063515376e-07, "logits/chosen": -0.3694385290145874, "logits/rejected": -0.3720252513885498, "loss": 1.3409, "step": 261 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4851832985877991, "beta_dpo/beta_margin_grad_std": 0.018579039722681046, "beta_dpo/beta_margin_mean": 0.05938103049993515, "beta_dpo/beta_margin_std": 0.07453680038452148, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3785286545753479, "beta_dpo/gap_mean": 63.341896057128906, "beta_dpo/gap_std": 86.87582397460938, "beta_dpo/loss_margin_mean": 59.38102722167969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.38472834067547723, "grad_norm": 4.288631916046143, "learning_rate": 3.880912432401264e-07, "logits/chosen": -0.3830975890159607, "logits/rejected": -0.3654525876045227, "loss": 1.3326, "step": 262 }, { "beta_dpo/beta": 0.7164207696914673, "beta_dpo/beta_margin_grad_mean": -0.31974849104881287, "beta_dpo/beta_margin_grad_std": 0.30375197529792786, "beta_dpo/beta_margin_mean": 71.56385803222656, "beta_dpo/beta_margin_std": 131.27561950683594, "beta_dpo/beta_used": 0.7164207696914673, "beta_dpo/beta_used_raw": 0.651368260383606, "beta_dpo/gap_mean": 66.71192932128906, "beta_dpo/gap_std": 88.3709487915039, "beta_dpo/loss_margin_mean": 81.06204986572266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.38619676945668135, "grad_norm": 1847.1041259765625, "learning_rate": 3.870196412960302e-07, "logits/chosen": -0.43336668610572815, "logits/rejected": -0.40536999702453613, "loss": 3.3255, "step": 263 }, { "beta_dpo/beta": 0.32210445404052734, "beta_dpo/beta_margin_grad_mean": -0.3196498155593872, "beta_dpo/beta_margin_grad_std": 0.2992617189884186, "beta_dpo/beta_margin_mean": 26.927030563354492, "beta_dpo/beta_margin_std": 47.48490524291992, "beta_dpo/beta_used": 0.32210445404052734, "beta_dpo/beta_used_raw": -0.5231786966323853, "beta_dpo/gap_mean": 66.88683319091797, "beta_dpo/gap_std": 88.364501953125, "beta_dpo/loss_margin_mean": 62.8607292175293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3876651982378855, "grad_norm": 1272.9935302734375, "learning_rate": 3.8594442875695665e-07, "logits/chosen": -0.47683650255203247, "logits/rejected": -0.4689565896987915, "loss": 3.0123, "step": 264 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4836253225803375, "beta_dpo/beta_margin_grad_std": 0.024147428572177887, "beta_dpo/beta_margin_mean": 0.06574657559394836, "beta_dpo/beta_margin_std": 0.097164586186409, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7309384942054749, "beta_dpo/gap_mean": 65.43952941894531, "beta_dpo/gap_std": 89.35261535644531, "beta_dpo/loss_margin_mean": 65.74657440185547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3891336270190896, "grad_norm": 5.106090545654297, "learning_rate": 3.848656339557562e-07, "logits/chosen": -0.44355565309524536, "logits/rejected": -0.42892855405807495, "loss": 1.3367, "step": 265 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48562902212142944, "beta_dpo/beta_margin_grad_std": 0.02202366106212139, "beta_dpo/beta_margin_mean": 0.05766104906797409, "beta_dpo/beta_margin_std": 0.08847023546695709, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3030107021331787, "beta_dpo/gap_mean": 64.68086242675781, "beta_dpo/gap_std": 90.58798217773438, "beta_dpo/loss_margin_mean": 57.66104507446289, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.39060205580029367, "grad_norm": 3.584993362426758, "learning_rate": 3.8378328531967507e-07, "logits/chosen": -0.5003777146339417, "logits/rejected": -0.4550362229347229, "loss": 1.3305, "step": 266 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4839051365852356, "beta_dpo/beta_margin_grad_std": 0.021753991022706032, "beta_dpo/beta_margin_mean": 0.06459161639213562, "beta_dpo/beta_margin_std": 0.08754534274339676, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.679577112197876, "beta_dpo/gap_mean": 64.24443817138672, "beta_dpo/gap_std": 89.84454345703125, "beta_dpo/loss_margin_mean": 64.59161376953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3920704845814978, "grad_norm": 5.026149272918701, "learning_rate": 3.8269741136960646e-07, "logits/chosen": -0.46633046865463257, "logits/rejected": -0.4374736547470093, "loss": 1.3368, "step": 267 }, { "beta_dpo/beta": 0.4272679090499878, "beta_dpo/beta_margin_grad_mean": -0.3774115741252899, "beta_dpo/beta_margin_grad_std": 0.3255773186683655, "beta_dpo/beta_margin_mean": 26.88530731201172, "beta_dpo/beta_margin_std": 64.07011413574219, "beta_dpo/beta_used": 0.4272679090499878, "beta_dpo/beta_used_raw": 0.28859809041023254, "beta_dpo/gap_mean": 64.24163055419922, "beta_dpo/gap_std": 89.63772583007812, "beta_dpo/loss_margin_mean": 64.2711410522461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3935389133627019, "grad_norm": 2970.7353515625, "learning_rate": 3.8160804071933894e-07, "logits/chosen": -0.4358275532722473, "logits/rejected": -0.44389188289642334, "loss": 5.9535, "step": 268 }, { "beta_dpo/beta": 0.7696582078933716, "beta_dpo/beta_margin_grad_mean": -0.31902071833610535, "beta_dpo/beta_margin_grad_std": 0.45872315764427185, "beta_dpo/beta_margin_mean": 70.0230484008789, "beta_dpo/beta_margin_std": 98.9859390258789, "beta_dpo/beta_used": 0.7696582078933716, "beta_dpo/beta_used_raw": 0.7696582078933716, "beta_dpo/gap_mean": 67.56047821044922, "beta_dpo/gap_std": 95.0364990234375, "beta_dpo/loss_margin_mean": 89.27982330322266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.39500734214390604, "grad_norm": 6386.1025390625, "learning_rate": 3.8051520207480204e-07, "logits/chosen": -0.4257703721523285, "logits/rejected": -0.40664464235305786, "loss": 17.9159, "step": 269 }, { "beta_dpo/beta": 0.31599855422973633, "beta_dpo/beta_margin_grad_mean": -0.1990150511264801, "beta_dpo/beta_margin_grad_std": 0.38719597458839417, "beta_dpo/beta_margin_mean": 21.739652633666992, "beta_dpo/beta_margin_std": 33.68879318237305, "beta_dpo/beta_used": 0.31599855422973633, "beta_dpo/beta_used_raw": 0.31599855422973633, "beta_dpo/gap_mean": 68.88189697265625, "beta_dpo/gap_std": 98.04679870605469, "beta_dpo/loss_margin_mean": 69.03794860839844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3964757709251101, "grad_norm": 782.6886596679688, "learning_rate": 3.794189242333106e-07, "logits/chosen": -0.5228564739227295, "logits/rejected": -0.5192960500717163, "loss": 2.5132, "step": 270 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48039206862449646, "beta_dpo/beta_margin_grad_std": 0.026033930480480194, "beta_dpo/beta_margin_mean": 0.07876794040203094, "beta_dpo/beta_margin_std": 0.10474507510662079, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6152107119560242, "beta_dpo/gap_mean": 70.80068969726562, "beta_dpo/gap_std": 99.597412109375, "beta_dpo/loss_margin_mean": 78.76793670654297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.39794419970631423, "grad_norm": 5.687714576721191, "learning_rate": 3.7831923608280514e-07, "logits/chosen": -0.45079296827316284, "logits/rejected": -0.4350966811180115, "loss": 1.3298, "step": 271 }, { "beta_dpo/beta": 0.2916773557662964, "beta_dpo/beta_margin_grad_mean": -0.34444308280944824, "beta_dpo/beta_margin_grad_std": 0.3155882954597473, "beta_dpo/beta_margin_mean": 26.473766326904297, "beta_dpo/beta_margin_std": 43.10868835449219, "beta_dpo/beta_used": 0.2916773557662964, "beta_dpo/beta_used_raw": 0.21146634221076965, "beta_dpo/gap_mean": 74.29582214355469, "beta_dpo/gap_std": 98.69171905517578, "beta_dpo/loss_margin_mean": 96.75138854980469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.39941262848751835, "grad_norm": 3469.978759765625, "learning_rate": 3.772161666010912e-07, "logits/chosen": -0.4280936121940613, "logits/rejected": -0.4439089596271515, "loss": 2.403, "step": 272 }, { "beta_dpo/beta": 0.4497944712638855, "beta_dpo/beta_margin_grad_mean": -0.30444207787513733, "beta_dpo/beta_margin_grad_std": 0.294939249753952, "beta_dpo/beta_margin_mean": 40.1925048828125, "beta_dpo/beta_margin_std": 79.06779479980469, "beta_dpo/beta_used": 0.4497944712638855, "beta_dpo/beta_used_raw": -0.13546743988990784, "beta_dpo/gap_mean": 76.61572265625, "beta_dpo/gap_std": 100.23278045654297, "beta_dpo/loss_margin_mean": 77.89697265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4008810572687225, "grad_norm": 1977.9761962890625, "learning_rate": 3.761097448550755e-07, "logits/chosen": -0.4547615647315979, "logits/rejected": -0.4396814703941345, "loss": 3.9441, "step": 273 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48395276069641113, "beta_dpo/beta_margin_grad_std": 0.021796153858304024, "beta_dpo/beta_margin_mean": 0.06436125934123993, "beta_dpo/beta_margin_std": 0.08749227970838547, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6583735942840576, "beta_dpo/gap_mean": 73.87464141845703, "beta_dpo/gap_std": 97.98983001708984, "beta_dpo/loss_margin_mean": 64.36125183105469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4023494860499266, "grad_norm": 4.778660774230957, "learning_rate": 3.75e-07, "logits/chosen": -0.4540286064147949, "logits/rejected": -0.43437108397483826, "loss": 1.3435, "step": 274 }, { "beta_dpo/beta": 0.3658776581287384, "beta_dpo/beta_margin_grad_mean": -0.30500340461730957, "beta_dpo/beta_margin_grad_std": 0.2937050759792328, "beta_dpo/beta_margin_mean": 28.91089630126953, "beta_dpo/beta_margin_std": 53.39341354370117, "beta_dpo/beta_used": 0.3658776581287384, "beta_dpo/beta_used_raw": -0.2188054919242859, "beta_dpo/gap_mean": 73.8324203491211, "beta_dpo/gap_std": 97.02469635009766, "beta_dpo/loss_margin_mean": 74.46902465820312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40381791483113066, "grad_norm": 1107.3209228515625, "learning_rate": 3.738869612786737e-07, "logits/chosen": -0.48561912775039673, "logits/rejected": -0.4850524365901947, "loss": 1.4477, "step": 275 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48285502195358276, "beta_dpo/beta_margin_grad_std": 0.027132032439112663, "beta_dpo/beta_margin_mean": 0.06888844817876816, "beta_dpo/beta_margin_std": 0.10926186293363571, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6487220525741577, "beta_dpo/gap_mean": 73.41490173339844, "beta_dpo/gap_std": 99.17544555664062, "beta_dpo/loss_margin_mean": 68.88844299316406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4052863436123348, "grad_norm": 4.309329986572266, "learning_rate": 3.7277065802070204e-07, "logits/chosen": -0.4739760756492615, "logits/rejected": -0.4428936541080475, "loss": 1.328, "step": 276 }, { "beta_dpo/beta": 0.2809670865535736, "beta_dpo/beta_margin_grad_mean": -0.3351666331291199, "beta_dpo/beta_margin_grad_std": 0.3102318048477173, "beta_dpo/beta_margin_mean": 21.383647918701172, "beta_dpo/beta_margin_std": 38.81602478027344, "beta_dpo/beta_used": 0.2809670865535736, "beta_dpo/beta_used_raw": -0.2556490898132324, "beta_dpo/gap_mean": 71.44065856933594, "beta_dpo/gap_std": 96.77009582519531, "beta_dpo/loss_margin_mean": 65.0876693725586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4067547723935389, "grad_norm": 959.0798950195312, "learning_rate": 3.71651119641714e-07, "logits/chosen": -0.4775172770023346, "logits/rejected": -0.4674876928329468, "loss": 3.3232, "step": 277 }, { "beta_dpo/beta": 0.09777142852544785, "beta_dpo/beta_margin_grad_mean": -0.3435121774673462, "beta_dpo/beta_margin_grad_std": 0.3006548285484314, "beta_dpo/beta_margin_mean": 9.16122055053711, "beta_dpo/beta_margin_std": 16.98973274230957, "beta_dpo/beta_used": 0.09777142852544785, "beta_dpo/beta_used_raw": -0.4377209544181824, "beta_dpo/gap_mean": 73.7154541015625, "beta_dpo/gap_std": 97.09827423095703, "beta_dpo/loss_margin_mean": 81.99968719482422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40822320117474303, "grad_norm": 546.0422973632812, "learning_rate": 3.705283756425872e-07, "logits/chosen": -0.5163074731826782, "logits/rejected": -0.5155045390129089, "loss": 1.8696, "step": 278 }, { "beta_dpo/beta": 0.1939535290002823, "beta_dpo/beta_margin_grad_mean": -0.31403571367263794, "beta_dpo/beta_margin_grad_std": 0.2941286265850067, "beta_dpo/beta_margin_mean": 18.408206939697266, "beta_dpo/beta_margin_std": 33.89780807495117, "beta_dpo/beta_used": 0.1939535290002823, "beta_dpo/beta_used_raw": -0.6678704023361206, "beta_dpo/gap_mean": 75.48173522949219, "beta_dpo/gap_std": 98.2899169921875, "beta_dpo/loss_margin_mean": 83.40555572509766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40969162995594716, "grad_norm": 677.2081909179688, "learning_rate": 3.6940245560867e-07, "logits/chosen": -0.4336978495121002, "logits/rejected": -0.42833346128463745, "loss": 2.2112, "step": 279 }, { "beta_dpo/beta": 0.5752575993537903, "beta_dpo/beta_margin_grad_mean": -0.13274730741977692, "beta_dpo/beta_margin_grad_std": 0.31232884526252747, "beta_dpo/beta_margin_mean": 43.74085235595703, "beta_dpo/beta_margin_std": 54.59124755859375, "beta_dpo/beta_used": 0.5752575993537903, "beta_dpo/beta_used_raw": 0.5752575993537903, "beta_dpo/gap_mean": 75.63088989257812, "beta_dpo/gap_std": 95.76606750488281, "beta_dpo/loss_margin_mean": 77.03679656982422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4111600587371512, "grad_norm": 373.3504943847656, "learning_rate": 3.6827338920900253e-07, "logits/chosen": -0.4546999931335449, "logits/rejected": -0.45433032512664795, "loss": 0.6316, "step": 280 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.483308345079422, "beta_dpo/beta_margin_grad_std": 0.020045718178153038, "beta_dpo/beta_margin_mean": 0.06684371829032898, "beta_dpo/beta_margin_std": 0.0805417075753212, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.12925973534584045, "beta_dpo/gap_mean": 73.92355346679688, "beta_dpo/gap_std": 93.38307189941406, "beta_dpo/loss_margin_mean": 66.84371185302734, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41262848751835535, "grad_norm": 8.058195114135742, "learning_rate": 3.6714120619553435e-07, "logits/chosen": -0.49369382858276367, "logits/rejected": -0.46913886070251465, "loss": 1.3182, "step": 281 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48673728108406067, "beta_dpo/beta_margin_grad_std": 0.019268635660409927, "beta_dpo/beta_margin_mean": 0.05318976566195488, "beta_dpo/beta_margin_std": 0.07743314653635025, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6386913061141968, "beta_dpo/gap_mean": 70.5438003540039, "beta_dpo/gap_std": 89.866455078125, "beta_dpo/loss_margin_mean": 53.189762115478516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41409691629955947, "grad_norm": 3.629554033279419, "learning_rate": 3.660059364023408e-07, "logits/chosen": -0.5274136066436768, "logits/rejected": -0.5010647773742676, "loss": 1.346, "step": 282 }, { "beta_dpo/beta": 0.838965654373169, "beta_dpo/beta_margin_grad_mean": -0.12155988812446594, "beta_dpo/beta_margin_grad_std": 0.31926241517066956, "beta_dpo/beta_margin_mean": 74.75791931152344, "beta_dpo/beta_margin_std": 82.98445892333984, "beta_dpo/beta_used": 0.838965654373169, "beta_dpo/beta_used_raw": 0.838965654373169, "beta_dpo/gap_mean": 71.56987762451172, "beta_dpo/gap_std": 89.52423095703125, "beta_dpo/loss_margin_mean": 84.6165771484375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4155653450807636, "grad_norm": 462.5566711425781, "learning_rate": 3.6486760974483685e-07, "logits/chosen": -0.49745476245880127, "logits/rejected": -0.48693162202835083, "loss": 1.5468, "step": 283 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47972315549850464, "beta_dpo/beta_margin_grad_std": 0.02312047965824604, "beta_dpo/beta_margin_mean": 0.08142410963773727, "beta_dpo/beta_margin_std": 0.0932619571685791, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.19312117993831635, "beta_dpo/gap_mean": 74.1982650756836, "beta_dpo/gap_std": 90.27053833007812, "beta_dpo/loss_margin_mean": 81.42410278320312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4170337738619677, "grad_norm": 6.851167678833008, "learning_rate": 3.6372625621898863e-07, "logits/chosen": -0.5042980313301086, "logits/rejected": -0.4991450905799866, "loss": 1.3195, "step": 284 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48195090889930725, "beta_dpo/beta_margin_grad_std": 0.0216471329331398, "beta_dpo/beta_margin_mean": 0.07239013910293579, "beta_dpo/beta_margin_std": 0.08699988573789597, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.30619388818740845, "beta_dpo/gap_mean": 73.66974639892578, "beta_dpo/gap_std": 90.04093933105469, "beta_dpo/loss_margin_mean": 72.39013671875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4185022026431718, "grad_norm": 7.985069274902344, "learning_rate": 3.625819059005228e-07, "logits/chosen": -0.4940687417984009, "logits/rejected": -0.48543840646743774, "loss": 1.3215, "step": 285 }, { "beta_dpo/beta": 0.07317624241113663, "beta_dpo/beta_margin_grad_mean": -0.3578983247280121, "beta_dpo/beta_margin_grad_std": 0.3003653585910797, "beta_dpo/beta_margin_mean": 5.437658786773682, "beta_dpo/beta_margin_std": 11.020866394042969, "beta_dpo/beta_used": 0.07317624241113663, "beta_dpo/beta_used_raw": -0.39668411016464233, "beta_dpo/gap_mean": 74.31663513183594, "beta_dpo/gap_std": 90.61752319335938, "beta_dpo/loss_margin_mean": 73.65949249267578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4199706314243759, "grad_norm": 274.8042907714844, "learning_rate": 3.614345889441346e-07, "logits/chosen": -0.4832392930984497, "logits/rejected": -0.46001118421554565, "loss": 1.2468, "step": 286 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48475971817970276, "beta_dpo/beta_margin_grad_std": 0.022152835503220558, "beta_dpo/beta_margin_mean": 0.061166539788246155, "beta_dpo/beta_margin_std": 0.08909157663583755, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9690273404121399, "beta_dpo/gap_mean": 72.43344116210938, "beta_dpo/gap_std": 90.36245727539062, "beta_dpo/loss_margin_mean": 61.16653823852539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42143906020558003, "grad_norm": 4.072757720947266, "learning_rate": 3.6028433558269275e-07, "logits/chosen": -0.46784478425979614, "logits/rejected": -0.44443923234939575, "loss": 1.3334, "step": 287 }, { "beta_dpo/beta": 0.7897164821624756, "beta_dpo/beta_margin_grad_mean": -0.3040521442890167, "beta_dpo/beta_margin_grad_std": 0.2914998233318329, "beta_dpo/beta_margin_mean": 85.403076171875, "beta_dpo/beta_margin_std": 137.9335479736328, "beta_dpo/beta_used": 0.7897164821624756, "beta_dpo/beta_used_raw": 0.6619566082954407, "beta_dpo/gap_mean": 74.12348937988281, "beta_dpo/gap_std": 91.28290557861328, "beta_dpo/loss_margin_mean": 79.14554595947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42290748898678415, "grad_norm": 2659.9658203125, "learning_rate": 3.5913117612644327e-07, "logits/chosen": -0.44504302740097046, "logits/rejected": -0.4315459430217743, "loss": 1.2203, "step": 288 }, { "beta_dpo/beta": 0.629094123840332, "beta_dpo/beta_margin_grad_mean": -0.3004843592643738, "beta_dpo/beta_margin_grad_std": 0.28447577357292175, "beta_dpo/beta_margin_mean": 68.1593246459961, "beta_dpo/beta_margin_std": 111.16494750976562, "beta_dpo/beta_used": 0.629094123840332, "beta_dpo/beta_used_raw": -0.10066229104995728, "beta_dpo/gap_mean": 74.27970886230469, "beta_dpo/gap_std": 92.71040344238281, "beta_dpo/loss_margin_mean": 88.7456283569336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4243759177679883, "grad_norm": 1766.22216796875, "learning_rate": 3.5797514096221024e-07, "logits/chosen": -0.35877037048339844, "logits/rejected": -0.3598015010356903, "loss": 2.5223, "step": 289 }, { "beta_dpo/beta": 0.4477105140686035, "beta_dpo/beta_margin_grad_mean": -0.3150025010108948, "beta_dpo/beta_margin_grad_std": 0.30229073762893677, "beta_dpo/beta_margin_mean": 46.98125076293945, "beta_dpo/beta_margin_std": 88.1680908203125, "beta_dpo/beta_used": 0.4477105140686035, "beta_dpo/beta_used_raw": -0.1657930314540863, "beta_dpo/gap_mean": 80.18174743652344, "beta_dpo/gap_std": 98.11917877197266, "beta_dpo/loss_margin_mean": 104.16016387939453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42584434654919234, "grad_norm": 1004.2230224609375, "learning_rate": 3.568162605525952e-07, "logits/chosen": -0.3618127703666687, "logits/rejected": -0.38121217489242554, "loss": 3.118, "step": 290 }, { "beta_dpo/beta": 0.3404870629310608, "beta_dpo/beta_margin_grad_mean": -0.2316586971282959, "beta_dpo/beta_margin_grad_std": 0.40322452783584595, "beta_dpo/beta_margin_mean": 25.296192169189453, "beta_dpo/beta_margin_std": 31.709936141967773, "beta_dpo/beta_used": 0.3404870629310608, "beta_dpo/beta_used_raw": 0.3404870629310608, "beta_dpo/gap_mean": 79.38957214355469, "beta_dpo/gap_std": 99.54486083984375, "beta_dpo/loss_margin_mean": 72.23591613769531, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42731277533039647, "grad_norm": 952.44775390625, "learning_rate": 3.5565456543517485e-07, "logits/chosen": -0.42019423842430115, "logits/rejected": -0.40653547644615173, "loss": 2.0799, "step": 291 }, { "beta_dpo/beta": 0.12015949934720993, "beta_dpo/beta_margin_grad_mean": -0.33290329575538635, "beta_dpo/beta_margin_grad_std": 0.30069440603256226, "beta_dpo/beta_margin_mean": 10.655224800109863, "beta_dpo/beta_margin_std": 21.715547561645508, "beta_dpo/beta_used": 0.12015949934720993, "beta_dpo/beta_used_raw": -0.7626643776893616, "beta_dpo/gap_mean": 79.20477294921875, "beta_dpo/gap_std": 100.69721984863281, "beta_dpo/loss_margin_mean": 78.73019409179688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4287812041116006, "grad_norm": 439.33978271484375, "learning_rate": 3.5449008622169583e-07, "logits/chosen": -0.3626874089241028, "logits/rejected": -0.3548169732093811, "loss": 1.6587, "step": 292 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4831177890300751, "beta_dpo/beta_margin_grad_std": 0.025702647864818573, "beta_dpo/beta_margin_mean": 0.06781422346830368, "beta_dpo/beta_margin_std": 0.10339030623435974, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7132205963134766, "beta_dpo/gap_mean": 77.53086853027344, "beta_dpo/gap_std": 101.82347106933594, "beta_dpo/loss_margin_mean": 67.81421661376953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4302496328928047, "grad_norm": 4.400468349456787, "learning_rate": 3.5332285359726846e-07, "logits/chosen": -0.41298121213912964, "logits/rejected": -0.40352344512939453, "loss": 1.3249, "step": 293 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48102009296417236, "beta_dpo/beta_margin_grad_std": 0.026048097759485245, "beta_dpo/beta_margin_mean": 0.07623665034770966, "beta_dpo/beta_margin_std": 0.10481663793325424, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.2027143239974976, "beta_dpo/gap_mean": 78.0030517578125, "beta_dpo/gap_std": 102.60092163085938, "beta_dpo/loss_margin_mean": 76.23664855957031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43171806167400884, "grad_norm": 4.342075347900391, "learning_rate": 3.5215289831955786e-07, "logits/chosen": -0.380662739276886, "logits/rejected": -0.3861265182495117, "loss": 1.3324, "step": 294 }, { "beta_dpo/beta": 0.2947583496570587, "beta_dpo/beta_margin_grad_mean": -0.40385448932647705, "beta_dpo/beta_margin_grad_std": 0.32800954580307007, "beta_dpo/beta_margin_mean": 19.130741119384766, "beta_dpo/beta_margin_std": 50.656394958496094, "beta_dpo/beta_used": 0.2947583496570587, "beta_dpo/beta_used_raw": 0.05702996253967285, "beta_dpo/gap_mean": 76.71334075927734, "beta_dpo/gap_std": 102.96287536621094, "beta_dpo/loss_margin_mean": 78.26499938964844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4331864904552129, "grad_norm": 1862.5281982421875, "learning_rate": 3.509802512179737e-07, "logits/chosen": -0.37672334909439087, "logits/rejected": -0.3786112368106842, "loss": 8.3528, "step": 295 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48530909419059753, "beta_dpo/beta_margin_grad_std": 0.02513442374765873, "beta_dpo/beta_margin_mean": 0.05899207293987274, "beta_dpo/beta_margin_std": 0.10110720992088318, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0527881383895874, "beta_dpo/gap_mean": 75.4265365600586, "beta_dpo/gap_std": 102.43699645996094, "beta_dpo/loss_margin_mean": 58.992069244384766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.434654919236417, "grad_norm": 4.538437366485596, "learning_rate": 3.498049431928577e-07, "logits/chosen": -0.41676008701324463, "logits/rejected": -0.3972277343273163, "loss": 1.3325, "step": 296 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48259493708610535, "beta_dpo/beta_margin_grad_std": 0.022371800616383553, "beta_dpo/beta_margin_mean": 0.06982959061861038, "beta_dpo/beta_margin_std": 0.0898992121219635, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6135008335113525, "beta_dpo/gap_mean": 73.90296936035156, "beta_dpo/gap_std": 100.1375961303711, "beta_dpo/loss_margin_mean": 69.82958984375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43612334801762115, "grad_norm": 5.51907205581665, "learning_rate": 3.486270052146694e-07, "logits/chosen": -0.35295820236206055, "logits/rejected": -0.3571382761001587, "loss": 1.3264, "step": 297 }, { "beta_dpo/beta": 0.27431046962738037, "beta_dpo/beta_margin_grad_mean": -0.263118177652359, "beta_dpo/beta_margin_grad_std": 0.33494073152542114, "beta_dpo/beta_margin_mean": 20.479074478149414, "beta_dpo/beta_margin_std": 45.33749008178711, "beta_dpo/beta_used": 0.27431046962738037, "beta_dpo/beta_used_raw": 0.27431046962738037, "beta_dpo/gap_mean": 74.29185485839844, "beta_dpo/gap_std": 102.38994598388672, "beta_dpo/loss_margin_mean": 86.00110626220703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43759177679882527, "grad_norm": 738.2294311523438, "learning_rate": 3.474464683231698e-07, "logits/chosen": -0.4162539839744568, "logits/rejected": -0.4425868093967438, "loss": 1.6437, "step": 298 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4808002710342407, "beta_dpo/beta_margin_grad_std": 0.02673073299229145, "beta_dpo/beta_margin_mean": 0.07717499881982803, "beta_dpo/beta_margin_std": 0.10790830105543137, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10428804159164429, "beta_dpo/gap_mean": 74.65848541259766, "beta_dpo/gap_std": 103.56509399414062, "beta_dpo/loss_margin_mean": 77.17499542236328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4390602055800294, "grad_norm": 5.145935535430908, "learning_rate": 3.462633636266041e-07, "logits/chosen": -0.39493227005004883, "logits/rejected": -0.40073153376579285, "loss": 1.3176, "step": 299 }, { "beta_dpo/beta": 0.09059438109397888, "beta_dpo/beta_margin_grad_mean": -0.3587842583656311, "beta_dpo/beta_margin_grad_std": 0.31596502661705017, "beta_dpo/beta_margin_mean": 9.376904487609863, "beta_dpo/beta_margin_std": 18.19443702697754, "beta_dpo/beta_used": 0.09059438109397888, "beta_dpo/beta_used_raw": -0.37631434202194214, "beta_dpo/gap_mean": 77.76226806640625, "beta_dpo/gap_std": 109.28889465332031, "beta_dpo/loss_margin_mean": 91.35057830810547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.44052863436123346, "grad_norm": 932.0242919921875, "learning_rate": 3.4507772230088147e-07, "logits/chosen": -0.3533056974411011, "logits/rejected": -0.35223710536956787, "loss": 2.8698, "step": 300 }, { "epoch": 0.44052863436123346, "eval_beta_dpo/beta": 0.021529680117964745, "eval_beta_dpo/beta_margin_grad_mean": -0.47100841999053955, "eval_beta_dpo/beta_margin_grad_std": 0.05001794174313545, "eval_beta_dpo/beta_margin_mean": 1.7761027812957764, "eval_beta_dpo/beta_margin_std": 2.521554946899414, "eval_beta_dpo/beta_used": 0.021529680117964745, "eval_beta_dpo/beta_used_raw": -1.8358967304229736, "eval_beta_dpo/gap_mean": 79.1242446899414, "eval_beta_dpo/gap_std": 110.1003646850586, "eval_beta_dpo/loss_margin_mean": 46.85930633544922, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.4178045988082886, "eval_logits/rejected": -0.40100225806236267, "eval_loss": 0.8542339205741882, "eval_runtime": 40.2324, "eval_samples_per_second": 58.137, "eval_steps_per_second": 1.839, "step": 300 }, { "beta_dpo/beta": 0.2012784332036972, "beta_dpo/beta_margin_grad_mean": -0.31333795189857483, "beta_dpo/beta_margin_grad_std": 0.2758391499519348, "beta_dpo/beta_margin_mean": 21.419300079345703, "beta_dpo/beta_margin_std": 39.65841293334961, "beta_dpo/beta_used": 0.2012784332036972, "beta_dpo/beta_used_raw": 0.05099296569824219, "beta_dpo/gap_mean": 82.58103942871094, "beta_dpo/gap_std": 111.84109497070312, "beta_dpo/loss_margin_mean": 101.39947509765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4419970631424376, "grad_norm": 1095.8056640625, "learning_rate": 3.4388957558875316e-07, "logits/chosen": -0.36673691868782043, "logits/rejected": -0.3641397953033447, "loss": 2.4685, "step": 301 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48131224513053894, "beta_dpo/beta_margin_grad_std": 0.028863143175840378, "beta_dpo/beta_margin_mean": 0.07514145970344543, "beta_dpo/beta_margin_std": 0.1161830946803093, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.307005763053894, "beta_dpo/gap_mean": 82.75358581542969, "beta_dpo/gap_std": 113.17562103271484, "beta_dpo/loss_margin_mean": 75.1414566040039, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4434654919236417, "grad_norm": 5.6372833251953125, "learning_rate": 3.426989547989902e-07, "logits/chosen": -0.37512508034706116, "logits/rejected": -0.38196590542793274, "loss": 1.3296, "step": 302 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4821236729621887, "beta_dpo/beta_margin_grad_std": 0.03194034472107887, "beta_dpo/beta_margin_mean": 0.07196322828531265, "beta_dpo/beta_margin_std": 0.12872378528118134, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.7821969985961914, "beta_dpo/gap_mean": 80.88421630859375, "beta_dpo/gap_std": 116.31727600097656, "beta_dpo/loss_margin_mean": 71.96322631835938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.44493392070484583, "grad_norm": 5.255090236663818, "learning_rate": 3.4150589130555773e-07, "logits/chosen": -0.38791757822036743, "logits/rejected": -0.36933159828186035, "loss": 1.3394, "step": 303 }, { "beta_dpo/beta": 0.45528510212898254, "beta_dpo/beta_margin_grad_mean": -0.3141389787197113, "beta_dpo/beta_margin_grad_std": 0.2951262891292572, "beta_dpo/beta_margin_mean": 40.59646224975586, "beta_dpo/beta_margin_std": 64.21621704101562, "beta_dpo/beta_used": 0.45528510212898254, "beta_dpo/beta_used_raw": 0.308247447013855, "beta_dpo/gap_mean": 78.1776123046875, "beta_dpo/gap_std": 113.8460693359375, "beta_dpo/loss_margin_mean": 74.2430419921875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.44640234948604995, "grad_norm": 2268.49462890625, "learning_rate": 3.403104165467883e-07, "logits/chosen": -0.42312443256378174, "logits/rejected": -0.4081481993198395, "loss": 2.6158, "step": 304 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47965607047080994, "beta_dpo/beta_margin_grad_std": 0.0331178717315197, "beta_dpo/beta_margin_mean": 0.08205502480268478, "beta_dpo/beta_margin_std": 0.13452649116516113, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13185091316699982, "beta_dpo/gap_mean": 79.73931884765625, "beta_dpo/gap_std": 116.2216567993164, "beta_dpo/loss_margin_mean": 82.05501556396484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.447870778267254, "grad_norm": 5.800883769989014, "learning_rate": 3.391125620245535e-07, "logits/chosen": -0.4113423824310303, "logits/rejected": -0.39184314012527466, "loss": 1.3143, "step": 305 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4851844310760498, "beta_dpo/beta_margin_grad_std": 0.02573644183576107, "beta_dpo/beta_margin_mean": 0.05950712412595749, "beta_dpo/beta_margin_std": 0.10371576249599457, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1865973472595215, "beta_dpo/gap_mean": 76.38526916503906, "beta_dpo/gap_std": 114.24805450439453, "beta_dpo/loss_margin_mean": 59.507118225097656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.44933920704845814, "grad_norm": 5.251987457275391, "learning_rate": 3.3791235930343417e-07, "logits/chosen": -0.400661826133728, "logits/rejected": -0.38528114557266235, "loss": 1.3335, "step": 306 }, { "beta_dpo/beta": 0.3818568289279938, "beta_dpo/beta_margin_grad_mean": -0.34861063957214355, "beta_dpo/beta_margin_grad_std": 0.32014045119285583, "beta_dpo/beta_margin_mean": 27.862314224243164, "beta_dpo/beta_margin_std": 64.0865249633789, "beta_dpo/beta_used": 0.3818568289279938, "beta_dpo/beta_used_raw": 0.1832038164138794, "beta_dpo/gap_mean": 76.5669937133789, "beta_dpo/gap_std": 113.13117218017578, "beta_dpo/loss_margin_mean": 80.41350555419922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.45080763582966227, "grad_norm": 1632.386474609375, "learning_rate": 3.367098400098881e-07, "logits/chosen": -0.39632394909858704, "logits/rejected": -0.37825697660446167, "loss": 7.6211, "step": 307 }, { "beta_dpo/beta": 0.35672301054000854, "beta_dpo/beta_margin_grad_mean": -0.21785807609558105, "beta_dpo/beta_margin_grad_std": 0.3981392979621887, "beta_dpo/beta_margin_mean": 32.29079055786133, "beta_dpo/beta_margin_std": 39.89966583251953, "beta_dpo/beta_used": 0.35672301054000854, "beta_dpo/beta_used_raw": 0.35672301054000854, "beta_dpo/gap_mean": 78.43896484375, "beta_dpo/gap_std": 113.47734069824219, "beta_dpo/loss_margin_mean": 90.54447174072266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4522760646108664, "grad_norm": 1437.0244140625, "learning_rate": 3.355050358314172e-07, "logits/chosen": -0.3438449501991272, "logits/rejected": -0.31777122616767883, "loss": 5.1079, "step": 308 }, { "beta_dpo/beta": 0.36354923248291016, "beta_dpo/beta_margin_grad_mean": -0.22801323235034943, "beta_dpo/beta_margin_grad_std": 0.36051756143569946, "beta_dpo/beta_margin_mean": 37.24231719970703, "beta_dpo/beta_margin_std": 56.5272102355957, "beta_dpo/beta_used": 0.36354923248291016, "beta_dpo/beta_used_raw": 0.36354923248291016, "beta_dpo/gap_mean": 78.77679443359375, "beta_dpo/gap_std": 111.2503433227539, "beta_dpo/loss_margin_mean": 83.59400177001953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.45374449339207046, "grad_norm": 1346.0263671875, "learning_rate": 3.3429797851573183e-07, "logits/chosen": -0.3582022190093994, "logits/rejected": -0.3475998640060425, "loss": 3.7664, "step": 309 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4826306700706482, "beta_dpo/beta_margin_grad_std": 0.0230739563703537, "beta_dpo/beta_margin_mean": 0.06967519968748093, "beta_dpo/beta_margin_std": 0.09265855699777603, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7171174883842468, "beta_dpo/gap_mean": 78.03067016601562, "beta_dpo/gap_std": 107.44921875, "beta_dpo/loss_margin_mean": 69.67520141601562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4552129221732746, "grad_norm": 5.082400321960449, "learning_rate": 3.3308869986991487e-07, "logits/chosen": -0.39452001452445984, "logits/rejected": -0.3749483823776245, "loss": 1.3247, "step": 310 }, { "beta_dpo/beta": 0.2502756416797638, "beta_dpo/beta_margin_grad_mean": -0.40929269790649414, "beta_dpo/beta_margin_grad_std": 0.3354220688343048, "beta_dpo/beta_margin_mean": 23.12792205810547, "beta_dpo/beta_margin_std": 54.33233642578125, "beta_dpo/beta_used": 0.2502756416797638, "beta_dpo/beta_used_raw": -0.058561310172080994, "beta_dpo/gap_mean": 79.78893280029297, "beta_dpo/gap_std": 111.02082824707031, "beta_dpo/loss_margin_mean": 86.99933624267578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4566813509544787, "grad_norm": 2260.131591796875, "learning_rate": 3.3187723175958346e-07, "logits/chosen": -0.3645089864730835, "logits/rejected": -0.349958598613739, "loss": 6.6088, "step": 311 }, { "beta_dpo/beta": 0.07158917188644409, "beta_dpo/beta_margin_grad_mean": -0.3369253873825073, "beta_dpo/beta_margin_grad_std": 0.29506799578666687, "beta_dpo/beta_margin_mean": 6.374331474304199, "beta_dpo/beta_margin_std": 13.631952285766602, "beta_dpo/beta_used": 0.07158917188644409, "beta_dpo/beta_used_raw": -0.2892300486564636, "beta_dpo/gap_mean": 79.41765594482422, "beta_dpo/gap_std": 112.60750579833984, "beta_dpo/loss_margin_mean": 79.63638305664062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4581497797356828, "grad_norm": 368.40875244140625, "learning_rate": 3.306636061080487e-07, "logits/chosen": -0.3711026608943939, "logits/rejected": -0.36203962564468384, "loss": 2.0408, "step": 312 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48098939657211304, "beta_dpo/beta_margin_grad_std": 0.025076594203710556, "beta_dpo/beta_margin_mean": 0.07637631893157959, "beta_dpo/beta_margin_std": 0.10121695697307587, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5578416585922241, "beta_dpo/gap_mean": 78.54216003417969, "beta_dpo/gap_std": 110.89816284179688, "beta_dpo/loss_margin_mean": 76.3763198852539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.45961820851688695, "grad_norm": 4.683560371398926, "learning_rate": 3.2944785489547537e-07, "logits/chosen": -0.39467549324035645, "logits/rejected": -0.38951510190963745, "loss": 1.321, "step": 313 }, { "beta_dpo/beta": 0.2938551902770996, "beta_dpo/beta_margin_grad_mean": -0.3812112510204315, "beta_dpo/beta_margin_grad_std": 0.33289316296577454, "beta_dpo/beta_margin_mean": 23.5745906829834, "beta_dpo/beta_margin_std": 54.09528350830078, "beta_dpo/beta_used": 0.2938551902770996, "beta_dpo/beta_used_raw": -0.6258662939071655, "beta_dpo/gap_mean": 79.06829833984375, "beta_dpo/gap_std": 112.64566040039062, "beta_dpo/loss_margin_mean": 75.2780532836914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.461086637298091, "grad_norm": 1395.1510009765625, "learning_rate": 3.2823001015803857e-07, "logits/chosen": -0.4507661461830139, "logits/rejected": -0.44838911294937134, "loss": 3.7032, "step": 314 }, { "beta_dpo/beta": 0.2281070500612259, "beta_dpo/beta_margin_grad_mean": -0.34743332862854004, "beta_dpo/beta_margin_grad_std": 0.3025640845298767, "beta_dpo/beta_margin_mean": 16.0867919921875, "beta_dpo/beta_margin_std": 31.88882827758789, "beta_dpo/beta_used": 0.2281070500612259, "beta_dpo/beta_used_raw": -0.3934894800186157, "beta_dpo/gap_mean": 76.99462890625, "beta_dpo/gap_std": 110.44047546386719, "beta_dpo/loss_margin_mean": 66.10765075683594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46255506607929514, "grad_norm": 879.7577514648438, "learning_rate": 3.270101039870797e-07, "logits/chosen": -0.3693186044692993, "logits/rejected": -0.3700152039527893, "loss": 2.1612, "step": 315 }, { "beta_dpo/beta": 0.716162919998169, "beta_dpo/beta_margin_grad_mean": -0.17893782258033752, "beta_dpo/beta_margin_grad_std": 0.3721279203891754, "beta_dpo/beta_margin_mean": 75.56684112548828, "beta_dpo/beta_margin_std": 110.35047149658203, "beta_dpo/beta_used": 0.716162919998169, "beta_dpo/beta_used_raw": 0.716162919998169, "beta_dpo/gap_mean": 79.88623046875, "beta_dpo/gap_std": 111.00810241699219, "beta_dpo/loss_margin_mean": 95.93524169921875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46402349486049926, "grad_norm": 1523.668701171875, "learning_rate": 3.2578816852826086e-07, "logits/chosen": -0.4141780734062195, "logits/rejected": -0.426508367061615, "loss": 2.74, "step": 316 }, { "beta_dpo/beta": 0.6636589169502258, "beta_dpo/beta_margin_grad_mean": -0.2624233067035675, "beta_dpo/beta_margin_grad_std": 0.2798077166080475, "beta_dpo/beta_margin_mean": 66.06298828125, "beta_dpo/beta_margin_std": 109.42732238769531, "beta_dpo/beta_used": 0.6636589169502258, "beta_dpo/beta_used_raw": 0.6636589169502258, "beta_dpo/gap_mean": 83.21510314941406, "beta_dpo/gap_std": 109.098876953125, "beta_dpo/loss_margin_mean": 105.4694595336914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4654919236417034, "grad_norm": 2744.883056640625, "learning_rate": 3.2456423598071783e-07, "logits/chosen": -0.43852800130844116, "logits/rejected": -0.4249088168144226, "loss": 7.8427, "step": 317 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4800810217857361, "beta_dpo/beta_margin_grad_std": 0.024429909884929657, "beta_dpo/beta_margin_mean": 0.07998356968164444, "beta_dpo/beta_margin_std": 0.09835181385278702, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7813102602958679, "beta_dpo/gap_mean": 83.25845336914062, "beta_dpo/gap_std": 107.83091735839844, "beta_dpo/loss_margin_mean": 79.98356628417969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4669603524229075, "grad_norm": 5.620822906494141, "learning_rate": 3.233383385962115e-07, "logits/chosen": -0.4964483976364136, "logits/rejected": -0.46540865302085876, "loss": 1.3202, "step": 318 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4783901572227478, "beta_dpo/beta_margin_grad_std": 0.02400689758360386, "beta_dpo/beta_margin_mean": 0.08676531910896301, "beta_dpo/beta_margin_std": 0.09681374579668045, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.41069674491882324, "beta_dpo/gap_mean": 84.244140625, "beta_dpo/gap_std": 106.17644500732422, "beta_dpo/loss_margin_mean": 86.76531219482422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4684287812041116, "grad_norm": 5.136927604675293, "learning_rate": 3.2211050867827805e-07, "logits/chosen": -0.44059932231903076, "logits/rejected": -0.4612247943878174, "loss": 1.3132, "step": 319 }, { "beta_dpo/beta": 0.028877759352326393, "beta_dpo/beta_margin_grad_mean": -0.3182305097579956, "beta_dpo/beta_margin_grad_std": 0.24575480818748474, "beta_dpo/beta_margin_mean": 2.645266532897949, "beta_dpo/beta_margin_std": 4.285132884979248, "beta_dpo/beta_used": 0.028877759352326393, "beta_dpo/beta_used_raw": -0.33262649178504944, "beta_dpo/gap_mean": 85.014892578125, "beta_dpo/gap_std": 103.82169342041016, "beta_dpo/loss_margin_mean": 86.08537292480469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4698972099853157, "grad_norm": 51.687992095947266, "learning_rate": 3.208807785813777e-07, "logits/chosen": -0.4929655194282532, "logits/rejected": -0.4922058582305908, "loss": 0.9036, "step": 320 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4800475537776947, "beta_dpo/beta_margin_grad_std": 0.028980152681469917, "beta_dpo/beta_margin_mean": 0.08030161261558533, "beta_dpo/beta_margin_std": 0.11732209473848343, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5375549793243408, "beta_dpo/gap_mean": 84.98190307617188, "beta_dpo/gap_std": 106.54576110839844, "beta_dpo/loss_margin_mean": 80.3016128540039, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4713656387665198, "grad_norm": 5.877539157867432, "learning_rate": 3.1964918071004217e-07, "logits/chosen": -0.42156291007995605, "logits/rejected": -0.41359078884124756, "loss": 1.3152, "step": 321 }, { "beta_dpo/beta": 0.6184810400009155, "beta_dpo/beta_margin_grad_mean": -0.202738955616951, "beta_dpo/beta_margin_grad_std": 0.3781369626522064, "beta_dpo/beta_margin_mean": 58.11309814453125, "beta_dpo/beta_margin_std": 91.10275268554688, "beta_dpo/beta_used": 0.6184810400009155, "beta_dpo/beta_used_raw": 0.6184810400009155, "beta_dpo/gap_mean": 84.80892944335938, "beta_dpo/gap_std": 106.46078491210938, "beta_dpo/loss_margin_mean": 88.51132202148438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47283406754772395, "grad_norm": 2252.3525390625, "learning_rate": 3.184157475180207e-07, "logits/chosen": -0.4382587671279907, "logits/rejected": -0.4316785931587219, "loss": 5.5448, "step": 322 }, { "beta_dpo/beta": 0.09758946299552917, "beta_dpo/beta_margin_grad_mean": -0.3393961191177368, "beta_dpo/beta_margin_grad_std": 0.30425986647605896, "beta_dpo/beta_margin_mean": 9.701543807983398, "beta_dpo/beta_margin_std": 19.077165603637695, "beta_dpo/beta_used": 0.09758946299552917, "beta_dpo/beta_used_raw": -0.6850037574768066, "beta_dpo/gap_mean": 85.3616943359375, "beta_dpo/gap_std": 106.0413818359375, "beta_dpo/loss_margin_mean": 85.88945770263672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47430249632892807, "grad_norm": 346.4527282714844, "learning_rate": 3.171805115074251e-07, "logits/chosen": -0.4343454837799072, "logits/rejected": -0.4204588532447815, "loss": 1.375, "step": 323 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48019298911094666, "beta_dpo/beta_margin_grad_std": 0.029619457200169563, "beta_dpo/beta_margin_mean": 0.0796787217259407, "beta_dpo/beta_margin_std": 0.11951327323913574, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1255923509597778, "beta_dpo/gap_mean": 82.46531677246094, "beta_dpo/gap_std": 104.10604858398438, "beta_dpo/loss_margin_mean": 79.67871856689453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47577092511013214, "grad_norm": 6.610962390899658, "learning_rate": 3.1594350522787295e-07, "logits/chosen": -0.44873249530792236, "logits/rejected": -0.4262539744377136, "loss": 1.327, "step": 324 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4845367670059204, "beta_dpo/beta_margin_grad_std": 0.02106996439397335, "beta_dpo/beta_margin_mean": 0.06200973317027092, "beta_dpo/beta_margin_std": 0.08459888398647308, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.2990741729736328, "beta_dpo/gap_mean": 81.02421569824219, "beta_dpo/gap_std": 103.86199951171875, "beta_dpo/loss_margin_mean": 62.00973129272461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47723935389133626, "grad_norm": 4.3783674240112305, "learning_rate": 3.147047612756302e-07, "logits/chosen": -0.494614839553833, "logits/rejected": -0.46266597509384155, "loss": 1.3307, "step": 325 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4840051829814911, "beta_dpo/beta_margin_grad_std": 0.022391587495803833, "beta_dpo/beta_margin_mean": 0.06417535245418549, "beta_dpo/beta_margin_std": 0.08998852968215942, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.766413688659668, "beta_dpo/gap_mean": 77.61061096191406, "beta_dpo/gap_std": 99.97056579589844, "beta_dpo/loss_margin_mean": 64.17534637451172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4787077826725404, "grad_norm": 4.2274274826049805, "learning_rate": 3.134643122927519e-07, "logits/chosen": -0.516798198223114, "logits/rejected": -0.48323750495910645, "loss": 1.3416, "step": 326 }, { "beta_dpo/beta": 0.5268765687942505, "beta_dpo/beta_margin_grad_mean": -0.19231468439102173, "beta_dpo/beta_margin_grad_std": 0.38571032881736755, "beta_dpo/beta_margin_mean": 54.37653732299805, "beta_dpo/beta_margin_std": 60.839786529541016, "beta_dpo/beta_used": 0.5268765687942505, "beta_dpo/beta_used_raw": 0.5268765687942505, "beta_dpo/gap_mean": 81.72610473632812, "beta_dpo/gap_std": 102.8271713256836, "beta_dpo/loss_margin_mean": 105.36248779296875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4801762114537445, "grad_norm": 2141.1357421875, "learning_rate": 3.1222219096622264e-07, "logits/chosen": -0.436132550239563, "logits/rejected": -0.41571980714797974, "loss": 7.9926, "step": 327 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47792962193489075, "beta_dpo/beta_margin_grad_std": 0.027522550895810127, "beta_dpo/beta_margin_mean": 0.08871802687644958, "beta_dpo/beta_margin_std": 0.1109635978937149, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8078003525733948, "beta_dpo/gap_mean": 83.3543701171875, "beta_dpo/gap_std": 105.11456298828125, "beta_dpo/loss_margin_mean": 88.71802520751953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48164464023494863, "grad_norm": 5.572524547576904, "learning_rate": 3.1097843002709427e-07, "logits/chosen": -0.42520958185195923, "logits/rejected": -0.4174392819404602, "loss": 1.3207, "step": 328 }, { "beta_dpo/beta": 1.0845508575439453, "beta_dpo/beta_margin_grad_mean": -0.1470133364200592, "beta_dpo/beta_margin_grad_std": 0.3484492897987366, "beta_dpo/beta_margin_mean": 114.5302505493164, "beta_dpo/beta_margin_std": 163.5399627685547, "beta_dpo/beta_used": 1.0845508575439453, "beta_dpo/beta_used_raw": 1.0845508575439453, "beta_dpo/gap_mean": 87.77429962158203, "beta_dpo/gap_std": 108.42816162109375, "beta_dpo/loss_margin_mean": 109.0322265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4831130690161527, "grad_norm": 3878.8359375, "learning_rate": 3.0973306224962437e-07, "logits/chosen": -0.3832947015762329, "logits/rejected": -0.36104413866996765, "loss": 12.0021, "step": 329 }, { "beta_dpo/beta": 0.020872846245765686, "beta_dpo/beta_margin_grad_mean": -0.356696754693985, "beta_dpo/beta_margin_grad_std": 0.2544516921043396, "beta_dpo/beta_margin_mean": 2.022984027862549, "beta_dpo/beta_margin_std": 3.9013209342956543, "beta_dpo/beta_used": 0.020872846245765686, "beta_dpo/beta_used_raw": -0.501742959022522, "beta_dpo/gap_mean": 89.45821380615234, "beta_dpo/gap_std": 113.25923156738281, "beta_dpo/loss_margin_mean": 98.53221893310547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4845814977973568, "grad_norm": 94.45597839355469, "learning_rate": 3.084861204504122e-07, "logits/chosen": -0.3601895570755005, "logits/rejected": -0.36258891224861145, "loss": 1.1195, "step": 330 }, { "beta_dpo/beta": 0.3303714692592621, "beta_dpo/beta_margin_grad_mean": -0.3008911907672882, "beta_dpo/beta_margin_grad_std": 0.292959600687027, "beta_dpo/beta_margin_mean": 34.38044357299805, "beta_dpo/beta_margin_std": 57.2767333984375, "beta_dpo/beta_used": 0.3303714692592621, "beta_dpo/beta_used_raw": 0.10756845772266388, "beta_dpo/gap_mean": 91.51332092285156, "beta_dpo/gap_std": 111.50447082519531, "beta_dpo/loss_margin_mean": 98.58274841308594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48604992657856094, "grad_norm": 4.018616199493408, "learning_rate": 3.072376374875335e-07, "logits/chosen": -0.40397077798843384, "logits/rejected": -0.3952021598815918, "loss": 0.6532, "step": 331 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48232200741767883, "beta_dpo/beta_margin_grad_std": 0.023256592452526093, "beta_dpo/beta_margin_mean": 0.07096298784017563, "beta_dpo/beta_margin_std": 0.0935334786772728, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.294619083404541, "beta_dpo/gap_mean": 88.39376831054688, "beta_dpo/gap_std": 107.6720962524414, "beta_dpo/loss_margin_mean": 70.96298217773438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48751835535976507, "grad_norm": 5.169561386108398, "learning_rate": 3.059876462596758e-07, "logits/chosen": -0.42340922355651855, "logits/rejected": -0.4118001163005829, "loss": 1.3397, "step": 332 }, { "beta_dpo/beta": 0.9148516654968262, "beta_dpo/beta_margin_grad_mean": -0.34807515144348145, "beta_dpo/beta_margin_grad_std": 0.3170374631881714, "beta_dpo/beta_margin_mean": 111.1144790649414, "beta_dpo/beta_margin_std": 201.45445251464844, "beta_dpo/beta_used": 0.9148516654968262, "beta_dpo/beta_used_raw": 0.4829646050930023, "beta_dpo/gap_mean": 90.23367309570312, "beta_dpo/gap_std": 110.23046875, "beta_dpo/loss_margin_mean": 97.64309692382812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4889867841409692, "grad_norm": 6724.30078125, "learning_rate": 3.0473617970527015e-07, "logits/chosen": -0.41021233797073364, "logits/rejected": -0.4079732298851013, "loss": 7.7022, "step": 333 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47854748368263245, "beta_dpo/beta_margin_grad_std": 0.028062039986252785, "beta_dpo/beta_margin_mean": 0.08618205785751343, "beta_dpo/beta_margin_std": 0.11291919648647308, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.775077760219574, "beta_dpo/gap_mean": 88.88426208496094, "beta_dpo/gap_std": 111.89533996582031, "beta_dpo/loss_margin_mean": 86.18205261230469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.49045521292217326, "grad_norm": 5.3802080154418945, "learning_rate": 3.034832708016243e-07, "logits/chosen": -0.45664849877357483, "logits/rejected": -0.44416582584381104, "loss": 1.3155, "step": 334 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48350557684898376, "beta_dpo/beta_margin_grad_std": 0.028902921825647354, "beta_dpo/beta_margin_mean": 0.0663158968091011, "beta_dpo/beta_margin_std": 0.11655885726213455, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3869401812553406, "beta_dpo/gap_mean": 84.96998596191406, "beta_dpo/gap_std": 111.31343078613281, "beta_dpo/loss_margin_mean": 66.3158950805664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4919236417033774, "grad_norm": 5.149271488189697, "learning_rate": 3.022289525640531e-07, "logits/chosen": -0.42109692096710205, "logits/rejected": -0.4054427742958069, "loss": 1.3126, "step": 335 }, { "beta_dpo/beta": 0.7048290371894836, "beta_dpo/beta_margin_grad_mean": -0.12527000904083252, "beta_dpo/beta_margin_grad_std": 0.3306175172328949, "beta_dpo/beta_margin_mean": 86.26851654052734, "beta_dpo/beta_margin_std": 136.75621032714844, "beta_dpo/beta_used": 0.7048290371894836, "beta_dpo/beta_used_raw": 0.7048290371894836, "beta_dpo/gap_mean": 88.45248413085938, "beta_dpo/gap_std": 115.87779998779297, "beta_dpo/loss_margin_mean": 115.32160186767578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4933920704845815, "grad_norm": 4854.24609375, "learning_rate": 3.009732580450086e-07, "logits/chosen": -0.39914026856422424, "logits/rejected": -0.40446028113365173, "loss": 6.4255, "step": 336 }, { "beta_dpo/beta": 0.14777547121047974, "beta_dpo/beta_margin_grad_mean": -0.33285483717918396, "beta_dpo/beta_margin_grad_std": 0.31178155541419983, "beta_dpo/beta_margin_mean": 18.477201461791992, "beta_dpo/beta_margin_std": 31.78298568725586, "beta_dpo/beta_used": 0.14777547121047974, "beta_dpo/beta_used_raw": 0.00013016164302825928, "beta_dpo/gap_mean": 93.61722564697266, "beta_dpo/gap_std": 119.06917572021484, "beta_dpo/loss_margin_mean": 105.2668228149414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4948604992657856, "grad_norm": 1186.75927734375, "learning_rate": 2.9971622033320914e-07, "logits/chosen": -0.4144825041294098, "logits/rejected": -0.41202259063720703, "loss": 2.2556, "step": 337 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47472554445266724, "beta_dpo/beta_margin_grad_std": 0.02603665366768837, "beta_dpo/beta_margin_mean": 0.10154020041227341, "beta_dpo/beta_margin_std": 0.10497380793094635, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8564766645431519, "beta_dpo/gap_mean": 94.33158874511719, "beta_dpo/gap_std": 117.08407592773438, "beta_dpo/loss_margin_mean": 101.54019927978516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.49632892804698975, "grad_norm": 7.202863693237305, "learning_rate": 2.984578725527675e-07, "logits/chosen": -0.4074411988258362, "logits/rejected": -0.3915863037109375, "loss": 1.3108, "step": 338 }, { "beta_dpo/beta": 1.0157151222229004, "beta_dpo/beta_margin_grad_mean": -0.3203289806842804, "beta_dpo/beta_margin_grad_std": 0.3050708770751953, "beta_dpo/beta_margin_mean": 132.27044677734375, "beta_dpo/beta_margin_std": 213.22317504882812, "beta_dpo/beta_used": 1.0157151222229004, "beta_dpo/beta_used_raw": 0.2719331383705139, "beta_dpo/gap_mean": 93.49383544921875, "beta_dpo/gap_std": 113.80068969726562, "beta_dpo/loss_margin_mean": 96.39201354980469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4977973568281938, "grad_norm": 4415.94677734375, "learning_rate": 2.9719824786231796e-07, "logits/chosen": -0.4132786989212036, "logits/rejected": -0.4065033197402954, "loss": 5.6395, "step": 339 }, { "beta_dpo/beta": 0.48150238394737244, "beta_dpo/beta_margin_grad_mean": -0.31963202357292175, "beta_dpo/beta_margin_grad_std": 0.30448395013809204, "beta_dpo/beta_margin_mean": 53.98538589477539, "beta_dpo/beta_margin_std": 104.22420501708984, "beta_dpo/beta_used": 0.48150238394737244, "beta_dpo/beta_used_raw": -0.11612993478775024, "beta_dpo/gap_mean": 95.82996368408203, "beta_dpo/gap_std": 116.46409606933594, "beta_dpo/loss_margin_mean": 91.91159057617188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.49926578560939794, "grad_norm": 2312.302001953125, "learning_rate": 2.959373794541426e-07, "logits/chosen": -0.36081361770629883, "logits/rejected": -0.33534175157546997, "loss": 2.6635, "step": 340 }, { "beta_dpo/beta": 0.6681861877441406, "beta_dpo/beta_margin_grad_mean": -0.29601871967315674, "beta_dpo/beta_margin_grad_std": 0.28802916407585144, "beta_dpo/beta_margin_mean": 76.64708709716797, "beta_dpo/beta_margin_std": 130.1103515625, "beta_dpo/beta_used": 0.6681861877441406, "beta_dpo/beta_used_raw": 0.584057629108429, "beta_dpo/gap_mean": 97.98030090332031, "beta_dpo/gap_std": 118.38259887695312, "beta_dpo/loss_margin_mean": 117.34190368652344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5007342143906021, "grad_norm": 2236.2265625, "learning_rate": 2.946753005532965e-07, "logits/chosen": -0.35989874601364136, "logits/rejected": -0.3705149292945862, "loss": 5.4623, "step": 341 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47810834646224976, "beta_dpo/beta_margin_grad_std": 0.031336311250925064, "beta_dpo/beta_margin_mean": 0.08804672211408615, "beta_dpo/beta_margin_std": 0.12628589570522308, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.7561790943145752, "beta_dpo/gap_mean": 96.36663818359375, "beta_dpo/gap_std": 119.49044799804688, "beta_dpo/loss_margin_mean": 88.04671478271484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5022026431718062, "grad_norm": 6.459059238433838, "learning_rate": 2.934120444167326e-07, "logits/chosen": -0.4166700839996338, "logits/rejected": -0.387287974357605, "loss": 1.3243, "step": 342 }, { "beta_dpo/beta": 0.4361698031425476, "beta_dpo/beta_margin_grad_mean": -0.25323668122291565, "beta_dpo/beta_margin_grad_std": 0.25362610816955566, "beta_dpo/beta_margin_mean": 53.51424026489258, "beta_dpo/beta_margin_std": 72.98287200927734, "beta_dpo/beta_used": 0.4361698031425476, "beta_dpo/beta_used_raw": -0.4439680874347687, "beta_dpo/gap_mean": 99.7641372680664, "beta_dpo/gap_std": 115.47168731689453, "beta_dpo/loss_margin_mean": 112.97042083740234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5036710719530103, "grad_norm": 1073.5543212890625, "learning_rate": 2.9214764433242476e-07, "logits/chosen": -0.41678088903427124, "logits/rejected": -0.4238309860229492, "loss": 1.0074, "step": 343 }, { "beta_dpo/beta": 0.5173386931419373, "beta_dpo/beta_margin_grad_mean": -0.30524685978889465, "beta_dpo/beta_margin_grad_std": 0.2957528233528137, "beta_dpo/beta_margin_mean": 64.93697357177734, "beta_dpo/beta_margin_std": 109.22602844238281, "beta_dpo/beta_used": 0.5173386931419373, "beta_dpo/beta_used_raw": -0.35434967279434204, "beta_dpo/gap_mean": 97.2651596069336, "beta_dpo/gap_std": 113.85401916503906, "beta_dpo/loss_margin_mean": 91.90724182128906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5051395007342144, "grad_norm": 2767.380859375, "learning_rate": 2.9088213361849126e-07, "logits/chosen": -0.3739524185657501, "logits/rejected": -0.3753708004951477, "loss": 1.9805, "step": 344 }, { "beta_dpo/beta": 0.40887582302093506, "beta_dpo/beta_margin_grad_mean": -0.19348150491714478, "beta_dpo/beta_margin_grad_std": 0.38543590903282166, "beta_dpo/beta_margin_mean": 49.93855667114258, "beta_dpo/beta_margin_std": 70.8931655883789, "beta_dpo/beta_used": 0.40887582302093506, "beta_dpo/beta_used_raw": 0.40887582302093506, "beta_dpo/gap_mean": 99.37379455566406, "beta_dpo/gap_std": 114.99360656738281, "beta_dpo/loss_margin_mean": 109.09276580810547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5066079295154186, "grad_norm": 2037.227294921875, "learning_rate": 2.896155456223163e-07, "logits/chosen": -0.41102027893066406, "logits/rejected": -0.3992459177970886, "loss": 3.0387, "step": 345 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4759877324104309, "beta_dpo/beta_margin_grad_std": 0.029918290674686432, "beta_dpo/beta_margin_mean": 0.09645290672779083, "beta_dpo/beta_margin_std": 0.1203700602054596, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.828955888748169, "beta_dpo/gap_mean": 100.52455139160156, "beta_dpo/gap_std": 115.36701965332031, "beta_dpo/loss_margin_mean": 96.45289611816406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5080763582966226, "grad_norm": 6.275406837463379, "learning_rate": 2.883479137196714e-07, "logits/chosen": -0.37849825620651245, "logits/rejected": -0.3621870279312134, "loss": 1.3049, "step": 346 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47555938363075256, "beta_dpo/beta_margin_grad_std": 0.030918261036276817, "beta_dpo/beta_margin_mean": 0.0982765406370163, "beta_dpo/beta_margin_std": 0.12459293752908707, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1044821739196777, "beta_dpo/gap_mean": 100.19987487792969, "beta_dpo/gap_std": 118.36697387695312, "beta_dpo/loss_margin_mean": 98.27653503417969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5095447870778267, "grad_norm": 7.38579797744751, "learning_rate": 2.8707927131383614e-07, "logits/chosen": -0.3721884787082672, "logits/rejected": -0.3643941581249237, "loss": 1.3105, "step": 347 }, { "beta_dpo/beta": 0.11714650690555573, "beta_dpo/beta_margin_grad_mean": -0.3267797529697418, "beta_dpo/beta_margin_grad_std": 0.28449147939682007, "beta_dpo/beta_margin_mean": 12.065768241882324, "beta_dpo/beta_margin_std": 22.209104537963867, "beta_dpo/beta_used": 0.11714650690555573, "beta_dpo/beta_used_raw": -0.1274842917919159, "beta_dpo/gap_mean": 98.06617736816406, "beta_dpo/gap_std": 117.04876708984375, "beta_dpo/loss_margin_mean": 82.79788970947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5110132158590308, "grad_norm": 457.3243713378906, "learning_rate": 2.858096518347179e-07, "logits/chosen": -0.4001210629940033, "logits/rejected": -0.4025808572769165, "loss": 1.652, "step": 348 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47573983669281006, "beta_dpo/beta_margin_grad_std": 0.03337588906288147, "beta_dpo/beta_margin_mean": 0.09776747226715088, "beta_dpo/beta_margin_std": 0.13501474261283875, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7904142737388611, "beta_dpo/gap_mean": 95.7049331665039, "beta_dpo/gap_std": 117.83291625976562, "beta_dpo/loss_margin_mean": 97.76747131347656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5124816446402349, "grad_norm": 7.413999080657959, "learning_rate": 2.845390887379706e-07, "logits/chosen": -0.354410856962204, "logits/rejected": -0.34631115198135376, "loss": 1.3096, "step": 349 }, { "beta_dpo/beta": 0.3681584596633911, "beta_dpo/beta_margin_grad_mean": -0.2865951657295227, "beta_dpo/beta_margin_grad_std": 0.2829616367816925, "beta_dpo/beta_margin_mean": 44.03104782104492, "beta_dpo/beta_margin_std": 82.63276672363281, "beta_dpo/beta_used": 0.3681584596633911, "beta_dpo/beta_used_raw": 0.18125556409358978, "beta_dpo/gap_mean": 98.13190460205078, "beta_dpo/gap_std": 121.19251251220703, "beta_dpo/loss_margin_mean": 101.13141632080078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5139500734214391, "grad_norm": 363.3417663574219, "learning_rate": 2.8326761550411346e-07, "logits/chosen": -0.39443519711494446, "logits/rejected": -0.37324777245521545, "loss": 1.6184, "step": 350 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46981188654899597, "beta_dpo/beta_margin_grad_std": 0.038395486772060394, "beta_dpo/beta_margin_mean": 0.12189145386219025, "beta_dpo/beta_margin_std": 0.15585792064666748, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8729835748672485, "beta_dpo/gap_mean": 101.26289367675781, "beta_dpo/gap_std": 127.14557647705078, "beta_dpo/loss_margin_mean": 121.89144897460938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5154185022026432, "grad_norm": 6.668581008911133, "learning_rate": 2.819952656376487e-07, "logits/chosen": -0.43974393606185913, "logits/rejected": -0.4313165545463562, "loss": 1.3058, "step": 351 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48466551303863525, "beta_dpo/beta_margin_grad_std": 0.030659163370728493, "beta_dpo/beta_margin_mean": 0.06168793886899948, "beta_dpo/beta_margin_std": 0.12350940704345703, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.4254016876220703, "beta_dpo/gap_mean": 96.96006774902344, "beta_dpo/gap_std": 127.05946350097656, "beta_dpo/loss_margin_mean": 61.68793487548828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5168869309838473, "grad_norm": 6.023617267608643, "learning_rate": 2.8072207266617854e-07, "logits/chosen": -0.4059138894081116, "logits/rejected": -0.37293723225593567, "loss": 1.3343, "step": 352 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47603118419647217, "beta_dpo/beta_margin_grad_std": 0.037852540612220764, "beta_dpo/beta_margin_mean": 0.09669725596904755, "beta_dpo/beta_margin_std": 0.15294483304023743, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3700706958770752, "beta_dpo/gap_mean": 95.6932373046875, "beta_dpo/gap_std": 130.9846954345703, "beta_dpo/loss_margin_mean": 96.69725036621094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5183553597650514, "grad_norm": 6.5913310050964355, "learning_rate": 2.794480701395219e-07, "logits/chosen": -0.40948837995529175, "logits/rejected": -0.39385730028152466, "loss": 1.3195, "step": 353 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4750857651233673, "beta_dpo/beta_margin_grad_std": 0.026884840801358223, "beta_dpo/beta_margin_mean": 0.10007837414741516, "beta_dpo/beta_margin_std": 0.10820183157920837, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0040392875671387, "beta_dpo/gap_mean": 95.46762084960938, "beta_dpo/gap_std": 128.0501708984375, "beta_dpo/loss_margin_mean": 100.078369140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5198237885462555, "grad_norm": 5.958319664001465, "learning_rate": 2.781732916288303e-07, "logits/chosen": -0.3586284816265106, "logits/rejected": -0.34347254037857056, "loss": 1.3129, "step": 354 }, { "beta_dpo/beta": 0.803097665309906, "beta_dpo/beta_margin_grad_mean": -0.31661173701286316, "beta_dpo/beta_margin_grad_std": 0.30314168334007263, "beta_dpo/beta_margin_mean": 108.771484375, "beta_dpo/beta_margin_std": 188.28627014160156, "beta_dpo/beta_used": 0.803097665309906, "beta_dpo/beta_used_raw": 0.2829711437225342, "beta_dpo/gap_mean": 98.98455810546875, "beta_dpo/gap_std": 126.47689819335938, "beta_dpo/loss_margin_mean": 113.99552917480469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5212922173274597, "grad_norm": 3436.352294921875, "learning_rate": 2.7689777072570284e-07, "logits/chosen": -0.4502210021018982, "logits/rejected": -0.42753684520721436, "loss": 9.4138, "step": 355 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48544058203697205, "beta_dpo/beta_margin_grad_std": 0.03667362034320831, "beta_dpo/beta_margin_mean": 0.05878689885139465, "beta_dpo/beta_margin_std": 0.14823727309703827, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.0601253509521484, "beta_dpo/gap_mean": 92.14360046386719, "beta_dpo/gap_std": 127.69420623779297, "beta_dpo/loss_margin_mean": 58.786895751953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5227606461086637, "grad_norm": 5.952792644500732, "learning_rate": 2.7562154104130176e-07, "logits/chosen": -0.3653779625892639, "logits/rejected": -0.3463535010814667, "loss": 1.3335, "step": 356 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47698020935058594, "beta_dpo/beta_margin_grad_std": 0.030018875375390053, "beta_dpo/beta_margin_mean": 0.09258735179901123, "beta_dpo/beta_margin_std": 0.12106671184301376, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.46944957971572876, "beta_dpo/gap_mean": 92.12881469726562, "beta_dpo/gap_std": 128.6616973876953, "beta_dpo/loss_margin_mean": 92.58734893798828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5242290748898678, "grad_norm": 5.877689361572266, "learning_rate": 2.7434463620546594e-07, "logits/chosen": -0.39090579748153687, "logits/rejected": -0.3792232275009155, "loss": 1.3075, "step": 357 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4783032238483429, "beta_dpo/beta_margin_grad_std": 0.03480137139558792, "beta_dpo/beta_margin_mean": 0.08748451620340347, "beta_dpo/beta_margin_std": 0.14093735814094543, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.034654974937439, "beta_dpo/gap_mean": 91.1614990234375, "beta_dpo/gap_std": 129.97589111328125, "beta_dpo/loss_margin_mean": 87.48451232910156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5256975036710719, "grad_norm": 5.643310070037842, "learning_rate": 2.730670898658255e-07, "logits/chosen": -0.40258651971817017, "logits/rejected": -0.385714590549469, "loss": 1.3174, "step": 358 }, { "beta_dpo/beta": 0.03269139304757118, "beta_dpo/beta_margin_grad_mean": -0.34401631355285645, "beta_dpo/beta_margin_grad_std": 0.29530322551727295, "beta_dpo/beta_margin_mean": 3.772794485092163, "beta_dpo/beta_margin_std": 7.085198402404785, "beta_dpo/beta_used": 0.03269139304757118, "beta_dpo/beta_used_raw": -0.8927912712097168, "beta_dpo/gap_mean": 94.33483123779297, "beta_dpo/gap_std": 137.95599365234375, "beta_dpo/loss_margin_mean": 116.4987564086914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.527165932452276, "grad_norm": 417.1752624511719, "learning_rate": 2.717889356869146e-07, "logits/chosen": -0.3224365711212158, "logits/rejected": -0.2917293906211853, "loss": 1.5572, "step": 359 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4791829586029053, "beta_dpo/beta_margin_grad_std": 0.027085591107606888, "beta_dpo/beta_margin_mean": 0.08361884206533432, "beta_dpo/beta_margin_std": 0.10896874964237213, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.4150499105453491, "beta_dpo/gap_mean": 93.76991271972656, "beta_dpo/gap_std": 133.27197265625, "beta_dpo/loss_margin_mean": 83.61884307861328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5286343612334802, "grad_norm": 7.576156139373779, "learning_rate": 2.7051020734928443e-07, "logits/chosen": -0.33717477321624756, "logits/rejected": -0.32376694679260254, "loss": 1.3215, "step": 360 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.477610319852829, "beta_dpo/beta_margin_grad_std": 0.03370558097958565, "beta_dpo/beta_margin_mean": 0.09011209011077881, "beta_dpo/beta_margin_std": 0.13601025938987732, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9473916888237, "beta_dpo/gap_mean": 92.4726333618164, "beta_dpo/gap_std": 133.42208862304688, "beta_dpo/loss_margin_mean": 90.1120834350586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5301027900146843, "grad_norm": 9.286351203918457, "learning_rate": 2.6923093854861593e-07, "logits/chosen": -0.3260883092880249, "logits/rejected": -0.3336498737335205, "loss": 1.3155, "step": 361 }, { "beta_dpo/beta": 1.6271191835403442, "beta_dpo/beta_margin_grad_mean": -0.2851690351963043, "beta_dpo/beta_margin_grad_std": 0.2767854332923889, "beta_dpo/beta_margin_mean": 257.64166259765625, "beta_dpo/beta_margin_std": 433.18182373046875, "beta_dpo/beta_used": 1.6271191835403442, "beta_dpo/beta_used_raw": 1.0895951986312866, "beta_dpo/gap_mean": 101.06622314453125, "beta_dpo/gap_std": 136.33203125, "beta_dpo/loss_margin_mean": 147.14773559570312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5315712187958884, "grad_norm": 3908.80322265625, "learning_rate": 2.679511629948319e-07, "logits/chosen": -0.3415091633796692, "logits/rejected": -0.3479331135749817, "loss": 4.7128, "step": 362 }, { "beta_dpo/beta": 0.48806485533714294, "beta_dpo/beta_margin_grad_mean": -0.3278650641441345, "beta_dpo/beta_margin_grad_std": 0.3104459047317505, "beta_dpo/beta_margin_mean": 78.98721313476562, "beta_dpo/beta_margin_std": 153.0666961669922, "beta_dpo/beta_used": 0.48806485533714294, "beta_dpo/beta_used_raw": 0.030955523252487183, "beta_dpo/gap_mean": 108.2145004272461, "beta_dpo/gap_std": 139.0877227783203, "beta_dpo/loss_margin_mean": 144.82916259765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5330396475770925, "grad_norm": 2845.82421875, "learning_rate": 2.6667091441120816e-07, "logits/chosen": -0.3030551075935364, "logits/rejected": -0.288122296333313, "loss": 5.4489, "step": 363 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4774971008300781, "beta_dpo/beta_margin_grad_std": 0.03736840933561325, "beta_dpo/beta_margin_mean": 0.09085896611213684, "beta_dpo/beta_margin_std": 0.1515338271856308, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.154345989227295, "beta_dpo/gap_mean": 107.01531982421875, "beta_dpo/gap_std": 143.16229248046875, "beta_dpo/loss_margin_mean": 90.85896301269531, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5345080763582967, "grad_norm": 6.039409160614014, "learning_rate": 2.6539022653348575e-07, "logits/chosen": -0.3785761594772339, "logits/rejected": -0.3902500867843628, "loss": 1.3205, "step": 364 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47192299365997314, "beta_dpo/beta_margin_grad_std": 0.03479469195008278, "beta_dpo/beta_margin_mean": 0.11332341283559799, "beta_dpo/beta_margin_std": 0.14188429713249207, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.951167106628418, "beta_dpo/gap_mean": 108.13379669189453, "beta_dpo/gap_std": 142.005859375, "beta_dpo/loss_margin_mean": 113.32341003417969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5359765051395007, "grad_norm": 6.869436264038086, "learning_rate": 2.641091331089811e-07, "logits/chosen": -0.3123311996459961, "logits/rejected": -0.32426539063453674, "loss": 1.3158, "step": 365 }, { "beta_dpo/beta": 0.5055487155914307, "beta_dpo/beta_margin_grad_mean": -0.31768321990966797, "beta_dpo/beta_margin_grad_std": 0.30457475781440735, "beta_dpo/beta_margin_mean": 68.90055084228516, "beta_dpo/beta_margin_std": 133.3690948486328, "beta_dpo/beta_used": 0.5055487155914307, "beta_dpo/beta_used_raw": -0.3282930850982666, "beta_dpo/gap_mean": 110.02189636230469, "beta_dpo/gap_std": 146.34872436523438, "beta_dpo/loss_margin_mean": 110.3228759765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5374449339207048, "grad_norm": 2276.9384765625, "learning_rate": 2.6282766789569736e-07, "logits/chosen": -0.2959885001182556, "logits/rejected": -0.31161656975746155, "loss": 4.1092, "step": 366 }, { "beta_dpo/beta": 0.1259302943944931, "beta_dpo/beta_margin_grad_mean": -0.3177259862422943, "beta_dpo/beta_margin_grad_std": 0.2994624972343445, "beta_dpo/beta_margin_mean": 11.867181777954102, "beta_dpo/beta_margin_std": 23.115652084350586, "beta_dpo/beta_used": 0.1259302943944931, "beta_dpo/beta_used_raw": -0.25230592489242554, "beta_dpo/gap_mean": 106.54058837890625, "beta_dpo/gap_std": 143.6832275390625, "beta_dpo/loss_margin_mean": 93.8835220336914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5389133627019089, "grad_norm": 524.9410400390625, "learning_rate": 2.615458646614349e-07, "logits/chosen": -0.37701088190078735, "logits/rejected": -0.35184770822525024, "loss": 1.9352, "step": 367 }, { "beta_dpo/beta": 0.9328745603561401, "beta_dpo/beta_margin_grad_mean": -0.09676685929298401, "beta_dpo/beta_margin_grad_std": 0.2863916754722595, "beta_dpo/beta_margin_mean": 133.5645751953125, "beta_dpo/beta_margin_std": 201.60333251953125, "beta_dpo/beta_used": 0.9328745603561401, "beta_dpo/beta_used_raw": 0.9328745603561401, "beta_dpo/gap_mean": 109.11013793945312, "beta_dpo/gap_std": 139.04080200195312, "beta_dpo/loss_margin_mean": 132.6707000732422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.540381791483113, "grad_norm": 3995.7197265625, "learning_rate": 2.6026375718290083e-07, "logits/chosen": -0.38272589445114136, "logits/rejected": -0.38491058349609375, "loss": 1.3764, "step": 368 }, { "beta_dpo/beta": 0.09317570924758911, "beta_dpo/beta_margin_grad_mean": -0.3334360122680664, "beta_dpo/beta_margin_grad_std": 0.29883116483688354, "beta_dpo/beta_margin_mean": 9.50542163848877, "beta_dpo/beta_margin_std": 21.700239181518555, "beta_dpo/beta_used": 0.09317570924758911, "beta_dpo/beta_used_raw": -1.1680105924606323, "beta_dpo/gap_mean": 105.86335754394531, "beta_dpo/gap_std": 138.92889404296875, "beta_dpo/loss_margin_mean": 81.47874450683594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5418502202643172, "grad_norm": 338.8497619628906, "learning_rate": 2.589813792448196e-07, "logits/chosen": -0.40276187658309937, "logits/rejected": -0.3842419385910034, "loss": 1.2417, "step": 369 }, { "beta_dpo/beta": 0.32751452922821045, "beta_dpo/beta_margin_grad_mean": -0.33634528517723083, "beta_dpo/beta_margin_grad_std": 0.31442970037460327, "beta_dpo/beta_margin_mean": 26.184959411621094, "beta_dpo/beta_margin_std": 58.59195327758789, "beta_dpo/beta_used": 0.32751452922821045, "beta_dpo/beta_used_raw": -0.5632827281951904, "beta_dpo/gap_mean": 100.1129150390625, "beta_dpo/gap_std": 139.25335693359375, "beta_dpo/loss_margin_mean": 70.3260726928711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5433186490455213, "grad_norm": 2646.7197265625, "learning_rate": 2.5769876463904263e-07, "logits/chosen": -0.39602580666542053, "logits/rejected": -0.39256715774536133, "loss": 5.0036, "step": 370 }, { "beta_dpo/beta": 0.26400327682495117, "beta_dpo/beta_margin_grad_mean": -0.3500906229019165, "beta_dpo/beta_margin_grad_std": 0.31608888506889343, "beta_dpo/beta_margin_mean": 22.5057430267334, "beta_dpo/beta_margin_std": 54.71674346923828, "beta_dpo/beta_used": 0.26400327682495117, "beta_dpo/beta_used_raw": -0.1922587752342224, "beta_dpo/gap_mean": 97.70218658447266, "beta_dpo/gap_std": 137.7125244140625, "beta_dpo/loss_margin_mean": 91.76586151123047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5447870778267254, "grad_norm": 1841.630859375, "learning_rate": 2.5641594716365744e-07, "logits/chosen": -0.4191368520259857, "logits/rejected": -0.4073137044906616, "loss": 4.2713, "step": 371 }, { "beta_dpo/beta": 0.29865550994873047, "beta_dpo/beta_margin_grad_mean": -0.34942829608917236, "beta_dpo/beta_margin_grad_std": 0.3169166147708893, "beta_dpo/beta_margin_mean": 30.436782836914062, "beta_dpo/beta_margin_std": 65.03998565673828, "beta_dpo/beta_used": 0.29865550994873047, "beta_dpo/beta_used_raw": 0.2250063121318817, "beta_dpo/gap_mean": 98.745849609375, "beta_dpo/gap_std": 137.76092529296875, "beta_dpo/loss_margin_mean": 105.9774398803711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5462555066079295, "grad_norm": 2330.501953125, "learning_rate": 2.551329606220976e-07, "logits/chosen": -0.4094877243041992, "logits/rejected": -0.3760242462158203, "loss": 5.4678, "step": 372 }, { "beta_dpo/beta": 0.4515746831893921, "beta_dpo/beta_margin_grad_mean": -0.27207887172698975, "beta_dpo/beta_margin_grad_std": 0.2706195116043091, "beta_dpo/beta_margin_mean": 58.79832077026367, "beta_dpo/beta_margin_std": 84.47066497802734, "beta_dpo/beta_used": 0.4515746831893921, "beta_dpo/beta_used_raw": -0.7580370903015137, "beta_dpo/gap_mean": 98.81103515625, "beta_dpo/gap_std": 134.30552673339844, "beta_dpo/loss_margin_mean": 103.87548828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5477239353891337, "grad_norm": 2668.42236328125, "learning_rate": 2.538498388222517e-07, "logits/chosen": -0.4020259380340576, "logits/rejected": -0.3562648296356201, "loss": 3.7343, "step": 373 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4792296886444092, "beta_dpo/beta_margin_grad_std": 0.0275330301374197, "beta_dpo/beta_margin_mean": 0.08352459967136383, "beta_dpo/beta_margin_std": 0.1111961379647255, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.5819969177246094, "beta_dpo/gap_mean": 98.51296997070312, "beta_dpo/gap_std": 129.830322265625, "beta_dpo/loss_margin_mean": 83.52459716796875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5491923641703378, "grad_norm": 25.06356430053711, "learning_rate": 2.525666155755725e-07, "logits/chosen": -0.5303980112075806, "logits/rejected": -0.5011695027351379, "loss": 1.319, "step": 374 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48084428906440735, "beta_dpo/beta_margin_grad_std": 0.025963004678487778, "beta_dpo/beta_margin_mean": 0.07689561694860458, "beta_dpo/beta_margin_std": 0.10439448803663254, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7749541997909546, "beta_dpo/gap_mean": 95.07070922851562, "beta_dpo/gap_std": 124.96221923828125, "beta_dpo/loss_margin_mean": 76.89561462402344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5506607929515418, "grad_norm": 33.27694320678711, "learning_rate": 2.512833246961859e-07, "logits/chosen": -0.46519535779953003, "logits/rejected": -0.4570329785346985, "loss": 1.3091, "step": 375 }, { "beta_dpo/beta": 0.16056698560714722, "beta_dpo/beta_margin_grad_mean": -0.3113880455493927, "beta_dpo/beta_margin_grad_std": 0.28589603304862976, "beta_dpo/beta_margin_mean": 17.529916763305664, "beta_dpo/beta_margin_std": 32.734588623046875, "beta_dpo/beta_used": 0.16056698560714722, "beta_dpo/beta_used_raw": -0.6300212144851685, "beta_dpo/gap_mean": 94.86084747314453, "beta_dpo/gap_std": 124.55882263183594, "beta_dpo/loss_margin_mean": 106.193359375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5521292217327459, "grad_norm": 1583.425048828125, "learning_rate": 2.5e-07, "logits/chosen": -0.4400368332862854, "logits/rejected": -0.42413341999053955, "loss": 2.4697, "step": 376 }, { "beta_dpo/beta": 0.12274540960788727, "beta_dpo/beta_margin_grad_mean": -0.3400813341140747, "beta_dpo/beta_margin_grad_std": 0.3001156449317932, "beta_dpo/beta_margin_mean": 12.869964599609375, "beta_dpo/beta_margin_std": 25.119873046875, "beta_dpo/beta_used": 0.12274540960788727, "beta_dpo/beta_used_raw": -0.9513387680053711, "beta_dpo/gap_mean": 94.40826416015625, "beta_dpo/gap_std": 123.61512756347656, "beta_dpo/loss_margin_mean": 91.47625732421875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.55359765051395, "grad_norm": 474.910400390625, "learning_rate": 2.487166753038141e-07, "logits/chosen": -0.43538355827331543, "logits/rejected": -0.43877607583999634, "loss": 1.7143, "step": 377 }, { "beta_dpo/beta": 0.055305834859609604, "beta_dpo/beta_margin_grad_mean": -0.33113202452659607, "beta_dpo/beta_margin_grad_std": 0.2857387065887451, "beta_dpo/beta_margin_mean": 5.2662177085876465, "beta_dpo/beta_margin_std": 8.644315719604492, "beta_dpo/beta_used": 0.055305834859609604, "beta_dpo/beta_used_raw": -0.05297088995575905, "beta_dpo/gap_mean": 96.25257873535156, "beta_dpo/gap_std": 121.488525390625, "beta_dpo/loss_margin_mean": 101.7795639038086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5550660792951542, "grad_norm": 206.10948181152344, "learning_rate": 2.4743338442442754e-07, "logits/chosen": -0.453810453414917, "logits/rejected": -0.45135384798049927, "loss": 1.2075, "step": 378 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47414979338645935, "beta_dpo/beta_margin_grad_std": 0.03370606154203415, "beta_dpo/beta_margin_mean": 0.10408032685518265, "beta_dpo/beta_margin_std": 0.13608884811401367, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.923268735408783, "beta_dpo/gap_mean": 98.50502014160156, "beta_dpo/gap_std": 122.33307647705078, "beta_dpo/loss_margin_mean": 104.080322265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5565345080763583, "grad_norm": 7.842655658721924, "learning_rate": 2.461501611777483e-07, "logits/chosen": -0.40998560190200806, "logits/rejected": -0.43568363785743713, "loss": 1.3086, "step": 379 }, { "beta_dpo/beta": 0.49257999658584595, "beta_dpo/beta_margin_grad_mean": -0.1504988819360733, "beta_dpo/beta_margin_grad_std": 0.34962981939315796, "beta_dpo/beta_margin_mean": 55.05702590942383, "beta_dpo/beta_margin_std": 68.4500732421875, "beta_dpo/beta_used": 0.49257999658584595, "beta_dpo/beta_used_raw": 0.49257999658584595, "beta_dpo/gap_mean": 99.70042419433594, "beta_dpo/gap_std": 122.06121063232422, "beta_dpo/loss_margin_mean": 114.565673828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5580029368575624, "grad_norm": 1847.671142578125, "learning_rate": 2.4486703937790243e-07, "logits/chosen": -0.4073488712310791, "logits/rejected": -0.4285936951637268, "loss": 3.929, "step": 380 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4786049425601959, "beta_dpo/beta_margin_grad_std": 0.033149346709251404, "beta_dpo/beta_margin_mean": 0.08609545230865479, "beta_dpo/beta_margin_std": 0.13360077142715454, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.517669916152954, "beta_dpo/gap_mean": 99.50776672363281, "beta_dpo/gap_std": 124.66862487792969, "beta_dpo/loss_margin_mean": 86.09545135498047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5594713656387665, "grad_norm": 7.046865463256836, "learning_rate": 2.435840528363426e-07, "logits/chosen": -0.44461020827293396, "logits/rejected": -0.42805731296539307, "loss": 1.3176, "step": 381 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4754858613014221, "beta_dpo/beta_margin_grad_std": 0.0272133257240057, "beta_dpo/beta_margin_mean": 0.09848769009113312, "beta_dpo/beta_margin_std": 0.10959197580814362, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7904385328292847, "beta_dpo/gap_mean": 98.30332946777344, "beta_dpo/gap_std": 123.63853454589844, "beta_dpo/loss_margin_mean": 98.48768615722656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5609397944199707, "grad_norm": 7.702118873596191, "learning_rate": 2.4230123536095745e-07, "logits/chosen": -0.482845664024353, "logits/rejected": -0.47936874628067017, "loss": 1.3066, "step": 382 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47483423352241516, "beta_dpo/beta_margin_grad_std": 0.031567756086587906, "beta_dpo/beta_margin_mean": 0.10119039565324783, "beta_dpo/beta_margin_std": 0.1272357702255249, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9441766142845154, "beta_dpo/gap_mean": 99.17279815673828, "beta_dpo/gap_std": 122.20887756347656, "beta_dpo/loss_margin_mean": 101.19039154052734, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5624082232011748, "grad_norm": 7.877129077911377, "learning_rate": 2.4101862075518037e-07, "logits/chosen": -0.4365376830101013, "logits/rejected": -0.4394179880619049, "loss": 1.3082, "step": 383 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4772571325302124, "beta_dpo/beta_margin_grad_std": 0.026376277208328247, "beta_dpo/beta_margin_mean": 0.09137213975191116, "beta_dpo/beta_margin_std": 0.10625956207513809, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6997740268707275, "beta_dpo/gap_mean": 96.98212432861328, "beta_dpo/gap_std": 120.21687316894531, "beta_dpo/loss_margin_mean": 91.37213897705078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5638766519823789, "grad_norm": 6.691796779632568, "learning_rate": 2.397362428170992e-07, "logits/chosen": -0.4357266128063202, "logits/rejected": -0.4359877407550812, "loss": 1.3064, "step": 384 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4784564971923828, "beta_dpo/beta_margin_grad_std": 0.023655114695429802, "beta_dpo/beta_margin_mean": 0.08640988171100616, "beta_dpo/beta_margin_std": 0.09507441520690918, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6672598123550415, "beta_dpo/gap_mean": 95.1080322265625, "beta_dpo/gap_std": 117.16928100585938, "beta_dpo/loss_margin_mean": 86.40987396240234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5653450807635829, "grad_norm": 6.454415798187256, "learning_rate": 2.3845413533856514e-07, "logits/chosen": -0.4430091381072998, "logits/rejected": -0.406819224357605, "loss": 1.3072, "step": 385 }, { "beta_dpo/beta": 0.3350660502910614, "beta_dpo/beta_margin_grad_mean": -0.30638933181762695, "beta_dpo/beta_margin_grad_std": 0.2916352152824402, "beta_dpo/beta_margin_mean": 41.42173385620117, "beta_dpo/beta_margin_std": 72.62975311279297, "beta_dpo/beta_used": 0.3350660502910614, "beta_dpo/beta_used_raw": -0.22976088523864746, "beta_dpo/gap_mean": 95.47817993164062, "beta_dpo/gap_std": 116.49141693115234, "beta_dpo/loss_margin_mean": 104.02435302734375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.566813509544787, "grad_norm": 1257.6240234375, "learning_rate": 2.3717233210430254e-07, "logits/chosen": -0.40499740839004517, "logits/rejected": -0.3944457769393921, "loss": 2.1028, "step": 386 }, { "beta_dpo/beta": 0.04211033880710602, "beta_dpo/beta_margin_grad_mean": -0.362678587436676, "beta_dpo/beta_margin_grad_std": 0.2757696211338043, "beta_dpo/beta_margin_mean": 2.8369340896606445, "beta_dpo/beta_margin_std": 6.981544017791748, "beta_dpo/beta_used": 0.04211033880710602, "beta_dpo/beta_used_raw": -0.41031795740127563, "beta_dpo/gap_mean": 96.32708740234375, "beta_dpo/gap_std": 116.07279968261719, "beta_dpo/loss_margin_mean": 86.49880981445312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5682819383259912, "grad_norm": 153.96595764160156, "learning_rate": 2.3589086689101889e-07, "logits/chosen": -0.46131113171577454, "logits/rejected": -0.4227498173713684, "loss": 0.9943, "step": 387 }, { "beta_dpo/beta": 0.2852635979652405, "beta_dpo/beta_margin_grad_mean": -0.1428508460521698, "beta_dpo/beta_margin_grad_std": 0.3305058777332306, "beta_dpo/beta_margin_mean": 35.86345291137695, "beta_dpo/beta_margin_std": 41.991371154785156, "beta_dpo/beta_used": 0.2852635979652405, "beta_dpo/beta_used_raw": 0.2852635979652405, "beta_dpo/gap_mean": 99.73340606689453, "beta_dpo/gap_std": 122.03709411621094, "beta_dpo/loss_margin_mean": 126.48102569580078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5697503671071953, "grad_norm": 1019.3936767578125, "learning_rate": 2.3460977346651428e-07, "logits/chosen": -0.34111300110816956, "logits/rejected": -0.35984545946121216, "loss": 2.9347, "step": 388 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4744521677494049, "beta_dpo/beta_margin_grad_std": 0.029463116079568863, "beta_dpo/beta_margin_mean": 0.10270687937736511, "beta_dpo/beta_margin_std": 0.1187412440776825, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.477594792842865, "beta_dpo/gap_mean": 101.11051177978516, "beta_dpo/gap_std": 122.17589569091797, "beta_dpo/loss_margin_mean": 102.70687103271484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5712187958883994, "grad_norm": 7.796079158782959, "learning_rate": 2.3332908558879177e-07, "logits/chosen": -0.3674032688140869, "logits/rejected": -0.33893561363220215, "loss": 1.299, "step": 389 }, { "beta_dpo/beta": 0.9373176097869873, "beta_dpo/beta_margin_grad_mean": -0.3490453064441681, "beta_dpo/beta_margin_grad_std": 0.32049307227134705, "beta_dpo/beta_margin_mean": 102.44970703125, "beta_dpo/beta_margin_std": 286.3146667480469, "beta_dpo/beta_used": 0.9373176097869873, "beta_dpo/beta_used_raw": 0.6466106176376343, "beta_dpo/gap_mean": 100.94984436035156, "beta_dpo/gap_std": 128.69989013671875, "beta_dpo/loss_margin_mean": 96.0970687866211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5726872246696035, "grad_norm": 6808.29296875, "learning_rate": 2.320488370051681e-07, "logits/chosen": -0.38758012652397156, "logits/rejected": -0.3825533986091614, "loss": 5.2234, "step": 390 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4825769364833832, "beta_dpo/beta_margin_grad_std": 0.032539550215005875, "beta_dpo/beta_margin_mean": 0.07015617936849594, "beta_dpo/beta_margin_std": 0.1320018768310547, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.8503118753433228, "beta_dpo/gap_mean": 95.19480895996094, "beta_dpo/gap_std": 125.16159057617188, "beta_dpo/loss_margin_mean": 70.15617370605469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5741556534508077, "grad_norm": 7.870649814605713, "learning_rate": 2.3076906145138405e-07, "logits/chosen": -0.4045184850692749, "logits/rejected": -0.40096336603164673, "loss": 1.3264, "step": 391 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4708142578601837, "beta_dpo/beta_margin_grad_std": 0.03216283768415451, "beta_dpo/beta_margin_mean": 0.11758483201265335, "beta_dpo/beta_margin_std": 0.13041386008262634, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6004123687744141, "beta_dpo/gap_mean": 96.84786987304688, "beta_dpo/gap_std": 127.46624755859375, "beta_dpo/loss_margin_mean": 117.58483123779297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5756240822320118, "grad_norm": 7.0149126052856445, "learning_rate": 2.294897926507156e-07, "logits/chosen": -0.4069562554359436, "logits/rejected": -0.4070258140563965, "loss": 1.3047, "step": 392 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47842276096343994, "beta_dpo/beta_margin_grad_std": 0.035466983914375305, "beta_dpo/beta_margin_mean": 0.08704755455255508, "beta_dpo/beta_margin_std": 0.1434432864189148, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.5364588499069214, "beta_dpo/gap_mean": 97.63465881347656, "beta_dpo/gap_std": 131.28060913085938, "beta_dpo/loss_margin_mean": 87.04754638671875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5770925110132159, "grad_norm": 5.620955944061279, "learning_rate": 2.2821106431308543e-07, "logits/chosen": -0.36222386360168457, "logits/rejected": -0.35495465993881226, "loss": 1.3203, "step": 393 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47261518239974976, "beta_dpo/beta_margin_grad_std": 0.03843296319246292, "beta_dpo/beta_margin_mean": 0.11056187748908997, "beta_dpo/beta_margin_std": 0.1556539684534073, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0046627521514893, "beta_dpo/gap_mean": 98.78483581542969, "beta_dpo/gap_std": 134.528564453125, "beta_dpo/loss_margin_mean": 110.56187438964844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.57856093979442, "grad_norm": 6.00560998916626, "learning_rate": 2.2693291013417452e-07, "logits/chosen": -0.38290512561798096, "logits/rejected": -0.37921932339668274, "loss": 1.311, "step": 394 }, { "beta_dpo/beta": 0.2388431876897812, "beta_dpo/beta_margin_grad_mean": -0.36290186643600464, "beta_dpo/beta_margin_grad_std": 0.3192496597766876, "beta_dpo/beta_margin_mean": 26.790372848510742, "beta_dpo/beta_margin_std": 53.6635856628418, "beta_dpo/beta_used": 0.2388431876897812, "beta_dpo/beta_used_raw": -0.2869706153869629, "beta_dpo/gap_mean": 101.26589965820312, "beta_dpo/gap_std": 135.43722534179688, "beta_dpo/loss_margin_mean": 112.6613540649414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.580029368575624, "grad_norm": 1960.75048828125, "learning_rate": 2.2565536379453404e-07, "logits/chosen": -0.41427770256996155, "logits/rejected": -0.4001610577106476, "loss": 4.0929, "step": 395 }, { "beta_dpo/beta": 0.04714573919773102, "beta_dpo/beta_margin_grad_mean": -0.32316020131111145, "beta_dpo/beta_margin_grad_std": 0.29187336564064026, "beta_dpo/beta_margin_mean": 4.048221588134766, "beta_dpo/beta_margin_std": 9.408126831054688, "beta_dpo/beta_used": 0.04714573919773102, "beta_dpo/beta_used_raw": -0.8138464689254761, "beta_dpo/gap_mean": 100.85454559326172, "beta_dpo/gap_std": 133.6050262451172, "beta_dpo/loss_margin_mean": 92.83641815185547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5814977973568282, "grad_norm": 261.0092468261719, "learning_rate": 2.2437845895869825e-07, "logits/chosen": -0.45682457089424133, "logits/rejected": -0.4294641613960266, "loss": 0.7611, "step": 396 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4667245149612427, "beta_dpo/beta_margin_grad_std": 0.03587669879198074, "beta_dpo/beta_margin_mean": 0.1342308074235916, "beta_dpo/beta_margin_std": 0.14559264481067657, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7097823619842529, "beta_dpo/gap_mean": 104.96504211425781, "beta_dpo/gap_std": 135.63267517089844, "beta_dpo/loss_margin_mean": 134.23080444335938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5829662261380323, "grad_norm": 9.609397888183594, "learning_rate": 2.2310222927429716e-07, "logits/chosen": -0.37662869691848755, "logits/rejected": -0.3610289692878723, "loss": 1.2987, "step": 397 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4707203805446625, "beta_dpo/beta_margin_grad_std": 0.03591061383485794, "beta_dpo/beta_margin_mean": 0.11811903864145279, "beta_dpo/beta_margin_std": 0.14565932750701904, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9772100448608398, "beta_dpo/gap_mean": 108.35206604003906, "beta_dpo/gap_std": 138.2628631591797, "beta_dpo/loss_margin_mean": 118.11903381347656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5844346549192364, "grad_norm": 7.986485481262207, "learning_rate": 2.2182670837116972e-07, "logits/chosen": -0.4529736638069153, "logits/rejected": -0.4409021735191345, "loss": 1.3013, "step": 398 }, { "beta_dpo/beta": 0.10209912061691284, "beta_dpo/beta_margin_grad_mean": -0.3368144929409027, "beta_dpo/beta_margin_grad_std": 0.3037874400615692, "beta_dpo/beta_margin_mean": 12.228537559509277, "beta_dpo/beta_margin_std": 24.178831100463867, "beta_dpo/beta_used": 0.10209912061691284, "beta_dpo/beta_used_raw": -0.7041253447532654, "beta_dpo/gap_mean": 109.07110595703125, "beta_dpo/gap_std": 139.66268920898438, "beta_dpo/loss_margin_mean": 107.10200500488281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5859030837004405, "grad_norm": 1133.5469970703125, "learning_rate": 2.2055192986047804e-07, "logits/chosen": -0.3726699948310852, "logits/rejected": -0.31704509258270264, "loss": 3.271, "step": 399 }, { "beta_dpo/beta": 1.6487863063812256, "beta_dpo/beta_margin_grad_mean": -0.1875, "beta_dpo/beta_margin_grad_std": 0.3903123736381531, "beta_dpo/beta_margin_mean": 249.3288116455078, "beta_dpo/beta_margin_std": 262.01593017578125, "beta_dpo/beta_used": 1.6487863063812256, "beta_dpo/beta_used_raw": 1.6487863063812256, "beta_dpo/gap_mean": 115.11041259765625, "beta_dpo/gap_std": 142.930419921875, "beta_dpo/loss_margin_mean": 150.7600555419922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5873715124816447, "grad_norm": 9287.4912109375, "learning_rate": 2.192779273338215e-07, "logits/chosen": -0.4059417247772217, "logits/rejected": -0.40355077385902405, "loss": 18.5063, "step": 400 }, { "epoch": 0.5873715124816447, "eval_beta_dpo/beta": 0.0093453424051404, "eval_beta_dpo/beta_margin_grad_mean": -0.4752621054649353, "eval_beta_dpo/beta_margin_grad_std": 0.04468919709324837, "eval_beta_dpo/beta_margin_mean": 1.0762052536010742, "eval_beta_dpo/beta_margin_std": 1.4304980039596558, "eval_beta_dpo/beta_used": 0.0093453424051404, "eval_beta_dpo/beta_used_raw": -2.859452724456787, "eval_beta_dpo/gap_mean": 116.2162094116211, "eval_beta_dpo/gap_std": 143.88238525390625, "eval_beta_dpo/loss_margin_mean": 66.89200592041016, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.4157075881958008, "eval_logits/rejected": -0.39382484555244446, "eval_loss": 0.7606868147850037, "eval_runtime": 40.2132, "eval_samples_per_second": 58.165, "eval_steps_per_second": 1.84, "step": 400 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4766118824481964, "beta_dpo/beta_margin_grad_std": 0.042866192758083344, "beta_dpo/beta_margin_mean": 0.094448022544384, "beta_dpo/beta_margin_std": 0.17364467680454254, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.313326358795166, "beta_dpo/gap_mean": 113.66698455810547, "beta_dpo/gap_std": 148.7388916015625, "beta_dpo/loss_margin_mean": 94.4480209350586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5888399412628488, "grad_norm": 6.7877278327941895, "learning_rate": 2.1800473436235136e-07, "logits/chosen": -0.35054802894592285, "logits/rejected": -0.3441402316093445, "loss": 1.303, "step": 401 }, { "beta_dpo/beta": 0.6786636710166931, "beta_dpo/beta_margin_grad_mean": -0.10659972578287125, "beta_dpo/beta_margin_grad_std": 0.3003370761871338, "beta_dpo/beta_margin_mean": 108.07469177246094, "beta_dpo/beta_margin_std": 99.87371826171875, "beta_dpo/beta_used": 0.6786636710166931, "beta_dpo/beta_used_raw": 0.6786636710166931, "beta_dpo/gap_mean": 119.07506561279297, "beta_dpo/gap_std": 149.63043212890625, "beta_dpo/loss_margin_mean": 158.593994140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5903083700440529, "grad_norm": 2755.770263671875, "learning_rate": 2.1673238449588665e-07, "logits/chosen": -0.3783169388771057, "logits/rejected": -0.3567967414855957, "loss": 5.1878, "step": 402 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4704398214817047, "beta_dpo/beta_margin_grad_std": 0.03317659720778465, "beta_dpo/beta_margin_mean": 0.11902157217264175, "beta_dpo/beta_margin_std": 0.13397535681724548, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.16943010687828064, "beta_dpo/gap_mean": 120.31027221679688, "beta_dpo/gap_std": 146.5064697265625, "beta_dpo/loss_margin_mean": 119.02156829833984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.591776798825257, "grad_norm": 6.965160369873047, "learning_rate": 2.154609112620295e-07, "logits/chosen": -0.38709545135498047, "logits/rejected": -0.3838120698928833, "loss": 1.2773, "step": 403 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4710405468940735, "beta_dpo/beta_margin_grad_std": 0.04145493730902672, "beta_dpo/beta_margin_mean": 0.11696790158748627, "beta_dpo/beta_margin_std": 0.16781915724277496, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3914072513580322, "beta_dpo/gap_mean": 120.10807800292969, "beta_dpo/gap_std": 150.40188598632812, "beta_dpo/loss_margin_mean": 116.9678955078125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.593245227606461, "grad_norm": 7.6397705078125, "learning_rate": 2.1419034816528218e-07, "logits/chosen": -0.32211601734161377, "logits/rejected": -0.3063517212867737, "loss": 1.2817, "step": 404 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4748651087284088, "beta_dpo/beta_margin_grad_std": 0.0469396598637104, "beta_dpo/beta_margin_mean": 0.10184108465909958, "beta_dpo/beta_margin_std": 0.19063597917556763, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.5856375694274902, "beta_dpo/gap_mean": 115.75703430175781, "beta_dpo/gap_std": 156.31784057617188, "beta_dpo/loss_margin_mean": 101.84107971191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5947136563876652, "grad_norm": 7.145941257476807, "learning_rate": 2.129207286861638e-07, "logits/chosen": -0.3783246874809265, "logits/rejected": -0.35847070813179016, "loss": 1.3212, "step": 405 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46724018454551697, "beta_dpo/beta_margin_grad_std": 0.04072672128677368, "beta_dpo/beta_margin_mean": 0.13246352970600128, "beta_dpo/beta_margin_std": 0.16599087417125702, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2788747549057007, "beta_dpo/gap_mean": 117.24635314941406, "beta_dpo/gap_std": 158.91787719726562, "beta_dpo/loss_margin_mean": 132.46351623535156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5961820851688693, "grad_norm": 7.6379899978637695, "learning_rate": 2.1165208628032861e-07, "logits/chosen": -0.3630604147911072, "logits/rejected": -0.35475897789001465, "loss": 1.2817, "step": 406 }, { "beta_dpo/beta": 0.9161151051521301, "beta_dpo/beta_margin_grad_mean": -0.3084886372089386, "beta_dpo/beta_margin_grad_std": 0.2934010624885559, "beta_dpo/beta_margin_mean": 123.79098510742188, "beta_dpo/beta_margin_std": 235.88023376464844, "beta_dpo/beta_used": 0.9161151051521301, "beta_dpo/beta_used_raw": -0.8416473865509033, "beta_dpo/gap_mean": 115.60337829589844, "beta_dpo/gap_std": 158.64419555664062, "beta_dpo/loss_margin_mean": 100.3062515258789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5976505139500734, "grad_norm": 4827.95361328125, "learning_rate": 2.1038445437768375e-07, "logits/chosen": -0.377028226852417, "logits/rejected": -0.3416253924369812, "loss": 10.9265, "step": 407 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4801346957683563, "beta_dpo/beta_margin_grad_std": 0.03229653090238571, "beta_dpo/beta_margin_mean": 0.07995922118425369, "beta_dpo/beta_margin_std": 0.13016226887702942, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.016765594482422, "beta_dpo/gap_mean": 111.73031616210938, "beta_dpo/gap_std": 154.44381713867188, "beta_dpo/loss_margin_mean": 79.95922088623047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5991189427312775, "grad_norm": 7.195991516113281, "learning_rate": 2.0911786638150872e-07, "logits/chosen": -0.40478670597076416, "logits/rejected": -0.37068575620651245, "loss": 1.3143, "step": 408 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4723385274410248, "beta_dpo/beta_margin_grad_std": 0.03917807340621948, "beta_dpo/beta_margin_mean": 0.11170117557048798, "beta_dpo/beta_margin_std": 0.15871573984622955, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.8204164505004883, "beta_dpo/gap_mean": 109.16544342041016, "beta_dpo/gap_std": 155.03025817871094, "beta_dpo/loss_margin_mean": 111.70116424560547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6005873715124816, "grad_norm": 7.589075565338135, "learning_rate": 2.0785235566757517e-07, "logits/chosen": -0.34394949674606323, "logits/rejected": -0.3319231867790222, "loss": 1.3136, "step": 409 }, { "beta_dpo/beta": 0.5207417011260986, "beta_dpo/beta_margin_grad_mean": -0.3113498389720917, "beta_dpo/beta_margin_grad_std": 0.3010904788970947, "beta_dpo/beta_margin_mean": 55.4542121887207, "beta_dpo/beta_margin_std": 125.90103912353516, "beta_dpo/beta_used": 0.5207417011260986, "beta_dpo/beta_used_raw": -0.16834038496017456, "beta_dpo/gap_mean": 112.37252044677734, "beta_dpo/gap_std": 154.6945343017578, "beta_dpo/loss_margin_mean": 121.03668975830078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6020558002936858, "grad_norm": 1850.3192138671875, "learning_rate": 2.065879555832674e-07, "logits/chosen": -0.3558204472064972, "logits/rejected": -0.34983137249946594, "loss": 1.3845, "step": 410 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4737773537635803, "beta_dpo/beta_margin_grad_std": 0.046403612941503525, "beta_dpo/beta_margin_mean": 0.10650434345006943, "beta_dpo/beta_margin_std": 0.19019237160682678, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.678023338317871, "beta_dpo/gap_mean": 113.07457733154297, "beta_dpo/gap_std": 160.96011352539062, "beta_dpo/loss_margin_mean": 106.50434112548828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6035242290748899, "grad_norm": 10.161256790161133, "learning_rate": 2.0532469944670343e-07, "logits/chosen": -0.293745219707489, "logits/rejected": -0.2922123670578003, "loss": 1.3083, "step": 411 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4713696539402008, "beta_dpo/beta_margin_grad_std": 0.04353627562522888, "beta_dpo/beta_margin_mean": 0.11588773876428604, "beta_dpo/beta_margin_std": 0.17700567841529846, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.7123887538909912, "beta_dpo/gap_mean": 111.26490783691406, "beta_dpo/gap_std": 163.46185302734375, "beta_dpo/loss_margin_mean": 115.88773345947266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.604992657856094, "grad_norm": 7.504628658294678, "learning_rate": 2.0406262054585738e-07, "logits/chosen": -0.3158118724822998, "logits/rejected": -0.32687675952911377, "loss": 1.3101, "step": 412 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4713848829269409, "beta_dpo/beta_margin_grad_std": 0.04106110334396362, "beta_dpo/beta_margin_mean": 0.11544950306415558, "beta_dpo/beta_margin_std": 0.16624687612056732, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9482701420783997, "beta_dpo/gap_mean": 110.88575744628906, "beta_dpo/gap_std": 163.36767578125, "beta_dpo/loss_margin_mean": 115.44949340820312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6064610866372981, "grad_norm": 10.111505508422852, "learning_rate": 2.0280175213768205e-07, "logits/chosen": -0.33399781584739685, "logits/rejected": -0.3200353980064392, "loss": 1.2993, "step": 413 }, { "beta_dpo/beta": 1.1125692129135132, "beta_dpo/beta_margin_grad_mean": -0.3008454442024231, "beta_dpo/beta_margin_grad_std": 0.29388001561164856, "beta_dpo/beta_margin_mean": 208.84002685546875, "beta_dpo/beta_margin_std": 342.9871826171875, "beta_dpo/beta_used": 1.1125692129135132, "beta_dpo/beta_used_raw": 0.5759499669075012, "beta_dpo/gap_mean": 114.5771484375, "beta_dpo/gap_std": 164.32669067382812, "beta_dpo/loss_margin_mean": 140.61228942871094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6079295154185022, "grad_norm": 10158.8984375, "learning_rate": 2.0154212744723247e-07, "logits/chosen": -0.29129675030708313, "logits/rejected": -0.28304004669189453, "loss": 18.094, "step": 414 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47645503282546997, "beta_dpo/beta_margin_grad_std": 0.04023678973317146, "beta_dpo/beta_margin_mean": 0.09504882246255875, "beta_dpo/beta_margin_std": 0.1632952094078064, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.1535353660583496, "beta_dpo/gap_mean": 116.05256652832031, "beta_dpo/gap_std": 165.4222412109375, "beta_dpo/loss_margin_mean": 95.04881286621094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6093979441997063, "grad_norm": 7.246009826660156, "learning_rate": 2.002837796667909e-07, "logits/chosen": -0.3636121153831482, "logits/rejected": -0.35459795594215393, "loss": 1.3127, "step": 415 }, { "beta_dpo/beta": 0.8338208198547363, "beta_dpo/beta_margin_grad_mean": -0.17009158432483673, "beta_dpo/beta_margin_grad_std": 0.35257911682128906, "beta_dpo/beta_margin_mean": 129.33334350585938, "beta_dpo/beta_margin_std": 189.8321990966797, "beta_dpo/beta_used": 0.8338208198547363, "beta_dpo/beta_used_raw": 0.8338208198547363, "beta_dpo/gap_mean": 118.31330108642578, "beta_dpo/gap_std": 161.25177001953125, "beta_dpo/loss_margin_mean": 147.69627380371094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6108663729809104, "grad_norm": 9633.19921875, "learning_rate": 1.990267419549914e-07, "logits/chosen": -0.3395143449306488, "logits/rejected": -0.32628241181373596, "loss": 7.9484, "step": 416 }, { "beta_dpo/beta": 0.3650580644607544, "beta_dpo/beta_margin_grad_mean": -0.3305802643299103, "beta_dpo/beta_margin_grad_std": 0.3116385340690613, "beta_dpo/beta_margin_mean": 45.415931701660156, "beta_dpo/beta_margin_std": 81.82047271728516, "beta_dpo/beta_used": 0.3650580644607544, "beta_dpo/beta_used_raw": 0.1691010594367981, "beta_dpo/gap_mean": 119.18829345703125, "beta_dpo/gap_std": 156.21324157714844, "beta_dpo/loss_margin_mean": 115.47030639648438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6123348017621145, "grad_norm": 3923.328857421875, "learning_rate": 1.9777104743594686e-07, "logits/chosen": -0.27080368995666504, "logits/rejected": -0.22706595063209534, "loss": 6.8179, "step": 417 }, { "beta_dpo/beta": 0.1472662091255188, "beta_dpo/beta_margin_grad_mean": -0.31466129422187805, "beta_dpo/beta_margin_grad_std": 0.29242756962776184, "beta_dpo/beta_margin_mean": 24.18821907043457, "beta_dpo/beta_margin_std": 42.399009704589844, "beta_dpo/beta_used": 0.1472662091255188, "beta_dpo/beta_used_raw": -0.4067423641681671, "beta_dpo/gap_mean": 119.43331909179688, "beta_dpo/gap_std": 159.44818115234375, "beta_dpo/loss_margin_mean": 129.963623046875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6138032305433186, "grad_norm": 956.6565551757812, "learning_rate": 1.965167291983757e-07, "logits/chosen": -0.38150128722190857, "logits/rejected": -0.33936968445777893, "loss": 2.2555, "step": 418 }, { "beta_dpo/beta": 0.04090619087219238, "beta_dpo/beta_margin_grad_mean": -0.3300994336605072, "beta_dpo/beta_margin_grad_std": 0.2953225076198578, "beta_dpo/beta_margin_mean": 5.522484302520752, "beta_dpo/beta_margin_std": 10.368701934814453, "beta_dpo/beta_used": 0.04090619087219238, "beta_dpo/beta_used_raw": -0.6058524250984192, "beta_dpo/gap_mean": 123.40135192871094, "beta_dpo/gap_std": 159.61978149414062, "beta_dpo/loss_margin_mean": 131.7987060546875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6152716593245228, "grad_norm": 406.3208312988281, "learning_rate": 1.9526382029472988e-07, "logits/chosen": -0.3685181736946106, "logits/rejected": -0.35807985067367554, "loss": 1.3448, "step": 419 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.479174941778183, "beta_dpo/beta_margin_grad_std": 0.043484870344400406, "beta_dpo/beta_margin_mean": 0.08427228033542633, "beta_dpo/beta_margin_std": 0.17619559168815613, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7748525738716125, "beta_dpo/gap_mean": 117.49530029296875, "beta_dpo/gap_std": 161.63946533203125, "beta_dpo/loss_margin_mean": 84.27227783203125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6167400881057269, "grad_norm": 8.381654739379883, "learning_rate": 1.9401235374032425e-07, "logits/chosen": -0.34530162811279297, "logits/rejected": -0.2882389426231384, "loss": 1.2911, "step": 420 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4776723086833954, "beta_dpo/beta_margin_grad_std": 0.03428473323583603, "beta_dpo/beta_margin_mean": 0.08976796269416809, "beta_dpo/beta_margin_std": 0.1381371021270752, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9457611441612244, "beta_dpo/gap_mean": 111.70301818847656, "beta_dpo/gap_std": 160.45973205566406, "beta_dpo/loss_margin_mean": 89.76795959472656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.618208516886931, "grad_norm": 6.698497772216797, "learning_rate": 1.9276236251246653e-07, "logits/chosen": -0.3635827600955963, "logits/rejected": -0.3487810492515564, "loss": 1.2975, "step": 421 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.476465106010437, "beta_dpo/beta_margin_grad_std": 0.03539099171757698, "beta_dpo/beta_margin_mean": 0.09482631832361221, "beta_dpo/beta_margin_std": 0.14304772019386292, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9157909154891968, "beta_dpo/gap_mean": 108.79386901855469, "beta_dpo/gap_std": 155.77139282226562, "beta_dpo/loss_margin_mean": 94.8263168334961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6196769456681351, "grad_norm": 7.295708179473877, "learning_rate": 1.9151387954958792e-07, "logits/chosen": -0.323574960231781, "logits/rejected": -0.3058650493621826, "loss": 1.3, "step": 422 }, { "beta_dpo/beta": 0.5498670339584351, "beta_dpo/beta_margin_grad_mean": -0.19201448559761047, "beta_dpo/beta_margin_grad_std": 0.38938337564468384, "beta_dpo/beta_margin_mean": 67.63153839111328, "beta_dpo/beta_margin_std": 82.61705017089844, "beta_dpo/beta_used": 0.5498670339584351, "beta_dpo/beta_used_raw": 0.5498670339584351, "beta_dpo/gap_mean": 111.04264831542969, "beta_dpo/gap_std": 153.08340454101562, "beta_dpo/loss_margin_mean": 124.84105682373047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6211453744493393, "grad_norm": 2952.7294921875, "learning_rate": 1.902669377503756e-07, "logits/chosen": -0.29522740840911865, "logits/rejected": -0.2932446002960205, "loss": 5.8705, "step": 423 }, { "beta_dpo/beta": 0.027477234601974487, "beta_dpo/beta_margin_grad_mean": -0.31576114892959595, "beta_dpo/beta_margin_grad_std": 0.28133726119995117, "beta_dpo/beta_margin_mean": 3.6695759296417236, "beta_dpo/beta_margin_std": 6.411843299865723, "beta_dpo/beta_used": 0.027477234601974487, "beta_dpo/beta_used_raw": -1.4149752855300903, "beta_dpo/gap_mean": 112.22328186035156, "beta_dpo/gap_std": 152.5062255859375, "beta_dpo/loss_margin_mean": 109.46617126464844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6226138032305433, "grad_norm": 137.83319091796875, "learning_rate": 1.890215699729057e-07, "logits/chosen": -0.3986721932888031, "logits/rejected": -0.3727181553840637, "loss": 0.9681, "step": 424 }, { "beta_dpo/beta": 0.5835731625556946, "beta_dpo/beta_margin_grad_mean": -0.3327001929283142, "beta_dpo/beta_margin_grad_std": 0.312762588262558, "beta_dpo/beta_margin_mean": 72.92134857177734, "beta_dpo/beta_margin_std": 129.18519592285156, "beta_dpo/beta_used": 0.5835731625556946, "beta_dpo/beta_used_raw": 0.4376869797706604, "beta_dpo/gap_mean": 109.38998413085938, "beta_dpo/gap_std": 150.577880859375, "beta_dpo/loss_margin_mean": 106.90679168701172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6240822320117474, "grad_norm": 6124.79150390625, "learning_rate": 1.8777780903377732e-07, "logits/chosen": -0.3280683159828186, "logits/rejected": -0.32920628786087036, "loss": 6.1878, "step": 425 }, { "beta_dpo/beta": 0.8895680904388428, "beta_dpo/beta_margin_grad_mean": -0.30061760544776917, "beta_dpo/beta_margin_grad_std": 0.29346781969070435, "beta_dpo/beta_margin_mean": 142.06744384765625, "beta_dpo/beta_margin_std": 253.59666442871094, "beta_dpo/beta_used": 0.8895680904388428, "beta_dpo/beta_used_raw": -0.7809062600135803, "beta_dpo/gap_mean": 111.31645965576172, "beta_dpo/gap_std": 149.850341796875, "beta_dpo/loss_margin_mean": 127.76013946533203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6255506607929515, "grad_norm": 5486.13525390625, "learning_rate": 1.8653568770724803e-07, "logits/chosen": -0.37183499336242676, "logits/rejected": -0.31186115741729736, "loss": 8.4638, "step": 426 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4798411726951599, "beta_dpo/beta_margin_grad_std": 0.03274958208203316, "beta_dpo/beta_margin_mean": 0.08117052912712097, "beta_dpo/beta_margin_std": 0.13242076337337494, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.4412474632263184, "beta_dpo/gap_mean": 109.63316345214844, "beta_dpo/gap_std": 148.7486572265625, "beta_dpo/loss_margin_mean": 81.17052459716797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6270190895741556, "grad_norm": 6.825258731842041, "learning_rate": 1.8529523872436977e-07, "logits/chosen": -0.3810279965400696, "logits/rejected": -0.35081952810287476, "loss": 1.306, "step": 427 }, { "beta_dpo/beta": 0.41161054372787476, "beta_dpo/beta_margin_grad_mean": -0.32943397760391235, "beta_dpo/beta_margin_grad_std": 0.30981266498565674, "beta_dpo/beta_margin_mean": 47.70144271850586, "beta_dpo/beta_margin_std": 109.32994842529297, "beta_dpo/beta_used": 0.41161054372787476, "beta_dpo/beta_used_raw": -0.37105491757392883, "beta_dpo/gap_mean": 109.1749267578125, "beta_dpo/gap_std": 149.90882873535156, "beta_dpo/loss_margin_mean": 120.1363296508789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6284875183553598, "grad_norm": 3710.728271484375, "learning_rate": 1.8405649477212697e-07, "logits/chosen": -0.37873727083206177, "logits/rejected": -0.37077072262763977, "loss": 3.3058, "step": 428 }, { "beta_dpo/beta": 0.4344549775123596, "beta_dpo/beta_margin_grad_mean": -0.2335137575864792, "beta_dpo/beta_margin_grad_std": 0.4133719801902771, "beta_dpo/beta_margin_mean": 48.72703552246094, "beta_dpo/beta_margin_std": 64.88159942626953, "beta_dpo/beta_used": 0.4344549775123596, "beta_dpo/beta_used_raw": 0.4344549775123596, "beta_dpo/gap_mean": 107.56082916259766, "beta_dpo/gap_std": 150.14230346679688, "beta_dpo/loss_margin_mean": 103.24775695800781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6299559471365639, "grad_norm": 3259.48974609375, "learning_rate": 1.828194884925749e-07, "logits/chosen": -0.38967394828796387, "logits/rejected": -0.33787745237350464, "loss": 7.8201, "step": 429 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4721178114414215, "beta_dpo/beta_margin_grad_std": 0.03465087339282036, "beta_dpo/beta_margin_mean": 0.11242672055959702, "beta_dpo/beta_margin_std": 0.14071322977542877, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.2862778902053833, "beta_dpo/gap_mean": 109.13970947265625, "beta_dpo/gap_std": 147.79107666015625, "beta_dpo/loss_margin_mean": 112.42671203613281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.631424375917768, "grad_norm": 7.614285945892334, "learning_rate": 1.8158425248197928e-07, "logits/chosen": -0.4028991460800171, "logits/rejected": -0.40245670080184937, "loss": 1.3051, "step": 430 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4677823781967163, "beta_dpo/beta_margin_grad_std": 0.03935808688402176, "beta_dpo/beta_margin_mean": 0.1300922930240631, "beta_dpo/beta_margin_std": 0.15987038612365723, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7770711183547974, "beta_dpo/gap_mean": 113.27009582519531, "beta_dpo/gap_std": 150.56829833984375, "beta_dpo/loss_margin_mean": 130.09228515625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6328928046989721, "grad_norm": 6.900725841522217, "learning_rate": 1.8035081928995788e-07, "logits/chosen": -0.33334821462631226, "logits/rejected": -0.32843929529190063, "loss": 1.2936, "step": 431 }, { "beta_dpo/beta": 0.10785573720932007, "beta_dpo/beta_margin_grad_mean": -0.31203174591064453, "beta_dpo/beta_margin_grad_std": 0.2826971411705017, "beta_dpo/beta_margin_mean": 17.94474220275879, "beta_dpo/beta_margin_std": 30.068361282348633, "beta_dpo/beta_used": 0.10785573720932007, "beta_dpo/beta_used_raw": -0.6099668145179749, "beta_dpo/gap_mean": 113.14790344238281, "beta_dpo/gap_std": 143.69342041015625, "beta_dpo/loss_margin_mean": 124.32618713378906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6343612334801763, "grad_norm": 871.7344970703125, "learning_rate": 1.791192214186223e-07, "logits/chosen": -0.4066033363342285, "logits/rejected": -0.37539827823638916, "loss": 2.6038, "step": 432 }, { "beta_dpo/beta": 0.2765732407569885, "beta_dpo/beta_margin_grad_mean": -0.31871679425239563, "beta_dpo/beta_margin_grad_std": 0.3027940094470978, "beta_dpo/beta_margin_mean": 27.676023483276367, "beta_dpo/beta_margin_std": 58.62560272216797, "beta_dpo/beta_used": 0.2765732407569885, "beta_dpo/beta_used_raw": -0.3640483319759369, "beta_dpo/gap_mean": 112.42630767822266, "beta_dpo/gap_std": 144.95359802246094, "beta_dpo/loss_margin_mean": 93.55750274658203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6358296622613803, "grad_norm": 21.230777740478516, "learning_rate": 1.7788949132172193e-07, "logits/chosen": -0.358863890171051, "logits/rejected": -0.34688135981559753, "loss": 0.6547, "step": 433 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47845569252967834, "beta_dpo/beta_margin_grad_std": 0.030826503410935402, "beta_dpo/beta_margin_mean": 0.0868024155497551, "beta_dpo/beta_margin_std": 0.12513087689876556, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.8638619184494019, "beta_dpo/gap_mean": 108.65299987792969, "beta_dpo/gap_std": 142.203125, "beta_dpo/loss_margin_mean": 86.80241394042969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6372980910425844, "grad_norm": 6.974902629852295, "learning_rate": 1.7666166140378853e-07, "logits/chosen": -0.39474016427993774, "logits/rejected": -0.36454617977142334, "loss": 1.3143, "step": 434 }, { "beta_dpo/beta": 0.4795774221420288, "beta_dpo/beta_margin_grad_mean": -0.12648658454418182, "beta_dpo/beta_margin_grad_std": 0.3134034276008606, "beta_dpo/beta_margin_mean": 54.45040512084961, "beta_dpo/beta_margin_std": 62.09480285644531, "beta_dpo/beta_used": 0.4795774221420288, "beta_dpo/beta_used_raw": 0.4795774221420288, "beta_dpo/gap_mean": 108.19963073730469, "beta_dpo/gap_std": 141.86123657226562, "beta_dpo/loss_margin_mean": 114.60810089111328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6387665198237885, "grad_norm": 1543.473388671875, "learning_rate": 1.7543576401928218e-07, "logits/chosen": -0.3778401017189026, "logits/rejected": -0.35977697372436523, "loss": 2.6214, "step": 435 }, { "beta_dpo/beta": 0.055185671895742416, "beta_dpo/beta_margin_grad_mean": -0.30519527196884155, "beta_dpo/beta_margin_grad_std": 0.2901572585105896, "beta_dpo/beta_margin_mean": 5.6773810386657715, "beta_dpo/beta_margin_std": 10.930699348449707, "beta_dpo/beta_used": 0.055185671895742416, "beta_dpo/beta_used_raw": -1.441216230392456, "beta_dpo/gap_mean": 106.91453552246094, "beta_dpo/gap_std": 138.24383544921875, "beta_dpo/loss_margin_mean": 96.44285583496094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6402349486049926, "grad_norm": 229.00344848632812, "learning_rate": 1.742118314717391e-07, "logits/chosen": -0.40563905239105225, "logits/rejected": -0.3649734854698181, "loss": 1.3673, "step": 436 }, { "beta_dpo/beta": 0.44650039076805115, "beta_dpo/beta_margin_grad_mean": -0.31715255975723267, "beta_dpo/beta_margin_grad_std": 0.30363377928733826, "beta_dpo/beta_margin_mean": 45.99268341064453, "beta_dpo/beta_margin_std": 82.80380249023438, "beta_dpo/beta_used": 0.44650039076805115, "beta_dpo/beta_used_raw": -0.26632630825042725, "beta_dpo/gap_mean": 104.29106140136719, "beta_dpo/gap_std": 136.22210693359375, "beta_dpo/loss_margin_mean": 96.21728515625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6417033773861968, "grad_norm": 1593.89501953125, "learning_rate": 1.7298989601292036e-07, "logits/chosen": -0.3833288848400116, "logits/rejected": -0.3413906693458557, "loss": 5.1701, "step": 437 }, { "beta_dpo/beta": 1.1907906532287598, "beta_dpo/beta_margin_grad_mean": -0.1750006526708603, "beta_dpo/beta_margin_grad_std": 0.37429773807525635, "beta_dpo/beta_margin_mean": 154.26736450195312, "beta_dpo/beta_margin_std": 161.1520538330078, "beta_dpo/beta_used": 1.1907906532287598, "beta_dpo/beta_used_raw": 1.1907906532287598, "beta_dpo/gap_mean": 107.95907592773438, "beta_dpo/gap_std": 133.67709350585938, "beta_dpo/loss_margin_mean": 126.7836685180664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6431718061674009, "grad_norm": 7641.5771484375, "learning_rate": 1.7176998984196144e-07, "logits/chosen": -0.37084126472473145, "logits/rejected": -0.3320963382720947, "loss": 8.9122, "step": 438 }, { "beta_dpo/beta": 0.07319752871990204, "beta_dpo/beta_margin_grad_mean": -0.32128748297691345, "beta_dpo/beta_margin_grad_std": 0.2859705984592438, "beta_dpo/beta_margin_mean": 9.903467178344727, "beta_dpo/beta_margin_std": 17.277389526367188, "beta_dpo/beta_used": 0.07319752871990204, "beta_dpo/beta_used_raw": -1.7231221199035645, "beta_dpo/gap_mean": 107.83575439453125, "beta_dpo/gap_std": 133.11056518554688, "beta_dpo/loss_margin_mean": 90.30883026123047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.644640234948605, "grad_norm": 512.3974609375, "learning_rate": 1.7055214510452458e-07, "logits/chosen": -0.40375328063964844, "logits/rejected": -0.4028066396713257, "loss": 1.4163, "step": 439 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4809180796146393, "beta_dpo/beta_margin_grad_std": 0.031033983454108238, "beta_dpo/beta_margin_mean": 0.0767781138420105, "beta_dpo/beta_margin_std": 0.1251077651977539, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.205794334411621, "beta_dpo/gap_mean": 100.31968688964844, "beta_dpo/gap_std": 130.88662719726562, "beta_dpo/loss_margin_mean": 76.77810668945312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6461086637298091, "grad_norm": 11.201451301574707, "learning_rate": 1.6933639389195134e-07, "logits/chosen": -0.48015761375427246, "logits/rejected": -0.44124317169189453, "loss": 1.3123, "step": 440 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4754822850227356, "beta_dpo/beta_margin_grad_std": 0.036441490054130554, "beta_dpo/beta_margin_mean": 0.09894155710935593, "beta_dpo/beta_margin_std": 0.1482171267271042, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.326048493385315, "beta_dpo/gap_mean": 99.0499267578125, "beta_dpo/gap_std": 131.88418579101562, "beta_dpo/loss_margin_mean": 98.94155883789062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6475770925110133, "grad_norm": 12.307683944702148, "learning_rate": 1.681227682404166e-07, "logits/chosen": -0.4150615930557251, "logits/rejected": -0.4018522799015045, "loss": 1.3149, "step": 441 }, { "beta_dpo/beta": 0.19351361691951752, "beta_dpo/beta_margin_grad_mean": -0.2922385334968567, "beta_dpo/beta_margin_grad_std": 0.2803710997104645, "beta_dpo/beta_margin_mean": 28.820743560791016, "beta_dpo/beta_margin_std": 45.040016174316406, "beta_dpo/beta_used": 0.19351361691951752, "beta_dpo/beta_used_raw": -0.9432244896888733, "beta_dpo/gap_mean": 101.88089752197266, "beta_dpo/gap_std": 133.10354614257812, "beta_dpo/loss_margin_mean": 124.86181640625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6490455212922174, "grad_norm": 920.883056640625, "learning_rate": 1.669113001300851e-07, "logits/chosen": -0.42568036913871765, "logits/rejected": -0.4096643924713135, "loss": 1.2978, "step": 442 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.48161694407463074, "beta_dpo/beta_margin_grad_std": 0.03176787868142128, "beta_dpo/beta_margin_mean": 0.07391852885484695, "beta_dpo/beta_margin_std": 0.12798674404621124, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9595794677734375, "beta_dpo/gap_mean": 100.70872497558594, "beta_dpo/gap_std": 131.86151123046875, "beta_dpo/loss_margin_mean": 73.91852569580078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6505139500734214, "grad_norm": 8.173919677734375, "learning_rate": 1.6570202148426815e-07, "logits/chosen": -0.4086863398551941, "logits/rejected": -0.38320356607437134, "loss": 1.3075, "step": 443 }, { "beta_dpo/beta": 0.13834300637245178, "beta_dpo/beta_margin_grad_mean": -0.29932746291160583, "beta_dpo/beta_margin_grad_std": 0.28772518038749695, "beta_dpo/beta_margin_mean": 17.195384979248047, "beta_dpo/beta_margin_std": 30.380125045776367, "beta_dpo/beta_used": 0.13834300637245178, "beta_dpo/beta_used_raw": -0.6297559142112732, "beta_dpo/gap_mean": 102.39837646484375, "beta_dpo/gap_std": 133.09300231933594, "beta_dpo/loss_margin_mean": 126.64820861816406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6519823788546255, "grad_norm": 1877.506103515625, "learning_rate": 1.6449496416858282e-07, "logits/chosen": -0.38396507501602173, "logits/rejected": -0.3728168308734894, "loss": 2.6417, "step": 444 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4744797348976135, "beta_dpo/beta_margin_grad_std": 0.03470303490757942, "beta_dpo/beta_margin_mean": 0.10285831242799759, "beta_dpo/beta_margin_std": 0.14040379226207733, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7490635514259338, "beta_dpo/gap_mean": 103.6148681640625, "beta_dpo/gap_std": 134.420654296875, "beta_dpo/loss_margin_mean": 102.85830688476562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6534508076358296, "grad_norm": 15.09030818939209, "learning_rate": 1.6329015999011182e-07, "logits/chosen": -0.4086730480194092, "logits/rejected": -0.3865576982498169, "loss": 1.3019, "step": 445 }, { "beta_dpo/beta": 0.28921666741371155, "beta_dpo/beta_margin_grad_mean": -0.27025842666625977, "beta_dpo/beta_margin_grad_std": 0.26976633071899414, "beta_dpo/beta_margin_mean": 44.87013626098633, "beta_dpo/beta_margin_std": 70.5100326538086, "beta_dpo/beta_used": 0.28921666741371155, "beta_dpo/beta_used_raw": -0.14518234133720398, "beta_dpo/gap_mean": 104.75509643554688, "beta_dpo/gap_std": 132.56423950195312, "beta_dpo/loss_margin_mean": 121.7738265991211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6549192364170338, "grad_norm": 680.3995971679688, "learning_rate": 1.6208764069656578e-07, "logits/chosen": -0.42230162024497986, "logits/rejected": -0.42033088207244873, "loss": 1.0661, "step": 446 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4698044955730438, "beta_dpo/beta_margin_grad_std": 0.03518033027648926, "beta_dpo/beta_margin_mean": 0.12202388048171997, "beta_dpo/beta_margin_std": 0.14478100836277008, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.2607978880405426, "beta_dpo/gap_mean": 110.31854248046875, "beta_dpo/gap_std": 135.51388549804688, "beta_dpo/loss_margin_mean": 122.02387237548828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6563876651982379, "grad_norm": 12.893980026245117, "learning_rate": 1.608874379754465e-07, "logits/chosen": -0.4422151446342468, "logits/rejected": -0.45059633255004883, "loss": 1.2871, "step": 447 }, { "beta_dpo/beta": 0.8242188692092896, "beta_dpo/beta_margin_grad_mean": -0.2527080774307251, "beta_dpo/beta_margin_grad_std": 0.254643052816391, "beta_dpo/beta_margin_mean": 125.9197998046875, "beta_dpo/beta_margin_std": 187.5569305419922, "beta_dpo/beta_used": 0.8242188692092896, "beta_dpo/beta_used_raw": -0.4037218689918518, "beta_dpo/gap_mean": 111.61314392089844, "beta_dpo/gap_std": 135.30453491210938, "beta_dpo/loss_margin_mean": 128.5632781982422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.657856093979442, "grad_norm": 4.838625907897949, "learning_rate": 1.5968958345321177e-07, "logits/chosen": -0.3758270740509033, "logits/rejected": -0.3679637312889099, "loss": 0.6614, "step": 448 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4730731248855591, "beta_dpo/beta_margin_grad_std": 0.04196110740303993, "beta_dpo/beta_margin_mean": 0.1088365912437439, "beta_dpo/beta_margin_std": 0.17029906809329987, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4290629029273987, "beta_dpo/gap_mean": 113.01295471191406, "beta_dpo/gap_std": 139.9627685546875, "beta_dpo/loss_margin_mean": 108.83658599853516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6593245227606461, "grad_norm": 8.870427131652832, "learning_rate": 1.584941086944423e-07, "logits/chosen": -0.4272603690624237, "logits/rejected": -0.40170085430145264, "loss": 1.2892, "step": 449 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46986889839172363, "beta_dpo/beta_margin_grad_std": 0.0339692123234272, "beta_dpo/beta_margin_mean": 0.12120691686868668, "beta_dpo/beta_margin_std": 0.13755354285240173, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.313100129365921, "beta_dpo/gap_mean": 113.51698303222656, "beta_dpo/gap_std": 141.5602264404297, "beta_dpo/loss_margin_mean": 121.2069091796875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6607929515418502, "grad_norm": 9.47729206085205, "learning_rate": 1.573010452010098e-07, "logits/chosen": -0.3865205645561218, "logits/rejected": -0.38359227776527405, "loss": 1.2847, "step": 450 }, { "beta_dpo/beta": 0.3940798044204712, "beta_dpo/beta_margin_grad_mean": -0.33411669731140137, "beta_dpo/beta_margin_grad_std": 0.31294018030166626, "beta_dpo/beta_margin_mean": 42.9352912902832, "beta_dpo/beta_margin_std": 87.50625610351562, "beta_dpo/beta_used": 0.3940798044204712, "beta_dpo/beta_used_raw": -0.24944308400154114, "beta_dpo/gap_mean": 110.95838928222656, "beta_dpo/gap_std": 140.57334899902344, "beta_dpo/loss_margin_mean": 94.27608489990234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6622613803230544, "grad_norm": 3518.580078125, "learning_rate": 1.5611042441124687e-07, "logits/chosen": -0.3372274339199066, "logits/rejected": -0.3046179413795471, "loss": 7.5031, "step": 451 }, { "beta_dpo/beta": 0.7567883729934692, "beta_dpo/beta_margin_grad_mean": -0.17167411744594574, "beta_dpo/beta_margin_grad_std": 0.37626853585243225, "beta_dpo/beta_margin_mean": 89.04338073730469, "beta_dpo/beta_margin_std": 102.09488677978516, "beta_dpo/beta_used": 0.7567883729934692, "beta_dpo/beta_used_raw": 0.7567883729934692, "beta_dpo/gap_mean": 111.77011108398438, "beta_dpo/gap_std": 139.58013916015625, "beta_dpo/loss_margin_mean": 117.5452651977539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6637298091042585, "grad_norm": 4123.4677734375, "learning_rate": 1.549222776991186e-07, "logits/chosen": -0.3441423773765564, "logits/rejected": -0.35753265023231506, "loss": 12.507, "step": 452 }, { "beta_dpo/beta": 0.0010159736266359687, "beta_dpo/beta_margin_grad_mean": -0.47485530376434326, "beta_dpo/beta_margin_grad_std": 0.03558202460408211, "beta_dpo/beta_margin_mean": 0.10151873528957367, "beta_dpo/beta_margin_std": 0.14481480419635773, "beta_dpo/beta_used": 0.0010159736266359687, "beta_dpo/beta_used_raw": -1.122417688369751, "beta_dpo/gap_mean": 111.07215881347656, "beta_dpo/gap_std": 140.66952514648438, "beta_dpo/loss_margin_mean": 99.66301727294922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6651982378854625, "grad_norm": 8.228669166564941, "learning_rate": 1.5373663637339584e-07, "logits/chosen": -0.4157373905181885, "logits/rejected": -0.38169363141059875, "loss": 1.2991, "step": 453 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46827903389930725, "beta_dpo/beta_margin_grad_std": 0.03779821842908859, "beta_dpo/beta_margin_mean": 0.1280444711446762, "beta_dpo/beta_margin_std": 0.15373089909553528, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6103986501693726, "beta_dpo/gap_mean": 112.77023315429688, "beta_dpo/gap_std": 141.88412475585938, "beta_dpo/loss_margin_mean": 128.04446411132812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6666666666666666, "grad_norm": 7.617781162261963, "learning_rate": 1.5255353167683017e-07, "logits/chosen": -0.3238060176372528, "logits/rejected": -0.2969810962677002, "loss": 1.2903, "step": 454 }, { "beta_dpo/beta": 0.23283345997333527, "beta_dpo/beta_margin_grad_mean": -0.27318888902664185, "beta_dpo/beta_margin_grad_std": 0.2729749083518982, "beta_dpo/beta_margin_mean": 31.724552154541016, "beta_dpo/beta_margin_std": 55.67319107055664, "beta_dpo/beta_used": 0.23283345997333527, "beta_dpo/beta_used_raw": 0.23084740340709686, "beta_dpo/gap_mean": 119.1419677734375, "beta_dpo/gap_std": 145.837158203125, "beta_dpo/loss_margin_mean": 164.03538513183594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6681350954478708, "grad_norm": 257.9051208496094, "learning_rate": 1.5137299478533064e-07, "logits/chosen": -0.3444521725177765, "logits/rejected": -0.35367467999458313, "loss": 0.7604, "step": 455 }, { "beta_dpo/beta": 0.028770416975021362, "beta_dpo/beta_margin_grad_mean": -0.30041444301605225, "beta_dpo/beta_margin_grad_std": 0.25256428122520447, "beta_dpo/beta_margin_mean": 4.620020866394043, "beta_dpo/beta_margin_std": 7.49506950378418, "beta_dpo/beta_used": 0.028770416975021362, "beta_dpo/beta_used_raw": -0.5959498286247253, "beta_dpo/gap_mean": 127.31085205078125, "beta_dpo/gap_std": 151.3060302734375, "beta_dpo/loss_margin_mean": 154.75982666015625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6696035242290749, "grad_norm": 182.11668395996094, "learning_rate": 1.5019505680714232e-07, "logits/chosen": -0.36195001006126404, "logits/rejected": -0.3621336817741394, "loss": 0.9313, "step": 456 }, { "beta_dpo/beta": 0.9050564765930176, "beta_dpo/beta_margin_grad_mean": -0.3133964538574219, "beta_dpo/beta_margin_grad_std": 0.30206099152565, "beta_dpo/beta_margin_mean": 119.8252182006836, "beta_dpo/beta_margin_std": 193.12596130371094, "beta_dpo/beta_used": 0.9050564765930176, "beta_dpo/beta_used_raw": 0.25765174627304077, "beta_dpo/gap_mean": 128.869873046875, "beta_dpo/gap_std": 148.14273071289062, "beta_dpo/loss_margin_mean": 125.5430908203125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.671071953010279, "grad_norm": 2250.94482421875, "learning_rate": 1.4901974878202627e-07, "logits/chosen": -0.35765865445137024, "logits/rejected": -0.3306649625301361, "loss": 1.9311, "step": 457 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46133655309677124, "beta_dpo/beta_margin_grad_std": 0.03900197148323059, "beta_dpo/beta_margin_mean": 0.15646237134933472, "beta_dpo/beta_margin_std": 0.1608007401227951, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.31320202350616455, "beta_dpo/gap_mean": 133.1038818359375, "beta_dpo/gap_std": 151.08180236816406, "beta_dpo/loss_margin_mean": 156.4623565673828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6725403817914831, "grad_norm": 7.672088146209717, "learning_rate": 1.4784710168044212e-07, "logits/chosen": -0.3376292586326599, "logits/rejected": -0.31968408823013306, "loss": 1.2669, "step": 458 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46891355514526367, "beta_dpo/beta_margin_grad_std": 0.03510946035385132, "beta_dpo/beta_margin_mean": 0.12519963085651398, "beta_dpo/beta_margin_std": 0.14195367693901062, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7991423606872559, "beta_dpo/gap_mean": 132.22055053710938, "beta_dpo/gap_std": 149.7262420654297, "beta_dpo/loss_margin_mean": 125.19963073730469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6740088105726872, "grad_norm": 7.513828754425049, "learning_rate": 1.466771464027316e-07, "logits/chosen": -0.3106893301010132, "logits/rejected": -0.30481159687042236, "loss": 1.2765, "step": 459 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46639198064804077, "beta_dpo/beta_margin_grad_std": 0.03753071278333664, "beta_dpo/beta_margin_mean": 0.13555875420570374, "beta_dpo/beta_margin_std": 0.15229398012161255, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.736950159072876, "beta_dpo/gap_mean": 132.47604370117188, "beta_dpo/gap_std": 149.71617126464844, "beta_dpo/loss_margin_mean": 135.55874633789062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6754772393538914, "grad_norm": 9.385546684265137, "learning_rate": 1.4550991377830423e-07, "logits/chosen": -0.35042130947113037, "logits/rejected": -0.36293381452560425, "loss": 1.2756, "step": 460 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4717380404472351, "beta_dpo/beta_margin_grad_std": 0.03876164183020592, "beta_dpo/beta_margin_mean": 0.11412369459867477, "beta_dpo/beta_margin_std": 0.15732567012310028, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.1008927822113037, "beta_dpo/gap_mean": 128.8672637939453, "beta_dpo/gap_std": 150.39163208007812, "beta_dpo/loss_margin_mean": 114.12368774414062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6769456681350955, "grad_norm": 9.00002670288086, "learning_rate": 1.4434543456482518e-07, "logits/chosen": -0.3559180200099945, "logits/rejected": -0.3427043855190277, "loss": 1.2997, "step": 461 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4784083962440491, "beta_dpo/beta_margin_grad_std": 0.03373510017991066, "beta_dpo/beta_margin_mean": 0.08696634322404861, "beta_dpo/beta_margin_std": 0.1362220048904419, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.4599204063415527, "beta_dpo/gap_mean": 123.946533203125, "beta_dpo/gap_std": 149.71881103515625, "beta_dpo/loss_margin_mean": 86.9663314819336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6784140969162996, "grad_norm": 9.362037658691406, "learning_rate": 1.4318373944740484e-07, "logits/chosen": -0.3212631940841675, "logits/rejected": -0.29980742931365967, "loss": 1.3102, "step": 462 }, { "beta_dpo/beta": 0.2805536985397339, "beta_dpo/beta_margin_grad_mean": -0.3316049575805664, "beta_dpo/beta_margin_grad_std": 0.31257641315460205, "beta_dpo/beta_margin_mean": 38.56230926513672, "beta_dpo/beta_margin_std": 81.53507232666016, "beta_dpo/beta_used": 0.2805536985397339, "beta_dpo/beta_used_raw": -0.2606269419193268, "beta_dpo/gap_mean": 119.78553771972656, "beta_dpo/gap_std": 151.25320434570312, "beta_dpo/loss_margin_mean": 117.82292938232422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6798825256975036, "grad_norm": 3597.947021484375, "learning_rate": 1.4202485903778976e-07, "logits/chosen": -0.3247862458229065, "logits/rejected": -0.3129928708076477, "loss": 7.63, "step": 463 }, { "beta_dpo/beta": 2.1228408813476562, "beta_dpo/beta_margin_grad_mean": -0.12389523535966873, "beta_dpo/beta_margin_grad_std": 0.3279002010822296, "beta_dpo/beta_margin_mean": 394.66033935546875, "beta_dpo/beta_margin_std": 431.92449951171875, "beta_dpo/beta_used": 2.1228408813476562, "beta_dpo/beta_used_raw": 2.1228408813476562, "beta_dpo/gap_mean": 131.84754943847656, "beta_dpo/gap_std": 157.7271728515625, "beta_dpo/loss_margin_mean": 192.6825714111328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6813509544787077, "grad_norm": 10341.1005859375, "learning_rate": 1.4086882387355658e-07, "logits/chosen": -0.3156416118144989, "logits/rejected": -0.3281491696834564, "loss": 29.0936, "step": 464 }, { "beta_dpo/beta": 0.4801773428916931, "beta_dpo/beta_margin_grad_mean": -0.16912737488746643, "beta_dpo/beta_margin_grad_std": 0.37140730023384094, "beta_dpo/beta_margin_mean": 81.35899353027344, "beta_dpo/beta_margin_std": 94.96959686279297, "beta_dpo/beta_used": 0.4801773428916931, "beta_dpo/beta_used_raw": 0.4801773428916931, "beta_dpo/gap_mean": 137.17782592773438, "beta_dpo/gap_std": 158.68795776367188, "beta_dpo/loss_margin_mean": 162.17996215820312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6828193832599119, "grad_norm": 4897.61328125, "learning_rate": 1.3971566441730714e-07, "logits/chosen": -0.31099051237106323, "logits/rejected": -0.305058091878891, "loss": 8.3958, "step": 465 }, { "beta_dpo/beta": 0.22260768711566925, "beta_dpo/beta_margin_grad_mean": -0.2829422950744629, "beta_dpo/beta_margin_grad_std": 0.2813977301120758, "beta_dpo/beta_margin_mean": 33.30300521850586, "beta_dpo/beta_margin_std": 57.53418731689453, "beta_dpo/beta_used": 0.22260768711566925, "beta_dpo/beta_used_raw": -0.26916056871414185, "beta_dpo/gap_mean": 139.38119506835938, "beta_dpo/gap_std": 160.36859130859375, "beta_dpo/loss_margin_mean": 131.21505737304688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.684287812041116, "grad_norm": 1151.1441650390625, "learning_rate": 1.3856541105586545e-07, "logits/chosen": -0.3135194778442383, "logits/rejected": -0.3104793429374695, "loss": 1.2206, "step": 466 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4690595865249634, "beta_dpo/beta_margin_grad_std": 0.05179882049560547, "beta_dpo/beta_margin_mean": 0.12564310431480408, "beta_dpo/beta_margin_std": 0.21079717576503754, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.9833605289459229, "beta_dpo/gap_mean": 137.7141571044922, "beta_dpo/gap_std": 169.05447387695312, "beta_dpo/loss_margin_mean": 125.64309692382812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6857562408223201, "grad_norm": 9.571708679199219, "learning_rate": 1.3741809409947729e-07, "logits/chosen": -0.31644725799560547, "logits/rejected": -0.29425540566444397, "loss": 1.292, "step": 467 }, { "beta_dpo/beta": 0.5442880988121033, "beta_dpo/beta_margin_grad_mean": -0.17244772613048553, "beta_dpo/beta_margin_grad_std": 0.3269096910953522, "beta_dpo/beta_margin_mean": 83.84257507324219, "beta_dpo/beta_margin_std": 139.0602569580078, "beta_dpo/beta_used": 0.5442880988121033, "beta_dpo/beta_used_raw": 0.5442880988121033, "beta_dpo/gap_mean": 136.60678100585938, "beta_dpo/gap_std": 168.23411560058594, "beta_dpo/loss_margin_mean": 139.57421875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6872246696035242, "grad_norm": 2363.861083984375, "learning_rate": 1.362737437810114e-07, "logits/chosen": -0.3771149516105652, "logits/rejected": -0.3516891598701477, "loss": 1.9544, "step": 468 }, { "beta_dpo/beta": 0.5691275596618652, "beta_dpo/beta_margin_grad_mean": -0.2979428172111511, "beta_dpo/beta_margin_grad_std": 0.2913264036178589, "beta_dpo/beta_margin_mean": 104.36015319824219, "beta_dpo/beta_margin_std": 166.2760467529297, "beta_dpo/beta_used": 0.5691275596618652, "beta_dpo/beta_used_raw": 0.13212749361991882, "beta_dpo/gap_mean": 139.35459899902344, "beta_dpo/gap_std": 167.7623291015625, "beta_dpo/loss_margin_mean": 148.57752990722656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6886930983847284, "grad_norm": 4023.0234375, "learning_rate": 1.351323902551631e-07, "logits/chosen": -0.3229216933250427, "logits/rejected": -0.2937919497489929, "loss": 5.6201, "step": 469 }, { "beta_dpo/beta": 0.058329131454229355, "beta_dpo/beta_margin_grad_mean": -0.30150657892227173, "beta_dpo/beta_margin_grad_std": 0.2844862639904022, "beta_dpo/beta_margin_mean": 8.915841102600098, "beta_dpo/beta_margin_std": 17.628265380859375, "beta_dpo/beta_used": 0.058329131454229355, "beta_dpo/beta_used_raw": -0.260947585105896, "beta_dpo/gap_mean": 140.06040954589844, "beta_dpo/gap_std": 169.35638427734375, "beta_dpo/loss_margin_mean": 151.863525390625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6901615271659325, "grad_norm": 635.5731201171875, "learning_rate": 1.339940635976592e-07, "logits/chosen": -0.2994263470172882, "logits/rejected": -0.2865986227989197, "loss": 1.8137, "step": 470 }, { "beta_dpo/beta": 0.14344525337219238, "beta_dpo/beta_margin_grad_mean": -0.32090723514556885, "beta_dpo/beta_margin_grad_std": 0.296132355928421, "beta_dpo/beta_margin_mean": 23.80760955810547, "beta_dpo/beta_margin_std": 40.966461181640625, "beta_dpo/beta_used": 0.14344525337219238, "beta_dpo/beta_used_raw": -1.259301781654358, "beta_dpo/gap_mean": 141.4301300048828, "beta_dpo/gap_std": 166.99551391601562, "beta_dpo/loss_margin_mean": 137.1492462158203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6916299559471366, "grad_norm": 660.4382934570312, "learning_rate": 1.3285879380446563e-07, "logits/chosen": -0.3323206603527069, "logits/rejected": -0.301265686750412, "loss": 1.5718, "step": 471 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46238476037979126, "beta_dpo/beta_margin_grad_std": 0.048712510615587234, "beta_dpo/beta_margin_mean": 0.15300215780735016, "beta_dpo/beta_margin_std": 0.2004023641347885, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.5177662372589111, "beta_dpo/gap_mean": 141.42642211914062, "beta_dpo/gap_std": 171.97683715820312, "beta_dpo/loss_margin_mean": 153.0021514892578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6930983847283406, "grad_norm": 9.515340805053711, "learning_rate": 1.317266107909975e-07, "logits/chosen": -0.46395474672317505, "logits/rejected": -0.4258913993835449, "loss": 1.2787, "step": 472 }, { "beta_dpo/beta": 0.12622235715389252, "beta_dpo/beta_margin_grad_mean": -0.3606536090373993, "beta_dpo/beta_margin_grad_std": 0.32541587948799133, "beta_dpo/beta_margin_mean": 11.348122596740723, "beta_dpo/beta_margin_std": 32.52213668823242, "beta_dpo/beta_used": 0.12622235715389252, "beta_dpo/beta_used_raw": -2.1221091747283936, "beta_dpo/gap_mean": 136.3826141357422, "beta_dpo/gap_std": 172.83595275878906, "beta_dpo/loss_margin_mean": 98.95618438720703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6945668135095447, "grad_norm": 874.2503051757812, "learning_rate": 1.3059754439133002e-07, "logits/chosen": -0.3121126890182495, "logits/rejected": -0.27456527948379517, "loss": 2.9427, "step": 473 }, { "beta_dpo/beta": 0.03025379776954651, "beta_dpo/beta_margin_grad_mean": -0.3477736711502075, "beta_dpo/beta_margin_grad_std": 0.26919984817504883, "beta_dpo/beta_margin_mean": 3.4796054363250732, "beta_dpo/beta_margin_std": 7.700491428375244, "beta_dpo/beta_used": 0.03025379776954651, "beta_dpo/beta_used_raw": -1.1924772262573242, "beta_dpo/gap_mean": 128.73321533203125, "beta_dpo/gap_std": 170.72265625, "beta_dpo/loss_margin_mean": 102.19025421142578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6960352422907489, "grad_norm": 241.4309539794922, "learning_rate": 1.2947162435741277e-07, "logits/chosen": -0.3187577426433563, "logits/rejected": -0.31267520785331726, "loss": 1.019, "step": 474 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4640824496746063, "beta_dpo/beta_margin_grad_std": 0.03909669071435928, "beta_dpo/beta_margin_mean": 0.14515595138072968, "beta_dpo/beta_margin_std": 0.1593308448791504, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.4561372995376587, "beta_dpo/gap_mean": 130.75253295898438, "beta_dpo/gap_std": 168.95263671875, "beta_dpo/loss_margin_mean": 145.15594482421875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.697503671071953, "grad_norm": 7.302783966064453, "learning_rate": 1.2834888035828596e-07, "logits/chosen": -0.3118668496608734, "logits/rejected": -0.32232552766799927, "loss": 1.2878, "step": 475 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4693569839000702, "beta_dpo/beta_margin_grad_std": 0.03457416966557503, "beta_dpo/beta_margin_mean": 0.12346017360687256, "beta_dpo/beta_margin_std": 0.13980108499526978, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6184031367301941, "beta_dpo/gap_mean": 130.04847717285156, "beta_dpo/gap_std": 165.11314392089844, "beta_dpo/loss_margin_mean": 123.46017456054688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6989720998531571, "grad_norm": 10.900651931762695, "learning_rate": 1.2722934197929802e-07, "logits/chosen": -0.32981306314468384, "logits/rejected": -0.3277033567428589, "loss": 1.2757, "step": 476 }, { "beta_dpo/beta": 0.08377163857221603, "beta_dpo/beta_margin_grad_mean": -0.3329217731952667, "beta_dpo/beta_margin_grad_std": 0.2996887266635895, "beta_dpo/beta_margin_mean": 9.991097450256348, "beta_dpo/beta_margin_std": 23.768993377685547, "beta_dpo/beta_used": 0.08377163857221603, "beta_dpo/beta_used_raw": -0.9268441200256348, "beta_dpo/gap_mean": 129.47628784179688, "beta_dpo/gap_std": 165.23104858398438, "beta_dpo/loss_margin_mean": 131.6189727783203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7004405286343612, "grad_norm": 881.2789306640625, "learning_rate": 1.2611303872132631e-07, "logits/chosen": -0.34101468324661255, "logits/rejected": -0.27440470457077026, "loss": 1.9278, "step": 477 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46343475580215454, "beta_dpo/beta_margin_grad_std": 0.039767127484083176, "beta_dpo/beta_margin_mean": 0.14786657691001892, "beta_dpo/beta_margin_std": 0.16245287656784058, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9066869616508484, "beta_dpo/gap_mean": 131.2724609375, "beta_dpo/gap_std": 162.33258056640625, "beta_dpo/loss_margin_mean": 147.8665771484375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7019089574155654, "grad_norm": 8.391778945922852, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -0.3239785432815552, "logits/rejected": -0.3198069930076599, "loss": 1.2777, "step": 478 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4676341712474823, "beta_dpo/beta_margin_grad_std": 0.04058250039815903, "beta_dpo/beta_margin_mean": 0.13071373105049133, "beta_dpo/beta_margin_std": 0.16454558074474335, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6241159439086914, "beta_dpo/gap_mean": 130.87498474121094, "beta_dpo/gap_std": 161.7484893798828, "beta_dpo/loss_margin_mean": 130.71371459960938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7033773861967695, "grad_norm": 9.221752166748047, "learning_rate": 1.2389025514492456e-07, "logits/chosen": -0.3099960684776306, "logits/rejected": -0.3118622601032257, "loss": 1.2909, "step": 479 }, { "beta_dpo/beta": 0.13506542146205902, "beta_dpo/beta_margin_grad_mean": -0.3352108895778656, "beta_dpo/beta_margin_grad_std": 0.31329280138015747, "beta_dpo/beta_margin_mean": 20.978227615356445, "beta_dpo/beta_margin_std": 46.153724670410156, "beta_dpo/beta_used": 0.13506542146205902, "beta_dpo/beta_used_raw": -1.191691517829895, "beta_dpo/gap_mean": 128.6205596923828, "beta_dpo/gap_std": 162.02749633789062, "beta_dpo/loss_margin_mean": 111.26964569091797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7048458149779736, "grad_norm": 1865.645751953125, "learning_rate": 1.227838333989088e-07, "logits/chosen": -0.2958667278289795, "logits/rejected": -0.261913537979126, "loss": 4.3921, "step": 480 }, { "beta_dpo/beta": 0.4420124888420105, "beta_dpo/beta_margin_grad_mean": -0.32656970620155334, "beta_dpo/beta_margin_grad_std": 0.3070107102394104, "beta_dpo/beta_margin_mean": 69.89620208740234, "beta_dpo/beta_margin_std": 127.26205444335938, "beta_dpo/beta_used": 0.4420124888420105, "beta_dpo/beta_used_raw": -0.41111305356025696, "beta_dpo/gap_mean": 132.35614013671875, "beta_dpo/gap_std": 165.59747314453125, "beta_dpo/loss_margin_mean": 140.51625061035156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7063142437591777, "grad_norm": 6225.22705078125, "learning_rate": 1.2168076391719489e-07, "logits/chosen": -0.29309454560279846, "logits/rejected": -0.2821449935436249, "loss": 10.966, "step": 481 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47538548707962036, "beta_dpo/beta_margin_grad_std": 0.0424528568983078, "beta_dpo/beta_margin_mean": 0.09938764572143555, "beta_dpo/beta_margin_std": 0.172020822763443, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.7888857126235962, "beta_dpo/gap_mean": 127.62977600097656, "beta_dpo/gap_std": 167.57472229003906, "beta_dpo/loss_margin_mean": 99.38764190673828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7077826725403817, "grad_norm": 7.6943440437316895, "learning_rate": 1.2058107576668938e-07, "logits/chosen": -0.33630889654159546, "logits/rejected": -0.3210619390010834, "loss": 1.2958, "step": 482 }, { "beta_dpo/beta": 0.26435208320617676, "beta_dpo/beta_margin_grad_mean": -0.2773337662220001, "beta_dpo/beta_margin_grad_std": 0.2783583700656891, "beta_dpo/beta_margin_mean": 39.302825927734375, "beta_dpo/beta_margin_std": 61.75477981567383, "beta_dpo/beta_used": 0.26435208320617676, "beta_dpo/beta_used_raw": 0.08599334955215454, "beta_dpo/gap_mean": 130.0849151611328, "beta_dpo/gap_std": 171.31443786621094, "beta_dpo/loss_margin_mean": 154.75323486328125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7092511013215859, "grad_norm": 1278.6922607421875, "learning_rate": 1.194847979251979e-07, "logits/chosen": -0.3326480984687805, "logits/rejected": -0.2999170124530792, "loss": 1.3205, "step": 483 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46763938665390015, "beta_dpo/beta_margin_grad_std": 0.035770609974861145, "beta_dpo/beta_margin_mean": 0.13057366013526917, "beta_dpo/beta_margin_std": 0.1456281840801239, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3041430711746216, "beta_dpo/gap_mean": 129.75552368164062, "beta_dpo/gap_std": 164.25143432617188, "beta_dpo/loss_margin_mean": 130.5736541748047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.71071953010279, "grad_norm": 10.355823516845703, "learning_rate": 1.1839195928066101e-07, "logits/chosen": -0.3226221203804016, "logits/rejected": -0.2984588146209717, "loss": 1.2859, "step": 484 }, { "beta_dpo/beta": 0.5772560238838196, "beta_dpo/beta_margin_grad_mean": -0.33905330300331116, "beta_dpo/beta_margin_grad_std": 0.3175105154514313, "beta_dpo/beta_margin_mean": 89.47730255126953, "beta_dpo/beta_margin_std": 172.3997344970703, "beta_dpo/beta_used": 0.5772560238838196, "beta_dpo/beta_used_raw": 0.09787964820861816, "beta_dpo/gap_mean": 135.1558837890625, "beta_dpo/gap_std": 167.03604125976562, "beta_dpo/loss_margin_mean": 159.67459106445312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7121879588839941, "grad_norm": 9029.59765625, "learning_rate": 1.1730258863039347e-07, "logits/chosen": -0.3165690302848816, "logits/rejected": -0.30851900577545166, "loss": 17.9925, "step": 485 }, { "beta_dpo/beta": 0.5253121852874756, "beta_dpo/beta_margin_grad_mean": -0.3582148551940918, "beta_dpo/beta_margin_grad_std": 0.32531389594078064, "beta_dpo/beta_margin_mean": 79.52362823486328, "beta_dpo/beta_margin_std": 165.96304321289062, "beta_dpo/beta_used": 0.5253121852874756, "beta_dpo/beta_used_raw": 0.35201627016067505, "beta_dpo/gap_mean": 137.73037719726562, "beta_dpo/gap_std": 171.21456909179688, "beta_dpo/loss_margin_mean": 145.63682556152344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7136563876651982, "grad_norm": 8708.306640625, "learning_rate": 1.1621671468032493e-07, "logits/chosen": -0.3039623498916626, "logits/rejected": -0.28515172004699707, "loss": 24.9742, "step": 486 }, { "beta_dpo/beta": 0.6316623091697693, "beta_dpo/beta_margin_grad_mean": -0.15689758956432343, "beta_dpo/beta_margin_grad_std": 0.36151018738746643, "beta_dpo/beta_margin_mean": 86.09791564941406, "beta_dpo/beta_margin_std": 80.9069595336914, "beta_dpo/beta_used": 0.6316623091697693, "beta_dpo/beta_used_raw": 0.6316623091697693, "beta_dpo/gap_mean": 138.84857177734375, "beta_dpo/gap_std": 166.0025634765625, "beta_dpo/loss_margin_mean": 136.31451416015625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7151248164464024, "grad_norm": 4539.7001953125, "learning_rate": 1.1513436604424378e-07, "logits/chosen": -0.3175516128540039, "logits/rejected": -0.30147281289100647, "loss": 7.4951, "step": 487 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4705146551132202, "beta_dpo/beta_margin_grad_std": 0.025216443464159966, "beta_dpo/beta_margin_mean": 0.11842110008001328, "beta_dpo/beta_margin_std": 0.10160267353057861, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.245683193206787, "beta_dpo/gap_mean": 134.59036254882812, "beta_dpo/gap_std": 155.66152954101562, "beta_dpo/loss_margin_mean": 118.42108917236328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7165932452276065, "grad_norm": 10.171424865722656, "learning_rate": 1.1405557124304335e-07, "logits/chosen": -0.3197871446609497, "logits/rejected": -0.2931329607963562, "loss": 1.2797, "step": 488 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47052738070487976, "beta_dpo/beta_margin_grad_std": 0.03435816988348961, "beta_dpo/beta_margin_mean": 0.11888797581195831, "beta_dpo/beta_margin_std": 0.13958628475666046, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.862081527709961, "beta_dpo/gap_mean": 132.25436401367188, "beta_dpo/gap_std": 150.97909545898438, "beta_dpo/loss_margin_mean": 118.88796997070312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7180616740088106, "grad_norm": 7.416528701782227, "learning_rate": 1.1298035870396985e-07, "logits/chosen": -0.39010077714920044, "logits/rejected": -0.36551567912101746, "loss": 1.3069, "step": 489 }, { "beta_dpo/beta": 0.058361634612083435, "beta_dpo/beta_margin_grad_mean": -0.35114118456840515, "beta_dpo/beta_margin_grad_std": 0.3123593032360077, "beta_dpo/beta_margin_mean": 8.510327339172363, "beta_dpo/beta_margin_std": 16.38105583190918, "beta_dpo/beta_used": 0.058361634612083435, "beta_dpo/beta_used_raw": -0.9930161833763123, "beta_dpo/gap_mean": 131.10269165039062, "beta_dpo/gap_std": 152.6240692138672, "beta_dpo/loss_margin_mean": 133.6477813720703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7195301027900147, "grad_norm": 638.021728515625, "learning_rate": 1.1190875675987355e-07, "logits/chosen": -0.40140801668167114, "logits/rejected": -0.4072290062904358, "loss": 2.6273, "step": 490 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47306498885154724, "beta_dpo/beta_margin_grad_std": 0.03465822711586952, "beta_dpo/beta_margin_mean": 0.10852167010307312, "beta_dpo/beta_margin_std": 0.14018140733242035, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.122982144355774, "beta_dpo/gap_mean": 128.78497314453125, "beta_dpo/gap_std": 152.2926025390625, "beta_dpo/loss_margin_mean": 108.52165985107422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7209985315712188, "grad_norm": 8.015692710876465, "learning_rate": 1.1084079364846241e-07, "logits/chosen": -0.3382050395011902, "logits/rejected": -0.30560484528541565, "loss": 1.2841, "step": 491 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4801286458969116, "beta_dpo/beta_margin_grad_std": 0.03697565570473671, "beta_dpo/beta_margin_mean": 0.07998443394899368, "beta_dpo/beta_margin_std": 0.14913584291934967, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.9428200721740723, "beta_dpo/gap_mean": 120.65419006347656, "beta_dpo/gap_std": 151.2496337890625, "beta_dpo/loss_margin_mean": 79.98442840576172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7224669603524229, "grad_norm": 7.962594509124756, "learning_rate": 1.097764975115576e-07, "logits/chosen": -0.32057705521583557, "logits/rejected": -0.30018332600593567, "loss": 1.3048, "step": 492 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4754677712917328, "beta_dpo/beta_margin_grad_std": 0.03316526114940643, "beta_dpo/beta_margin_mean": 0.09875541180372238, "beta_dpo/beta_margin_std": 0.13388586044311523, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.9641090631484985, "beta_dpo/gap_mean": 116.27113342285156, "beta_dpo/gap_std": 149.367431640625, "beta_dpo/loss_margin_mean": 98.75540924072266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.723935389133627, "grad_norm": 8.332205772399902, "learning_rate": 1.0871589639435203e-07, "logits/chosen": -0.3866749703884125, "logits/rejected": -0.3490540385246277, "loss": 1.3088, "step": 493 }, { "beta_dpo/beta": 1.4735260009765625, "beta_dpo/beta_margin_grad_mean": -0.15571968257427216, "beta_dpo/beta_margin_grad_std": 0.3583217263221741, "beta_dpo/beta_margin_mean": 210.98004150390625, "beta_dpo/beta_margin_std": 204.13458251953125, "beta_dpo/beta_used": 1.4735260009765625, "beta_dpo/beta_used_raw": 1.4735260009765625, "beta_dpo/gap_mean": 119.46544647216797, "beta_dpo/gap_std": 148.60195922851562, "beta_dpo/loss_margin_mean": 145.59498596191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7254038179148311, "grad_norm": 6623.4462890625, "learning_rate": 1.0765901824467166e-07, "logits/chosen": -0.2984636425971985, "logits/rejected": -0.31005731225013733, "loss": 4.6034, "step": 494 }, { "beta_dpo/beta": 0.5986773371696472, "beta_dpo/beta_margin_grad_mean": -0.2792108356952667, "beta_dpo/beta_margin_grad_std": 0.2721221148967743, "beta_dpo/beta_margin_mean": 80.61207580566406, "beta_dpo/beta_margin_std": 141.1808624267578, "beta_dpo/beta_used": 0.5986773371696472, "beta_dpo/beta_used_raw": -0.09175539016723633, "beta_dpo/gap_mean": 119.32475280761719, "beta_dpo/gap_std": 148.88406372070312, "beta_dpo/loss_margin_mean": 118.74334716796875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7268722466960352, "grad_norm": 1850.2857666015625, "learning_rate": 1.0660589091223854e-07, "logits/chosen": -0.3925628662109375, "logits/rejected": -0.37049469351768494, "loss": 0.6957, "step": 495 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4757267236709595, "beta_dpo/beta_margin_grad_std": 0.030057376250624657, "beta_dpo/beta_margin_mean": 0.09760633856058121, "beta_dpo/beta_margin_std": 0.12117937952280045, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.945371389389038, "beta_dpo/gap_mean": 116.95680236816406, "beta_dpo/gap_std": 145.31634521484375, "beta_dpo/loss_margin_mean": 97.60633850097656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7283406754772394, "grad_norm": 7.529769420623779, "learning_rate": 1.0555654214793722e-07, "logits/chosen": -0.3815876245498657, "logits/rejected": -0.34360769391059875, "loss": 1.3074, "step": 496 }, { "beta_dpo/beta": 0.8181713223457336, "beta_dpo/beta_margin_grad_mean": -0.27269458770751953, "beta_dpo/beta_margin_grad_std": 0.2720523774623871, "beta_dpo/beta_margin_mean": 115.75753021240234, "beta_dpo/beta_margin_std": 177.8175506591797, "beta_dpo/beta_used": 0.8181713223457336, "beta_dpo/beta_used_raw": -0.907131552696228, "beta_dpo/gap_mean": 115.927490234375, "beta_dpo/gap_std": 140.37762451171875, "beta_dpo/loss_margin_mean": 105.21829986572266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7298091042584435, "grad_norm": 2982.553955078125, "learning_rate": 1.0451099960308374e-07, "logits/chosen": -0.3127893805503845, "logits/rejected": -0.2815262973308563, "loss": 2.3877, "step": 497 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4737287759780884, "beta_dpo/beta_margin_grad_std": 0.037132780998945236, "beta_dpo/beta_margin_mean": 0.10587074607610703, "beta_dpo/beta_margin_std": 0.15012362599372864, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5454678535461426, "beta_dpo/gap_mean": 111.99593353271484, "beta_dpo/gap_std": 142.203369140625, "beta_dpo/loss_margin_mean": 105.87074279785156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7312775330396476, "grad_norm": 8.269208908081055, "learning_rate": 1.0346929082869641e-07, "logits/chosen": -0.3560227155685425, "logits/rejected": -0.323871910572052, "loss": 1.2907, "step": 498 }, { "beta_dpo/beta": 0.6870215535163879, "beta_dpo/beta_margin_grad_mean": -0.1571728140115738, "beta_dpo/beta_margin_grad_std": 0.35055309534072876, "beta_dpo/beta_margin_mean": 93.55929565429688, "beta_dpo/beta_margin_std": 131.30792236328125, "beta_dpo/beta_used": 0.6870215535163879, "beta_dpo/beta_used_raw": 0.6870215535163879, "beta_dpo/gap_mean": 114.74722290039062, "beta_dpo/gap_std": 141.5767822265625, "beta_dpo/loss_margin_mean": 125.64728546142578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7327459618208517, "grad_norm": 825.9117431640625, "learning_rate": 1.0243144327477013e-07, "logits/chosen": -0.30797550082206726, "logits/rejected": -0.313708633184433, "loss": 1.0329, "step": 499 }, { "beta_dpo/beta": 0.7255595922470093, "beta_dpo/beta_margin_grad_mean": -0.3108097314834595, "beta_dpo/beta_margin_grad_std": 0.3008542060852051, "beta_dpo/beta_margin_mean": 94.36482238769531, "beta_dpo/beta_margin_std": 166.26669311523438, "beta_dpo/beta_used": 0.7255595922470093, "beta_dpo/beta_used_raw": 0.49765706062316895, "beta_dpo/gap_mean": 117.69755554199219, "beta_dpo/gap_std": 142.67498779296875, "beta_dpo/loss_margin_mean": 134.94979858398438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7342143906020558, "grad_norm": 940.185546875, "learning_rate": 1.0139748428955333e-07, "logits/chosen": -0.30566155910491943, "logits/rejected": -0.30621030926704407, "loss": 1.1587, "step": 500 }, { "epoch": 0.7342143906020558, "eval_beta_dpo/beta": 0.0540650337934494, "eval_beta_dpo/beta_margin_grad_mean": -0.4557286202907562, "eval_beta_dpo/beta_margin_grad_std": 0.06789226830005646, "eval_beta_dpo/beta_margin_mean": 7.248837947845459, "eval_beta_dpo/beta_margin_std": 9.07664680480957, "eval_beta_dpo/beta_used": 0.0540650337934494, "eval_beta_dpo/beta_used_raw": -2.3147406578063965, "eval_beta_dpo/gap_mean": 118.34776306152344, "eval_beta_dpo/gap_std": 142.3097686767578, "eval_beta_dpo/loss_margin_mean": 78.1020736694336, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.35900095105171204, "eval_logits/rejected": -0.3353206515312195, "eval_loss": 1.3023549318313599, "eval_runtime": 40.1743, "eval_samples_per_second": 58.221, "eval_steps_per_second": 1.842, "step": 500 }, { "beta_dpo/beta": 0.08518475294113159, "beta_dpo/beta_margin_grad_mean": -0.31711265444755554, "beta_dpo/beta_margin_grad_std": 0.289605975151062, "beta_dpo/beta_margin_mean": 13.289090156555176, "beta_dpo/beta_margin_std": 21.948522567749023, "beta_dpo/beta_used": 0.08518475294113159, "beta_dpo/beta_used_raw": -1.208457112312317, "beta_dpo/gap_mean": 119.19757843017578, "beta_dpo/gap_std": 144.50363159179688, "beta_dpo/loss_margin_mean": 131.8070068359375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.73568281938326, "grad_norm": 516.4696044921875, "learning_rate": 1.0036744111882672e-07, "logits/chosen": -0.2940847873687744, "logits/rejected": -0.2648616433143616, "loss": 1.1495, "step": 501 }, { "beta_dpo/beta": 0.3071337640285492, "beta_dpo/beta_margin_grad_mean": -0.239473357796669, "beta_dpo/beta_margin_grad_std": 0.23620876669883728, "beta_dpo/beta_margin_mean": 44.45820617675781, "beta_dpo/beta_margin_std": 64.65328979492188, "beta_dpo/beta_used": 0.3071337640285492, "beta_dpo/beta_used_raw": -0.02544143795967102, "beta_dpo/gap_mean": 120.6997299194336, "beta_dpo/gap_std": 140.1085205078125, "beta_dpo/loss_margin_mean": 124.3865966796875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.737151248164464, "grad_norm": 124.2741928100586, "learning_rate": 9.934134090518592e-08, "logits/chosen": -0.34868794679641724, "logits/rejected": -0.3092746138572693, "loss": 0.6492, "step": 502 }, { "beta_dpo/beta": 0.408588171005249, "beta_dpo/beta_margin_grad_mean": -0.28420010209083557, "beta_dpo/beta_margin_grad_std": 0.28162574768066406, "beta_dpo/beta_margin_mean": 52.1716423034668, "beta_dpo/beta_margin_std": 84.77537536621094, "beta_dpo/beta_used": 0.408588171005249, "beta_dpo/beta_used_raw": -0.3279840648174286, "beta_dpo/gap_mean": 121.5184326171875, "beta_dpo/gap_std": 136.5009765625, "beta_dpo/loss_margin_mean": 114.87471008300781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7386196769456681, "grad_norm": 1218.5517578125, "learning_rate": 9.831921068732571e-08, "logits/chosen": -0.2874418795108795, "logits/rejected": -0.2468714714050293, "loss": 1.7133, "step": 503 }, { "beta_dpo/beta": 1.1715275049209595, "beta_dpo/beta_margin_grad_mean": -0.27964290976524353, "beta_dpo/beta_margin_grad_std": 0.27907973527908325, "beta_dpo/beta_margin_mean": 199.27561950683594, "beta_dpo/beta_margin_std": 273.30157470703125, "beta_dpo/beta_used": 1.1715275049209595, "beta_dpo/beta_used_raw": 0.7455565929412842, "beta_dpo/gap_mean": 126.13829040527344, "beta_dpo/gap_std": 136.19711303710938, "beta_dpo/loss_margin_mean": 154.6195526123047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7400881057268722, "grad_norm": 6307.01123046875, "learning_rate": 9.730107739932805e-08, "logits/chosen": -0.31520044803619385, "logits/rejected": -0.31102991104125977, "loss": 3.7457, "step": 504 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47535502910614014, "beta_dpo/beta_margin_grad_std": 0.03676827251911163, "beta_dpo/beta_margin_mean": 0.09925924986600876, "beta_dpo/beta_margin_std": 0.1482783406972885, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.7212300300598145, "beta_dpo/gap_mean": 122.90274047851562, "beta_dpo/gap_std": 139.72247314453125, "beta_dpo/loss_margin_mean": 99.25924682617188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7415565345080763, "grad_norm": 8.723043441772461, "learning_rate": 9.628696786995188e-08, "logits/chosen": -0.3765321969985962, "logits/rejected": -0.3430270552635193, "loss": 1.3002, "step": 505 }, { "beta_dpo/beta": 0.41072434186935425, "beta_dpo/beta_margin_grad_mean": -0.2921498417854309, "beta_dpo/beta_margin_grad_std": 0.27914443612098694, "beta_dpo/beta_margin_mean": 61.24283218383789, "beta_dpo/beta_margin_std": 103.88602447509766, "beta_dpo/beta_used": 0.41072434186935425, "beta_dpo/beta_used_raw": 0.20597346127033234, "beta_dpo/gap_mean": 124.59944915771484, "beta_dpo/gap_std": 140.55581665039062, "beta_dpo/loss_margin_mean": 139.48028564453125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7430249632892805, "grad_norm": 916.702880859375, "learning_rate": 9.527690882192635e-08, "logits/chosen": -0.3146125376224518, "logits/rejected": -0.2966008186340332, "loss": 4.1001, "step": 506 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47250908613204956, "beta_dpo/beta_margin_grad_std": 0.03613479807972908, "beta_dpo/beta_margin_mean": 0.11091171205043793, "beta_dpo/beta_margin_std": 0.14667391777038574, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.4757391214370728, "beta_dpo/gap_mean": 123.23077392578125, "beta_dpo/gap_std": 141.94122314453125, "beta_dpo/loss_margin_mean": 110.91170501708984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7444933920704846, "grad_norm": 11.176294326782227, "learning_rate": 9.427092687124691e-08, "logits/chosen": -0.3175603449344635, "logits/rejected": -0.2914998531341553, "loss": 1.2946, "step": 507 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4737025201320648, "beta_dpo/beta_margin_grad_std": 0.038725487887859344, "beta_dpo/beta_margin_mean": 0.10615622252225876, "beta_dpo/beta_margin_std": 0.15725988149642944, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.256690502166748, "beta_dpo/gap_mean": 119.46562957763672, "beta_dpo/gap_std": 144.94464111328125, "beta_dpo/loss_margin_mean": 106.15621948242188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7459618208516887, "grad_norm": 8.980006217956543, "learning_rate": 9.326904852647344e-08, "logits/chosen": -0.3461516499519348, "logits/rejected": -0.3254041373729706, "loss": 1.3101, "step": 508 }, { "beta_dpo/beta": 0.034919556230306625, "beta_dpo/beta_margin_grad_mean": -0.29076310992240906, "beta_dpo/beta_margin_grad_std": 0.26489248871803284, "beta_dpo/beta_margin_mean": 4.45152473449707, "beta_dpo/beta_margin_std": 6.8051252365112305, "beta_dpo/beta_used": 0.034919556230306625, "beta_dpo/beta_used_raw": -0.7437249422073364, "beta_dpo/gap_mean": 117.4862060546875, "beta_dpo/gap_std": 143.86575317382812, "beta_dpo/loss_margin_mean": 112.33943176269531, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7474302496328928, "grad_norm": 281.12945556640625, "learning_rate": 9.227130018803195e-08, "logits/chosen": -0.3934116065502167, "logits/rejected": -0.3660675883293152, "loss": 0.8276, "step": 509 }, { "beta_dpo/beta": 0.6322641372680664, "beta_dpo/beta_margin_grad_mean": -0.32574018836021423, "beta_dpo/beta_margin_grad_std": 0.3087122440338135, "beta_dpo/beta_margin_mean": 86.17361450195312, "beta_dpo/beta_margin_std": 161.67727661132812, "beta_dpo/beta_used": 0.6322641372680664, "beta_dpo/beta_used_raw": 0.5919451713562012, "beta_dpo/gap_mean": 120.94603729248047, "beta_dpo/gap_std": 143.10879516601562, "beta_dpo/loss_margin_mean": 138.92527770996094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.748898678414097, "grad_norm": 7391.86083984375, "learning_rate": 9.127770814751932e-08, "logits/chosen": -0.3361413776874542, "logits/rejected": -0.3392980992794037, "loss": 5.2172, "step": 510 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4689362347126007, "beta_dpo/beta_margin_grad_std": 0.041758738458156586, "beta_dpo/beta_margin_mean": 0.1254274845123291, "beta_dpo/beta_margin_std": 0.16929617524147034, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9141647815704346, "beta_dpo/gap_mean": 122.11959838867188, "beta_dpo/gap_std": 146.7066192626953, "beta_dpo/loss_margin_mean": 125.42748260498047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.750367107195301, "grad_norm": 14.153034210205078, "learning_rate": 9.028829858700973e-08, "logits/chosen": -0.3057625889778137, "logits/rejected": -0.2901447117328644, "loss": 1.287, "step": 511 }, { "beta_dpo/beta": 0.12954731285572052, "beta_dpo/beta_margin_grad_mean": -0.32700619101524353, "beta_dpo/beta_margin_grad_std": 0.3073154091835022, "beta_dpo/beta_margin_mean": 18.631834030151367, "beta_dpo/beta_margin_std": 31.87902069091797, "beta_dpo/beta_used": 0.12954731285572052, "beta_dpo/beta_used_raw": -0.11738580465316772, "beta_dpo/gap_mean": 125.70597839355469, "beta_dpo/gap_std": 147.9683837890625, "beta_dpo/loss_margin_mean": 142.68536376953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7518355359765051, "grad_norm": 1204.3978271484375, "learning_rate": 8.930309757836516e-08, "logits/chosen": -0.2575787305831909, "logits/rejected": -0.2278975248336792, "loss": 2.0045, "step": 512 }, { "beta_dpo/beta": 0.044924668967723846, "beta_dpo/beta_margin_grad_mean": -0.32202252745628357, "beta_dpo/beta_margin_grad_std": 0.2846587002277374, "beta_dpo/beta_margin_mean": 6.217226982116699, "beta_dpo/beta_margin_std": 10.354048728942871, "beta_dpo/beta_used": 0.044924668967723846, "beta_dpo/beta_used_raw": -1.4216735363006592, "beta_dpo/gap_mean": 124.45533752441406, "beta_dpo/gap_std": 146.31292724609375, "beta_dpo/loss_margin_mean": 99.97219848632812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7533039647577092, "grad_norm": 278.9576416015625, "learning_rate": 8.832213108254863e-08, "logits/chosen": -0.28937166929244995, "logits/rejected": -0.23899608850479126, "loss": 1.4199, "step": 513 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47301986813545227, "beta_dpo/beta_margin_grad_std": 0.04115281254053116, "beta_dpo/beta_margin_mean": 0.10918539017438889, "beta_dpo/beta_margin_std": 0.1675841063261032, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3583461046218872, "beta_dpo/gap_mean": 119.46575927734375, "beta_dpo/gap_std": 149.45828247070312, "beta_dpo/loss_margin_mean": 109.18538665771484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7547723935389133, "grad_norm": 8.001137733459473, "learning_rate": 8.734542494893954e-08, "logits/chosen": -0.33942490816116333, "logits/rejected": -0.3081890940666199, "loss": 1.2968, "step": 514 }, { "beta_dpo/beta": 0.7366660833358765, "beta_dpo/beta_margin_grad_mean": -0.2994893789291382, "beta_dpo/beta_margin_grad_std": 0.2928798794746399, "beta_dpo/beta_margin_mean": 86.19298553466797, "beta_dpo/beta_margin_std": 163.50563049316406, "beta_dpo/beta_used": 0.7366660833358765, "beta_dpo/beta_used_raw": 0.6376501321792603, "beta_dpo/gap_mean": 117.94536590576172, "beta_dpo/gap_std": 149.05044555664062, "beta_dpo/loss_margin_mean": 110.93781280517578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7562408223201175, "grad_norm": 2712.74951171875, "learning_rate": 8.637300491465272e-08, "logits/chosen": -0.33565250039100647, "logits/rejected": -0.32709378004074097, "loss": 2.2661, "step": 515 }, { "beta_dpo/beta": 0.3256189227104187, "beta_dpo/beta_margin_grad_mean": -0.17041105031967163, "beta_dpo/beta_margin_grad_std": 0.32598721981048584, "beta_dpo/beta_margin_mean": 44.762569427490234, "beta_dpo/beta_margin_std": 72.75740814208984, "beta_dpo/beta_used": 0.3256189227104187, "beta_dpo/beta_used_raw": 0.3256189227104187, "beta_dpo/gap_mean": 120.83750915527344, "beta_dpo/gap_std": 148.33566284179688, "beta_dpo/loss_margin_mean": 144.36268615722656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7577092511013216, "grad_norm": 2482.775634765625, "learning_rate": 8.540489660386064e-08, "logits/chosen": -0.30348920822143555, "logits/rejected": -0.30661916732788086, "loss": 3.9909, "step": 516 }, { "beta_dpo/beta": 0.3787996470928192, "beta_dpo/beta_margin_grad_mean": -0.3142106235027313, "beta_dpo/beta_margin_grad_std": 0.3018515706062317, "beta_dpo/beta_margin_mean": 53.798763275146484, "beta_dpo/beta_margin_std": 91.0955581665039, "beta_dpo/beta_used": 0.3787996470928192, "beta_dpo/beta_used_raw": -0.8146347403526306, "beta_dpo/gap_mean": 124.02101135253906, "beta_dpo/gap_std": 147.56996154785156, "beta_dpo/loss_margin_mean": 129.0400848388672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7591776798825257, "grad_norm": 4063.2880859375, "learning_rate": 8.444112552711752e-08, "logits/chosen": -0.34874552488327026, "logits/rejected": -0.3211863934993744, "loss": 9.232, "step": 517 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47028571367263794, "beta_dpo/beta_margin_grad_std": 0.03980425372719765, "beta_dpo/beta_margin_mean": 0.11989691108465195, "beta_dpo/beta_margin_std": 0.16114358603954315, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.2156920433044434, "beta_dpo/gap_mean": 123.86222839355469, "beta_dpo/gap_std": 149.8852081298828, "beta_dpo/loss_margin_mean": 119.89690399169922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7606461086637298, "grad_norm": 9.67983627319336, "learning_rate": 8.348171708068747e-08, "logits/chosen": -0.33817270398139954, "logits/rejected": -0.3306800425052643, "loss": 1.2903, "step": 518 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4726635813713074, "beta_dpo/beta_margin_grad_std": 0.035860326141119, "beta_dpo/beta_margin_mean": 0.11023343354463577, "beta_dpo/beta_margin_std": 0.14520837366580963, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.990078091621399, "beta_dpo/gap_mean": 120.50178527832031, "beta_dpo/gap_std": 149.90951538085938, "beta_dpo/loss_margin_mean": 110.23342895507812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.762114537444934, "grad_norm": 11.328512191772461, "learning_rate": 8.25266965458755e-08, "logits/chosen": -0.38129687309265137, "logits/rejected": -0.3619292974472046, "loss": 1.3054, "step": 519 }, { "beta_dpo/beta": 0.3380447328090668, "beta_dpo/beta_margin_grad_mean": -0.2715797424316406, "beta_dpo/beta_margin_grad_std": 0.26895225048065186, "beta_dpo/beta_margin_mean": 49.52477264404297, "beta_dpo/beta_margin_std": 87.04480743408203, "beta_dpo/beta_used": 0.3380447328090668, "beta_dpo/beta_used_raw": -1.0785763263702393, "beta_dpo/gap_mean": 121.2685775756836, "beta_dpo/gap_std": 148.63670349121094, "beta_dpo/loss_margin_mean": 116.98388671875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7635829662261381, "grad_norm": 3035.5087890625, "learning_rate": 8.15760890883607e-08, "logits/chosen": -0.3086138069629669, "logits/rejected": -0.29337257146835327, "loss": 3.5396, "step": 520 }, { "beta_dpo/beta": 0.7759643793106079, "beta_dpo/beta_margin_grad_mean": -0.30295756459236145, "beta_dpo/beta_margin_grad_std": 0.2947865128517151, "beta_dpo/beta_margin_mean": 120.77637481689453, "beta_dpo/beta_margin_std": 199.4180450439453, "beta_dpo/beta_used": 0.7759643793106079, "beta_dpo/beta_used_raw": -0.11128360033035278, "beta_dpo/gap_mean": 117.24072265625, "beta_dpo/gap_std": 145.8902587890625, "beta_dpo/loss_margin_mean": 116.25064086914062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7650513950073421, "grad_norm": 2916.091552734375, "learning_rate": 8.062991975753378e-08, "logits/chosen": -0.26582300662994385, "logits/rejected": -0.24365702271461487, "loss": 5.6064, "step": 521 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4751797318458557, "beta_dpo/beta_margin_grad_std": 0.03499903902411461, "beta_dpo/beta_margin_mean": 0.09980867058038712, "beta_dpo/beta_margin_std": 0.1410750299692154, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.123004674911499, "beta_dpo/gap_mean": 115.89306640625, "beta_dpo/gap_std": 145.66278076171875, "beta_dpo/loss_margin_mean": 99.80866241455078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7665198237885462, "grad_norm": 11.408084869384766, "learning_rate": 7.968821348583643e-08, "logits/chosen": -0.26223623752593994, "logits/rejected": -0.2491573542356491, "loss": 1.2971, "step": 522 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4724850058555603, "beta_dpo/beta_margin_grad_std": 0.03419146686792374, "beta_dpo/beta_margin_mean": 0.11072482913732529, "beta_dpo/beta_margin_std": 0.13797280192375183, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7056801319122314, "beta_dpo/gap_mean": 114.94084167480469, "beta_dpo/gap_std": 144.96939086914062, "beta_dpo/loss_margin_mean": 110.72482299804688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7679882525697503, "grad_norm": 12.761443138122559, "learning_rate": 7.875099508810484e-08, "logits/chosen": -0.35182422399520874, "logits/rejected": -0.3160788416862488, "loss": 1.2905, "step": 523 }, { "beta_dpo/beta": 0.34091123938560486, "beta_dpo/beta_margin_grad_mean": -0.28588905930519104, "beta_dpo/beta_margin_grad_std": 0.26944512128829956, "beta_dpo/beta_margin_mean": 39.1301383972168, "beta_dpo/beta_margin_std": 78.40217590332031, "beta_dpo/beta_used": 0.34091123938560486, "beta_dpo/beta_used_raw": -0.35857832431793213, "beta_dpo/gap_mean": 113.64476013183594, "beta_dpo/gap_std": 142.83682250976562, "beta_dpo/loss_margin_mean": 103.63865661621094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7694566813509545, "grad_norm": 1415.0672607421875, "learning_rate": 7.781828926091535e-08, "logits/chosen": -0.3773775100708008, "logits/rejected": -0.3307211399078369, "loss": 0.7133, "step": 524 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46599993109703064, "beta_dpo/beta_margin_grad_std": 0.03062298335134983, "beta_dpo/beta_margin_mean": 0.13683471083641052, "beta_dpo/beta_margin_std": 0.12389042973518372, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7509552240371704, "beta_dpo/gap_mean": 116.8314208984375, "beta_dpo/gap_std": 139.02029418945312, "beta_dpo/loss_margin_mean": 136.83470153808594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7709251101321586, "grad_norm": 11.353320121765137, "learning_rate": 7.689012058193384e-08, "logits/chosen": -0.29079174995422363, "logits/rejected": -0.29601868987083435, "loss": 1.2884, "step": 525 }, { "beta_dpo/beta": 1.2091269493103027, "beta_dpo/beta_margin_grad_mean": -0.2980670630931854, "beta_dpo/beta_margin_grad_std": 0.2918414771556854, "beta_dpo/beta_margin_mean": 188.9692840576172, "beta_dpo/beta_margin_std": 288.71917724609375, "beta_dpo/beta_used": 1.2091269493103027, "beta_dpo/beta_used_raw": 0.724345326423645, "beta_dpo/gap_mean": 118.87196350097656, "beta_dpo/gap_std": 137.54379272460938, "beta_dpo/loss_margin_mean": 136.27288818359375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7723935389133627, "grad_norm": 10179.40234375, "learning_rate": 7.596651350926836e-08, "logits/chosen": -0.37073665857315063, "logits/rejected": -0.33339107036590576, "loss": 4.632, "step": 526 }, { "beta_dpo/beta": 0.31047749519348145, "beta_dpo/beta_margin_grad_mean": -0.31394025683403015, "beta_dpo/beta_margin_grad_std": 0.2968938946723938, "beta_dpo/beta_margin_mean": 37.45048522949219, "beta_dpo/beta_margin_std": 69.46243286132812, "beta_dpo/beta_used": 0.31047749519348145, "beta_dpo/beta_used_raw": -0.9867266416549683, "beta_dpo/gap_mean": 119.21248626708984, "beta_dpo/gap_std": 135.69989013671875, "beta_dpo/loss_margin_mean": 104.62269592285156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7738619676945668, "grad_norm": 997.8250122070312, "learning_rate": 7.504749238082414e-08, "logits/chosen": -0.32467547059059143, "logits/rejected": -0.28431421518325806, "loss": 0.8418, "step": 527 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47215956449508667, "beta_dpo/beta_margin_grad_std": 0.03576524555683136, "beta_dpo/beta_margin_mean": 0.11228987574577332, "beta_dpo/beta_margin_std": 0.1448754370212555, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.420975685119629, "beta_dpo/gap_mean": 117.36293029785156, "beta_dpo/gap_std": 136.00509643554688, "beta_dpo/loss_margin_mean": 112.28987121582031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.775330396475771, "grad_norm": 9.15918254852295, "learning_rate": 7.413308141366254e-08, "logits/chosen": -0.34611016511917114, "logits/rejected": -0.3225988745689392, "loss": 1.2992, "step": 528 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47805625200271606, "beta_dpo/beta_margin_grad_std": 0.02993660233914852, "beta_dpo/beta_margin_mean": 0.08821769058704376, "beta_dpo/beta_margin_std": 0.12073423713445663, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3918501138687134, "beta_dpo/gap_mean": 112.62429809570312, "beta_dpo/gap_std": 133.21206665039062, "beta_dpo/loss_margin_mean": 88.21768951416016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7767988252569751, "grad_norm": 8.978301048278809, "learning_rate": 7.322330470336313e-08, "logits/chosen": -0.29925012588500977, "logits/rejected": -0.2968965470790863, "loss": 1.3025, "step": 529 }, { "beta_dpo/beta": 0.8380435109138489, "beta_dpo/beta_margin_grad_mean": -0.29539409279823303, "beta_dpo/beta_margin_grad_std": 0.29032719135284424, "beta_dpo/beta_margin_mean": 130.1285858154297, "beta_dpo/beta_margin_std": 249.0910186767578, "beta_dpo/beta_used": 0.8380435109138489, "beta_dpo/beta_used_raw": 0.8118060231208801, "beta_dpo/gap_mean": 116.62379455566406, "beta_dpo/gap_std": 139.05780029296875, "beta_dpo/loss_margin_mean": 146.89210510253906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7782672540381792, "grad_norm": 5670.451171875, "learning_rate": 7.231818622338822e-08, "logits/chosen": -0.29417866468429565, "logits/rejected": -0.28473860025405884, "loss": 6.8965, "step": 530 }, { "beta_dpo/beta": 0.3860895335674286, "beta_dpo/beta_margin_grad_mean": -0.3058871030807495, "beta_dpo/beta_margin_grad_std": 0.29022565484046936, "beta_dpo/beta_margin_mean": 48.6815071105957, "beta_dpo/beta_margin_std": 90.59869384765625, "beta_dpo/beta_used": 0.3860895335674286, "beta_dpo/beta_used_raw": -0.38052642345428467, "beta_dpo/gap_mean": 118.85505676269531, "beta_dpo/gap_std": 137.6885986328125, "beta_dpo/loss_margin_mean": 125.42870330810547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7797356828193832, "grad_norm": 3328.2119140625, "learning_rate": 7.141774982445147e-08, "logits/chosen": -0.30007681250572205, "logits/rejected": -0.2692079246044159, "loss": 4.4606, "step": 531 }, { "beta_dpo/beta": 0.25227928161621094, "beta_dpo/beta_margin_grad_mean": -0.3480188250541687, "beta_dpo/beta_margin_grad_std": 0.31572601199150085, "beta_dpo/beta_margin_mean": 46.9578742980957, "beta_dpo/beta_margin_std": 80.62285614013672, "beta_dpo/beta_used": 0.25227928161621094, "beta_dpo/beta_used_raw": -0.626258373260498, "beta_dpo/gap_mean": 122.90603637695312, "beta_dpo/gap_std": 142.55856323242188, "beta_dpo/loss_margin_mean": 125.72514343261719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7812041116005873, "grad_norm": 6387.31787109375, "learning_rate": 7.052201923388953e-08, "logits/chosen": -0.3237287104129791, "logits/rejected": -0.28502288460731506, "loss": 6.5028, "step": 532 }, { "beta_dpo/beta": 0.10688778758049011, "beta_dpo/beta_margin_grad_mean": -0.36453330516815186, "beta_dpo/beta_margin_grad_std": 0.31307417154312134, "beta_dpo/beta_margin_mean": 14.438727378845215, "beta_dpo/beta_margin_std": 29.17506217956543, "beta_dpo/beta_used": 0.10688778758049011, "beta_dpo/beta_used_raw": -1.7720496654510498, "beta_dpo/gap_mean": 116.83836364746094, "beta_dpo/gap_std": 140.88243103027344, "beta_dpo/loss_margin_mean": 107.61312103271484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7826725403817915, "grad_norm": 854.921630859375, "learning_rate": 6.963101805503646e-08, "logits/chosen": -0.32803478837013245, "logits/rejected": -0.2940494418144226, "loss": 2.1474, "step": 533 }, { "beta_dpo/beta": 0.4838470220565796, "beta_dpo/beta_margin_grad_mean": -0.3121793866157532, "beta_dpo/beta_margin_grad_std": 0.30149754881858826, "beta_dpo/beta_margin_mean": 61.1616096496582, "beta_dpo/beta_margin_std": 110.03469848632812, "beta_dpo/beta_used": 0.4838470220565796, "beta_dpo/beta_used_raw": -0.25012335181236267, "beta_dpo/gap_mean": 119.16416931152344, "beta_dpo/gap_std": 142.51844787597656, "beta_dpo/loss_margin_mean": 127.46477508544922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7841409691629956, "grad_norm": 2596.990478515625, "learning_rate": 6.874476976660184e-08, "logits/chosen": -0.3111898899078369, "logits/rejected": -0.2915695905685425, "loss": 7.1215, "step": 534 }, { "beta_dpo/beta": 0.4798532724380493, "beta_dpo/beta_margin_grad_mean": -0.25442755222320557, "beta_dpo/beta_margin_grad_std": 0.2559193968772888, "beta_dpo/beta_margin_mean": 74.49298858642578, "beta_dpo/beta_margin_std": 107.83834075927734, "beta_dpo/beta_used": 0.4798532724380493, "beta_dpo/beta_used_raw": -0.8804515600204468, "beta_dpo/gap_mean": 118.63661193847656, "beta_dpo/gap_std": 141.1715087890625, "beta_dpo/loss_margin_mean": 122.73117065429688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7856093979441997, "grad_norm": 5.533578395843506, "learning_rate": 6.786329772205246e-08, "logits/chosen": -0.34731101989746094, "logits/rejected": -0.340278685092926, "loss": 0.6606, "step": 535 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46096980571746826, "beta_dpo/beta_margin_grad_std": 0.03988654166460037, "beta_dpo/beta_margin_mean": 0.1579839587211609, "beta_dpo/beta_margin_std": 0.16417579352855682, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.219361811876297, "beta_dpo/gap_mean": 126.47843933105469, "beta_dpo/gap_std": 143.17359924316406, "beta_dpo/loss_margin_mean": 157.9839630126953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7870778267254038, "grad_norm": 7.822637557983398, "learning_rate": 6.698662514899638e-08, "logits/chosen": -0.28798243403434753, "logits/rejected": -0.27560853958129883, "loss": 1.2712, "step": 536 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4682691991329193, "beta_dpo/beta_margin_grad_std": 0.03821183741092682, "beta_dpo/beta_margin_mean": 0.12798717617988586, "beta_dpo/beta_margin_std": 0.15467973053455353, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.374413013458252, "beta_dpo/gap_mean": 127.0035400390625, "beta_dpo/gap_std": 146.7835693359375, "beta_dpo/loss_margin_mean": 127.98716735839844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.788546255506608, "grad_norm": 11.444367408752441, "learning_rate": 6.611477514857114e-08, "logits/chosen": -0.24693317711353302, "logits/rejected": -0.20856288075447083, "loss": 1.2903, "step": 537 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46676576137542725, "beta_dpo/beta_margin_grad_std": 0.03483714163303375, "beta_dpo/beta_margin_mean": 0.1339321732521057, "beta_dpo/beta_margin_std": 0.1410028487443924, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.4291920065879822, "beta_dpo/gap_mean": 128.6876983642578, "beta_dpo/gap_std": 145.996826171875, "beta_dpo/loss_margin_mean": 133.93215942382812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7900146842878121, "grad_norm": 8.882081031799316, "learning_rate": 6.524777069483525e-08, "logits/chosen": -0.34462568163871765, "logits/rejected": -0.31714940071105957, "loss": 1.2737, "step": 538 }, { "beta_dpo/beta": 0.25301647186279297, "beta_dpo/beta_margin_grad_mean": -0.3016367256641388, "beta_dpo/beta_margin_grad_std": 0.29386886954307556, "beta_dpo/beta_margin_mean": 32.2935676574707, "beta_dpo/beta_margin_std": 58.6886100769043, "beta_dpo/beta_used": 0.25301647186279297, "beta_dpo/beta_used_raw": -1.4830609560012817, "beta_dpo/gap_mean": 126.75646209716797, "beta_dpo/gap_std": 144.74050903320312, "beta_dpo/loss_margin_mean": 107.55413818359375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7914831130690162, "grad_norm": 1186.32177734375, "learning_rate": 6.438563463416221e-08, "logits/chosen": -0.30251675844192505, "logits/rejected": -0.269988089799881, "loss": 2.8284, "step": 539 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4608391523361206, "beta_dpo/beta_margin_grad_std": 0.041996635496616364, "beta_dpo/beta_margin_mean": 0.15832501649856567, "beta_dpo/beta_margin_std": 0.17094068229198456, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.767475962638855, "beta_dpo/gap_mean": 128.5769805908203, "beta_dpo/gap_std": 145.3531036376953, "beta_dpo/loss_margin_mean": 158.32501220703125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7929515418502202, "grad_norm": 11.150030136108398, "learning_rate": 6.352838968463919e-08, "logits/chosen": -0.32399916648864746, "logits/rejected": -0.3120352029800415, "loss": 1.2796, "step": 540 }, { "beta_dpo/beta": 0.4254739284515381, "beta_dpo/beta_margin_grad_mean": -0.27390703558921814, "beta_dpo/beta_margin_grad_std": 0.2731405198574066, "beta_dpo/beta_margin_mean": 67.95819091796875, "beta_dpo/beta_margin_std": 111.18570709228516, "beta_dpo/beta_used": 0.4254739284515381, "beta_dpo/beta_used_raw": -1.5122350454330444, "beta_dpo/gap_mean": 130.40518188476562, "beta_dpo/gap_std": 148.4336395263672, "beta_dpo/loss_margin_mean": 109.54701232910156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7944199706314243, "grad_norm": 7.361756324768066, "learning_rate": 6.267605843546767e-08, "logits/chosen": -0.34003913402557373, "logits/rejected": -0.32215964794158936, "loss": 0.6651, "step": 541 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46578720211982727, "beta_dpo/beta_margin_grad_std": 0.04261607676744461, "beta_dpo/beta_margin_mean": 0.13842153549194336, "beta_dpo/beta_margin_std": 0.17371715605258942, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.1535425186157227, "beta_dpo/gap_mean": 129.5877227783203, "beta_dpo/gap_std": 152.70767211914062, "beta_dpo/loss_margin_mean": 138.42153930664062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7958883994126285, "grad_norm": 8.798036575317383, "learning_rate": 6.182866334636888e-08, "logits/chosen": -0.2917734980583191, "logits/rejected": -0.28750523924827576, "loss": 1.3005, "step": 542 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47245243191719055, "beta_dpo/beta_margin_grad_std": 0.0398247130215168, "beta_dpo/beta_margin_mean": 0.1112518459558487, "beta_dpo/beta_margin_std": 0.1615283042192459, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.7853881120681763, "beta_dpo/gap_mean": 126.10411071777344, "beta_dpo/gap_std": 155.04043579101562, "beta_dpo/loss_margin_mean": 111.25183868408203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7973568281938326, "grad_norm": 10.619677543640137, "learning_rate": 6.098622674699147e-08, "logits/chosen": -0.3245403468608856, "logits/rejected": -0.3284730315208435, "loss": 1.2969, "step": 543 }, { "beta_dpo/beta": 0.042499665170907974, "beta_dpo/beta_margin_grad_mean": -0.3261357545852661, "beta_dpo/beta_margin_grad_std": 0.27486762404441833, "beta_dpo/beta_margin_mean": 6.365813732147217, "beta_dpo/beta_margin_std": 10.63759708404541, "beta_dpo/beta_used": 0.042499665170907974, "beta_dpo/beta_used_raw": -0.0006970278918743134, "beta_dpo/gap_mean": 126.75240325927734, "beta_dpo/gap_std": 154.43374633789062, "beta_dpo/loss_margin_mean": 138.70376586914062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7988252569750367, "grad_norm": 337.2016906738281, "learning_rate": 6.01487708363232e-08, "logits/chosen": -0.2916638255119324, "logits/rejected": -0.29204195737838745, "loss": 0.9386, "step": 544 }, { "beta_dpo/beta": 0.06862561404705048, "beta_dpo/beta_margin_grad_mean": -0.29947012662887573, "beta_dpo/beta_margin_grad_std": 0.2657318115234375, "beta_dpo/beta_margin_mean": 11.304689407348633, "beta_dpo/beta_margin_std": 18.24564552307129, "beta_dpo/beta_used": 0.06862561404705048, "beta_dpo/beta_used_raw": -0.3488979637622833, "beta_dpo/gap_mean": 130.86599731445312, "beta_dpo/gap_std": 152.92401123046875, "beta_dpo/loss_margin_mean": 151.92410278320312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8002936857562408, "grad_norm": 781.7689819335938, "learning_rate": 5.9316317682106294e-08, "logits/chosen": -0.2935147285461426, "logits/rejected": -0.27058857679367065, "loss": 1.6842, "step": 545 }, { "beta_dpo/beta": 0.188106968998909, "beta_dpo/beta_margin_grad_mean": -0.28278061747550964, "beta_dpo/beta_margin_grad_std": 0.28060972690582275, "beta_dpo/beta_margin_mean": 26.020763397216797, "beta_dpo/beta_margin_std": 42.951210021972656, "beta_dpo/beta_used": 0.188106968998909, "beta_dpo/beta_used_raw": -0.1429443508386612, "beta_dpo/gap_mean": 132.23533630371094, "beta_dpo/gap_std": 149.86732482910156, "beta_dpo/loss_margin_mean": 125.84686279296875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.801762114537445, "grad_norm": 752.8720092773438, "learning_rate": 5.848888922025552e-08, "logits/chosen": -0.27959996461868286, "logits/rejected": -0.2727757394313812, "loss": 1.079, "step": 546 }, { "beta_dpo/beta": 0.6438117027282715, "beta_dpo/beta_margin_grad_mean": -0.33132269978523254, "beta_dpo/beta_margin_grad_std": 0.31213411688804626, "beta_dpo/beta_margin_mean": 104.74890899658203, "beta_dpo/beta_margin_std": 195.10377502441406, "beta_dpo/beta_used": 0.6438117027282715, "beta_dpo/beta_used_raw": 0.1203995943069458, "beta_dpo/gap_mean": 129.84857177734375, "beta_dpo/gap_std": 149.78875732421875, "beta_dpo/loss_margin_mean": 131.3858642578125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8032305433186491, "grad_norm": 6136.345703125, "learning_rate": 5.7666507254280265e-08, "logits/chosen": -0.3318541646003723, "logits/rejected": -0.30599403381347656, "loss": 8.3158, "step": 547 }, { "beta_dpo/beta": 0.5348808765411377, "beta_dpo/beta_margin_grad_mean": -0.2953696846961975, "beta_dpo/beta_margin_grad_std": 0.29101452231407166, "beta_dpo/beta_margin_mean": 72.7407455444336, "beta_dpo/beta_margin_std": 127.447021484375, "beta_dpo/beta_used": 0.5348808765411377, "beta_dpo/beta_used_raw": -0.02570188045501709, "beta_dpo/gap_mean": 132.4664764404297, "beta_dpo/gap_std": 154.11122131347656, "beta_dpo/loss_margin_mean": 137.2895965576172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8046989720998532, "grad_norm": 5.817610263824463, "learning_rate": 5.684919345471029e-08, "logits/chosen": -0.3109471797943115, "logits/rejected": -0.2878919839859009, "loss": 0.6402, "step": 548 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4734934866428375, "beta_dpo/beta_margin_grad_std": 0.03661532700061798, "beta_dpo/beta_margin_mean": 0.10683414340019226, "beta_dpo/beta_margin_std": 0.14824533462524414, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.812859058380127, "beta_dpo/gap_mean": 128.56686401367188, "beta_dpo/gap_std": 153.61985778808594, "beta_dpo/loss_margin_mean": 106.83413696289062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8061674008810573, "grad_norm": 8.842658996582031, "learning_rate": 5.603696935852426e-08, "logits/chosen": -0.2656526565551758, "logits/rejected": -0.245744526386261, "loss": 1.2953, "step": 549 }, { "beta_dpo/beta": 0.5795989036560059, "beta_dpo/beta_margin_grad_mean": -0.31652987003326416, "beta_dpo/beta_margin_grad_std": 0.3032745122909546, "beta_dpo/beta_margin_mean": 79.51753234863281, "beta_dpo/beta_margin_std": 137.07252502441406, "beta_dpo/beta_used": 0.5795989036560059, "beta_dpo/beta_used_raw": -0.1910473108291626, "beta_dpo/gap_mean": 126.98031616210938, "beta_dpo/gap_std": 149.06398010253906, "beta_dpo/loss_margin_mean": 115.1662368774414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8076358296622613, "grad_norm": 4275.45068359375, "learning_rate": 5.5229856368582376e-08, "logits/chosen": -0.2921378016471863, "logits/rejected": -0.2875661849975586, "loss": 8.9316, "step": 550 }, { "beta_dpo/beta": 1.3962814807891846, "beta_dpo/beta_margin_grad_mean": -0.18695083260536194, "beta_dpo/beta_margin_grad_std": 0.38917961716651917, "beta_dpo/beta_margin_mean": 232.1625518798828, "beta_dpo/beta_margin_std": 253.87550354003906, "beta_dpo/beta_used": 1.3962814807891846, "beta_dpo/beta_used_raw": 1.3962814807891846, "beta_dpo/gap_mean": 131.1296844482422, "beta_dpo/gap_std": 147.8602752685547, "beta_dpo/loss_margin_mean": 165.77349853515625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8091042584434655, "grad_norm": 8295.5263671875, "learning_rate": 5.4427875753062734e-08, "logits/chosen": -0.30160123109817505, "logits/rejected": -0.3009389638900757, "loss": 11.5516, "step": 551 }, { "beta_dpo/beta": 0.030131345614790916, "beta_dpo/beta_margin_grad_mean": -0.26047882437705994, "beta_dpo/beta_margin_grad_std": 0.2447003722190857, "beta_dpo/beta_margin_mean": 6.02596378326416, "beta_dpo/beta_margin_std": 9.554372787475586, "beta_dpo/beta_used": 0.030131345614790916, "beta_dpo/beta_used_raw": -0.81267249584198, "beta_dpo/gap_mean": 139.46234130859375, "beta_dpo/gap_std": 153.2427520751953, "beta_dpo/loss_margin_mean": 167.8729705810547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8105726872246696, "grad_norm": 132.4352264404297, "learning_rate": 5.363104864490034e-08, "logits/chosen": -0.2840738296508789, "logits/rejected": -0.2644941210746765, "loss": 0.9032, "step": 552 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4713681638240814, "beta_dpo/beta_margin_grad_std": 0.03616320341825485, "beta_dpo/beta_margin_mean": 0.11548375338315964, "beta_dpo/beta_margin_std": 0.1468585580587387, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.732727289199829, "beta_dpo/gap_mean": 136.5676727294922, "beta_dpo/gap_std": 154.75587463378906, "beta_dpo/loss_margin_mean": 115.4837417602539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8120411160058737, "grad_norm": 12.119832038879395, "learning_rate": 5.2839396041230415e-08, "logits/chosen": -0.2949014902114868, "logits/rejected": -0.28429996967315674, "loss": 1.2862, "step": 553 }, { "beta_dpo/beta": 1.4587900638580322, "beta_dpo/beta_margin_grad_mean": -0.14538182318210602, "beta_dpo/beta_margin_grad_std": 0.3477603495121002, "beta_dpo/beta_margin_mean": 238.0844268798828, "beta_dpo/beta_margin_std": 281.8099365234375, "beta_dpo/beta_used": 1.4587900638580322, "beta_dpo/beta_used_raw": 1.4587900638580322, "beta_dpo/gap_mean": 135.99404907226562, "beta_dpo/gap_std": 153.7836456298828, "beta_dpo/loss_margin_mean": 152.52484130859375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8135095447870778, "grad_norm": 4559.46533203125, "learning_rate": 5.205293880283551e-08, "logits/chosen": -0.3366745710372925, "logits/rejected": -0.2806839942932129, "loss": 5.6062, "step": 554 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46083858609199524, "beta_dpo/beta_margin_grad_std": 0.04272112995386124, "beta_dpo/beta_margin_mean": 0.15844394266605377, "beta_dpo/beta_margin_std": 0.17401549220085144, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0361934900283813, "beta_dpo/gap_mean": 141.6253662109375, "beta_dpo/gap_std": 156.78480529785156, "beta_dpo/loss_margin_mean": 158.44393920898438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8149779735682819, "grad_norm": 9.786598205566406, "learning_rate": 5.127169765359515e-08, "logits/chosen": -0.32283300161361694, "logits/rejected": -0.32556623220443726, "loss": 1.2716, "step": 555 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47160035371780396, "beta_dpo/beta_margin_grad_std": 0.03592273220419884, "beta_dpo/beta_margin_mean": 0.11455066502094269, "beta_dpo/beta_margin_std": 0.1455988883972168, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.443986415863037, "beta_dpo/gap_mean": 137.80215454101562, "beta_dpo/gap_std": 157.4153594970703, "beta_dpo/loss_margin_mean": 114.5506591796875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8164464023494861, "grad_norm": 9.365325927734375, "learning_rate": 5.049569317994012e-08, "logits/chosen": -0.26281827688217163, "logits/rejected": -0.25179579854011536, "loss": 1.2816, "step": 556 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4667251408100128, "beta_dpo/beta_margin_grad_std": 0.03912574052810669, "beta_dpo/beta_margin_mean": 0.1342703402042389, "beta_dpo/beta_margin_std": 0.15857142210006714, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.001009464263916, "beta_dpo/gap_mean": 137.3007049560547, "beta_dpo/gap_std": 157.2840576171875, "beta_dpo/loss_margin_mean": 134.2703399658203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8179148311306902, "grad_norm": 9.669354438781738, "learning_rate": 4.9724945830310144e-08, "logits/chosen": -0.337306410074234, "logits/rejected": -0.3294256329536438, "loss": 1.2898, "step": 557 }, { "beta_dpo/beta": 1.0059072971343994, "beta_dpo/beta_margin_grad_mean": -0.2754041254520416, "beta_dpo/beta_margin_grad_std": 0.2766437232494354, "beta_dpo/beta_margin_mean": 189.39646911621094, "beta_dpo/beta_margin_std": 301.6690673828125, "beta_dpo/beta_used": 1.0059072971343994, "beta_dpo/beta_used_raw": 0.4606805443763733, "beta_dpo/gap_mean": 142.946044921875, "beta_dpo/gap_std": 159.32034301757812, "beta_dpo/loss_margin_mean": 181.12159729003906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8193832599118943, "grad_norm": 3461.24609375, "learning_rate": 4.8959475914614554e-08, "logits/chosen": -0.3064291179180145, "logits/rejected": -0.2867761254310608, "loss": 1.8804, "step": 558 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46763336658477783, "beta_dpo/beta_margin_grad_std": 0.037108905613422394, "beta_dpo/beta_margin_mean": 0.13055618107318878, "beta_dpo/beta_margin_std": 0.15036651492118835, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.5490376949310303, "beta_dpo/gap_mean": 142.8798828125, "beta_dpo/gap_std": 159.66058349609375, "beta_dpo/loss_margin_mean": 130.55618286132812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8208516886930984, "grad_norm": 10.119109153747559, "learning_rate": 4.8199303603697614e-08, "logits/chosen": -0.29613497853279114, "logits/rejected": -0.264508992433548, "loss": 1.2773, "step": 559 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4762340188026428, "beta_dpo/beta_margin_grad_std": 0.02926693856716156, "beta_dpo/beta_margin_mean": 0.09551002085208893, "beta_dpo/beta_margin_std": 0.11784511804580688, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.5403027534484863, "beta_dpo/gap_mean": 135.69216918945312, "beta_dpo/gap_std": 153.98773193359375, "beta_dpo/loss_margin_mean": 95.51001739501953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8223201174743024, "grad_norm": 7.162990093231201, "learning_rate": 4.7444448928806615e-08, "logits/chosen": -0.3266592025756836, "logits/rejected": -0.29840749502182007, "loss": 1.3, "step": 560 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4732115864753723, "beta_dpo/beta_margin_grad_std": 0.03771474212408066, "beta_dpo/beta_margin_mean": 0.10795173794031143, "beta_dpo/beta_margin_std": 0.15263213217258453, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3117592334747314, "beta_dpo/gap_mean": 128.0847930908203, "beta_dpo/gap_std": 150.51815795898438, "beta_dpo/loss_margin_mean": 107.95172882080078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8237885462555066, "grad_norm": 9.79651165008545, "learning_rate": 4.669493178106432e-08, "logits/chosen": -0.25924211740493774, "logits/rejected": -0.26707911491394043, "loss": 1.2876, "step": 561 }, { "beta_dpo/beta": 0.3996525704860687, "beta_dpo/beta_margin_grad_mean": -0.18731488287448883, "beta_dpo/beta_margin_grad_std": 0.24937215447425842, "beta_dpo/beta_margin_mean": 71.11713409423828, "beta_dpo/beta_margin_std": 115.91793060302734, "beta_dpo/beta_used": 0.3996525704860687, "beta_dpo/beta_used_raw": 0.3996525704860687, "beta_dpo/gap_mean": 133.90435791015625, "beta_dpo/gap_std": 153.45608520507812, "beta_dpo/loss_margin_mean": 160.7019500732422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8252569750367107, "grad_norm": 1593.489501953125, "learning_rate": 4.5950771910944596e-08, "logits/chosen": -0.2836867570877075, "logits/rejected": -0.2583543062210083, "loss": 0.5564, "step": 562 }, { "beta_dpo/beta": 0.6970747709274292, "beta_dpo/beta_margin_grad_mean": -0.2882736921310425, "beta_dpo/beta_margin_grad_std": 0.28458070755004883, "beta_dpo/beta_margin_mean": 97.39839935302734, "beta_dpo/beta_margin_std": 165.26593017578125, "beta_dpo/beta_used": 0.6970747709274292, "beta_dpo/beta_used_raw": -1.030278205871582, "beta_dpo/gap_mean": 128.47000122070312, "beta_dpo/gap_std": 152.47921752929688, "beta_dpo/loss_margin_mean": 104.50337219238281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8267254038179148, "grad_norm": 5.384544372558594, "learning_rate": 4.521198892775202e-08, "logits/chosen": -0.2287236452102661, "logits/rejected": -0.2236756980419159, "loss": 0.6609, "step": 563 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.468271940946579, "beta_dpo/beta_margin_grad_std": 0.026733947917819023, "beta_dpo/beta_margin_mean": 0.12744946777820587, "beta_dpo/beta_margin_std": 0.10762052237987518, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.3562297523021698, "beta_dpo/gap_mean": 128.43161010742188, "beta_dpo/gap_std": 145.12420654296875, "beta_dpo/loss_margin_mean": 127.449462890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8281938325991189, "grad_norm": 10.955151557922363, "learning_rate": 4.447860229910544e-08, "logits/chosen": -0.3264349102973938, "logits/rejected": -0.27898818254470825, "loss": 1.2715, "step": 564 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46887075901031494, "beta_dpo/beta_margin_grad_std": 0.04425227269530296, "beta_dpo/beta_margin_mean": 0.12599852681159973, "beta_dpo/beta_margin_std": 0.1797182410955429, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.8274688720703125, "beta_dpo/gap_mean": 129.21328735351562, "beta_dpo/gap_std": 149.37860107421875, "beta_dpo/loss_margin_mean": 125.99852752685547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8296622613803231, "grad_norm": 9.339284896850586, "learning_rate": 4.375063135042445e-08, "logits/chosen": -0.32135599851608276, "logits/rejected": -0.29336240887641907, "loss": 1.2962, "step": 565 }, { "beta_dpo/beta": 0.8117600679397583, "beta_dpo/beta_margin_grad_mean": -0.31733959913253784, "beta_dpo/beta_margin_grad_std": 0.30447834730148315, "beta_dpo/beta_margin_mean": 146.53407287597656, "beta_dpo/beta_margin_std": 228.20166015625, "beta_dpo/beta_used": 0.8117600679397583, "beta_dpo/beta_used_raw": 0.07706618309020996, "beta_dpo/gap_mean": 126.85293579101562, "beta_dpo/gap_std": 152.6265411376953, "beta_dpo/loss_margin_mean": 133.81838989257812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8311306901615272, "grad_norm": 6508.62109375, "learning_rate": 4.3028095264420525e-08, "logits/chosen": -0.32367947697639465, "logits/rejected": -0.31794965267181396, "loss": 15.8724, "step": 566 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46915555000305176, "beta_dpo/beta_margin_grad_std": 0.037161875516176224, "beta_dpo/beta_margin_mean": 0.12432525306940079, "beta_dpo/beta_margin_std": 0.15016567707061768, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7240467667579651, "beta_dpo/gap_mean": 129.08819580078125, "beta_dpo/gap_std": 152.85943603515625, "beta_dpo/loss_margin_mean": 124.32524871826172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8325991189427313, "grad_norm": 9.504931449890137, "learning_rate": 4.231101308059165e-08, "logits/chosen": -0.23528262972831726, "logits/rejected": -0.2039298713207245, "loss": 1.278, "step": 567 }, { "beta_dpo/beta": 0.8461459279060364, "beta_dpo/beta_margin_grad_mean": -0.2526528537273407, "beta_dpo/beta_margin_grad_std": 0.24908038973808289, "beta_dpo/beta_margin_mean": 147.88540649414062, "beta_dpo/beta_margin_std": 209.6600341796875, "beta_dpo/beta_used": 0.8461459279060364, "beta_dpo/beta_used_raw": 0.6107033491134644, "beta_dpo/gap_mean": 133.88278198242188, "beta_dpo/gap_std": 151.78158569335938, "beta_dpo/loss_margin_mean": 160.45494079589844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8340675477239354, "grad_norm": 6.678622245788574, "learning_rate": 4.1599403694720145e-08, "logits/chosen": -0.23205448687076569, "logits/rejected": -0.23499351739883423, "loss": 0.6345, "step": 568 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4686329960823059, "beta_dpo/beta_margin_grad_std": 0.042182739824056625, "beta_dpo/beta_margin_mean": 0.1267063468694687, "beta_dpo/beta_margin_std": 0.1721249222755432, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.5694082379341125, "beta_dpo/gap_mean": 132.76881408691406, "beta_dpo/gap_std": 157.49737548828125, "beta_dpo/loss_margin_mean": 126.70633697509766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8355359765051396, "grad_norm": 8.88830280303955, "learning_rate": 4.089328585837512e-08, "logits/chosen": -0.2558819651603699, "logits/rejected": -0.23038721084594727, "loss": 1.2718, "step": 569 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4649701714515686, "beta_dpo/beta_margin_grad_std": 0.03612607344985008, "beta_dpo/beta_margin_mean": 0.1412174552679062, "beta_dpo/beta_margin_std": 0.1463191956281662, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3033900260925293, "beta_dpo/gap_mean": 135.02297973632812, "beta_dpo/gap_std": 156.3493194580078, "beta_dpo/loss_margin_mean": 141.2174530029297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8370044052863436, "grad_norm": 9.188750267028809, "learning_rate": 4.019267817841834e-08, "logits/chosen": -0.2918507754802704, "logits/rejected": -0.25753656029701233, "loss": 1.2812, "step": 570 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4650152325630188, "beta_dpo/beta_margin_grad_std": 0.04063018783926964, "beta_dpo/beta_margin_mean": 0.141206756234169, "beta_dpo/beta_margin_std": 0.16472414135932922, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18992829322814941, "beta_dpo/gap_mean": 136.13604736328125, "beta_dpo/gap_std": 156.74822998046875, "beta_dpo/loss_margin_mean": 141.20675659179688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8384728340675477, "grad_norm": 9.97313117980957, "learning_rate": 3.9497599116513705e-08, "logits/chosen": -0.25397494435310364, "logits/rejected": -0.2496742308139801, "loss": 1.2632, "step": 571 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46506214141845703, "beta_dpo/beta_margin_grad_std": 0.04904274642467499, "beta_dpo/beta_margin_mean": 0.1416582465171814, "beta_dpo/beta_margin_std": 0.19958563148975372, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1367369890213013, "beta_dpo/gap_mean": 137.13278198242188, "beta_dpo/gap_std": 162.1319122314453, "beta_dpo/loss_margin_mean": 141.65823364257812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8399412628487518, "grad_norm": 8.473297119140625, "learning_rate": 3.880806698864086e-08, "logits/chosen": -0.1344674527645111, "logits/rejected": -0.1297120749950409, "loss": 1.2785, "step": 572 }, { "beta_dpo/beta": 0.5657570362091064, "beta_dpo/beta_margin_grad_mean": -0.31479325890541077, "beta_dpo/beta_margin_grad_std": 0.3017220199108124, "beta_dpo/beta_margin_mean": 95.1263656616211, "beta_dpo/beta_margin_std": 160.51971435546875, "beta_dpo/beta_used": 0.5657570362091064, "beta_dpo/beta_used_raw": -0.6155003905296326, "beta_dpo/gap_mean": 134.93524169921875, "beta_dpo/gap_std": 163.98435974121094, "beta_dpo/loss_margin_mean": 134.92376708984375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8414096916299559, "grad_norm": 2860.332763671875, "learning_rate": 3.812409996461275e-08, "logits/chosen": -0.21585188806056976, "logits/rejected": -0.1981552243232727, "loss": 1.0498, "step": 573 }, { "beta_dpo/beta": 0.42727431654930115, "beta_dpo/beta_margin_grad_mean": -0.29670077562332153, "beta_dpo/beta_margin_grad_std": 0.2917650043964386, "beta_dpo/beta_margin_mean": 74.77179718017578, "beta_dpo/beta_margin_std": 117.22281646728516, "beta_dpo/beta_used": 0.42727431654930115, "beta_dpo/beta_used_raw": -0.5097041130065918, "beta_dpo/gap_mean": 137.75469970703125, "beta_dpo/gap_std": 164.98902893066406, "beta_dpo/loss_margin_mean": 151.39781188964844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8428781204111601, "grad_norm": 4061.1669921875, "learning_rate": 3.74457160675965e-08, "logits/chosen": -0.2794973850250244, "logits/rejected": -0.27186524868011475, "loss": 7.3757, "step": 574 }, { "beta_dpo/beta": 0.7542173862457275, "beta_dpo/beta_margin_grad_mean": -0.28391233086586, "beta_dpo/beta_margin_grad_std": 0.2826778292655945, "beta_dpo/beta_margin_mean": 144.06639099121094, "beta_dpo/beta_margin_std": 216.29620361328125, "beta_dpo/beta_used": 0.7542173862457275, "beta_dpo/beta_used_raw": -1.0610246658325195, "beta_dpo/gap_mean": 142.86614990234375, "beta_dpo/gap_std": 165.47073364257812, "beta_dpo/loss_margin_mean": 148.09268188476562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8443465491923642, "grad_norm": 5152.19970703125, "learning_rate": 3.677293317363864e-08, "logits/chosen": -0.23463058471679688, "logits/rejected": -0.22157064080238342, "loss": 10.9297, "step": 575 }, { "beta_dpo/beta": 0.7337521910667419, "beta_dpo/beta_margin_grad_mean": -0.33054977655410767, "beta_dpo/beta_margin_grad_std": 0.3121558427810669, "beta_dpo/beta_margin_mean": 134.6069793701172, "beta_dpo/beta_margin_std": 246.19821166992188, "beta_dpo/beta_used": 0.7337521910667419, "beta_dpo/beta_used_raw": -0.325950026512146, "beta_dpo/gap_mean": 143.2208251953125, "beta_dpo/gap_std": 170.84510803222656, "beta_dpo/loss_margin_mean": 145.1151885986328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8458149779735683, "grad_norm": 8114.90380859375, "learning_rate": 3.6105769011194224e-08, "logits/chosen": -0.19612964987754822, "logits/rejected": -0.20850840210914612, "loss": 15.0778, "step": 576 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46494874358177185, "beta_dpo/beta_margin_grad_std": 0.03526080772280693, "beta_dpo/beta_margin_mean": 0.14119017124176025, "beta_dpo/beta_margin_std": 0.1427299827337265, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.49266529083251953, "beta_dpo/gap_mean": 141.38392639160156, "beta_dpo/gap_std": 167.66371154785156, "beta_dpo/loss_margin_mean": 141.19017028808594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8472834067547724, "grad_norm": 10.440290451049805, "learning_rate": 3.5444241160659304e-08, "logits/chosen": -0.23968470096588135, "logits/rejected": -0.20959413051605225, "loss": 1.2622, "step": 577 }, { "beta_dpo/beta": 0.7611909508705139, "beta_dpo/beta_margin_grad_mean": -0.2679000794887543, "beta_dpo/beta_margin_grad_std": 0.26821058988571167, "beta_dpo/beta_margin_mean": 142.5281219482422, "beta_dpo/beta_margin_std": 197.11514282226562, "beta_dpo/beta_used": 0.7611909508705139, "beta_dpo/beta_used_raw": 0.006027281284332275, "beta_dpo/gap_mean": 143.93539428710938, "beta_dpo/gap_std": 159.52902221679688, "beta_dpo/loss_margin_mean": 147.3227996826172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8487518355359766, "grad_norm": 6583.7763671875, "learning_rate": 3.478836705390808e-08, "logits/chosen": -0.22640444338321686, "logits/rejected": -0.2157905399799347, "loss": 11.5305, "step": 578 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46830984950065613, "beta_dpo/beta_margin_grad_std": 0.035833120346069336, "beta_dpo/beta_margin_mean": 0.1277673989534378, "beta_dpo/beta_margin_std": 0.14521077275276184, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.2088857889175415, "beta_dpo/gap_mean": 139.8126220703125, "beta_dpo/gap_std": 156.47616577148438, "beta_dpo/loss_margin_mean": 127.76739501953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8502202643171806, "grad_norm": 9.89820384979248, "learning_rate": 3.41381639738331e-08, "logits/chosen": -0.2302914261817932, "logits/rejected": -0.2305675745010376, "loss": 1.2754, "step": 579 }, { "beta_dpo/beta": 0.6327630877494812, "beta_dpo/beta_margin_grad_mean": -0.294393390417099, "beta_dpo/beta_margin_grad_std": 0.2901197075843811, "beta_dpo/beta_margin_mean": 124.6340560913086, "beta_dpo/beta_margin_std": 214.6474151611328, "beta_dpo/beta_used": 0.6327630877494812, "beta_dpo/beta_used_raw": 0.005323469638824463, "beta_dpo/gap_mean": 145.12841796875, "beta_dpo/gap_std": 160.601318359375, "beta_dpo/loss_margin_mean": 171.3323516845703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8516886930983847, "grad_norm": 4675.8388671875, "learning_rate": 3.349364905389032e-08, "logits/chosen": -0.2667354345321655, "logits/rejected": -0.2596646249294281, "loss": 3.4703, "step": 580 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4696502089500427, "beta_dpo/beta_margin_grad_std": 0.037816308438777924, "beta_dpo/beta_margin_mean": 0.12241779267787933, "beta_dpo/beta_margin_std": 0.1532379686832428, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.4444794654846191, "beta_dpo/gap_mean": 142.22879028320312, "beta_dpo/gap_std": 161.37136840820312, "beta_dpo/loss_margin_mean": 122.41778564453125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8531571218795888, "grad_norm": 11.051158905029297, "learning_rate": 3.285483927764726e-08, "logits/chosen": -0.2130197286605835, "logits/rejected": -0.19874969124794006, "loss": 1.2772, "step": 581 }, { "beta_dpo/beta": 0.8017593622207642, "beta_dpo/beta_margin_grad_mean": -0.3157171308994293, "beta_dpo/beta_margin_grad_std": 0.30175650119781494, "beta_dpo/beta_margin_mean": 149.09133911132812, "beta_dpo/beta_margin_std": 235.7801971435547, "beta_dpo/beta_used": 0.8017593622207642, "beta_dpo/beta_used_raw": 0.31641441583633423, "beta_dpo/gap_mean": 140.53729248046875, "beta_dpo/gap_std": 163.76708984375, "beta_dpo/loss_margin_mean": 150.3306884765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8546255506607929, "grad_norm": 6046.9873046875, "learning_rate": 3.222175147833556e-08, "logits/chosen": -0.2035096287727356, "logits/rejected": -0.21539103984832764, "loss": 3.3185, "step": 582 }, { "beta_dpo/beta": 0.9682586193084717, "beta_dpo/beta_margin_grad_mean": -0.30453696846961975, "beta_dpo/beta_margin_grad_std": 0.2952696681022644, "beta_dpo/beta_margin_mean": 162.30221557617188, "beta_dpo/beta_margin_std": 269.2271423339844, "beta_dpo/beta_used": 0.9682586193084717, "beta_dpo/beta_used_raw": -1.43953537940979, "beta_dpo/gap_mean": 141.73876953125, "beta_dpo/gap_std": 164.75506591796875, "beta_dpo/loss_margin_mean": 119.88349151611328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.856093979441997, "grad_norm": 3786.101318359375, "learning_rate": 3.159440233840763e-08, "logits/chosen": -0.24737709760665894, "logits/rejected": -0.2320498675107956, "loss": 1.185, "step": 583 }, { "beta_dpo/beta": 1.6438066959381104, "beta_dpo/beta_margin_grad_mean": -0.09067382663488388, "beta_dpo/beta_margin_grad_std": 0.27994534373283386, "beta_dpo/beta_margin_mean": 301.57366943359375, "beta_dpo/beta_margin_std": 240.6201934814453, "beta_dpo/beta_used": 1.6438066959381104, "beta_dpo/beta_used_raw": 1.6438066959381104, "beta_dpo/gap_mean": 145.17355346679688, "beta_dpo/gap_std": 163.69281005859375, "beta_dpo/loss_margin_mean": 184.53834533691406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8575624082232012, "grad_norm": 8176.68701171875, "learning_rate": 3.0972808389096635e-08, "logits/chosen": -0.2157582938671112, "logits/rejected": -0.18717166781425476, "loss": 4.5156, "step": 584 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4652203321456909, "beta_dpo/beta_margin_grad_std": 0.04472190886735916, "beta_dpo/beta_margin_mean": 0.14087313413619995, "beta_dpo/beta_margin_std": 0.1823493242263794, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.062327265739441, "beta_dpo/gap_mean": 146.427490234375, "beta_dpo/gap_std": 167.09033203125, "beta_dpo/loss_margin_mean": 140.87313842773438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8590308370044053, "grad_norm": 10.569422721862793, "learning_rate": 3.035698600998121e-08, "logits/chosen": -0.22004346549510956, "logits/rejected": -0.19343584775924683, "loss": 1.2685, "step": 585 }, { "beta_dpo/beta": 0.6197296380996704, "beta_dpo/beta_margin_grad_mean": -0.34489572048187256, "beta_dpo/beta_margin_grad_std": 0.31850454211235046, "beta_dpo/beta_margin_mean": 77.84491729736328, "beta_dpo/beta_margin_std": 179.86990356445312, "beta_dpo/beta_used": 0.6197296380996704, "beta_dpo/beta_used_raw": 0.3582208454608917, "beta_dpo/gap_mean": 142.56971740722656, "beta_dpo/gap_std": 166.39047241210938, "beta_dpo/loss_margin_mean": 124.97702026367188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8604992657856094, "grad_norm": 6330.98046875, "learning_rate": 2.974695142855388e-08, "logits/chosen": -0.15243850648403168, "logits/rejected": -0.1464701145887375, "loss": 20.036, "step": 586 }, { "beta_dpo/beta": 0.1769709438085556, "beta_dpo/beta_margin_grad_mean": -0.31770533323287964, "beta_dpo/beta_margin_grad_std": 0.2991076707839966, "beta_dpo/beta_margin_mean": 28.48146629333496, "beta_dpo/beta_margin_std": 53.35295104980469, "beta_dpo/beta_used": 0.1769709438085556, "beta_dpo/beta_used_raw": 0.002979278564453125, "beta_dpo/gap_mean": 141.173828125, "beta_dpo/gap_std": 167.37734985351562, "beta_dpo/loss_margin_mean": 143.61300659179688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8619676945668135, "grad_norm": 1486.959716796875, "learning_rate": 2.9142720719793122e-08, "logits/chosen": -0.21711990237236023, "logits/rejected": -0.22026541829109192, "loss": 1.6782, "step": 587 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46673768758773804, "beta_dpo/beta_margin_grad_std": 0.040590547025203705, "beta_dpo/beta_margin_mean": 0.1343628466129303, "beta_dpo/beta_margin_std": 0.16470497846603394, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.646202802658081, "beta_dpo/gap_mean": 142.1791534423828, "beta_dpo/gap_std": 167.677001953125, "beta_dpo/loss_margin_mean": 134.3628387451172, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8634361233480177, "grad_norm": 10.127455711364746, "learning_rate": 2.8544309805740018e-08, "logits/chosen": -0.20089392364025116, "logits/rejected": -0.21640396118164062, "loss": 1.2803, "step": 588 }, { "beta_dpo/beta": 0.7172443270683289, "beta_dpo/beta_margin_grad_mean": -0.2917996644973755, "beta_dpo/beta_margin_grad_std": 0.288737952709198, "beta_dpo/beta_margin_mean": 129.02499389648438, "beta_dpo/beta_margin_std": 218.08363342285156, "beta_dpo/beta_used": 0.7172443270683289, "beta_dpo/beta_used_raw": 0.6308818459510803, "beta_dpo/gap_mean": 145.3118438720703, "beta_dpo/gap_std": 171.17291259765625, "beta_dpo/loss_margin_mean": 175.2359619140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8649045521292217, "grad_norm": 4978.62353515625, "learning_rate": 2.7951734455078786e-08, "logits/chosen": -0.21389494836330414, "logits/rejected": -0.21070238947868347, "loss": 9.0009, "step": 589 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46174660325050354, "beta_dpo/beta_margin_grad_std": 0.04324490576982498, "beta_dpo/beta_margin_mean": 0.15463007986545563, "beta_dpo/beta_margin_std": 0.1757480353116989, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1024678945541382, "beta_dpo/gap_mean": 148.32342529296875, "beta_dpo/gap_std": 171.41741943359375, "beta_dpo/loss_margin_mean": 154.6300811767578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8663729809104258, "grad_norm": 9.892497062683105, "learning_rate": 2.736501028272095e-08, "logits/chosen": -0.27166497707366943, "logits/rejected": -0.2824591398239136, "loss": 1.2672, "step": 590 }, { "beta_dpo/beta": 0.8430722951889038, "beta_dpo/beta_margin_grad_mean": -0.1681036800146103, "beta_dpo/beta_margin_grad_std": 0.3638584315776825, "beta_dpo/beta_margin_mean": 119.6875228881836, "beta_dpo/beta_margin_std": 166.5166778564453, "beta_dpo/beta_used": 0.8430722951889038, "beta_dpo/beta_used_raw": 0.8430722951889038, "beta_dpo/gap_mean": 147.86041259765625, "beta_dpo/gap_std": 171.3108367919922, "beta_dpo/loss_margin_mean": 143.2025604248047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8678414096916299, "grad_norm": 6685.375, "learning_rate": 2.678415274939408e-08, "logits/chosen": -0.18227970600128174, "logits/rejected": -0.1490815132856369, "loss": 8.5331, "step": 591 }, { "beta_dpo/beta": 0.3010109066963196, "beta_dpo/beta_margin_grad_mean": -0.17112194001674652, "beta_dpo/beta_margin_grad_std": 0.3744083046913147, "beta_dpo/beta_margin_mean": 41.5831298828125, "beta_dpo/beta_margin_std": 63.77037048339844, "beta_dpo/beta_used": 0.3010109066963196, "beta_dpo/beta_used_raw": 0.3010109066963196, "beta_dpo/gap_mean": 147.7420654296875, "beta_dpo/gap_std": 173.76480102539062, "beta_dpo/loss_margin_mean": 142.8642120361328, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.869309838472834, "grad_norm": 1482.502197265625, "learning_rate": 2.6209177161234442e-08, "logits/chosen": -0.1433703750371933, "logits/rejected": -0.13005104660987854, "loss": 4.0589, "step": 592 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47312307357788086, "beta_dpo/beta_margin_grad_std": 0.037345997989177704, "beta_dpo/beta_margin_mean": 0.10826075077056885, "beta_dpo/beta_margin_std": 0.15056025981903076, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6807360649108887, "beta_dpo/gap_mean": 140.90626525878906, "beta_dpo/gap_std": 171.15963745117188, "beta_dpo/loss_margin_mean": 108.26074981689453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8707782672540382, "grad_norm": 9.439023971557617, "learning_rate": 2.564009866938349e-08, "logits/chosen": -0.21954259276390076, "logits/rejected": -0.20099984109401703, "loss": 1.2829, "step": 593 }, { "beta_dpo/beta": 0.978081226348877, "beta_dpo/beta_margin_grad_mean": -0.30155855417251587, "beta_dpo/beta_margin_grad_std": 0.29437771439552307, "beta_dpo/beta_margin_mean": 191.28977966308594, "beta_dpo/beta_margin_std": 318.696533203125, "beta_dpo/beta_used": 0.978081226348877, "beta_dpo/beta_used_raw": -0.07855743169784546, "beta_dpo/gap_mean": 137.13877868652344, "beta_dpo/gap_std": 171.59979248046875, "beta_dpo/loss_margin_mean": 141.89723205566406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8722466960352423, "grad_norm": 8501.2041015625, "learning_rate": 2.5076932269588708e-08, "logits/chosen": -0.24125239253044128, "logits/rejected": -0.22039398550987244, "loss": 5.7956, "step": 594 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46594342589378357, "beta_dpo/beta_margin_grad_std": 0.04041733592748642, "beta_dpo/beta_margin_mean": 0.13782164454460144, "beta_dpo/beta_margin_std": 0.16540595889091492, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.5019283294677734, "beta_dpo/gap_mean": 140.37985229492188, "beta_dpo/gap_std": 170.879638671875, "beta_dpo/loss_margin_mean": 137.82164001464844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8737151248164464, "grad_norm": 8.183320999145508, "learning_rate": 2.451969280180849e-08, "logits/chosen": -0.22626781463623047, "logits/rejected": -0.209863543510437, "loss": 1.2789, "step": 595 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4717291593551636, "beta_dpo/beta_margin_grad_std": 0.041060976684093475, "beta_dpo/beta_margin_mean": 0.11397657543420792, "beta_dpo/beta_margin_std": 0.1660909652709961, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.0142910480499268, "beta_dpo/gap_mean": 135.27981567382812, "beta_dpo/gap_std": 171.58035278320312, "beta_dpo/loss_margin_mean": 113.97657012939453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8751835535976505, "grad_norm": 9.269095420837402, "learning_rate": 2.396839494982103e-08, "logits/chosen": -0.2307220697402954, "logits/rejected": -0.1973237693309784, "loss": 1.2921, "step": 596 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46336647868156433, "beta_dpo/beta_margin_grad_std": 0.04198829457163811, "beta_dpo/beta_margin_mean": 0.14806897938251495, "beta_dpo/beta_margin_std": 0.1707853525876999, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7436294555664062, "beta_dpo/gap_mean": 138.75473022460938, "beta_dpo/gap_std": 170.81509399414062, "beta_dpo/loss_margin_mean": 148.0689697265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8766519823788547, "grad_norm": 8.808784484863281, "learning_rate": 2.3423053240837514e-08, "logits/chosen": -0.25807347893714905, "logits/rejected": -0.271928995847702, "loss": 1.2696, "step": 597 }, { "beta_dpo/beta": 1.4840975999832153, "beta_dpo/beta_margin_grad_mean": -0.1805470734834671, "beta_dpo/beta_margin_grad_std": 0.3769548535346985, "beta_dpo/beta_margin_mean": 230.9344024658203, "beta_dpo/beta_margin_std": 346.7568054199219, "beta_dpo/beta_used": 1.4840975999832153, "beta_dpo/beta_used_raw": 1.4840975999832153, "beta_dpo/gap_mean": 137.17803955078125, "beta_dpo/gap_std": 168.94393920898438, "beta_dpo/loss_margin_mean": 126.81389617919922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8781204111600588, "grad_norm": 8300.21875, "learning_rate": 2.2883682045119062e-08, "logits/chosen": -0.15673092007637024, "logits/rejected": -0.1377362161874771, "loss": 0.7417, "step": 598 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47013577818870544, "beta_dpo/beta_margin_grad_std": 0.032581571489572525, "beta_dpo/beta_margin_mean": 0.12014342844486237, "beta_dpo/beta_margin_std": 0.1313522458076477, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.891679584980011, "beta_dpo/gap_mean": 133.7099609375, "beta_dpo/gap_std": 163.5647430419922, "beta_dpo/loss_margin_mean": 120.14342498779297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8795888399412628, "grad_norm": 8.623753547668457, "learning_rate": 2.2350295575598367e-08, "logits/chosen": -0.23145388066768646, "logits/rejected": -0.22919651865959167, "loss": 1.2758, "step": 599 }, { "beta_dpo/beta": 0.5338709950447083, "beta_dpo/beta_margin_grad_mean": -0.3133900761604309, "beta_dpo/beta_margin_grad_std": 0.30288076400756836, "beta_dpo/beta_margin_mean": 67.7778091430664, "beta_dpo/beta_margin_std": 132.87322998046875, "beta_dpo/beta_used": 0.5338709950447083, "beta_dpo/beta_used_raw": -0.27364301681518555, "beta_dpo/gap_mean": 130.60569763183594, "beta_dpo/gap_std": 163.328125, "beta_dpo/loss_margin_mean": 123.1740951538086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8810572687224669, "grad_norm": 6785.72021484375, "learning_rate": 2.1822907887504932e-08, "logits/chosen": -0.25038397312164307, "logits/rejected": -0.22991827130317688, "loss": 4.9835, "step": 600 }, { "epoch": 0.8810572687224669, "eval_beta_dpo/beta": 0.06907455623149872, "eval_beta_dpo/beta_margin_grad_mean": -0.45501866936683655, "eval_beta_dpo/beta_margin_grad_std": 0.07437112927436829, "eval_beta_dpo/beta_margin_mean": 10.027384757995605, "eval_beta_dpo/beta_margin_std": 12.811722755432129, "eval_beta_dpo/beta_used": 0.06907455623149872, "eval_beta_dpo/beta_used_raw": -2.489274740219116, "eval_beta_dpo/gap_mean": 130.01516723632812, "eval_beta_dpo/gap_std": 165.05413818359375, "eval_beta_dpo/loss_margin_mean": 86.860595703125, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.2788536548614502, "eval_logits/rejected": -0.2574594020843506, "eval_loss": 1.7101179361343384, "eval_runtime": 40.1564, "eval_samples_per_second": 58.247, "eval_steps_per_second": 1.843, "step": 600 }, { "beta_dpo/beta": 0.8004127740859985, "beta_dpo/beta_margin_grad_mean": -0.1567797064781189, "beta_dpo/beta_margin_grad_std": 0.36213722825050354, "beta_dpo/beta_margin_mean": 111.2850570678711, "beta_dpo/beta_margin_std": 138.40003967285156, "beta_dpo/beta_used": 0.8004127740859985, "beta_dpo/beta_used_raw": 0.8004127740859985, "beta_dpo/gap_mean": 132.40260314941406, "beta_dpo/gap_std": 165.82818603515625, "beta_dpo/loss_margin_mean": 142.81410217285156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.882525697503671, "grad_norm": 8184.26904296875, "learning_rate": 2.1301532877994742e-08, "logits/chosen": -0.22523418068885803, "logits/rejected": -0.21112903952598572, "loss": 17.4177, "step": 601 }, { "beta_dpo/beta": 0.405770868062973, "beta_dpo/beta_margin_grad_mean": -0.3060374855995178, "beta_dpo/beta_margin_grad_std": 0.2988956570625305, "beta_dpo/beta_margin_mean": 59.608760833740234, "beta_dpo/beta_margin_std": 99.93406677246094, "beta_dpo/beta_used": 0.405770868062973, "beta_dpo/beta_used_raw": -0.4077162742614746, "beta_dpo/gap_mean": 136.0496826171875, "beta_dpo/gap_std": 164.3628387451172, "beta_dpo/loss_margin_mean": 162.997314453125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8839941262848752, "grad_norm": 3134.716064453125, "learning_rate": 2.0786184285784298e-08, "logits/chosen": -0.2434152215719223, "logits/rejected": -0.23451802134513855, "loss": 3.5737, "step": 602 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47068238258361816, "beta_dpo/beta_margin_grad_std": 0.03925681486725807, "beta_dpo/beta_margin_mean": 0.11826837062835693, "beta_dpo/beta_margin_std": 0.15868444740772247, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.2860097885131836, "beta_dpo/gap_mean": 135.49624633789062, "beta_dpo/gap_std": 164.59576416015625, "beta_dpo/loss_margin_mean": 118.26836395263672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8854625550660793, "grad_norm": 8.003498077392578, "learning_rate": 2.0276875690788204e-08, "logits/chosen": -0.30411213636398315, "logits/rejected": -0.28685271739959717, "loss": 1.281, "step": 603 }, { "beta_dpo/beta": 0.6322586536407471, "beta_dpo/beta_margin_grad_mean": -0.27876517176628113, "beta_dpo/beta_margin_grad_std": 0.2794075906276703, "beta_dpo/beta_margin_mean": 95.61994934082031, "beta_dpo/beta_margin_std": 150.78732299804688, "beta_dpo/beta_used": 0.6322586536407471, "beta_dpo/beta_used_raw": 0.08082294464111328, "beta_dpo/gap_mean": 136.69830322265625, "beta_dpo/gap_std": 164.337158203125, "beta_dpo/loss_margin_mean": 149.04656982421875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8869309838472834, "grad_norm": 724.6542358398438, "learning_rate": 1.977362051376158e-08, "logits/chosen": -0.2538166642189026, "logits/rejected": -0.25749316811561584, "loss": 0.8912, "step": 604 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4670778810977936, "beta_dpo/beta_margin_grad_std": 0.044755224138498306, "beta_dpo/beta_margin_mean": 0.13371996581554413, "beta_dpo/beta_margin_std": 0.18470925092697144, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.015389084815979, "beta_dpo/gap_mean": 137.9195098876953, "beta_dpo/gap_std": 170.83059692382812, "beta_dpo/loss_margin_mean": 133.719970703125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8883994126284875, "grad_norm": 12.376964569091797, "learning_rate": 1.9276432015946446e-08, "logits/chosen": -0.2761760950088501, "logits/rejected": -0.2704794406890869, "loss": 1.2738, "step": 605 }, { "beta_dpo/beta": 1.080771803855896, "beta_dpo/beta_margin_grad_mean": -0.28311601281166077, "beta_dpo/beta_margin_grad_std": 0.2813016474246979, "beta_dpo/beta_margin_mean": 202.94053649902344, "beta_dpo/beta_margin_std": 319.66082763671875, "beta_dpo/beta_used": 1.080771803855896, "beta_dpo/beta_used_raw": 0.5771820545196533, "beta_dpo/gap_mean": 136.48269653320312, "beta_dpo/gap_std": 169.08889770507812, "beta_dpo/loss_margin_mean": 149.46290588378906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8898678414096917, "grad_norm": 1692.56103515625, "learning_rate": 1.8785323298722093e-08, "logits/chosen": -0.20563073456287384, "logits/rejected": -0.20558518171310425, "loss": 1.7791, "step": 606 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47003647685050964, "beta_dpo/beta_margin_grad_std": 0.03521895408630371, "beta_dpo/beta_margin_mean": 0.12072371691465378, "beta_dpo/beta_margin_std": 0.14232668280601501, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6541626453399658, "beta_dpo/gap_mean": 136.1642303466797, "beta_dpo/gap_std": 165.0216522216797, "beta_dpo/loss_margin_mean": 120.72370910644531, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8913362701908958, "grad_norm": 8.623156547546387, "learning_rate": 1.8300307303259904e-08, "logits/chosen": -0.28075528144836426, "logits/rejected": -0.26314833760261536, "loss": 1.2853, "step": 607 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46741145849227905, "beta_dpo/beta_margin_grad_std": 0.03372717648744583, "beta_dpo/beta_margin_mean": 0.13120831549167633, "beta_dpo/beta_margin_std": 0.13629145920276642, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6566117405891418, "beta_dpo/gap_mean": 134.62435913085938, "beta_dpo/gap_std": 160.134521484375, "beta_dpo/loss_margin_mean": 131.20831298828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8928046989720999, "grad_norm": 8.3565673828125, "learning_rate": 1.7821396810182437e-08, "logits/chosen": -0.30109351873397827, "logits/rejected": -0.28483152389526367, "loss": 1.2709, "step": 608 }, { "beta_dpo/beta": 0.5683431029319763, "beta_dpo/beta_margin_grad_mean": -0.11586936563253403, "beta_dpo/beta_margin_grad_std": 0.3091588318347931, "beta_dpo/beta_margin_mean": 91.26676177978516, "beta_dpo/beta_margin_std": 144.23231506347656, "beta_dpo/beta_used": 0.5683431029319763, "beta_dpo/beta_used_raw": 0.5683431029319763, "beta_dpo/gap_mean": 137.36264038085938, "beta_dpo/gap_std": 161.44122314453125, "beta_dpo/loss_margin_mean": 153.95826721191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8942731277533039, "grad_norm": 3484.029052734375, "learning_rate": 1.7348604439226617e-08, "logits/chosen": -0.26210033893585205, "logits/rejected": -0.24275103211402893, "loss": 0.6493, "step": 609 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4685831665992737, "beta_dpo/beta_margin_grad_std": 0.04099490866065025, "beta_dpo/beta_margin_mean": 0.1268150508403778, "beta_dpo/beta_margin_std": 0.16618604958057404, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.988802433013916, "beta_dpo/gap_mean": 136.2181854248047, "beta_dpo/gap_std": 160.43869018554688, "beta_dpo/loss_margin_mean": 126.81504821777344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.895741556534508, "grad_norm": 9.066965103149414, "learning_rate": 1.6881942648911074e-08, "logits/chosen": -0.25646454095840454, "logits/rejected": -0.22565940022468567, "loss": 1.2757, "step": 610 }, { "beta_dpo/beta": 0.7692165374755859, "beta_dpo/beta_margin_grad_mean": -0.3122340738773346, "beta_dpo/beta_margin_grad_std": 0.3016832768917084, "beta_dpo/beta_margin_mean": 129.48629760742188, "beta_dpo/beta_margin_std": 206.50274658203125, "beta_dpo/beta_used": 0.7692165374755859, "beta_dpo/beta_used_raw": 0.4401324391365051, "beta_dpo/gap_mean": 137.39132690429688, "beta_dpo/gap_std": 162.03436279296875, "beta_dpo/loss_margin_mean": 148.3230438232422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8972099853157122, "grad_norm": 7008.0810546875, "learning_rate": 1.6421423736208e-08, "logits/chosen": -0.20377308130264282, "logits/rejected": -0.19680052995681763, "loss": 8.377, "step": 611 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4589446187019348, "beta_dpo/beta_margin_grad_std": 0.039632294327020645, "beta_dpo/beta_margin_mean": 0.16578657925128937, "beta_dpo/beta_margin_std": 0.16075921058654785, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.4418590068817139, "beta_dpo/gap_mean": 142.43182373046875, "beta_dpo/gap_std": 161.67913818359375, "beta_dpo/loss_margin_mean": 165.7865753173828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8986784140969163, "grad_norm": 13.170220375061035, "learning_rate": 1.5967059836219042e-08, "logits/chosen": -0.2226446568965912, "logits/rejected": -0.18076658248901367, "loss": 1.2771, "step": 612 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46369874477386475, "beta_dpo/beta_margin_grad_std": 0.034297436475753784, "beta_dpo/beta_margin_mean": 0.14624443650245667, "beta_dpo/beta_margin_std": 0.13885696232318878, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.0288455486297607, "beta_dpo/gap_mean": 144.05943298339844, "beta_dpo/gap_std": 158.86074829101562, "beta_dpo/loss_margin_mean": 146.2444305419922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9001468428781204, "grad_norm": 8.972193717956543, "learning_rate": 1.551886292185553e-08, "logits/chosen": -0.2800806760787964, "logits/rejected": -0.29024672508239746, "loss": 1.2679, "step": 613 }, { "beta_dpo/beta": 0.06990689039230347, "beta_dpo/beta_margin_grad_mean": -0.3197058439254761, "beta_dpo/beta_margin_grad_std": 0.2986561954021454, "beta_dpo/beta_margin_mean": 12.157843589782715, "beta_dpo/beta_margin_std": 20.12245750427246, "beta_dpo/beta_used": 0.06990689039230347, "beta_dpo/beta_used_raw": -0.06399475783109665, "beta_dpo/gap_mean": 148.22630310058594, "beta_dpo/gap_std": 159.02099609375, "beta_dpo/loss_margin_mean": 170.205810546875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9016152716593245, "grad_norm": 895.0585327148438, "learning_rate": 1.507684480352292e-08, "logits/chosen": -0.20398010313510895, "logits/rejected": -0.21416090428829193, "loss": 2.7127, "step": 614 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4713370203971863, "beta_dpo/beta_margin_grad_std": 0.036130066961050034, "beta_dpo/beta_margin_mean": 0.1156582459807396, "beta_dpo/beta_margin_std": 0.14664776623249054, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6342370510101318, "beta_dpo/gap_mean": 143.90347290039062, "beta_dpo/gap_std": 156.93869018554688, "beta_dpo/loss_margin_mean": 115.65824127197266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9030837004405287, "grad_norm": 8.794045448303223, "learning_rate": 1.4641017128809801e-08, "logits/chosen": -0.2878304719924927, "logits/rejected": -0.2756372094154358, "loss": 1.2775, "step": 615 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4739888608455658, "beta_dpo/beta_margin_grad_std": 0.03806653246283531, "beta_dpo/beta_margin_mean": 0.10492546856403351, "beta_dpo/beta_margin_std": 0.15405791997909546, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -3.256364107131958, "beta_dpo/gap_mean": 137.07403564453125, "beta_dpo/gap_std": 155.02120971679688, "beta_dpo/loss_margin_mean": 104.92546844482422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9045521292217328, "grad_norm": 9.077305793762207, "learning_rate": 1.4211391382180637e-08, "logits/chosen": -0.2529584467411041, "logits/rejected": -0.2234017550945282, "loss": 1.3106, "step": 616 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47770678997039795, "beta_dpo/beta_margin_grad_std": 0.03402474522590637, "beta_dpo/beta_margin_mean": 0.0897776335477829, "beta_dpo/beta_margin_std": 0.13734619319438934, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.259263753890991, "beta_dpo/gap_mean": 129.06411743164062, "beta_dpo/gap_std": 153.8069305419922, "beta_dpo/loss_margin_mean": 89.77762603759766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9060205580029369, "grad_norm": 8.899731636047363, "learning_rate": 1.378797888467345e-08, "logits/chosen": -0.23405620455741882, "logits/rejected": -0.19954687356948853, "loss": 1.3011, "step": 617 }, { "beta_dpo/beta": 0.22516019642353058, "beta_dpo/beta_margin_grad_mean": -0.30100154876708984, "beta_dpo/beta_margin_grad_std": 0.2931227684020996, "beta_dpo/beta_margin_mean": 36.04357147216797, "beta_dpo/beta_margin_std": 58.656856536865234, "beta_dpo/beta_used": 0.22516019642353058, "beta_dpo/beta_used_raw": -0.7018966674804688, "beta_dpo/gap_mean": 126.19082641601562, "beta_dpo/gap_std": 157.0688018798828, "beta_dpo/loss_margin_mean": 128.67428588867188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9074889867841409, "grad_norm": 2414.7275390625, "learning_rate": 1.3370790793601371e-08, "logits/chosen": -0.28824859857559204, "logits/rejected": -0.25596606731414795, "loss": 3.6018, "step": 618 }, { "beta_dpo/beta": 0.32898879051208496, "beta_dpo/beta_margin_grad_mean": -0.32226526737213135, "beta_dpo/beta_margin_grad_std": 0.3011726140975952, "beta_dpo/beta_margin_mean": 47.31397247314453, "beta_dpo/beta_margin_std": 92.59415435791016, "beta_dpo/beta_used": 0.32898879051208496, "beta_dpo/beta_used_raw": -0.5022631883621216, "beta_dpo/gap_mean": 127.52127075195312, "beta_dpo/gap_std": 159.29910278320312, "beta_dpo/loss_margin_mean": 129.31863403320312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.908957415565345, "grad_norm": 1730.38427734375, "learning_rate": 1.2959838102258535e-08, "logits/chosen": -0.2695918679237366, "logits/rejected": -0.25438401103019714, "loss": 2.3727, "step": 619 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46883711218833923, "beta_dpo/beta_margin_grad_std": 0.04180603846907616, "beta_dpo/beta_margin_mean": 0.1258520781993866, "beta_dpo/beta_margin_std": 0.16934403777122498, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7810671329498291, "beta_dpo/gap_mean": 128.8234405517578, "beta_dpo/gap_std": 161.2275390625, "beta_dpo/loss_margin_mean": 125.8520736694336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9104258443465492, "grad_norm": 8.195945739746094, "learning_rate": 1.2555131639630567e-08, "logits/chosen": -0.27168160676956177, "logits/rejected": -0.24854370951652527, "loss": 1.2793, "step": 620 }, { "beta_dpo/beta": 0.315225213766098, "beta_dpo/beta_margin_grad_mean": -0.28073248267173767, "beta_dpo/beta_margin_grad_std": 0.27754899859428406, "beta_dpo/beta_margin_mean": 61.944881439208984, "beta_dpo/beta_margin_std": 95.92522430419922, "beta_dpo/beta_used": 0.315225213766098, "beta_dpo/beta_used_raw": -0.001695185899734497, "beta_dpo/gap_mean": 131.76333618164062, "beta_dpo/gap_std": 162.11734008789062, "beta_dpo/loss_margin_mean": 165.3124237060547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9118942731277533, "grad_norm": 2288.819091796875, "learning_rate": 1.2156682070109086e-08, "logits/chosen": -0.18687333166599274, "logits/rejected": -0.1780368983745575, "loss": 2.0444, "step": 621 }, { "beta_dpo/beta": 0.2183779627084732, "beta_dpo/beta_margin_grad_mean": -0.3208658993244171, "beta_dpo/beta_margin_grad_std": 0.29182958602905273, "beta_dpo/beta_margin_mean": 35.79158401489258, "beta_dpo/beta_margin_std": 66.24662017822266, "beta_dpo/beta_used": 0.2183779627084732, "beta_dpo/beta_used_raw": -1.115787386894226, "beta_dpo/gap_mean": 136.07073974609375, "beta_dpo/gap_std": 164.10821533203125, "beta_dpo/loss_margin_mean": 131.12855529785156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9133627019089574, "grad_norm": 1909.772705078125, "learning_rate": 1.1764499893210878e-08, "logits/chosen": -0.2630102336406708, "logits/rejected": -0.24436010420322418, "loss": 2.2051, "step": 622 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4722324013710022, "beta_dpo/beta_margin_grad_std": 0.04376749321818352, "beta_dpo/beta_margin_mean": 0.11244507133960724, "beta_dpo/beta_margin_std": 0.1788908988237381, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.597635269165039, "beta_dpo/gap_mean": 131.22195434570312, "beta_dpo/gap_std": 165.27459716796875, "beta_dpo/loss_margin_mean": 112.445068359375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9148311306901615, "grad_norm": 8.092933654785156, "learning_rate": 1.1378595443300998e-08, "logits/chosen": -0.2123861014842987, "logits/rejected": -0.18733005225658417, "loss": 1.305, "step": 623 }, { "beta_dpo/beta": 1.4514429569244385, "beta_dpo/beta_margin_grad_mean": -0.1736312210559845, "beta_dpo/beta_margin_grad_std": 0.3766280710697174, "beta_dpo/beta_margin_mean": 235.16859436035156, "beta_dpo/beta_margin_std": 305.9576416015625, "beta_dpo/beta_used": 1.4514429569244385, "beta_dpo/beta_used_raw": 1.4514429569244385, "beta_dpo/gap_mean": 134.68902587890625, "beta_dpo/gap_std": 172.1035614013672, "beta_dpo/loss_margin_mean": 160.6850128173828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9162995594713657, "grad_norm": 14112.7099609375, "learning_rate": 1.0998978889320582e-08, "logits/chosen": -0.31213879585266113, "logits/rejected": -0.2707129120826721, "loss": 18.6323, "step": 624 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46674269437789917, "beta_dpo/beta_margin_grad_std": 0.03943945840001106, "beta_dpo/beta_margin_mean": 0.13447730243206024, "beta_dpo/beta_margin_std": 0.16113615036010742, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.086260437965393, "beta_dpo/gap_mean": 135.93350219726562, "beta_dpo/gap_std": 170.4825439453125, "beta_dpo/loss_margin_mean": 134.477294921875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9177679882525698, "grad_norm": 8.834936141967773, "learning_rate": 1.0625660234518913e-08, "logits/chosen": -0.24899110198020935, "logits/rejected": -0.22103792428970337, "loss": 1.277, "step": 625 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47088930010795593, "beta_dpo/beta_margin_grad_std": 0.03784249722957611, "beta_dpo/beta_margin_mean": 0.11733278632164001, "beta_dpo/beta_margin_std": 0.15290819108486176, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6771858930587769, "beta_dpo/gap_mean": 132.06570434570312, "beta_dpo/gap_std": 165.1246337890625, "beta_dpo/loss_margin_mean": 117.33277893066406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9192364170337739, "grad_norm": 11.363311767578125, "learning_rate": 1.0258649316189721e-08, "logits/chosen": -0.30383527278900146, "logits/rejected": -0.27899685502052307, "loss": 1.2908, "step": 626 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4600542187690735, "beta_dpo/beta_margin_grad_std": 0.04917608201503754, "beta_dpo/beta_margin_mean": 0.16240194439888, "beta_dpo/beta_margin_std": 0.2026146799325943, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.715671956539154, "beta_dpo/gap_mean": 135.79798889160156, "beta_dpo/gap_std": 170.36813354492188, "beta_dpo/loss_margin_mean": 162.4019317626953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.920704845814978, "grad_norm": 10.255217552185059, "learning_rate": 9.897955805412e-09, "logits/chosen": -0.2576707601547241, "logits/rejected": -0.27673864364624023, "loss": 1.2721, "step": 627 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46119076013565063, "beta_dpo/beta_margin_grad_std": 0.04088958352804184, "beta_dpo/beta_margin_mean": 0.15677191317081451, "beta_dpo/beta_margin_std": 0.16587892174720764, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7566049098968506, "beta_dpo/gap_mean": 141.70660400390625, "beta_dpo/gap_std": 172.304931640625, "beta_dpo/loss_margin_mean": 156.77191162109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.922173274596182, "grad_norm": 9.771873474121094, "learning_rate": 9.543589206795238e-09, "logits/chosen": -0.25853201746940613, "logits/rejected": -0.2484220564365387, "loss": 1.2686, "step": 628 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4680294096469879, "beta_dpo/beta_margin_grad_std": 0.033298566937446594, "beta_dpo/beta_margin_mean": 0.1288066953420639, "beta_dpo/beta_margin_std": 0.13501150906085968, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1783255338668823, "beta_dpo/gap_mean": 140.23866271972656, "beta_dpo/gap_std": 167.48165893554688, "beta_dpo/loss_margin_mean": 128.8066864013672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9236417033773862, "grad_norm": 13.822155952453613, "learning_rate": 9.19555885822887e-09, "logits/chosen": -0.2648368775844574, "logits/rejected": -0.2452375888824463, "loss": 1.274, "step": 629 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47568345069885254, "beta_dpo/beta_margin_grad_std": 0.03724653273820877, "beta_dpo/beta_margin_mean": 0.09809713065624237, "beta_dpo/beta_margin_std": 0.1510220766067505, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.620537281036377, "beta_dpo/gap_mean": 132.54100036621094, "beta_dpo/gap_std": 162.70718383789062, "beta_dpo/loss_margin_mean": 98.09712219238281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9251101321585903, "grad_norm": 8.615431785583496, "learning_rate": 8.85387393063622e-09, "logits/chosen": -0.3369476795196533, "logits/rejected": -0.3151329755783081, "loss": 1.3037, "step": 630 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.469342440366745, "beta_dpo/beta_margin_grad_std": 0.04454941302537918, "beta_dpo/beta_margin_mean": 0.12401168048381805, "beta_dpo/beta_margin_std": 0.18085241317749023, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -2.252204656600952, "beta_dpo/gap_mean": 129.70608520507812, "beta_dpo/gap_std": 164.6175079345703, "beta_dpo/loss_margin_mean": 124.01167297363281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9265785609397944, "grad_norm": 10.43221378326416, "learning_rate": 8.518543427732949e-09, "logits/chosen": -0.19672399759292603, "logits/rejected": -0.16939029097557068, "loss": 1.3022, "step": 631 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4707336127758026, "beta_dpo/beta_margin_grad_std": 0.04446292296051979, "beta_dpo/beta_margin_mean": 0.11808396875858307, "beta_dpo/beta_margin_std": 0.17979924380779266, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.9367992877960205, "beta_dpo/gap_mean": 129.06605529785156, "beta_dpo/gap_std": 169.87759399414062, "beta_dpo/loss_margin_mean": 118.0839614868164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9280469897209985, "grad_norm": 8.912779808044434, "learning_rate": 8.189576185789637e-09, "logits/chosen": -0.2137627899646759, "logits/rejected": -0.1909235715866089, "loss": 1.2832, "step": 632 }, { "beta_dpo/beta": 0.1498415768146515, "beta_dpo/beta_margin_grad_mean": -0.3400387465953827, "beta_dpo/beta_margin_grad_std": 0.31042587757110596, "beta_dpo/beta_margin_mean": 17.67989158630371, "beta_dpo/beta_margin_std": 41.04912567138672, "beta_dpo/beta_used": 0.1498415768146515, "beta_dpo/beta_used_raw": -1.1626986265182495, "beta_dpo/gap_mean": 122.80825805664062, "beta_dpo/gap_std": 166.48403930664062, "beta_dpo/loss_margin_mean": 100.83395385742188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9295154185022027, "grad_norm": 2468.25341796875, "learning_rate": 7.866980873399015e-09, "logits/chosen": -0.262068510055542, "logits/rejected": -0.2606055736541748, "loss": 4.4345, "step": 633 }, { "beta_dpo/beta": 0.2891407012939453, "beta_dpo/beta_margin_grad_mean": -0.3603072762489319, "beta_dpo/beta_margin_grad_std": 0.3205583393573761, "beta_dpo/beta_margin_mean": 44.27980041503906, "beta_dpo/beta_margin_std": 89.58101654052734, "beta_dpo/beta_used": 0.2891407012939453, "beta_dpo/beta_used_raw": -1.0204623937606812, "beta_dpo/gap_mean": 123.09707641601562, "beta_dpo/gap_std": 168.86935424804688, "beta_dpo/loss_margin_mean": 114.10114288330078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9309838472834068, "grad_norm": 4420.4560546875, "learning_rate": 7.550765991247654e-09, "logits/chosen": -0.2516968548297882, "logits/rejected": -0.2492125928401947, "loss": 8.1633, "step": 634 }, { "beta_dpo/beta": 0.8167719841003418, "beta_dpo/beta_margin_grad_mean": -0.3317233920097351, "beta_dpo/beta_margin_grad_std": 0.3114463686943054, "beta_dpo/beta_margin_mean": 93.9231948852539, "beta_dpo/beta_margin_std": 184.6671905517578, "beta_dpo/beta_used": 0.8167719841003418, "beta_dpo/beta_used_raw": 0.44367918372154236, "beta_dpo/gap_mean": 119.10769653320312, "beta_dpo/gap_std": 164.04827880859375, "beta_dpo/loss_margin_mean": 108.52445983886719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9324522760646109, "grad_norm": 4297.1875, "learning_rate": 7.240939871891699e-09, "logits/chosen": -0.3063223958015442, "logits/rejected": -0.25702351331710815, "loss": 12.3188, "step": 635 }, { "beta_dpo/beta": 0.21374358236789703, "beta_dpo/beta_margin_grad_mean": -0.29352760314941406, "beta_dpo/beta_margin_grad_std": 0.28238052129745483, "beta_dpo/beta_margin_mean": 32.544044494628906, "beta_dpo/beta_margin_std": 50.19921112060547, "beta_dpo/beta_used": 0.21374358236789703, "beta_dpo/beta_used_raw": -0.49636417627334595, "beta_dpo/gap_mean": 119.43673706054688, "beta_dpo/gap_std": 161.71958923339844, "beta_dpo/loss_margin_mean": 132.7529754638672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.933920704845815, "grad_norm": 1658.96923828125, "learning_rate": 6.937510679537628e-09, "logits/chosen": -0.2624373733997345, "logits/rejected": -0.23375412821769714, "loss": 2.1742, "step": 636 }, { "beta_dpo/beta": 0.3223646879196167, "beta_dpo/beta_margin_grad_mean": -0.32306286692619324, "beta_dpo/beta_margin_grad_std": 0.30376118421554565, "beta_dpo/beta_margin_mean": 42.55961608886719, "beta_dpo/beta_margin_std": 81.67517852783203, "beta_dpo/beta_used": 0.3223646879196167, "beta_dpo/beta_used_raw": 0.3115572929382324, "beta_dpo/gap_mean": 124.16712951660156, "beta_dpo/gap_std": 161.0850372314453, "beta_dpo/loss_margin_mean": 139.51402282714844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9353891336270191, "grad_norm": 4178.92724609375, "learning_rate": 6.640486409826785e-09, "logits/chosen": -0.22674018144607544, "logits/rejected": -0.22383208572864532, "loss": 3.3524, "step": 637 }, { "beta_dpo/beta": 1.4511369466781616, "beta_dpo/beta_margin_grad_mean": -0.3164081573486328, "beta_dpo/beta_margin_grad_std": 0.30334481596946716, "beta_dpo/beta_margin_mean": 266.310791015625, "beta_dpo/beta_margin_std": 417.8957214355469, "beta_dpo/beta_used": 1.4511369466781616, "beta_dpo/beta_used_raw": -0.3003849983215332, "beta_dpo/gap_mean": 124.66742706298828, "beta_dpo/gap_std": 157.39694213867188, "beta_dpo/loss_margin_mean": 139.00933837890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9368575624082232, "grad_norm": 9381.5517578125, "learning_rate": 6.349874889624962e-09, "logits/chosen": -0.2576182782649994, "logits/rejected": -0.23263539373874664, "loss": 8.0532, "step": 638 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47581177949905396, "beta_dpo/beta_margin_grad_std": 0.03869582340121269, "beta_dpo/beta_margin_mean": 0.09742747247219086, "beta_dpo/beta_margin_std": 0.1560250073671341, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.47708529233932495, "beta_dpo/gap_mean": 122.42938995361328, "beta_dpo/gap_std": 157.66665649414062, "beta_dpo/loss_margin_mean": 97.4274673461914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9383259911894273, "grad_norm": 11.267277717590332, "learning_rate": 6.065683776815933e-09, "logits/chosen": -0.2489241063594818, "logits/rejected": -0.20080968737602234, "loss": 1.2811, "step": 639 }, { "beta_dpo/beta": 1.0406347513198853, "beta_dpo/beta_margin_grad_mean": -0.10319266468286514, "beta_dpo/beta_margin_grad_std": 0.23703627288341522, "beta_dpo/beta_margin_mean": 186.98306274414062, "beta_dpo/beta_margin_std": 294.89520263671875, "beta_dpo/beta_used": 1.0406347513198853, "beta_dpo/beta_used_raw": 1.0406347513198853, "beta_dpo/gap_mean": 126.0462875366211, "beta_dpo/gap_std": 156.94723510742188, "beta_dpo/loss_margin_mean": 156.25440979003906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9397944199706314, "grad_norm": 2567.301025390625, "learning_rate": 5.7879205600998296e-09, "logits/chosen": -0.2669011354446411, "logits/rejected": -0.2516845762729645, "loss": 0.5288, "step": 640 }, { "beta_dpo/beta": 0.6511551141738892, "beta_dpo/beta_margin_grad_mean": -0.2987769544124603, "beta_dpo/beta_margin_grad_std": 0.29313045740127563, "beta_dpo/beta_margin_mean": 84.13956451416016, "beta_dpo/beta_margin_std": 165.199462890625, "beta_dpo/beta_used": 0.6511551141738892, "beta_dpo/beta_used_raw": 0.3140296936035156, "beta_dpo/gap_mean": 128.0950164794922, "beta_dpo/gap_std": 159.058837890625, "beta_dpo/loss_margin_mean": 120.50196075439453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9412628487518355, "grad_norm": 3385.51611328125, "learning_rate": 5.516592558795746e-09, "logits/chosen": -0.2616059482097626, "logits/rejected": -0.23641052842140198, "loss": 10.8266, "step": 641 }, { "beta_dpo/beta": 0.7909172177314758, "beta_dpo/beta_margin_grad_mean": -0.26804837584495544, "beta_dpo/beta_margin_grad_std": 0.27035075426101685, "beta_dpo/beta_margin_mean": 147.3969268798828, "beta_dpo/beta_margin_std": 221.18307495117188, "beta_dpo/beta_used": 0.7909172177314758, "beta_dpo/beta_used_raw": -0.6517113447189331, "beta_dpo/gap_mean": 128.44711303710938, "beta_dpo/gap_std": 167.51364135742188, "beta_dpo/loss_margin_mean": 147.625244140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9427312775330396, "grad_norm": 6544.80078125, "learning_rate": 5.251706922648868e-09, "logits/chosen": -0.27374494075775146, "logits/rejected": -0.26332151889801025, "loss": 7.0951, "step": 642 }, { "beta_dpo/beta": 0.5000445246696472, "beta_dpo/beta_margin_grad_mean": -0.2856932282447815, "beta_dpo/beta_margin_grad_std": 0.28263115882873535, "beta_dpo/beta_margin_mean": 75.19145965576172, "beta_dpo/beta_margin_std": 120.19136047363281, "beta_dpo/beta_used": 0.5000445246696472, "beta_dpo/beta_used_raw": -0.6685765981674194, "beta_dpo/gap_mean": 131.22329711914062, "beta_dpo/gap_std": 162.10546875, "beta_dpo/loss_margin_mean": 120.23302459716797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9441997063142438, "grad_norm": 4131.7802734375, "learning_rate": 4.993270631642038e-09, "logits/chosen": -0.24260678887367249, "logits/rejected": -0.24370941519737244, "loss": 3.7361, "step": 643 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4704153537750244, "beta_dpo/beta_margin_grad_std": 0.044792983680963516, "beta_dpo/beta_margin_mean": 0.11960872262716293, "beta_dpo/beta_margin_std": 0.18185746669769287, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.3600785732269287, "beta_dpo/gap_mean": 127.92471313476562, "beta_dpo/gap_std": 164.80690002441406, "beta_dpo/loss_margin_mean": 119.60871887207031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9456681350954479, "grad_norm": 9.257484436035156, "learning_rate": 4.741290495811873e-09, "logits/chosen": -0.29417717456817627, "logits/rejected": -0.2829264998435974, "loss": 1.2896, "step": 644 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47504597902297974, "beta_dpo/beta_margin_grad_std": 0.04654289036989212, "beta_dpo/beta_margin_mean": 0.10111980140209198, "beta_dpo/beta_margin_std": 0.1889955848455429, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6929526329040527, "beta_dpo/gap_mean": 125.04953002929688, "beta_dpo/gap_std": 169.11019897460938, "beta_dpo/loss_margin_mean": 101.11979675292969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.947136563876652, "grad_norm": 11.280401229858398, "learning_rate": 4.495773155069299e-09, "logits/chosen": -0.26835355162620544, "logits/rejected": -0.2733767330646515, "loss": 1.2982, "step": 645 }, { "beta_dpo/beta": 0.9947884678840637, "beta_dpo/beta_margin_grad_mean": -0.3158058226108551, "beta_dpo/beta_margin_grad_std": 0.3032316267490387, "beta_dpo/beta_margin_mean": 126.421630859375, "beta_dpo/beta_margin_std": 230.53216552734375, "beta_dpo/beta_used": 0.9947884678840637, "beta_dpo/beta_used_raw": 0.016669809818267822, "beta_dpo/gap_mean": 121.25621032714844, "beta_dpo/gap_std": 164.90869140625, "beta_dpo/loss_margin_mean": 113.20999145507812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9486049926578561, "grad_norm": 7780.4990234375, "learning_rate": 4.256725079024553e-09, "logits/chosen": -0.21456298232078552, "logits/rejected": -0.19140079617500305, "loss": 3.2758, "step": 646 }, { "beta_dpo/beta": 0.4405333995819092, "beta_dpo/beta_margin_grad_mean": -0.16993050277233124, "beta_dpo/beta_margin_grad_std": 0.3702445924282074, "beta_dpo/beta_margin_mean": 50.956336975097656, "beta_dpo/beta_margin_std": 66.18246459960938, "beta_dpo/beta_used": 0.4405333995819092, "beta_dpo/beta_used_raw": 0.4405333995819092, "beta_dpo/gap_mean": 119.49800109863281, "beta_dpo/gap_std": 160.93655395507812, "beta_dpo/loss_margin_mean": 118.18896484375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9500734214390602, "grad_norm": 3096.896240234375, "learning_rate": 4.024152566816791e-09, "logits/chosen": -0.23497043550014496, "logits/rejected": -0.23454715311527252, "loss": 8.0903, "step": 647 }, { "beta_dpo/beta": 0.21638301014900208, "beta_dpo/beta_margin_grad_mean": -0.28432542085647583, "beta_dpo/beta_margin_grad_std": 0.2745562791824341, "beta_dpo/beta_margin_mean": 37.98030090332031, "beta_dpo/beta_margin_std": 73.11116027832031, "beta_dpo/beta_used": 0.21638301014900208, "beta_dpo/beta_used_raw": 0.027231574058532715, "beta_dpo/gap_mean": 127.08036804199219, "beta_dpo/gap_std": 167.84896850585938, "beta_dpo/loss_margin_mean": 167.418212890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9515418502202643, "grad_norm": 1881.7218017578125, "learning_rate": 3.798061746947995e-09, "logits/chosen": -0.2324717938899994, "logits/rejected": -0.23608848452568054, "loss": 3.7315, "step": 648 }, { "beta_dpo/beta": 0.17022213339805603, "beta_dpo/beta_margin_grad_mean": -0.37708210945129395, "beta_dpo/beta_margin_grad_std": 0.333068311214447, "beta_dpo/beta_margin_mean": 15.761299133300781, "beta_dpo/beta_margin_std": 38.01227569580078, "beta_dpo/beta_used": 0.17022213339805603, "beta_dpo/beta_used_raw": -0.06394051015377045, "beta_dpo/gap_mean": 124.45140075683594, "beta_dpo/gap_std": 167.86746215820312, "beta_dpo/loss_margin_mean": 101.8471450805664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9530102790014684, "grad_norm": 2891.095703125, "learning_rate": 3.5784585771215235e-09, "logits/chosen": -0.3063885569572449, "logits/rejected": -0.2801710069179535, "loss": 3.869, "step": 649 }, { "beta_dpo/beta": 1.3667818307876587, "beta_dpo/beta_margin_grad_mean": -0.2347412258386612, "beta_dpo/beta_margin_grad_std": 0.42016705870628357, "beta_dpo/beta_margin_mean": 236.2583770751953, "beta_dpo/beta_margin_std": 431.2769470214844, "beta_dpo/beta_used": 1.3667818307876587, "beta_dpo/beta_used_raw": 1.3667818307876587, "beta_dpo/gap_mean": 129.84597778320312, "beta_dpo/gap_std": 173.6107635498047, "beta_dpo/loss_margin_mean": 161.89785766601562, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9544787077826725, "grad_norm": 20882.701171875, "learning_rate": 3.3653488440851253e-09, "logits/chosen": -0.22325937449932098, "logits/rejected": -0.22227120399475098, "loss": 43.9246, "step": 650 }, { "beta_dpo/beta": 0.7648828029632568, "beta_dpo/beta_margin_grad_mean": -0.32739847898483276, "beta_dpo/beta_margin_grad_std": 0.3100513815879822, "beta_dpo/beta_margin_mean": 129.44383239746094, "beta_dpo/beta_margin_std": 225.9346466064453, "beta_dpo/beta_used": 0.7648828029632568, "beta_dpo/beta_used_raw": 0.4660683274269104, "beta_dpo/gap_mean": 134.56472778320312, "beta_dpo/gap_std": 172.713623046875, "beta_dpo/loss_margin_mean": 150.5056610107422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9559471365638766, "grad_norm": 7399.314453125, "learning_rate": 3.158738163478475e-09, "logits/chosen": -0.29069170355796814, "logits/rejected": -0.3059248924255371, "loss": 8.9479, "step": 651 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46688932180404663, "beta_dpo/beta_margin_grad_std": 0.04138989374041557, "beta_dpo/beta_margin_mean": 0.1338927298784256, "beta_dpo/beta_margin_std": 0.1681978404521942, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.46489205956459045, "beta_dpo/gap_mean": 133.96636962890625, "beta_dpo/gap_std": 171.03175354003906, "beta_dpo/loss_margin_mean": 133.89273071289062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9574155653450808, "grad_norm": 13.33399772644043, "learning_rate": 2.9586319796851555e-09, "logits/chosen": -0.2815973162651062, "logits/rejected": -0.2725764214992523, "loss": 1.2702, "step": 652 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4622488021850586, "beta_dpo/beta_margin_grad_std": 0.0421992689371109, "beta_dpo/beta_margin_mean": 0.15270715951919556, "beta_dpo/beta_margin_std": 0.1718183010816574, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.37464144825935364, "beta_dpo/gap_mean": 136.72564697265625, "beta_dpo/gap_std": 170.6292724609375, "beta_dpo/loss_margin_mean": 152.7071533203125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9588839941262849, "grad_norm": 9.623185157775879, "learning_rate": 2.7650355656892166e-09, "logits/chosen": -0.26191675662994385, "logits/rejected": -0.26024746894836426, "loss": 1.2661, "step": 653 }, { "beta_dpo/beta": 0.12737774848937988, "beta_dpo/beta_margin_grad_mean": -0.3520982265472412, "beta_dpo/beta_margin_grad_std": 0.311506450176239, "beta_dpo/beta_margin_mean": 14.189286231994629, "beta_dpo/beta_margin_std": 31.74391746520996, "beta_dpo/beta_used": 0.12737774848937988, "beta_dpo/beta_used_raw": -0.6519217491149902, "beta_dpo/gap_mean": 135.6177978515625, "beta_dpo/gap_std": 171.04434204101562, "beta_dpo/loss_margin_mean": 119.36299896240234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.960352422907489, "grad_norm": 1289.0914306640625, "learning_rate": 2.577954022936174e-09, "logits/chosen": -0.285967618227005, "logits/rejected": -0.2813323140144348, "loss": 1.479, "step": 654 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4685191512107849, "beta_dpo/beta_margin_grad_std": 0.04493279755115509, "beta_dpo/beta_margin_mean": 0.12751449644565582, "beta_dpo/beta_margin_std": 0.1827131062746048, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.6912943124771118, "beta_dpo/gap_mean": 134.3379364013672, "beta_dpo/gap_std": 172.51646423339844, "beta_dpo/loss_margin_mean": 127.51449584960938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9618208516886931, "grad_norm": 11.29627513885498, "learning_rate": 2.397392281198729e-09, "logits/chosen": -0.21164986491203308, "logits/rejected": -0.22321152687072754, "loss": 1.2895, "step": 655 }, { "beta_dpo/beta": 1.46394944190979, "beta_dpo/beta_margin_grad_mean": -0.09375060349702835, "beta_dpo/beta_margin_grad_std": 0.2914803922176361, "beta_dpo/beta_margin_mean": 281.1544494628906, "beta_dpo/beta_margin_std": 236.0167694091797, "beta_dpo/beta_used": 1.46394944190979, "beta_dpo/beta_used_raw": 1.46394944190979, "beta_dpo/gap_mean": 140.21481323242188, "beta_dpo/gap_std": 170.7769775390625, "beta_dpo/loss_margin_mean": 189.06527709960938, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9632892804698973, "grad_norm": 4871.01171875, "learning_rate": 2.223355098446622e-09, "logits/chosen": -0.20203420519828796, "logits/rejected": -0.2107037454843521, "loss": 10.8002, "step": 656 }, { "beta_dpo/beta": 0.7246884703636169, "beta_dpo/beta_margin_grad_mean": -0.24664191901683807, "beta_dpo/beta_margin_grad_std": 0.24966345727443695, "beta_dpo/beta_margin_mean": 136.55160522460938, "beta_dpo/beta_margin_std": 201.0517578125, "beta_dpo/beta_used": 0.7246884703636169, "beta_dpo/beta_used_raw": -0.031182467937469482, "beta_dpo/gap_mean": 148.42965698242188, "beta_dpo/gap_std": 167.33609008789062, "beta_dpo/loss_margin_mean": 171.03836059570312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9647577092511013, "grad_norm": 5.878337860107422, "learning_rate": 2.055847060721566e-09, "logits/chosen": -0.2323456108570099, "logits/rejected": -0.23794196546077728, "loss": 0.6362, "step": 657 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.466478168964386, "beta_dpo/beta_margin_grad_std": 0.03783747926354408, "beta_dpo/beta_margin_mean": 0.13517163693904877, "beta_dpo/beta_margin_std": 0.15315905213356018, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.7150457501411438, "beta_dpo/gap_mean": 145.939208984375, "beta_dpo/gap_std": 164.26235961914062, "beta_dpo/loss_margin_mean": 135.171630859375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9662261380323054, "grad_norm": 9.239810943603516, "learning_rate": 1.8948725820160662e-09, "logits/chosen": -0.23999705910682678, "logits/rejected": -0.2215622067451477, "loss": 1.2622, "step": 658 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46911635994911194, "beta_dpo/beta_margin_grad_std": 0.0383928045630455, "beta_dpo/beta_margin_mean": 0.12460412085056305, "beta_dpo/beta_margin_std": 0.15533404052257538, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6675459146499634, "beta_dpo/gap_mean": 143.0897216796875, "beta_dpo/gap_std": 163.14138793945312, "beta_dpo/loss_margin_mean": 124.60411071777344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9676945668135095, "grad_norm": 13.10746955871582, "learning_rate": 1.7404359041573723e-09, "logits/chosen": -0.3149293065071106, "logits/rejected": -0.26698166131973267, "loss": 1.2654, "step": 659 }, { "beta_dpo/beta": 0.336564302444458, "beta_dpo/beta_margin_grad_mean": -0.2779940366744995, "beta_dpo/beta_margin_grad_std": 0.27703657746315, "beta_dpo/beta_margin_mean": 57.831546783447266, "beta_dpo/beta_margin_std": 95.76539611816406, "beta_dpo/beta_used": 0.336564302444458, "beta_dpo/beta_used_raw": 0.0521998405456543, "beta_dpo/gap_mean": 144.819091796875, "beta_dpo/gap_std": 160.9578857421875, "beta_dpo/loss_margin_mean": 162.99786376953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9691629955947136, "grad_norm": 1521.5159912109375, "learning_rate": 1.592541096695571e-09, "logits/chosen": -0.18781328201293945, "logits/rejected": -0.15785738825798035, "loss": 2.3556, "step": 660 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4634128510951996, "beta_dpo/beta_margin_grad_std": 0.04297526925802231, "beta_dpo/beta_margin_mean": 0.1479966789484024, "beta_dpo/beta_margin_std": 0.1754070222377777, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1518099308013916, "beta_dpo/gap_mean": 144.63906860351562, "beta_dpo/gap_std": 161.95355224609375, "beta_dpo/loss_margin_mean": 147.99667358398438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9706314243759178, "grad_norm": 8.182291030883789, "learning_rate": 1.4511920567963908e-09, "logits/chosen": -0.2719656527042389, "logits/rejected": -0.2467373013496399, "loss": 1.2689, "step": 661 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4679609537124634, "beta_dpo/beta_margin_grad_std": 0.04259883239865303, "beta_dpo/beta_margin_mean": 0.1293320655822754, "beta_dpo/beta_margin_std": 0.17222696542739868, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.6409615278244019, "beta_dpo/gap_mean": 144.40728759765625, "beta_dpo/gap_std": 164.30880737304688, "beta_dpo/loss_margin_mean": 129.33206176757812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9720998531571219, "grad_norm": 10.364067077636719, "learning_rate": 1.3163925091384532e-09, "logits/chosen": -0.3215191066265106, "logits/rejected": -0.2895079255104065, "loss": 1.2631, "step": 662 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4659326374530792, "beta_dpo/beta_margin_grad_std": 0.041490860283374786, "beta_dpo/beta_margin_mean": 0.1374731808900833, "beta_dpo/beta_margin_std": 0.1681915521621704, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.430047631263733, "beta_dpo/gap_mean": 142.96701049804688, "beta_dpo/gap_std": 167.32403564453125, "beta_dpo/loss_margin_mean": 137.47317504882812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.973568281938326, "grad_norm": 7.655603885650635, "learning_rate": 1.1881460058152382e-09, "logits/chosen": -0.31214457750320435, "logits/rejected": -0.310594379901886, "loss": 1.2757, "step": 663 }, { "beta_dpo/beta": 1.081035852432251, "beta_dpo/beta_margin_grad_mean": -0.3265109956264496, "beta_dpo/beta_margin_grad_std": 0.31032606959342957, "beta_dpo/beta_margin_mean": 193.7392120361328, "beta_dpo/beta_margin_std": 372.88427734375, "beta_dpo/beta_used": 1.081035852432251, "beta_dpo/beta_used_raw": 0.9985529780387878, "beta_dpo/gap_mean": 145.85546875, "beta_dpo/gap_std": 171.21942138671875, "beta_dpo/loss_margin_mean": 160.23175048828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9750367107195301, "grad_norm": 14736.9189453125, "learning_rate": 1.066455926241383e-09, "logits/chosen": -0.23802334070205688, "logits/rejected": -0.23446832597255707, "loss": 22.277, "step": 664 }, { "beta_dpo/beta": 0.17351345717906952, "beta_dpo/beta_margin_grad_mean": -0.30906784534454346, "beta_dpo/beta_margin_grad_std": 0.29436877369880676, "beta_dpo/beta_margin_mean": 24.549057006835938, "beta_dpo/beta_margin_std": 46.99803924560547, "beta_dpo/beta_used": 0.17351345717906952, "beta_dpo/beta_used_raw": 0.05960509926080704, "beta_dpo/gap_mean": 143.3297882080078, "beta_dpo/gap_std": 168.05531311035156, "beta_dpo/loss_margin_mean": 132.63438415527344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9765051395007343, "grad_norm": 950.77587890625, "learning_rate": 9.513254770636137e-10, "logits/chosen": -0.2172248661518097, "logits/rejected": -0.18482929468154907, "loss": 1.9778, "step": 665 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4638909697532654, "beta_dpo/beta_margin_grad_std": 0.034985702484846115, "beta_dpo/beta_margin_mean": 0.14538602530956268, "beta_dpo/beta_margin_std": 0.14133024215698242, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8510459661483765, "beta_dpo/gap_mean": 144.62229919433594, "beta_dpo/gap_std": 164.13558959960938, "beta_dpo/loss_margin_mean": 145.38601684570312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9779735682819384, "grad_norm": 10.848896026611328, "learning_rate": 8.427576920763956e-10, "logits/chosen": -0.26341164112091064, "logits/rejected": -0.24032096564769745, "loss": 1.2653, "step": 666 }, { "beta_dpo/beta": 0.555698573589325, "beta_dpo/beta_margin_grad_mean": -0.32695654034614563, "beta_dpo/beta_margin_grad_std": 0.3104262053966522, "beta_dpo/beta_margin_mean": 75.98949432373047, "beta_dpo/beta_margin_std": 132.38754272460938, "beta_dpo/beta_used": 0.555698573589325, "beta_dpo/beta_used_raw": 0.0477980375289917, "beta_dpo/gap_mean": 142.28619384765625, "beta_dpo/gap_std": 162.02328491210938, "beta_dpo/loss_margin_mean": 135.94004821777344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9794419970631424, "grad_norm": 7423.337890625, "learning_rate": 7.407554321417764e-10, "logits/chosen": -0.24232017993927002, "logits/rejected": -0.21042859554290771, "loss": 3.6685, "step": 667 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.47267022728919983, "beta_dpo/beta_margin_grad_std": 0.04184536263346672, "beta_dpo/beta_margin_mean": 0.11026235669851303, "beta_dpo/beta_margin_std": 0.16938358545303345, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.565541386604309, "beta_dpo/gap_mean": 135.5725555419922, "beta_dpo/gap_std": 161.8687744140625, "beta_dpo/loss_margin_mean": 110.26235961914062, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9809104258443465, "grad_norm": 15.746362686157227, "learning_rate": 6.453213851142225e-10, "logits/chosen": -0.26321089267730713, "logits/rejected": -0.2517067492008209, "loss": 1.2854, "step": 668 }, { "beta_dpo/beta": 0.49764859676361084, "beta_dpo/beta_margin_grad_mean": -0.2775057852268219, "beta_dpo/beta_margin_grad_std": 0.27767181396484375, "beta_dpo/beta_margin_mean": 76.75032043457031, "beta_dpo/beta_margin_std": 137.6516876220703, "beta_dpo/beta_used": 0.49764859676361084, "beta_dpo/beta_used_raw": 0.24128052592277527, "beta_dpo/gap_mean": 139.15911865234375, "beta_dpo/gap_std": 162.9943084716797, "beta_dpo/loss_margin_mean": 155.27737426757812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9823788546255506, "grad_norm": 2606.953125, "learning_rate": 5.564580657695939e-10, "logits/chosen": -0.239346444606781, "logits/rejected": -0.21844345331192017, "loss": 3.9008, "step": 669 }, { "beta_dpo/beta": 0.5937625169754028, "beta_dpo/beta_margin_grad_mean": -0.32673099637031555, "beta_dpo/beta_margin_grad_std": 0.3107914626598358, "beta_dpo/beta_margin_mean": 102.75801086425781, "beta_dpo/beta_margin_std": 171.8385009765625, "beta_dpo/beta_used": 0.5937625169754028, "beta_dpo/beta_used_raw": -0.3109077215194702, "beta_dpo/gap_mean": 141.39236450195312, "beta_dpo/gap_std": 165.60235595703125, "beta_dpo/loss_margin_mean": 155.38189697265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9838472834067548, "grad_norm": 7477.4453125, "learning_rate": 4.741678157389739e-10, "logits/chosen": -0.25414931774139404, "logits/rejected": -0.23977619409561157, "loss": 13.5793, "step": 670 }, { "beta_dpo/beta": 1.3876622915267944, "beta_dpo/beta_margin_grad_mean": -0.2369070202112198, "beta_dpo/beta_margin_grad_std": 0.42259082198143005, "beta_dpo/beta_margin_mean": 201.6892547607422, "beta_dpo/beta_margin_std": 243.80215454101562, "beta_dpo/beta_used": 1.3876622915267944, "beta_dpo/beta_used_raw": 1.3876622915267944, "beta_dpo/gap_mean": 142.10791015625, "beta_dpo/gap_std": 166.866943359375, "beta_dpo/loss_margin_mean": 143.68527221679688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9853157121879589, "grad_norm": 13217.642578125, "learning_rate": 3.9845280344705245e-10, "logits/chosen": -0.21202997863292694, "logits/rejected": -0.20390699803829193, "loss": 15.1475, "step": 671 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.469896525144577, "beta_dpo/beta_margin_grad_std": 0.043766915798187256, "beta_dpo/beta_margin_mean": 0.12151134014129639, "beta_dpo/beta_margin_std": 0.1770341694355011, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.8151004910469055, "beta_dpo/gap_mean": 139.9226531982422, "beta_dpo/gap_std": 167.88650512695312, "beta_dpo/loss_margin_mean": 121.51133728027344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.986784140969163, "grad_norm": 10.910394668579102, "learning_rate": 3.293150240547549e-10, "logits/chosen": -0.24089352786540985, "logits/rejected": -0.22517436742782593, "loss": 1.2722, "step": 672 }, { "beta_dpo/beta": 0.39367401599884033, "beta_dpo/beta_margin_grad_mean": -0.27898791432380676, "beta_dpo/beta_margin_grad_std": 0.2772313356399536, "beta_dpo/beta_margin_mean": 63.47161102294922, "beta_dpo/beta_margin_std": 101.09577178955078, "beta_dpo/beta_used": 0.39367401599884033, "beta_dpo/beta_used_raw": 0.13607317209243774, "beta_dpo/gap_mean": 141.91412353515625, "beta_dpo/gap_std": 166.22857666015625, "beta_dpo/loss_margin_mean": 155.3968048095703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9882525697503671, "grad_norm": 4248.92431640625, "learning_rate": 2.6675629940689504e-10, "logits/chosen": -0.21331897377967834, "logits/rejected": -0.20891378819942474, "loss": 7.6929, "step": 673 }, { "beta_dpo/beta": 0.3700469732284546, "beta_dpo/beta_margin_grad_mean": -0.31875723600387573, "beta_dpo/beta_margin_grad_std": 0.2990269958972931, "beta_dpo/beta_margin_mean": 60.167579650878906, "beta_dpo/beta_margin_std": 115.83226776123047, "beta_dpo/beta_used": 0.3700469732284546, "beta_dpo/beta_used_raw": 0.35867586731910706, "beta_dpo/gap_mean": 145.38265991210938, "beta_dpo/gap_std": 166.84365844726562, "beta_dpo/loss_margin_mean": 163.85891723632812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9897209985315712, "grad_norm": 3347.8056640625, "learning_rate": 2.1077827798404725e-10, "logits/chosen": -0.22968342900276184, "logits/rejected": -0.21133801341056824, "loss": 3.5724, "step": 674 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4584572911262512, "beta_dpo/beta_margin_grad_std": 0.04273706302046776, "beta_dpo/beta_margin_mean": 0.1682644486427307, "beta_dpo/beta_margin_std": 0.17532816529273987, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.1393800973892212, "beta_dpo/gap_mean": 149.49859619140625, "beta_dpo/gap_std": 167.7472381591797, "beta_dpo/loss_margin_mean": 168.26443481445312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9911894273127754, "grad_norm": 10.684988021850586, "learning_rate": 1.6138243485910863e-10, "logits/chosen": -0.2344612330198288, "logits/rejected": -0.22431063652038574, "loss": 1.2649, "step": 675 }, { "beta_dpo/beta": 0.3458569049835205, "beta_dpo/beta_margin_grad_mean": -0.2846805453300476, "beta_dpo/beta_margin_grad_std": 0.2793225646018982, "beta_dpo/beta_margin_mean": 58.578914642333984, "beta_dpo/beta_margin_std": 92.11776733398438, "beta_dpo/beta_used": 0.3458569049835205, "beta_dpo/beta_used_raw": -0.8106540441513062, "beta_dpo/gap_mean": 150.6968994140625, "beta_dpo/gap_std": 166.34634399414062, "beta_dpo/loss_margin_mean": 143.155029296875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9926578560939795, "grad_norm": 3344.320068359375, "learning_rate": 1.1857007165852472e-10, "logits/chosen": -0.314957857131958, "logits/rejected": -0.2842877507209778, "loss": 5.0433, "step": 676 }, { "beta_dpo/beta": 0.9840426445007324, "beta_dpo/beta_margin_grad_mean": -0.2947867214679718, "beta_dpo/beta_margin_grad_std": 0.29029718041419983, "beta_dpo/beta_margin_mean": 163.275146484375, "beta_dpo/beta_margin_std": 241.04299926757812, "beta_dpo/beta_used": 0.9840426445007324, "beta_dpo/beta_used_raw": 0.5463694334030151, "beta_dpo/gap_mean": 149.2086181640625, "beta_dpo/gap_std": 164.42991638183594, "beta_dpo/loss_margin_mean": 154.48655700683594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9941262848751835, "grad_norm": 9669.5361328125, "learning_rate": 8.23423165278725e-11, "logits/chosen": -0.22851765155792236, "logits/rejected": -0.20020201802253723, "loss": 12.5035, "step": 677 }, { "beta_dpo/beta": 0.5137372016906738, "beta_dpo/beta_margin_grad_mean": -0.33945244550704956, "beta_dpo/beta_margin_grad_std": 0.3146733343601227, "beta_dpo/beta_margin_mean": 91.00566101074219, "beta_dpo/beta_margin_std": 150.59832763671875, "beta_dpo/beta_used": 0.5137372016906738, "beta_dpo/beta_used_raw": 0.4158139228820801, "beta_dpo/gap_mean": 150.82748413085938, "beta_dpo/gap_std": 165.1314697265625, "beta_dpo/loss_margin_mean": 160.3917999267578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9955947136563876, "grad_norm": 6134.18310546875, "learning_rate": 5.270012410216185e-11, "logits/chosen": -0.24406661093235016, "logits/rejected": -0.23352187871932983, "loss": 5.5623, "step": 678 }, { "beta_dpo/beta": 0.4399394392967224, "beta_dpo/beta_margin_grad_mean": -0.202021986246109, "beta_dpo/beta_margin_grad_std": 0.3905799984931946, "beta_dpo/beta_margin_mean": 58.7913932800293, "beta_dpo/beta_margin_std": 76.95616149902344, "beta_dpo/beta_used": 0.4399394392967224, "beta_dpo/beta_used_raw": 0.4399394392967224, "beta_dpo/gap_mean": 149.608642578125, "beta_dpo/gap_std": 166.2967529296875, "beta_dpo/loss_margin_mean": 132.75094604492188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9970631424375918, "grad_norm": 2949.92333984375, "learning_rate": 2.9644275480772416e-11, "logits/chosen": -0.24739307165145874, "logits/rejected": -0.2278253436088562, "loss": 4.2081, "step": 679 }, { "beta_dpo/beta": 0.16887128353118896, "beta_dpo/beta_margin_grad_mean": -0.2913900911808014, "beta_dpo/beta_margin_grad_std": 0.28668370842933655, "beta_dpo/beta_margin_mean": 24.72771453857422, "beta_dpo/beta_margin_std": 45.65426254272461, "beta_dpo/beta_used": 0.16887128353118896, "beta_dpo/beta_used_raw": -0.949596643447876, "beta_dpo/gap_mean": 149.79739379882812, "beta_dpo/gap_std": 168.91465759277344, "beta_dpo/loss_margin_mean": 154.96934509277344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9985315712187959, "grad_norm": 773.09716796875, "learning_rate": 1.31753782067201e-11, "logits/chosen": -0.282899409532547, "logits/rejected": -0.2579476833343506, "loss": 1.4902, "step": 680 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.46664658188819885, "beta_dpo/beta_margin_grad_std": 0.041838180273771286, "beta_dpo/beta_margin_mean": 0.13485069572925568, "beta_dpo/beta_margin_std": 0.17000959813594818, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -1.753014087677002, "beta_dpo/gap_mean": 145.9384002685547, "beta_dpo/gap_std": 166.8389892578125, "beta_dpo/loss_margin_mean": 134.85069274902344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 1.0, "grad_norm": 11.882765769958496, "learning_rate": 3.2938662507808745e-12, "logits/chosen": -0.26762282848358154, "logits/rejected": -0.25434818863868713, "loss": 1.2798, "step": 681 }, { "epoch": 1.0, "step": 681, "total_flos": 0.0, "train_loss": 2.627565469291942, "train_runtime": 3177.7378, "train_samples_per_second": 13.72, "train_steps_per_second": 0.214 } ], "logging_steps": 1, "max_steps": 681, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }