{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 200, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta_used": 0.010316052474081516, "beta_dpo/beta_used_raw": 0.010316052474081516, "beta_dpo/gap_mean": -0.0030604612547904253, "beta_dpo/gap_std": 0.273499995470047, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.0020942408376963353, "grad_norm": 15.496143341064453, "learning_rate": 0.0, "logits/chosen": 2.203179359436035, "logits/rejected": 2.035616397857666, "loss": 5.5428, "step": 1 }, { "beta_dpo/beta_used": 0.009904756210744381, "beta_dpo/beta_used_raw": 0.009904756210744381, "beta_dpo/gap_mean": 0.0473581925034523, "beta_dpo/gap_std": 0.6410814523696899, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.004188481675392671, "grad_norm": 15.881836891174316, "learning_rate": 1.0416666666666666e-08, "logits/chosen": 2.1704792976379395, "logits/rejected": 2.0754430294036865, "loss": 5.5442, "step": 2 }, { "beta_dpo/beta_used": 0.010276634246110916, "beta_dpo/beta_used_raw": 0.010276634246110916, "beta_dpo/gap_mean": 0.040970198810100555, "beta_dpo/gap_std": 0.7673041224479675, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.0062827225130890054, "grad_norm": 16.63137435913086, "learning_rate": 2.083333333333333e-08, "logits/chosen": 2.4686079025268555, "logits/rejected": 2.464277505874634, "loss": 5.5428, "step": 3 }, { "beta_dpo/beta_used": 0.01017595175653696, "beta_dpo/beta_used_raw": 0.01017595175653696, "beta_dpo/gap_mean": 0.06479164212942123, "beta_dpo/gap_std": 0.8090450763702393, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.008376963350785341, "grad_norm": 19.53766632080078, "learning_rate": 3.125e-08, "logits/chosen": 1.7211281061172485, "logits/rejected": 1.5812376737594604, "loss": 5.5403, "step": 4 }, { "beta_dpo/beta_used": 0.009877461940050125, "beta_dpo/beta_used_raw": 0.009877461940050125, "beta_dpo/gap_mean": 0.03874587640166283, "beta_dpo/gap_std": 0.8403902649879456, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.010471204188481676, "grad_norm": 17.47425651550293, "learning_rate": 4.166666666666666e-08, "logits/chosen": 1.8391205072402954, "logits/rejected": 1.8945659399032593, "loss": 5.5435, "step": 5 }, { "beta_dpo/beta_used": 0.009602357633411884, "beta_dpo/beta_used_raw": 0.009602357633411884, "beta_dpo/gap_mean": 0.013125958852469921, "beta_dpo/gap_std": 0.8970670700073242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.012565445026178011, "grad_norm": 17.965578079223633, "learning_rate": 5.208333333333333e-08, "logits/chosen": 1.8753392696380615, "logits/rejected": 1.806428074836731, "loss": 5.546, "step": 6 }, { "beta_dpo/beta_used": 0.010046536102890968, "beta_dpo/beta_used_raw": 0.010046536102890968, "beta_dpo/gap_mean": 0.00752235297113657, "beta_dpo/gap_std": 0.9090036153793335, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.014659685863874346, "grad_norm": 18.481788635253906, "learning_rate": 6.25e-08, "logits/chosen": 2.1977810859680176, "logits/rejected": 2.027773141860962, "loss": 5.543, "step": 7 }, { "beta_dpo/beta_used": 0.009285343810915947, "beta_dpo/beta_used_raw": 0.009285343810915947, "beta_dpo/gap_mean": -0.0737709105014801, "beta_dpo/gap_std": 0.9767862558364868, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.016753926701570682, "grad_norm": 17.283451080322266, "learning_rate": 7.291666666666667e-08, "logits/chosen": 2.3551371097564697, "logits/rejected": 2.089672088623047, "loss": 5.5522, "step": 8 }, { "beta_dpo/beta_used": 0.010606064461171627, "beta_dpo/beta_used_raw": 0.010606064461171627, "beta_dpo/gap_mean": -0.04680243134498596, "beta_dpo/gap_std": 0.9687216281890869, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.018848167539267015, "grad_norm": 16.163658142089844, "learning_rate": 8.333333333333333e-08, "logits/chosen": 2.1110918521881104, "logits/rejected": 2.0067708492279053, "loss": 5.5433, "step": 9 }, { "beta_dpo/beta_used": 0.00987918209284544, "beta_dpo/beta_used_raw": 0.00987918209284544, "beta_dpo/gap_mean": -0.03316927328705788, "beta_dpo/gap_std": 0.8964071273803711, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.020942408376963352, "grad_norm": 15.014591217041016, "learning_rate": 9.375e-08, "logits/chosen": 1.858559012413025, "logits/rejected": 2.0337729454040527, "loss": 5.5481, "step": 10 }, { "beta_dpo/beta_used": 0.010337094776332378, "beta_dpo/beta_used_raw": 0.010337094776332378, "beta_dpo/gap_mean": 0.03589403256773949, "beta_dpo/gap_std": 0.8406289219856262, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.023036649214659685, "grad_norm": 18.00157356262207, "learning_rate": 1.0416666666666667e-07, "logits/chosen": 1.893631100654602, "logits/rejected": 1.8213893175125122, "loss": 5.5413, "step": 11 }, { "beta_dpo/beta_used": 0.009809032082557678, "beta_dpo/beta_used_raw": 0.009809032082557678, "beta_dpo/gap_mean": 0.031110307201743126, "beta_dpo/gap_std": 0.8743820190429688, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.025130890052356022, "grad_norm": 16.61766815185547, "learning_rate": 1.1458333333333332e-07, "logits/chosen": 1.5167274475097656, "logits/rejected": 1.6536264419555664, "loss": 5.5438, "step": 12 }, { "beta_dpo/beta_used": 0.009467006660997868, "beta_dpo/beta_used_raw": 0.009467006660997868, "beta_dpo/gap_mean": -9.547406807541847e-05, "beta_dpo/gap_std": 0.9159330725669861, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.027225130890052355, "grad_norm": 18.662208557128906, "learning_rate": 1.25e-07, "logits/chosen": 1.8461039066314697, "logits/rejected": 1.8939508199691772, "loss": 5.5481, "step": 13 }, { "beta_dpo/beta_used": 0.009789557196199894, "beta_dpo/beta_used_raw": 0.009789557196199894, "beta_dpo/gap_mean": -0.035510119050741196, "beta_dpo/gap_std": 0.8479209542274475, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.02931937172774869, "grad_norm": 15.506324768066406, "learning_rate": 1.3541666666666666e-07, "logits/chosen": 1.8386187553405762, "logits/rejected": 1.5979816913604736, "loss": 5.5477, "step": 14 }, { "beta_dpo/beta_used": 0.010104680433869362, "beta_dpo/beta_used_raw": 0.010104680433869362, "beta_dpo/gap_mean": -0.05601261928677559, "beta_dpo/gap_std": 0.8992904424667358, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.031413612565445025, "grad_norm": 17.449304580688477, "learning_rate": 1.4583333333333335e-07, "logits/chosen": 1.9075326919555664, "logits/rejected": 1.7650988101959229, "loss": 5.5445, "step": 15 }, { "beta_dpo/beta_used": 0.010083270259201527, "beta_dpo/beta_used_raw": 0.010083270259201527, "beta_dpo/gap_mean": -0.037581950426101685, "beta_dpo/gap_std": 0.9426290988922119, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.033507853403141365, "grad_norm": 18.769243240356445, "learning_rate": 1.5624999999999999e-07, "logits/chosen": 2.0930874347686768, "logits/rejected": 1.8253268003463745, "loss": 5.5458, "step": 16 }, { "beta_dpo/beta_used": 0.009928649291396141, "beta_dpo/beta_used_raw": 0.009928649291396141, "beta_dpo/gap_mean": -0.03386215493083, "beta_dpo/gap_std": 0.9212523102760315, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.0356020942408377, "grad_norm": 20.794923782348633, "learning_rate": 1.6666666666666665e-07, "logits/chosen": 1.769667387008667, "logits/rejected": 1.7814725637435913, "loss": 5.5484, "step": 17 }, { "beta_dpo/beta_used": 0.01007060892879963, "beta_dpo/beta_used_raw": 0.01007060892879963, "beta_dpo/gap_mean": -0.01796822063624859, "beta_dpo/gap_std": 0.8694018721580505, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.03769633507853403, "grad_norm": 16.827281951904297, "learning_rate": 1.7708333333333334e-07, "logits/chosen": 1.7808014154434204, "logits/rejected": 1.7646872997283936, "loss": 5.5437, "step": 18 }, { "beta_dpo/beta_used": 0.009850156493484974, "beta_dpo/beta_used_raw": 0.009850156493484974, "beta_dpo/gap_mean": -0.04470803216099739, "beta_dpo/gap_std": 0.8516724705696106, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.039790575916230364, "grad_norm": 16.883514404296875, "learning_rate": 1.875e-07, "logits/chosen": 2.054273843765259, "logits/rejected": 2.0647222995758057, "loss": 5.5483, "step": 19 }, { "beta_dpo/beta_used": 0.009869220666587353, "beta_dpo/beta_used_raw": 0.009869220666587353, "beta_dpo/gap_mean": -0.02124340645968914, "beta_dpo/gap_std": 0.8342310190200806, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.041884816753926704, "grad_norm": 17.35634994506836, "learning_rate": 1.9791666666666664e-07, "logits/chosen": 2.368907928466797, "logits/rejected": 2.167264223098755, "loss": 5.5473, "step": 20 }, { "beta_dpo/beta_used": 0.009426544420421124, "beta_dpo/beta_used_raw": 0.009426544420421124, "beta_dpo/gap_mean": -0.017612561583518982, "beta_dpo/gap_std": 0.8350470066070557, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.04397905759162304, "grad_norm": 15.612009048461914, "learning_rate": 2.0833333333333333e-07, "logits/chosen": 2.1447153091430664, "logits/rejected": 2.121504545211792, "loss": 5.5489, "step": 21 }, { "beta_dpo/beta_used": 0.01062285527586937, "beta_dpo/beta_used_raw": 0.01062285527586937, "beta_dpo/gap_mean": 0.06357374787330627, "beta_dpo/gap_std": 0.8492311835289001, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04607329842931937, "grad_norm": 17.105073928833008, "learning_rate": 2.1875e-07, "logits/chosen": 1.6775203943252563, "logits/rejected": 1.841507911682129, "loss": 5.5386, "step": 22 }, { "beta_dpo/beta_used": 0.009609552100300789, "beta_dpo/beta_used_raw": 0.009609552100300789, "beta_dpo/gap_mean": 0.09488284587860107, "beta_dpo/gap_std": 0.7845069169998169, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.048167539267015703, "grad_norm": 17.074167251586914, "learning_rate": 2.2916666666666663e-07, "logits/chosen": 2.0019335746765137, "logits/rejected": 1.876702070236206, "loss": 5.5427, "step": 23 }, { "beta_dpo/beta_used": 0.009548784233629704, "beta_dpo/beta_used_raw": 0.009548784233629704, "beta_dpo/gap_mean": 0.01768093928694725, "beta_dpo/gap_std": 0.821352481842041, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.050261780104712044, "grad_norm": 16.67466163635254, "learning_rate": 2.3958333333333335e-07, "logits/chosen": 2.0418663024902344, "logits/rejected": 1.9522861242294312, "loss": 5.5466, "step": 24 }, { "beta_dpo/beta_used": 0.010621692053973675, "beta_dpo/beta_used_raw": 0.010621692053973675, "beta_dpo/gap_mean": 0.02274535596370697, "beta_dpo/gap_std": 0.7953328490257263, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.05235602094240838, "grad_norm": 18.33420753479004, "learning_rate": 2.5e-07, "logits/chosen": 1.807928204536438, "logits/rejected": 1.8295968770980835, "loss": 5.5401, "step": 25 }, { "beta_dpo/beta_used": 0.009963510558009148, "beta_dpo/beta_used_raw": 0.009963510558009148, "beta_dpo/gap_mean": 0.053856804966926575, "beta_dpo/gap_std": 0.7753854990005493, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05445026178010471, "grad_norm": 17.823503494262695, "learning_rate": 2.604166666666667e-07, "logits/chosen": 1.6102561950683594, "logits/rejected": 1.5492463111877441, "loss": 5.5438, "step": 26 }, { "beta_dpo/beta_used": 0.009892760775983334, "beta_dpo/beta_used_raw": 0.009892760775983334, "beta_dpo/gap_mean": 0.035262782126665115, "beta_dpo/gap_std": 0.7987048625946045, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.05654450261780105, "grad_norm": 17.028757095336914, "learning_rate": 2.708333333333333e-07, "logits/chosen": 2.1599764823913574, "logits/rejected": 1.9214812517166138, "loss": 5.5447, "step": 27 }, { "beta_dpo/beta_used": 0.010526652447879314, "beta_dpo/beta_used_raw": 0.010526652447879314, "beta_dpo/gap_mean": 0.05413653701543808, "beta_dpo/gap_std": 0.794916033744812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05863874345549738, "grad_norm": 19.700441360473633, "learning_rate": 2.8125e-07, "logits/chosen": 1.9106848239898682, "logits/rejected": 2.0312745571136475, "loss": 5.5371, "step": 28 }, { "beta_dpo/beta_used": 0.010448331013321877, "beta_dpo/beta_used_raw": 0.010448331013321877, "beta_dpo/gap_mean": 0.02559659071266651, "beta_dpo/gap_std": 0.8567264080047607, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.060732984293193716, "grad_norm": 16.468107223510742, "learning_rate": 2.916666666666667e-07, "logits/chosen": 2.2274394035339355, "logits/rejected": 1.952311635017395, "loss": 5.5416, "step": 29 }, { "beta_dpo/beta_used": 0.009916335344314575, "beta_dpo/beta_used_raw": 0.009916335344314575, "beta_dpo/gap_mean": 0.04508252441883087, "beta_dpo/gap_std": 0.8601223826408386, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.06282722513089005, "grad_norm": 16.325408935546875, "learning_rate": 3.020833333333333e-07, "logits/chosen": 1.463683843612671, "logits/rejected": 1.4335768222808838, "loss": 5.5426, "step": 30 }, { "beta_dpo/beta_used": 0.010172335430979729, "beta_dpo/beta_used_raw": 0.010172335430979729, "beta_dpo/gap_mean": 0.06362677365541458, "beta_dpo/gap_std": 0.7783647775650024, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.06492146596858639, "grad_norm": 15.478079795837402, "learning_rate": 3.1249999999999997e-07, "logits/chosen": 1.877584457397461, "logits/rejected": 1.7691612243652344, "loss": 5.5409, "step": 31 }, { "beta_dpo/beta_used": 0.010151976719498634, "beta_dpo/beta_used_raw": 0.010151976719498634, "beta_dpo/gap_mean": 0.06375724077224731, "beta_dpo/gap_std": 0.8205698728561401, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.06701570680628273, "grad_norm": 16.919126510620117, "learning_rate": 3.2291666666666666e-07, "logits/chosen": 1.713607668876648, "logits/rejected": 1.5853075981140137, "loss": 5.5403, "step": 32 }, { "beta_dpo/beta_used": 0.010386324487626553, "beta_dpo/beta_used_raw": 0.010386324487626553, "beta_dpo/gap_mean": 0.08595895767211914, "beta_dpo/gap_std": 0.9470534324645996, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.06910994764397906, "grad_norm": 18.542863845825195, "learning_rate": 3.333333333333333e-07, "logits/chosen": 1.8243309259414673, "logits/rejected": 1.729980230331421, "loss": 5.5374, "step": 33 }, { "beta_dpo/beta_used": 0.009925332851707935, "beta_dpo/beta_used_raw": 0.009925332851707935, "beta_dpo/gap_mean": 0.09634880721569061, "beta_dpo/gap_std": 0.9391544461250305, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.0712041884816754, "grad_norm": 20.447566986083984, "learning_rate": 3.4375e-07, "logits/chosen": 2.0654332637786865, "logits/rejected": 2.0050528049468994, "loss": 5.5405, "step": 34 }, { "beta_dpo/beta_used": 0.009798412211239338, "beta_dpo/beta_used_raw": 0.009798412211239338, "beta_dpo/gap_mean": 0.09882716089487076, "beta_dpo/gap_std": 0.9505617022514343, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.07329842931937172, "grad_norm": 15.859660148620605, "learning_rate": 3.541666666666667e-07, "logits/chosen": 1.4941397905349731, "logits/rejected": 1.6851754188537598, "loss": 5.5409, "step": 35 }, { "beta_dpo/beta_used": 0.010313436388969421, "beta_dpo/beta_used_raw": 0.010313436388969421, "beta_dpo/gap_mean": 0.12937475740909576, "beta_dpo/gap_std": 0.9316422939300537, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07539267015706806, "grad_norm": 17.933530807495117, "learning_rate": 3.645833333333333e-07, "logits/chosen": 1.7557207345962524, "logits/rejected": 1.8125189542770386, "loss": 5.5377, "step": 36 }, { "beta_dpo/beta_used": 0.009959274902939796, "beta_dpo/beta_used_raw": 0.009959274902939796, "beta_dpo/gap_mean": 0.13312453031539917, "beta_dpo/gap_std": 0.9395788908004761, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.0774869109947644, "grad_norm": 18.94852638244629, "learning_rate": 3.75e-07, "logits/chosen": 2.1051876544952393, "logits/rejected": 2.0780932903289795, "loss": 5.5388, "step": 37 }, { "beta_dpo/beta_used": 0.009908566251397133, "beta_dpo/beta_used_raw": 0.009908566251397133, "beta_dpo/gap_mean": 0.16690538823604584, "beta_dpo/gap_std": 0.9445586800575256, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07958115183246073, "grad_norm": 16.41166114807129, "learning_rate": 3.8541666666666665e-07, "logits/chosen": 2.1622610092163086, "logits/rejected": 2.414966344833374, "loss": 5.5385, "step": 38 }, { "beta_dpo/beta_used": 0.009442973881959915, "beta_dpo/beta_used_raw": 0.009442973881959915, "beta_dpo/gap_mean": 0.2755042314529419, "beta_dpo/gap_std": 0.9882732629776001, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.08167539267015707, "grad_norm": 16.65612030029297, "learning_rate": 3.958333333333333e-07, "logits/chosen": 2.00819730758667, "logits/rejected": 2.0810117721557617, "loss": 5.5383, "step": 39 }, { "beta_dpo/beta_used": 0.00932924635708332, "beta_dpo/beta_used_raw": 0.00932924635708332, "beta_dpo/gap_mean": 0.2719506323337555, "beta_dpo/gap_std": 1.0504027605056763, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.08376963350785341, "grad_norm": 14.621367454528809, "learning_rate": 4.0625e-07, "logits/chosen": 1.8936258554458618, "logits/rejected": 1.895420789718628, "loss": 5.5403, "step": 40 }, { "beta_dpo/beta_used": 0.009584764949977398, "beta_dpo/beta_used_raw": 0.009584764949977398, "beta_dpo/gap_mean": 0.19441170990467072, "beta_dpo/gap_std": 1.045138955116272, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08586387434554973, "grad_norm": 19.228687286376953, "learning_rate": 4.1666666666666667e-07, "logits/chosen": 1.925986647605896, "logits/rejected": 1.7834522724151611, "loss": 5.539, "step": 41 }, { "beta_dpo/beta_used": 0.01015196181833744, "beta_dpo/beta_used_raw": 0.01015196181833744, "beta_dpo/gap_mean": 0.273733526468277, "beta_dpo/gap_std": 1.0639562606811523, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.08795811518324607, "grad_norm": 16.421497344970703, "learning_rate": 4.270833333333333e-07, "logits/chosen": 2.446347236633301, "logits/rejected": 2.493040084838867, "loss": 5.533, "step": 42 }, { "beta_dpo/beta_used": 0.010610947385430336, "beta_dpo/beta_used_raw": 0.010610947385430336, "beta_dpo/gap_mean": 0.32640647888183594, "beta_dpo/gap_std": 1.1364136934280396, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.09005235602094241, "grad_norm": 17.893566131591797, "learning_rate": 4.375e-07, "logits/chosen": 1.920936107635498, "logits/rejected": 1.9038302898406982, "loss": 5.5271, "step": 43 }, { "beta_dpo/beta_used": 0.01078065950423479, "beta_dpo/beta_used_raw": 0.01078065950423479, "beta_dpo/gap_mean": 0.3758638799190521, "beta_dpo/gap_std": 1.1031302213668823, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.09214659685863874, "grad_norm": 20.40181541442871, "learning_rate": 4.479166666666667e-07, "logits/chosen": 1.7042187452316284, "logits/rejected": 1.6264781951904297, "loss": 5.5232, "step": 44 }, { "beta_dpo/beta_used": 0.009485357441008091, "beta_dpo/beta_used_raw": 0.009485357441008091, "beta_dpo/gap_mean": 0.4286791682243347, "beta_dpo/gap_std": 1.1151459217071533, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.09424083769633508, "grad_norm": 18.97907829284668, "learning_rate": 4.5833333333333327e-07, "logits/chosen": 2.0053882598876953, "logits/rejected": 1.8914456367492676, "loss": 5.5308, "step": 45 }, { "beta_dpo/beta_used": 0.009595800191164017, "beta_dpo/beta_used_raw": 0.009595800191164017, "beta_dpo/gap_mean": 0.4576748311519623, "beta_dpo/gap_std": 1.219599723815918, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.09633507853403141, "grad_norm": 18.820371627807617, "learning_rate": 4.6874999999999996e-07, "logits/chosen": 1.9573893547058105, "logits/rejected": 2.0128352642059326, "loss": 5.5285, "step": 46 }, { "beta_dpo/beta_used": 0.00906536914408207, "beta_dpo/beta_used_raw": 0.00906536914408207, "beta_dpo/gap_mean": 0.4006018042564392, "beta_dpo/gap_std": 1.2177817821502686, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.09842931937172775, "grad_norm": 14.672569274902344, "learning_rate": 4.791666666666667e-07, "logits/chosen": 1.6562869548797607, "logits/rejected": 2.0256872177124023, "loss": 5.5352, "step": 47 }, { "beta_dpo/beta_used": 0.009402711875736713, "beta_dpo/beta_used_raw": 0.009402711875736713, "beta_dpo/gap_mean": 0.41205257177352905, "beta_dpo/gap_std": 1.2531991004943848, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10052356020942409, "grad_norm": 14.840012550354004, "learning_rate": 4.895833333333333e-07, "logits/chosen": 2.0920979976654053, "logits/rejected": 2.0639383792877197, "loss": 5.531, "step": 48 }, { "beta_dpo/beta_used": 0.008774153888225555, "beta_dpo/beta_used_raw": 0.008774153888225555, "beta_dpo/gap_mean": 0.4781131148338318, "beta_dpo/gap_std": 1.356748342514038, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.10261780104712041, "grad_norm": 13.876286506652832, "learning_rate": 5e-07, "logits/chosen": 1.855541706085205, "logits/rejected": 1.988050937652588, "loss": 5.5333, "step": 49 }, { "beta_dpo/beta_used": 0.009108037687838078, "beta_dpo/beta_used_raw": 0.009108037687838078, "beta_dpo/gap_mean": 0.38943564891815186, "beta_dpo/gap_std": 1.4389784336090088, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.10471204188481675, "grad_norm": 15.5634126663208, "learning_rate": 4.999932966293553e-07, "logits/chosen": 2.0256078243255615, "logits/rejected": 2.1688108444213867, "loss": 5.5343, "step": 50 }, { "beta_dpo/beta_used": 0.01051395758986473, "beta_dpo/beta_used_raw": 0.01051395758986473, "beta_dpo/gap_mean": 0.49393463134765625, "beta_dpo/gap_std": 1.5790597200393677, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1068062827225131, "grad_norm": 20.477724075317383, "learning_rate": 4.999731868769026e-07, "logits/chosen": 1.561262607574463, "logits/rejected": 1.7091399431228638, "loss": 5.5166, "step": 51 }, { "beta_dpo/beta_used": 0.010623252019286156, "beta_dpo/beta_used_raw": 0.010623252019286156, "beta_dpo/gap_mean": 0.6119964122772217, "beta_dpo/gap_std": 1.613837480545044, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.10890052356020942, "grad_norm": 21.817190170288086, "learning_rate": 4.99939671821067e-07, "logits/chosen": 2.082730531692505, "logits/rejected": 2.247464656829834, "loss": 5.5081, "step": 52 }, { "beta_dpo/beta_used": 0.01156248152256012, "beta_dpo/beta_used_raw": 0.01156248152256012, "beta_dpo/gap_mean": 0.5816015601158142, "beta_dpo/gap_std": 1.6215416193008423, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.11099476439790576, "grad_norm": 22.167213439941406, "learning_rate": 4.998927532591591e-07, "logits/chosen": 2.082489013671875, "logits/rejected": 2.072319984436035, "loss": 5.497, "step": 53 }, { "beta_dpo/beta_used": 0.008477726019918919, "beta_dpo/beta_used_raw": 0.008477726019918919, "beta_dpo/gap_mean": 0.7841604948043823, "beta_dpo/gap_std": 1.7853457927703857, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1130890052356021, "grad_norm": 14.047541618347168, "learning_rate": 4.998324337072792e-07, "logits/chosen": 1.5058391094207764, "logits/rejected": 1.5753705501556396, "loss": 5.5252, "step": 54 }, { "beta_dpo/beta_used": 0.009478636085987091, "beta_dpo/beta_used_raw": 0.009478636085987091, "beta_dpo/gap_mean": 0.5571960210800171, "beta_dpo/gap_std": 1.6621750593185425, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11518324607329843, "grad_norm": 14.583319664001465, "learning_rate": 4.997587164001815e-07, "logits/chosen": 2.003282308578491, "logits/rejected": 2.013611316680908, "loss": 5.5249, "step": 55 }, { "beta_dpo/beta_used": 0.009290758520364761, "beta_dpo/beta_used_raw": 0.009290758520364761, "beta_dpo/gap_mean": 0.638902485370636, "beta_dpo/gap_std": 1.8342792987823486, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.11727748691099477, "grad_norm": 15.21347713470459, "learning_rate": 4.996716052911017e-07, "logits/chosen": 2.15181565284729, "logits/rejected": 2.135338306427002, "loss": 5.5226, "step": 56 }, { "beta_dpo/beta_used": 0.009111498482525349, "beta_dpo/beta_used_raw": 0.009111498482525349, "beta_dpo/gap_mean": 0.9660211801528931, "beta_dpo/gap_std": 1.9951261281967163, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.1193717277486911, "grad_norm": 16.580799102783203, "learning_rate": 4.99571105051544e-07, "logits/chosen": 2.130098581314087, "logits/rejected": 1.8486499786376953, "loss": 5.5134, "step": 57 }, { "beta_dpo/beta_used": 0.008915345184504986, "beta_dpo/beta_used_raw": 0.008915345184504986, "beta_dpo/gap_mean": 0.9618982076644897, "beta_dpo/gap_std": 1.7987135648727417, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.12146596858638743, "grad_norm": 15.42608642578125, "learning_rate": 4.994572210710314e-07, "logits/chosen": 1.6894437074661255, "logits/rejected": 1.699744462966919, "loss": 5.5158, "step": 58 }, { "beta_dpo/beta_used": 0.009204288944602013, "beta_dpo/beta_used_raw": 0.009204288944602013, "beta_dpo/gap_mean": 0.8019428253173828, "beta_dpo/gap_std": 2.0088188648223877, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.12356020942408377, "grad_norm": 15.577202796936035, "learning_rate": 4.993299594568162e-07, "logits/chosen": 1.5538208484649658, "logits/rejected": 1.6072800159454346, "loss": 5.52, "step": 59 }, { "beta_dpo/beta_used": 0.009918388910591602, "beta_dpo/beta_used_raw": 0.009918388910591602, "beta_dpo/gap_mean": 0.847707986831665, "beta_dpo/gap_std": 2.123305320739746, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.1256544502617801, "grad_norm": 14.793850898742676, "learning_rate": 4.991893270335525e-07, "logits/chosen": 2.0483858585357666, "logits/rejected": 1.8020352125167847, "loss": 5.5111, "step": 60 }, { "beta_dpo/beta_used": 0.009820302948355675, "beta_dpo/beta_used_raw": 0.009820302948355675, "beta_dpo/gap_mean": 0.9802277684211731, "beta_dpo/gap_std": 2.0959830284118652, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.12774869109947645, "grad_norm": 16.083724975585938, "learning_rate": 4.990353313429303e-07, "logits/chosen": 1.9473985433578491, "logits/rejected": 1.9882135391235352, "loss": 5.5041, "step": 61 }, { "beta_dpo/beta_used": 0.010419272817671299, "beta_dpo/beta_used_raw": 0.010419272817671299, "beta_dpo/gap_mean": 0.979004442691803, "beta_dpo/gap_std": 2.1615118980407715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12984293193717278, "grad_norm": 18.826759338378906, "learning_rate": 4.988679806432711e-07, "logits/chosen": 1.872680902481079, "logits/rejected": 1.8009073734283447, "loss": 5.5007, "step": 62 }, { "beta_dpo/beta_used": 0.00935581885278225, "beta_dpo/beta_used_raw": 0.00935581885278225, "beta_dpo/gap_mean": 1.0244998931884766, "beta_dpo/gap_std": 2.4170455932617188, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.1319371727748691, "grad_norm": 15.110966682434082, "learning_rate": 4.986872839090852e-07, "logits/chosen": 1.9980614185333252, "logits/rejected": 2.105093002319336, "loss": 5.5107, "step": 63 }, { "beta_dpo/beta_used": 0.010298279114067554, "beta_dpo/beta_used_raw": 0.010298279114067554, "beta_dpo/gap_mean": 1.1149272918701172, "beta_dpo/gap_std": 2.4519460201263428, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.13403141361256546, "grad_norm": 20.477684020996094, "learning_rate": 4.9849325083059e-07, "logits/chosen": 1.7054760456085205, "logits/rejected": 1.951492428779602, "loss": 5.4844, "step": 64 }, { "beta_dpo/beta_used": 0.009701458737254143, "beta_dpo/beta_used_raw": 0.009701458737254143, "beta_dpo/gap_mean": 1.1075406074523926, "beta_dpo/gap_std": 2.5126233100891113, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.13612565445026178, "grad_norm": 16.23882484436035, "learning_rate": 4.982858918131906e-07, "logits/chosen": 1.9961862564086914, "logits/rejected": 2.0398294925689697, "loss": 5.502, "step": 65 }, { "beta_dpo/beta_used": 0.010468224063515663, "beta_dpo/beta_used_raw": 0.010468224063515663, "beta_dpo/gap_mean": 1.0450140237808228, "beta_dpo/gap_std": 2.6909701824188232, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1382198952879581, "grad_norm": 18.444570541381836, "learning_rate": 4.980652179769217e-07, "logits/chosen": 1.6719987392425537, "logits/rejected": 1.881594181060791, "loss": 5.4931, "step": 66 }, { "beta_dpo/beta_used": 0.010425317101180553, "beta_dpo/beta_used_raw": 0.010425317101180553, "beta_dpo/gap_mean": 1.015570878982544, "beta_dpo/gap_std": 2.8450400829315186, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.14031413612565444, "grad_norm": 17.675512313842773, "learning_rate": 4.978312411558517e-07, "logits/chosen": 2.0440990924835205, "logits/rejected": 2.0636091232299805, "loss": 5.4964, "step": 67 }, { "beta_dpo/beta_used": 0.009568197652697563, "beta_dpo/beta_used_raw": 0.009568197652697563, "beta_dpo/gap_mean": 1.0808396339416504, "beta_dpo/gap_std": 3.0677380561828613, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1424083769633508, "grad_norm": 15.759063720703125, "learning_rate": 4.975839738974473e-07, "logits/chosen": 1.5441210269927979, "logits/rejected": 1.3784618377685547, "loss": 5.5019, "step": 68 }, { "beta_dpo/beta_used": 0.011599601246416569, "beta_dpo/beta_used_raw": 0.011599601246416569, "beta_dpo/gap_mean": 1.4021799564361572, "beta_dpo/gap_std": 3.188746213912964, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.14450261780104712, "grad_norm": 22.499710083007812, "learning_rate": 4.97323429461901e-07, "logits/chosen": 1.9849984645843506, "logits/rejected": 1.8482412099838257, "loss": 5.4511, "step": 69 }, { "beta_dpo/beta_used": 0.009116853587329388, "beta_dpo/beta_used_raw": 0.009116853587329388, "beta_dpo/gap_mean": 1.547209620475769, "beta_dpo/gap_std": 3.23995304107666, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.14659685863874344, "grad_norm": 15.340811729431152, "learning_rate": 4.970496218214204e-07, "logits/chosen": 2.295590400695801, "logits/rejected": 2.3875482082366943, "loss": 5.4922, "step": 70 }, { "beta_dpo/beta_used": 0.012471513822674751, "beta_dpo/beta_used_raw": 0.012471513822674751, "beta_dpo/gap_mean": 1.582148551940918, "beta_dpo/gap_std": 3.453483819961548, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1486910994764398, "grad_norm": 22.772903442382812, "learning_rate": 4.967625656594781e-07, "logits/chosen": 1.8594659566879272, "logits/rejected": 1.9157780408859253, "loss": 5.4328, "step": 71 }, { "beta_dpo/beta_used": 0.010649541392922401, "beta_dpo/beta_used_raw": 0.009485064074397087, "beta_dpo/gap_mean": 1.6831897497177124, "beta_dpo/gap_std": 3.4016518592834473, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.15078534031413612, "grad_norm": 19.1998348236084, "learning_rate": 4.964622763700252e-07, "logits/chosen": 1.8293884992599487, "logits/rejected": 1.892337679862976, "loss": 5.4513, "step": 72 }, { "beta_dpo/beta_used": 0.011073922738432884, "beta_dpo/beta_used_raw": 0.010616803541779518, "beta_dpo/gap_mean": 1.6742221117019653, "beta_dpo/gap_std": 3.5703773498535156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15287958115183245, "grad_norm": 20.304622650146484, "learning_rate": 4.961487700566646e-07, "logits/chosen": 2.2375855445861816, "logits/rejected": 2.2253012657165527, "loss": 5.4517, "step": 73 }, { "beta_dpo/beta_used": 0.009733829647302628, "beta_dpo/beta_used_raw": 0.009427759796380997, "beta_dpo/gap_mean": 1.560795783996582, "beta_dpo/gap_std": 3.745507001876831, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.1549738219895288, "grad_norm": 20.791471481323242, "learning_rate": 4.958220635317885e-07, "logits/chosen": 1.8168758153915405, "logits/rejected": 1.7319445610046387, "loss": 5.4672, "step": 74 }, { "beta_dpo/beta_used": 0.013009906746447086, "beta_dpo/beta_used_raw": 0.013009906746447086, "beta_dpo/gap_mean": 1.750954031944275, "beta_dpo/gap_std": 3.6934804916381836, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.15706806282722513, "grad_norm": 23.435768127441406, "learning_rate": 4.954821743156767e-07, "logits/chosen": 1.8880510330200195, "logits/rejected": 1.9295786619186401, "loss": 5.4219, "step": 75 }, { "beta_dpo/beta_used": 0.007087262813001871, "beta_dpo/beta_used_raw": 0.007036793977022171, "beta_dpo/gap_mean": 2.2242462635040283, "beta_dpo/gap_std": 3.95930814743042, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.15916230366492146, "grad_norm": 17.705944061279297, "learning_rate": 4.951291206355559e-07, "logits/chosen": 2.0245938301086426, "logits/rejected": 1.793765902519226, "loss": 5.4853, "step": 76 }, { "beta_dpo/beta_used": 0.008353885263204575, "beta_dpo/beta_used_raw": 0.008143781684339046, "beta_dpo/gap_mean": 2.021268844604492, "beta_dpo/gap_std": 4.135770797729492, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.1612565445026178, "grad_norm": 16.503738403320312, "learning_rate": 4.947629214246236e-07, "logits/chosen": 2.1751253604888916, "logits/rejected": 2.1524720191955566, "loss": 5.4772, "step": 77 }, { "beta_dpo/beta_used": 0.011361459270119667, "beta_dpo/beta_used_raw": 0.011361459270119667, "beta_dpo/gap_mean": 2.2091753482818604, "beta_dpo/gap_std": 4.447847843170166, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.16335078534031414, "grad_norm": 19.320999145507812, "learning_rate": 4.943835963210323e-07, "logits/chosen": 1.7212610244750977, "logits/rejected": 1.7153496742248535, "loss": 5.4244, "step": 78 }, { "beta_dpo/beta_used": 0.009612835012376308, "beta_dpo/beta_used_raw": 0.00958208180963993, "beta_dpo/gap_mean": 2.3663156032562256, "beta_dpo/gap_std": 4.715466022491455, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.16544502617801046, "grad_norm": 19.817529678344727, "learning_rate": 4.939911656668361e-07, "logits/chosen": 1.9014170169830322, "logits/rejected": 2.1795027256011963, "loss": 5.4218, "step": 79 }, { "beta_dpo/beta_used": 0.009250715374946594, "beta_dpo/beta_used_raw": 0.008980360813438892, "beta_dpo/gap_mean": 2.0934667587280273, "beta_dpo/gap_std": 5.130978584289551, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16753926701570682, "grad_norm": 18.116151809692383, "learning_rate": 4.935856505068998e-07, "logits/chosen": 1.5658341646194458, "logits/rejected": 1.7265154123306274, "loss": 5.4494, "step": 80 }, { "beta_dpo/beta_used": 0.009883089922368526, "beta_dpo/beta_used_raw": 0.009712887927889824, "beta_dpo/gap_mean": 2.5941665172576904, "beta_dpo/gap_std": 5.163574695587158, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.16963350785340314, "grad_norm": 22.016693115234375, "learning_rate": 4.93167072587771e-07, "logits/chosen": 1.742193579673767, "logits/rejected": 1.9251035451889038, "loss": 5.2421, "step": 81 }, { "beta_dpo/beta_used": 0.009547875262796879, "beta_dpo/beta_used_raw": 0.009547875262796879, "beta_dpo/gap_mean": 2.4227218627929688, "beta_dpo/gap_std": 5.073668956756592, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.17172774869109947, "grad_norm": 17.96396255493164, "learning_rate": 4.92735454356513e-07, "logits/chosen": 1.9680440425872803, "logits/rejected": 1.9148989915847778, "loss": 5.4469, "step": 82 }, { "beta_dpo/beta_used": 0.010678245685994625, "beta_dpo/beta_used_raw": 0.009905948303639889, "beta_dpo/gap_mean": 2.5397074222564697, "beta_dpo/gap_std": 5.242867469787598, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.17382198952879582, "grad_norm": 23.018129348754883, "learning_rate": 4.922908189595017e-07, "logits/chosen": 1.5621941089630127, "logits/rejected": 1.5305424928665161, "loss": 5.3852, "step": 83 }, { "beta_dpo/beta_used": 0.006417885888367891, "beta_dpo/beta_used_raw": 0.006086358800530434, "beta_dpo/gap_mean": 2.7024130821228027, "beta_dpo/gap_std": 5.565805435180664, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.17591623036649215, "grad_norm": 13.125260353088379, "learning_rate": 4.918331902411841e-07, "logits/chosen": 2.024345874786377, "logits/rejected": 1.9076447486877441, "loss": 5.4801, "step": 84 }, { "beta_dpo/beta_used": 0.009424247778952122, "beta_dpo/beta_used_raw": 0.008895869366824627, "beta_dpo/gap_mean": 2.2540838718414307, "beta_dpo/gap_std": 5.414524555206299, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.17801047120418848, "grad_norm": 18.945358276367188, "learning_rate": 4.913625927427995e-07, "logits/chosen": 1.51369047164917, "logits/rejected": 1.6780593395233154, "loss": 5.4333, "step": 85 }, { "beta_dpo/beta_used": 0.013801836408674717, "beta_dpo/beta_used_raw": 0.013801836408674717, "beta_dpo/gap_mean": 2.4163331985473633, "beta_dpo/gap_std": 5.740031719207764, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.18010471204188483, "grad_norm": 25.516857147216797, "learning_rate": 4.908790517010636e-07, "logits/chosen": 1.8556016683578491, "logits/rejected": 1.872323751449585, "loss": 5.3655, "step": 86 }, { "beta_dpo/beta_used": 0.008744290098547935, "beta_dpo/beta_used_raw": 0.008744290098547935, "beta_dpo/gap_mean": 2.9491662979125977, "beta_dpo/gap_std": 5.92836856842041, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.18219895287958116, "grad_norm": 20.971223831176758, "learning_rate": 4.903825930468148e-07, "logits/chosen": 1.6977579593658447, "logits/rejected": 1.6770415306091309, "loss": 5.4258, "step": 87 }, { "beta_dpo/beta_used": 0.007864508777856827, "beta_dpo/beta_used_raw": 0.007664060685783625, "beta_dpo/gap_mean": 3.0257012844085693, "beta_dpo/gap_std": 5.952022552490234, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.18429319371727748, "grad_norm": 15.7445068359375, "learning_rate": 4.898732434036243e-07, "logits/chosen": 1.5104684829711914, "logits/rejected": 1.357150912284851, "loss": 5.4513, "step": 88 }, { "beta_dpo/beta_used": 0.010421731509268284, "beta_dpo/beta_used_raw": 0.01035550981760025, "beta_dpo/gap_mean": 2.823183536529541, "beta_dpo/gap_std": 6.035218238830566, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.18638743455497384, "grad_norm": 21.113414764404297, "learning_rate": 4.893510300863676e-07, "logits/chosen": 1.9621143341064453, "logits/rejected": 1.8874907493591309, "loss": 5.402, "step": 89 }, { "beta_dpo/beta_used": 0.012045778334140778, "beta_dpo/beta_used_raw": 0.010188662447035313, "beta_dpo/gap_mean": 2.964503288269043, "beta_dpo/gap_std": 5.843700408935547, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.18848167539267016, "grad_norm": 30.074321746826172, "learning_rate": 4.8881598109976e-07, "logits/chosen": 2.1660492420196533, "logits/rejected": 2.0563719272613525, "loss": 5.3301, "step": 90 }, { "beta_dpo/beta_used": 0.007549135014414787, "beta_dpo/beta_used_raw": 0.005311334040015936, "beta_dpo/gap_mean": 2.952354669570923, "beta_dpo/gap_std": 6.251888751983643, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1905759162303665, "grad_norm": 17.551513671875, "learning_rate": 4.882681251368548e-07, "logits/chosen": 1.2380826473236084, "logits/rejected": 1.557425618171692, "loss": 5.2785, "step": 91 }, { "beta_dpo/beta_used": 0.015128381550312042, "beta_dpo/beta_used_raw": 0.014704037457704544, "beta_dpo/gap_mean": 2.930189847946167, "beta_dpo/gap_std": 6.301963806152344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19267015706806281, "grad_norm": 30.674835205078125, "learning_rate": 4.877074915775048e-07, "logits/chosen": 1.6860748529434204, "logits/rejected": 1.4988112449645996, "loss": 5.2723, "step": 92 }, { "beta_dpo/beta_used": 0.006954543758183718, "beta_dpo/beta_used_raw": 0.0063597094267606735, "beta_dpo/gap_mean": 3.009707450866699, "beta_dpo/gap_std": 6.455717086791992, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.19476439790575917, "grad_norm": 12.83521556854248, "learning_rate": 4.871341104867864e-07, "logits/chosen": 1.9297364950180054, "logits/rejected": 1.8627700805664062, "loss": 5.4614, "step": 93 }, { "beta_dpo/beta_used": 0.0072138672694563866, "beta_dpo/beta_used_raw": 0.005733566824346781, "beta_dpo/gap_mean": 3.3237226009368896, "beta_dpo/gap_std": 6.866450786590576, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.1968586387434555, "grad_norm": 15.222475051879883, "learning_rate": 4.865480126133871e-07, "logits/chosen": 1.5820927619934082, "logits/rejected": 1.6416268348693848, "loss": 5.444, "step": 94 }, { "beta_dpo/beta_used": 0.008435830473899841, "beta_dpo/beta_used_raw": 0.007779551669955254, "beta_dpo/gap_mean": 3.4265336990356445, "beta_dpo/gap_std": 7.192251205444336, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.19895287958115182, "grad_norm": 17.31826400756836, "learning_rate": 4.859492293879573e-07, "logits/chosen": 1.7770836353302002, "logits/rejected": 1.5319178104400635, "loss": 5.4109, "step": 95 }, { "beta_dpo/beta_used": 0.010932082310318947, "beta_dpo/beta_used_raw": 0.00794284138828516, "beta_dpo/gap_mean": 3.5308783054351807, "beta_dpo/gap_std": 7.482184886932373, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.20104712041884817, "grad_norm": 24.832975387573242, "learning_rate": 4.853377929214243e-07, "logits/chosen": 1.4598766565322876, "logits/rejected": 1.3611279726028442, "loss": 5.3563, "step": 96 }, { "beta_dpo/beta_used": 0.010159716010093689, "beta_dpo/beta_used_raw": 0.010018959641456604, "beta_dpo/gap_mean": 3.793192148208618, "beta_dpo/gap_std": 7.78098201751709, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2031413612565445, "grad_norm": 22.265670776367188, "learning_rate": 4.847137360032699e-07, "logits/chosen": 1.5520637035369873, "logits/rejected": 1.644052505493164, "loss": 5.3533, "step": 97 }, { "beta_dpo/beta_used": 0.01027124933898449, "beta_dpo/beta_used_raw": 0.009908015839755535, "beta_dpo/gap_mean": 3.9612808227539062, "beta_dpo/gap_std": 7.822225093841553, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.20523560209424083, "grad_norm": 21.846027374267578, "learning_rate": 4.84077092099773e-07, "logits/chosen": 2.0662131309509277, "logits/rejected": 2.265798807144165, "loss": 5.3616, "step": 98 }, { "beta_dpo/beta_used": 0.01488437969237566, "beta_dpo/beta_used_raw": 0.01488437969237566, "beta_dpo/gap_mean": 3.7299928665161133, "beta_dpo/gap_std": 8.350497245788574, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.20732984293193718, "grad_norm": 27.384540557861328, "learning_rate": 4.834278953522137e-07, "logits/chosen": 1.9069733619689941, "logits/rejected": 1.8735466003417969, "loss": 5.251, "step": 99 }, { "beta_dpo/beta_used": 0.005023906007409096, "beta_dpo/beta_used_raw": 0.002925662323832512, "beta_dpo/gap_mean": 4.102505207061768, "beta_dpo/gap_std": 8.151671409606934, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.2094240837696335, "grad_norm": 16.64201545715332, "learning_rate": 4.827661805750437e-07, "logits/chosen": 1.9069080352783203, "logits/rejected": 1.840613842010498, "loss": 5.4547, "step": 100 }, { "beta_dpo/beta_used": 0.013062715530395508, "beta_dpo/beta_used_raw": 0.013062715530395508, "beta_dpo/gap_mean": 3.8761510848999023, "beta_dpo/gap_std": 8.57790756225586, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.21151832460732983, "grad_norm": 25.09943389892578, "learning_rate": 4.820919832540181e-07, "logits/chosen": 1.3813724517822266, "logits/rejected": 1.6055908203125, "loss": 5.3, "step": 101 }, { "beta_dpo/beta_used": 0.014250491745769978, "beta_dpo/beta_used_raw": 0.011926423758268356, "beta_dpo/gap_mean": 4.320952892303467, "beta_dpo/gap_std": 8.283108711242676, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2136125654450262, "grad_norm": 24.078140258789062, "learning_rate": 4.814053395442932e-07, "logits/chosen": 1.7069716453552246, "logits/rejected": 1.822311520576477, "loss": 5.2401, "step": 102 }, { "beta_dpo/beta_used": 0.006543359719216824, "beta_dpo/beta_used_raw": 0.003442541928961873, "beta_dpo/gap_mean": 4.461350917816162, "beta_dpo/gap_std": 8.508588790893555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2157068062827225, "grad_norm": 14.553688049316406, "learning_rate": 4.807062862684873e-07, "logits/chosen": 2.264915943145752, "logits/rejected": 2.3848659992218018, "loss": 5.4299, "step": 103 }, { "beta_dpo/beta_used": 0.007411661557853222, "beta_dpo/beta_used_raw": 0.006676441989839077, "beta_dpo/gap_mean": 3.8371684551239014, "beta_dpo/gap_std": 9.153058052062988, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.21780104712041884, "grad_norm": 15.451645851135254, "learning_rate": 4.799948609147061e-07, "logits/chosen": 1.8409569263458252, "logits/rejected": 1.6925066709518433, "loss": 5.4174, "step": 104 }, { "beta_dpo/beta_used": 0.01699206791818142, "beta_dpo/beta_used_raw": 0.016010824590921402, "beta_dpo/gap_mean": 4.9135966300964355, "beta_dpo/gap_std": 8.913808822631836, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.2198952879581152, "grad_norm": 37.052093505859375, "learning_rate": 4.792711016345321e-07, "logits/chosen": 1.8707003593444824, "logits/rejected": 1.7518517971038818, "loss": 5.0469, "step": 105 }, { "beta_dpo/beta_used": 0.00902323704212904, "beta_dpo/beta_used_raw": 0.007059420458972454, "beta_dpo/gap_mean": 4.627331256866455, "beta_dpo/gap_std": 9.46343994140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22198952879581152, "grad_norm": 24.8957576751709, "learning_rate": 4.785350472409791e-07, "logits/chosen": 1.8109657764434814, "logits/rejected": 2.01666522026062, "loss": 5.3041, "step": 106 }, { "beta_dpo/beta_used": 0.009695657528936863, "beta_dpo/beta_used_raw": 0.007410034071654081, "beta_dpo/gap_mean": 5.23702335357666, "beta_dpo/gap_std": 9.842565536499023, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.22408376963350785, "grad_norm": 23.953954696655273, "learning_rate": 4.777867372064105e-07, "logits/chosen": 1.8471797704696655, "logits/rejected": 1.797261357307434, "loss": 5.2947, "step": 107 }, { "beta_dpo/beta_used": 0.01452508196234703, "beta_dpo/beta_used_raw": 0.014417744241654873, "beta_dpo/gap_mean": 5.950323581695557, "beta_dpo/gap_std": 9.602670669555664, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.2261780104712042, "grad_norm": 28.78997039794922, "learning_rate": 4.770262116604223e-07, "logits/chosen": 1.781799077987671, "logits/rejected": 1.9572784900665283, "loss": 5.1167, "step": 108 }, { "beta_dpo/beta_used": 0.0066536241210997105, "beta_dpo/beta_used_raw": 0.004698293283581734, "beta_dpo/gap_mean": 6.354887962341309, "beta_dpo/gap_std": 10.01487922668457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22827225130890053, "grad_norm": 13.27180004119873, "learning_rate": 4.7625351138769166e-07, "logits/chosen": 1.9066269397735596, "logits/rejected": 1.9160687923431396, "loss": 5.3954, "step": 109 }, { "beta_dpo/beta_used": 0.010541049763560295, "beta_dpo/beta_used_raw": 0.009356118738651276, "beta_dpo/gap_mean": 6.276027202606201, "beta_dpo/gap_std": 11.113080978393555, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.23036649214659685, "grad_norm": 22.47654151916504, "learning_rate": 4.75468677825789e-07, "logits/chosen": 1.6488604545593262, "logits/rejected": 1.6686369180679321, "loss": 5.2445, "step": 110 }, { "beta_dpo/beta_used": 0.010099717415869236, "beta_dpo/beta_used_raw": 0.01006684172898531, "beta_dpo/gap_mean": 6.5077595710754395, "beta_dpo/gap_std": 11.19198989868164, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.2324607329842932, "grad_norm": 23.18780517578125, "learning_rate": 4.7467175306295647e-07, "logits/chosen": 1.820462942123413, "logits/rejected": 1.9046530723571777, "loss": 5.2581, "step": 111 }, { "beta_dpo/beta_used": 0.008607706055045128, "beta_dpo/beta_used_raw": 0.003359769470989704, "beta_dpo/gap_mean": 5.609295845031738, "beta_dpo/gap_std": 11.112923622131348, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.23455497382198953, "grad_norm": 20.108413696289062, "learning_rate": 4.7386277983585053e-07, "logits/chosen": 1.7908120155334473, "logits/rejected": 1.8937515020370483, "loss": 5.3333, "step": 112 }, { "beta_dpo/beta_used": 0.016104042530059814, "beta_dpo/beta_used_raw": 0.013628358021378517, "beta_dpo/gap_mean": 6.332000255584717, "beta_dpo/gap_std": 11.891839981079102, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.23664921465968586, "grad_norm": 32.54283142089844, "learning_rate": 4.7304180152725024e-07, "logits/chosen": 1.49177885055542, "logits/rejected": 1.6306943893432617, "loss": 4.845, "step": 113 }, { "beta_dpo/beta_used": 0.007905099540948868, "beta_dpo/beta_used_raw": 0.0018536364659667015, "beta_dpo/gap_mean": 5.829183101654053, "beta_dpo/gap_std": 12.018501281738281, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2387434554973822, "grad_norm": 22.07866859436035, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 1.4004794359207153, "logits/rejected": 1.3008323907852173, "loss": 5.3357, "step": 114 }, { "beta_dpo/beta_used": 0.009187846444547176, "beta_dpo/beta_used_raw": 0.003049051621928811, "beta_dpo/gap_mean": 4.952703952789307, "beta_dpo/gap_std": 11.791646957397461, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.24083769633507854, "grad_norm": 19.946712493896484, "learning_rate": 4.7136400641330245e-07, "logits/chosen": 1.9727150201797485, "logits/rejected": 1.7037996053695679, "loss": 5.325, "step": 115 }, { "beta_dpo/beta_used": 0.011235121637582779, "beta_dpo/beta_used_raw": 0.010504303500056267, "beta_dpo/gap_mean": 5.199014663696289, "beta_dpo/gap_std": 11.840551376342773, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24293193717277486, "grad_norm": 25.633787155151367, "learning_rate": 4.70507279583015e-07, "logits/chosen": 1.7236762046813965, "logits/rejected": 1.8275989294052124, "loss": 5.2519, "step": 116 }, { "beta_dpo/beta_used": 0.015477584674954414, "beta_dpo/beta_used_raw": 0.011517820879817009, "beta_dpo/gap_mean": 5.6072611808776855, "beta_dpo/gap_std": 11.469279289245605, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2450261780104712, "grad_norm": 34.2017936706543, "learning_rate": 4.6963872761652834e-07, "logits/chosen": 1.724921703338623, "logits/rejected": 1.5013651847839355, "loss": 5.0554, "step": 117 }, { "beta_dpo/beta_used": 0.01235922146588564, "beta_dpo/beta_used_raw": 0.007801849860697985, "beta_dpo/gap_mean": 6.555847644805908, "beta_dpo/gap_std": 11.524944305419922, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.24712041884816754, "grad_norm": 32.83587646484375, "learning_rate": 4.687583970916486e-07, "logits/chosen": 1.7096357345581055, "logits/rejected": 1.7614951133728027, "loss": 5.103, "step": 118 }, { "beta_dpo/beta_used": 0.007942959666252136, "beta_dpo/beta_used_raw": 0.0018032464431598783, "beta_dpo/gap_mean": 6.371241569519043, "beta_dpo/gap_std": 12.957239151000977, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.24921465968586387, "grad_norm": 19.372495651245117, "learning_rate": 4.6786633521783005e-07, "logits/chosen": 1.8338923454284668, "logits/rejected": 1.9390045404434204, "loss": 5.3143, "step": 119 }, { "beta_dpo/beta_used": 0.009077337570488453, "beta_dpo/beta_used_raw": 0.006467485800385475, "beta_dpo/gap_mean": 6.747334003448486, "beta_dpo/gap_std": 13.51995849609375, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.2513089005235602, "grad_norm": 25.99736785888672, "learning_rate": 4.669625898336438e-07, "logits/chosen": 1.904350757598877, "logits/rejected": 1.7881104946136475, "loss": 5.2818, "step": 120 }, { "beta_dpo/beta_used": 0.004188536666333675, "beta_dpo/beta_used_raw": -0.0010998877696692944, "beta_dpo/gap_mean": 5.929210662841797, "beta_dpo/gap_std": 12.944700241088867, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2534031413612565, "grad_norm": 11.07016372680664, "learning_rate": 4.6604720940421207e-07, "logits/chosen": 1.178752064704895, "logits/rejected": 1.4918150901794434, "loss": 5.4539, "step": 121 }, { "beta_dpo/beta_used": 0.013262229040265083, "beta_dpo/beta_used_raw": 0.010668408125638962, "beta_dpo/gap_mean": 6.120506286621094, "beta_dpo/gap_std": 13.898996353149414, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.2554973821989529, "grad_norm": 27.32390785217285, "learning_rate": 4.651202430186092e-07, "logits/chosen": 1.6907187700271606, "logits/rejected": 2.047647714614868, "loss": 5.1692, "step": 122 }, { "beta_dpo/beta_used": 0.02133483625948429, "beta_dpo/beta_used_raw": 0.01663101837038994, "beta_dpo/gap_mean": 6.646225929260254, "beta_dpo/gap_std": 14.434886932373047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25759162303664923, "grad_norm": 43.08592987060547, "learning_rate": 4.6418174038722924e-07, "logits/chosen": 1.7687194347381592, "logits/rejected": 1.6279195547103882, "loss": 4.832, "step": 123 }, { "beta_dpo/beta_used": 0.010348731651902199, "beta_dpo/beta_used_raw": 0.00513859186321497, "beta_dpo/gap_mean": 7.770158290863037, "beta_dpo/gap_std": 14.987278938293457, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.25968586387434556, "grad_norm": 27.494583129882812, "learning_rate": 4.6323175183912023e-07, "logits/chosen": 1.4107732772827148, "logits/rejected": 1.5245213508605957, "loss": 5.1637, "step": 124 }, { "beta_dpo/beta_used": 0.01517592091113329, "beta_dpo/beta_used_raw": 0.009712353348731995, "beta_dpo/gap_mean": 7.219732284545898, "beta_dpo/gap_std": 14.99057388305664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2617801047120419, "grad_norm": 35.12456130981445, "learning_rate": 4.6227032831928483e-07, "logits/chosen": 1.578749418258667, "logits/rejected": 1.5831409692764282, "loss": 5.0641, "step": 125 }, { "beta_dpo/beta_used": 0.014101858250796795, "beta_dpo/beta_used_raw": 0.006124613806605339, "beta_dpo/gap_mean": 8.286654472351074, "beta_dpo/gap_std": 15.358405113220215, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2638743455497382, "grad_norm": 30.269481658935547, "learning_rate": 4.612975213859487e-07, "logits/chosen": 2.050217866897583, "logits/rejected": 2.2472779750823975, "loss": 4.999, "step": 126 }, { "beta_dpo/beta_used": 0.01756615750491619, "beta_dpo/beta_used_raw": 0.016226252540946007, "beta_dpo/gap_mean": 8.151988983154297, "beta_dpo/gap_std": 15.966252326965332, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.26596858638743454, "grad_norm": 35.137901306152344, "learning_rate": 4.603133832077953e-07, "logits/chosen": 1.3663495779037476, "logits/rejected": 1.3178493976593018, "loss": 4.8628, "step": 127 }, { "beta_dpo/beta_used": 0.012780067510902882, "beta_dpo/beta_used_raw": 0.012262159027159214, "beta_dpo/gap_mean": 9.522705078125, "beta_dpo/gap_std": 15.977328300476074, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2680628272251309, "grad_norm": 26.821195602416992, "learning_rate": 4.5931796656116837e-07, "logits/chosen": 1.4073151350021362, "logits/rejected": 1.3889837265014648, "loss": 5.0196, "step": 128 }, { "beta_dpo/beta_used": 0.013786004856228828, "beta_dpo/beta_used_raw": 0.006445377133786678, "beta_dpo/gap_mean": 9.859175682067871, "beta_dpo/gap_std": 16.836477279663086, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.27015706806282724, "grad_norm": 30.80266761779785, "learning_rate": 4.5831132482724193e-07, "logits/chosen": 1.4743335247039795, "logits/rejected": 1.6113927364349365, "loss": 4.9661, "step": 129 }, { "beta_dpo/beta_used": 0.013385320082306862, "beta_dpo/beta_used_raw": 0.007840610109269619, "beta_dpo/gap_mean": 9.868795394897461, "beta_dpo/gap_std": 16.45522117614746, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.27225130890052357, "grad_norm": 31.25225067138672, "learning_rate": 4.5729351198915705e-07, "logits/chosen": 1.6542197465896606, "logits/rejected": 1.9003280401229858, "loss": 4.9968, "step": 130 }, { "beta_dpo/beta_used": 0.011940027587115765, "beta_dpo/beta_used_raw": 0.007653850130736828, "beta_dpo/gap_mean": 8.79969596862793, "beta_dpo/gap_std": 16.86931037902832, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.2743455497382199, "grad_norm": 26.647390365600586, "learning_rate": 4.5626458262912735e-07, "logits/chosen": 1.2588789463043213, "logits/rejected": 1.1883281469345093, "loss": 5.1746, "step": 131 }, { "beta_dpo/beta_used": 0.020324911922216415, "beta_dpo/beta_used_raw": 0.009501131251454353, "beta_dpo/gap_mean": 8.400039672851562, "beta_dpo/gap_std": 17.63036346435547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2764397905759162, "grad_norm": 42.30400085449219, "learning_rate": 4.5522459192551166e-07, "logits/chosen": 1.688079833984375, "logits/rejected": 1.7301361560821533, "loss": 4.7249, "step": 132 }, { "beta_dpo/beta_used": 0.027873020619153976, "beta_dpo/beta_used_raw": 0.021997135132551193, "beta_dpo/gap_mean": 10.159527778625488, "beta_dpo/gap_std": 18.784109115600586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.27853403141361255, "grad_norm": 44.038734436035156, "learning_rate": 4.541735956498554e-07, "logits/chosen": 1.7651947736740112, "logits/rejected": 1.7092256546020508, "loss": 4.3015, "step": 133 }, { "beta_dpo/beta_used": 0.009234755299985409, "beta_dpo/beta_used_raw": 0.00529387965798378, "beta_dpo/gap_mean": 9.394585609436035, "beta_dpo/gap_std": 17.975656509399414, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.2806282722513089, "grad_norm": 26.31687355041504, "learning_rate": 4.5311165016389914e-07, "logits/chosen": 2.0336687564849854, "logits/rejected": 2.0945892333984375, "loss": 5.1635, "step": 134 }, { "beta_dpo/beta_used": 0.018918566405773163, "beta_dpo/beta_used_raw": 0.01316812727600336, "beta_dpo/gap_mean": 9.845601081848145, "beta_dpo/gap_std": 16.85881805419922, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.28272251308900526, "grad_norm": 37.51054382324219, "learning_rate": 4.520388124165564e-07, "logits/chosen": 1.188499927520752, "logits/rejected": 0.9699570536613464, "loss": 4.7303, "step": 135 }, { "beta_dpo/beta_used": 0.009488210082054138, "beta_dpo/beta_used_raw": 0.004158595576882362, "beta_dpo/gap_mean": 10.281312942504883, "beta_dpo/gap_std": 17.496814727783203, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.2848167539267016, "grad_norm": 22.803377151489258, "learning_rate": 4.5095513994085974e-07, "logits/chosen": 1.2178832292556763, "logits/rejected": 1.4434417486190796, "loss": 5.1349, "step": 136 }, { "beta_dpo/beta_used": 0.01097769383341074, "beta_dpo/beta_used_raw": 0.00757699366658926, "beta_dpo/gap_mean": 9.332605361938477, "beta_dpo/gap_std": 18.028961181640625, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2869109947643979, "grad_norm": 26.255542755126953, "learning_rate": 4.498606908508753e-07, "logits/chosen": 1.823258876800537, "logits/rejected": 1.6405431032180786, "loss": 5.0908, "step": 137 }, { "beta_dpo/beta_used": 0.008300930261611938, "beta_dpo/beta_used_raw": -0.002543874317780137, "beta_dpo/gap_mean": 9.568643569946289, "beta_dpo/gap_std": 18.384599685668945, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.28900523560209423, "grad_norm": 22.109146118164062, "learning_rate": 4.487555238385862e-07, "logits/chosen": 1.5952801704406738, "logits/rejected": 1.5838592052459717, "loss": 5.217, "step": 138 }, { "beta_dpo/beta_used": 0.011962666176259518, "beta_dpo/beta_used_raw": 0.004802809562534094, "beta_dpo/gap_mean": 7.955426216125488, "beta_dpo/gap_std": 19.22389793395996, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.29109947643979056, "grad_norm": 25.960317611694336, "learning_rate": 4.476396981707453e-07, "logits/chosen": 1.4421442747116089, "logits/rejected": 1.5927166938781738, "loss": 5.1662, "step": 139 }, { "beta_dpo/beta_used": 0.02191855013370514, "beta_dpo/beta_used_raw": 0.017846662551164627, "beta_dpo/gap_mean": 8.63882064819336, "beta_dpo/gap_std": 19.29082679748535, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2931937172774869, "grad_norm": 49.216670989990234, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 1.5183682441711426, "logits/rejected": 1.5717380046844482, "loss": 4.6103, "step": 140 }, { "beta_dpo/beta_used": 0.014003738760948181, "beta_dpo/beta_used_raw": 0.006822553928941488, "beta_dpo/gap_mean": 10.601947784423828, "beta_dpo/gap_std": 19.063888549804688, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.29528795811518327, "grad_norm": 43.43609619140625, "learning_rate": 4.453763107901675e-07, "logits/chosen": 1.4985511302947998, "logits/rejected": 1.5825482606887817, "loss": 4.9577, "step": 141 }, { "beta_dpo/beta_used": 0.022530585527420044, "beta_dpo/beta_used_raw": 0.0178590789437294, "beta_dpo/gap_mean": 10.459101676940918, "beta_dpo/gap_std": 20.326278686523438, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2973821989528796, "grad_norm": 39.85797882080078, "learning_rate": 4.4422887045602674e-07, "logits/chosen": 2.03916335105896, "logits/rejected": 1.778942584991455, "loss": 4.5715, "step": 142 }, { "beta_dpo/beta_used": 0.016412286087870598, "beta_dpo/beta_used_raw": 0.009996837005019188, "beta_dpo/gap_mean": 11.491534233093262, "beta_dpo/gap_std": 21.220121383666992, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.2994764397905759, "grad_norm": 45.14045333862305, "learning_rate": 4.4307101421701755e-07, "logits/chosen": 1.4854329824447632, "logits/rejected": 1.3263810873031616, "loss": 4.7507, "step": 143 }, { "beta_dpo/beta_used": 0.009764298796653748, "beta_dpo/beta_used_raw": -0.003037895541638136, "beta_dpo/gap_mean": 12.049786567687988, "beta_dpo/gap_std": 21.212291717529297, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.30157068062827225, "grad_norm": 27.531478881835938, "learning_rate": 4.419028041654559e-07, "logits/chosen": 1.4089610576629639, "logits/rejected": 1.3612356185913086, "loss": 5.076, "step": 144 }, { "beta_dpo/beta_used": 0.015810877084732056, "beta_dpo/beta_used_raw": 0.0027779447846114635, "beta_dpo/gap_mean": 12.608784675598145, "beta_dpo/gap_std": 21.368688583374023, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3036649214659686, "grad_norm": 30.809558868408203, "learning_rate": 4.4072430294890166e-07, "logits/chosen": 1.900479793548584, "logits/rejected": 1.9564039707183838, "loss": 4.7396, "step": 145 }, { "beta_dpo/beta_used": 0.005745714530348778, "beta_dpo/beta_used_raw": -0.005364367738366127, "beta_dpo/gap_mean": 11.638813018798828, "beta_dpo/gap_std": 20.36126708984375, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3057591623036649, "grad_norm": 19.06623077392578, "learning_rate": 4.395355737667985e-07, "logits/chosen": 1.4604260921478271, "logits/rejected": 1.6670466661453247, "loss": 5.2938, "step": 146 }, { "beta_dpo/beta_used": 0.01017170213162899, "beta_dpo/beta_used_raw": -0.009537998586893082, "beta_dpo/gap_mean": 10.316466331481934, "beta_dpo/gap_std": 20.63652992248535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3078534031413613, "grad_norm": 28.077390670776367, "learning_rate": 4.3833668036708483e-07, "logits/chosen": 1.601604700088501, "logits/rejected": 1.5656179189682007, "loss": 5.1406, "step": 147 }, { "beta_dpo/beta_used": 0.013707359321415424, "beta_dpo/beta_used_raw": 0.004445759579539299, "beta_dpo/gap_mean": 10.2113618850708, "beta_dpo/gap_std": 21.985990524291992, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3099476439790576, "grad_norm": 33.60511016845703, "learning_rate": 4.3712768704277524e-07, "logits/chosen": 1.3828201293945312, "logits/rejected": 1.3478338718414307, "loss": 5.0395, "step": 148 }, { "beta_dpo/beta_used": 0.008657879196107388, "beta_dpo/beta_used_raw": 0.0010744923492893577, "beta_dpo/gap_mean": 11.151147842407227, "beta_dpo/gap_std": 20.73192024230957, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.31204188481675393, "grad_norm": 28.912668228149414, "learning_rate": 4.3590865862851263e-07, "logits/chosen": 2.108185291290283, "logits/rejected": 1.9332281351089478, "loss": 5.2272, "step": 149 }, { "beta_dpo/beta_used": 0.01538037694990635, "beta_dpo/beta_used_raw": 0.010280387476086617, "beta_dpo/gap_mean": 11.221325874328613, "beta_dpo/gap_std": 20.35310173034668, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31413612565445026, "grad_norm": 42.4512939453125, "learning_rate": 4.346796604970912e-07, "logits/chosen": 1.8120979070663452, "logits/rejected": 1.7387409210205078, "loss": 4.8116, "step": 150 }, { "beta_dpo/beta_used": 0.028699517250061035, "beta_dpo/beta_used_raw": 0.02786320261657238, "beta_dpo/gap_mean": 12.776216506958008, "beta_dpo/gap_std": 21.87693977355957, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3162303664921466, "grad_norm": 46.40315628051758, "learning_rate": 4.3344075855595097e-07, "logits/chosen": 1.5828508138656616, "logits/rejected": 1.6035374402999878, "loss": 4.197, "step": 151 }, { "beta_dpo/beta_used": 0.014542932622134686, "beta_dpo/beta_used_raw": 9.965314529836178e-05, "beta_dpo/gap_mean": 13.169672966003418, "beta_dpo/gap_std": 21.826007843017578, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3183246073298429, "grad_norm": 33.306884765625, "learning_rate": 4.3219201924364323e-07, "logits/chosen": 1.3182780742645264, "logits/rejected": 1.7138738632202148, "loss": 4.8325, "step": 152 }, { "beta_dpo/beta_used": 0.02487529069185257, "beta_dpo/beta_used_raw": 0.022432954981923103, "beta_dpo/gap_mean": 15.099176406860352, "beta_dpo/gap_std": 21.7235050201416, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.3204188481675393, "grad_norm": 43.83867263793945, "learning_rate": 4.309335095262675e-07, "logits/chosen": 1.5923478603363037, "logits/rejected": 1.5436244010925293, "loss": 4.2459, "step": 153 }, { "beta_dpo/beta_used": 0.019335608929395676, "beta_dpo/beta_used_raw": 0.007598421536386013, "beta_dpo/gap_mean": 15.192681312561035, "beta_dpo/gap_std": 23.77366828918457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3225130890052356, "grad_norm": 33.363792419433594, "learning_rate": 4.2966529689388064e-07, "logits/chosen": 1.4466509819030762, "logits/rejected": 1.4517470598220825, "loss": 4.5467, "step": 154 }, { "beta_dpo/beta_used": 0.018129050731658936, "beta_dpo/beta_used_raw": 0.009811091236770153, "beta_dpo/gap_mean": 13.158918380737305, "beta_dpo/gap_std": 22.92918586730957, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.32460732984293195, "grad_norm": 34.454673767089844, "learning_rate": 4.2838744935687716e-07, "logits/chosen": 1.3940773010253906, "logits/rejected": 1.3722490072250366, "loss": 4.5952, "step": 155 }, { "beta_dpo/beta_used": 0.022653408348560333, "beta_dpo/beta_used_raw": 0.017730802297592163, "beta_dpo/gap_mean": 13.508572578430176, "beta_dpo/gap_std": 24.86406135559082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3267015706806283, "grad_norm": 49.66926574707031, "learning_rate": 4.271000354423425e-07, "logits/chosen": 1.7816330194473267, "logits/rejected": 1.626936912536621, "loss": 4.4733, "step": 156 }, { "beta_dpo/beta_used": 0.00911460816860199, "beta_dpo/beta_used_raw": -0.00761047936975956, "beta_dpo/gap_mean": 13.641767501831055, "beta_dpo/gap_std": 25.110754013061523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3287958115183246, "grad_norm": 28.902727127075195, "learning_rate": 4.258031241903777e-07, "logits/chosen": 1.8037209510803223, "logits/rejected": 1.9432283639907837, "loss": 5.0592, "step": 157 }, { "beta_dpo/beta_used": 0.022043395787477493, "beta_dpo/beta_used_raw": 0.013519931584596634, "beta_dpo/gap_mean": 12.658366203308105, "beta_dpo/gap_std": 24.050304412841797, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.3308900523560209, "grad_norm": 49.30216979980469, "learning_rate": 4.2449678515039743e-07, "logits/chosen": 1.9826464653015137, "logits/rejected": 2.0838711261749268, "loss": 4.5538, "step": 158 }, { "beta_dpo/beta_used": 0.013070004992187023, "beta_dpo/beta_used_raw": 0.00028916902374476194, "beta_dpo/gap_mean": 11.834725379943848, "beta_dpo/gap_std": 25.340810775756836, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.33298429319371725, "grad_norm": 54.22214126586914, "learning_rate": 4.2318108837739986e-07, "logits/chosen": 1.4999477863311768, "logits/rejected": 1.369155764579773, "loss": 5.1162, "step": 159 }, { "beta_dpo/beta_used": 0.024159716442227364, "beta_dpo/beta_used_raw": 0.009181090630590916, "beta_dpo/gap_mean": 13.555554389953613, "beta_dpo/gap_std": 24.396202087402344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.33507853403141363, "grad_norm": 49.655757904052734, "learning_rate": 4.218561044282098e-07, "logits/chosen": 1.9025671482086182, "logits/rejected": 1.5475167036056519, "loss": 4.3937, "step": 160 }, { "beta_dpo/beta_used": 0.026369977742433548, "beta_dpo/beta_used_raw": 0.018488148227334023, "beta_dpo/gap_mean": 14.321226119995117, "beta_dpo/gap_std": 25.79440689086914, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.33717277486910996, "grad_norm": 56.462730407714844, "learning_rate": 4.2052190435769554e-07, "logits/chosen": 1.4102540016174316, "logits/rejected": 1.2628462314605713, "loss": 4.4445, "step": 161 }, { "beta_dpo/beta_used": 0.026266392320394516, "beta_dpo/beta_used_raw": 0.02344740927219391, "beta_dpo/gap_mean": 15.67480182647705, "beta_dpo/gap_std": 26.169410705566406, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.3392670157068063, "grad_norm": 64.92961120605469, "learning_rate": 4.1917855971495763e-07, "logits/chosen": 1.5759161710739136, "logits/rejected": 1.4259589910507202, "loss": 4.3731, "step": 162 }, { "beta_dpo/beta_used": 0.010873702354729176, "beta_dpo/beta_used_raw": -0.004741042852401733, "beta_dpo/gap_mean": 15.373876571655273, "beta_dpo/gap_std": 24.578004837036133, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3413612565445026, "grad_norm": 35.616493225097656, "learning_rate": 4.1782614253949255e-07, "logits/chosen": 1.7332031726837158, "logits/rejected": 1.7425578832626343, "loss": 4.9473, "step": 163 }, { "beta_dpo/beta_used": 0.025458887219429016, "beta_dpo/beta_used_raw": 0.015234654769301414, "beta_dpo/gap_mean": 14.888280868530273, "beta_dpo/gap_std": 24.105310440063477, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34345549738219894, "grad_norm": 43.64400863647461, "learning_rate": 4.164647253573289e-07, "logits/chosen": 1.4580204486846924, "logits/rejected": 1.6339696645736694, "loss": 4.1504, "step": 164 }, { "beta_dpo/beta_used": 0.010639484040439129, "beta_dpo/beta_used_raw": -0.005685774143785238, "beta_dpo/gap_mean": 14.408177375793457, "beta_dpo/gap_std": 23.938827514648438, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.34554973821989526, "grad_norm": 29.951501846313477, "learning_rate": 4.1509438117713863e-07, "logits/chosen": 2.0571203231811523, "logits/rejected": 2.0520873069763184, "loss": 4.9575, "step": 165 }, { "beta_dpo/beta_used": 0.013327265158295631, "beta_dpo/beta_used_raw": 0.0013559209182858467, "beta_dpo/gap_mean": 12.96614933013916, "beta_dpo/gap_std": 25.120412826538086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34764397905759165, "grad_norm": 36.66180419921875, "learning_rate": 4.137151834863213e-07, "logits/chosen": 1.6311808824539185, "logits/rejected": 1.59664785861969, "loss": 4.864, "step": 166 }, { "beta_dpo/beta_used": 0.03245996683835983, "beta_dpo/beta_used_raw": 0.031837042421102524, "beta_dpo/gap_mean": 12.544686317443848, "beta_dpo/gap_std": 25.848405838012695, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.34973821989528797, "grad_norm": 92.97169494628906, "learning_rate": 4.123272062470633e-07, "logits/chosen": 1.7561887502670288, "logits/rejected": 1.5244758129119873, "loss": 4.5144, "step": 167 }, { "beta_dpo/beta_used": 0.029823748394846916, "beta_dpo/beta_used_raw": 0.022257408127188683, "beta_dpo/gap_mean": 15.493486404418945, "beta_dpo/gap_std": 25.659543991088867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3518324607329843, "grad_norm": 77.38569641113281, "learning_rate": 4.1093052389237174e-07, "logits/chosen": 1.3179136514663696, "logits/rejected": 1.1715956926345825, "loss": 4.0093, "step": 168 }, { "beta_dpo/beta_used": 0.01944730058312416, "beta_dpo/beta_used_raw": 0.01372382789850235, "beta_dpo/gap_mean": 16.43326187133789, "beta_dpo/gap_std": 25.575986862182617, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3539267015706806, "grad_norm": 51.683170318603516, "learning_rate": 4.0952521132208267e-07, "logits/chosen": 1.7002696990966797, "logits/rejected": 1.8345009088516235, "loss": 4.4362, "step": 169 }, { "beta_dpo/beta_used": 0.003313018474727869, "beta_dpo/beta_used_raw": -0.008983142673969269, "beta_dpo/gap_mean": 18.35196304321289, "beta_dpo/gap_std": 25.07719612121582, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.35602094240837695, "grad_norm": 15.952840805053711, "learning_rate": 4.081113438988443e-07, "logits/chosen": 1.7776952981948853, "logits/rejected": 1.684997797012329, "loss": 5.3302, "step": 170 }, { "beta_dpo/beta_used": 0.015446186996996403, "beta_dpo/beta_used_raw": -0.0005397915374487638, "beta_dpo/gap_mean": 17.90646743774414, "beta_dpo/gap_std": 25.070568084716797, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.3581151832460733, "grad_norm": 43.073421478271484, "learning_rate": 4.0668899744407567e-07, "logits/chosen": 1.6446658372879028, "logits/rejected": 1.5069741010665894, "loss": 4.6088, "step": 171 }, { "beta_dpo/beta_used": 0.009129172191023827, "beta_dpo/beta_used_raw": -0.007493500132113695, "beta_dpo/gap_mean": 15.301614761352539, "beta_dpo/gap_std": 25.80316925048828, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.36020942408376966, "grad_norm": 37.880577087402344, "learning_rate": 4.0525824823390043e-07, "logits/chosen": 1.5476915836334229, "logits/rejected": 1.720083236694336, "loss": 5.1515, "step": 172 }, { "beta_dpo/beta_used": 0.022744204849004745, "beta_dpo/beta_used_raw": 0.012280027382075787, "beta_dpo/gap_mean": 14.178143501281738, "beta_dpo/gap_std": 26.050079345703125, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.362303664921466, "grad_norm": 45.3817024230957, "learning_rate": 4.0381917299505686e-07, "logits/chosen": 1.6695926189422607, "logits/rejected": 1.337355136871338, "loss": 4.4431, "step": 173 }, { "beta_dpo/beta_used": 0.0274057500064373, "beta_dpo/beta_used_raw": 0.015672199428081512, "beta_dpo/gap_mean": 16.109161376953125, "beta_dpo/gap_std": 25.606597900390625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3643979057591623, "grad_norm": 50.70249557495117, "learning_rate": 4.0237184890078243e-07, "logits/chosen": 2.1374263763427734, "logits/rejected": 1.9051423072814941, "loss": 4.1047, "step": 174 }, { "beta_dpo/beta_used": 0.022395484149456024, "beta_dpo/beta_used_raw": 0.020498108118772507, "beta_dpo/gap_mean": 16.25571632385254, "beta_dpo/gap_std": 25.667404174804688, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.36649214659685864, "grad_norm": 41.90084457397461, "learning_rate": 4.00916353566676e-07, "logits/chosen": 1.5944123268127441, "logits/rejected": 1.6246697902679443, "loss": 4.3686, "step": 175 }, { "beta_dpo/beta_used": 0.0224157627671957, "beta_dpo/beta_used_raw": 0.011731607839465141, "beta_dpo/gap_mean": 13.99099349975586, "beta_dpo/gap_std": 27.471248626708984, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.36858638743455496, "grad_norm": 63.70330047607422, "learning_rate": 3.994527650465352e-07, "logits/chosen": 1.1375683546066284, "logits/rejected": 1.2096847295761108, "loss": 4.6342, "step": 176 }, { "beta_dpo/beta_used": 0.01675129495561123, "beta_dpo/beta_used_raw": 0.0006211861036717892, "beta_dpo/gap_mean": 11.935150146484375, "beta_dpo/gap_std": 28.26276397705078, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3706806282722513, "grad_norm": 45.043968200683594, "learning_rate": 3.979811618281705e-07, "logits/chosen": 1.7941234111785889, "logits/rejected": 1.5880272388458252, "loss": 4.8643, "step": 177 }, { "beta_dpo/beta_used": 0.025348788127303123, "beta_dpo/beta_used_raw": 0.017674200236797333, "beta_dpo/gap_mean": 14.811019897460938, "beta_dpo/gap_std": 28.847448348999023, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.37277486910994767, "grad_norm": 72.1207046508789, "learning_rate": 3.9650162282919654e-07, "logits/chosen": 1.5051298141479492, "logits/rejected": 1.527164101600647, "loss": 4.3474, "step": 178 }, { "beta_dpo/beta_used": 0.024870071560144424, "beta_dpo/beta_used_raw": 0.0016013816930353642, "beta_dpo/gap_mean": 15.476740837097168, "beta_dpo/gap_std": 27.874025344848633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.374869109947644, "grad_norm": 51.20316696166992, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 1.2467797994613647, "logits/rejected": 1.2580769062042236, "loss": 4.336, "step": 179 }, { "beta_dpo/beta_used": 0.03025144338607788, "beta_dpo/beta_used_raw": 0.0288880355656147, "beta_dpo/gap_mean": 15.403278350830078, "beta_dpo/gap_std": 27.956090927124023, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.3769633507853403, "grad_norm": 47.122596740722656, "learning_rate": 3.935190552834828e-07, "logits/chosen": 1.592002034187317, "logits/rejected": 1.4925694465637207, "loss": 4.0441, "step": 180 }, { "beta_dpo/beta_used": 0.020815353840589523, "beta_dpo/beta_used_raw": 0.016023779287934303, "beta_dpo/gap_mean": 16.58497428894043, "beta_dpo/gap_std": 27.86528205871582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37905759162303665, "grad_norm": 44.79503631591797, "learning_rate": 3.920161866827889e-07, "logits/chosen": 1.3529762029647827, "logits/rejected": 1.3037437200546265, "loss": 4.3222, "step": 181 }, { "beta_dpo/beta_used": 0.023737944662570953, "beta_dpo/beta_used_raw": 0.017001213505864143, "beta_dpo/gap_mean": 17.020750045776367, "beta_dpo/gap_std": 27.084413528442383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.381151832460733, "grad_norm": 51.928287506103516, "learning_rate": 3.90505702185e-07, "logits/chosen": 1.4569286108016968, "logits/rejected": 1.4212331771850586, "loss": 4.1784, "step": 182 }, { "beta_dpo/beta_used": 0.01689002849161625, "beta_dpo/beta_used_raw": 0.011375264264643192, "beta_dpo/gap_mean": 18.06576919555664, "beta_dpo/gap_std": 28.06887435913086, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.3832460732984293, "grad_norm": 32.73753356933594, "learning_rate": 3.889876827928156e-07, "logits/chosen": 1.1345239877700806, "logits/rejected": 1.2237826585769653, "loss": 4.4976, "step": 183 }, { "beta_dpo/beta_used": 0.02527700364589691, "beta_dpo/beta_used_raw": 0.02064402773976326, "beta_dpo/gap_mean": 20.417850494384766, "beta_dpo/gap_std": 29.51577377319336, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.38534031413612563, "grad_norm": 52.284393310546875, "learning_rate": 3.874622099130087e-07, "logits/chosen": 1.6561375856399536, "logits/rejected": 1.639233946800232, "loss": 4.2447, "step": 184 }, { "beta_dpo/beta_used": 0.006265235599130392, "beta_dpo/beta_used_raw": -0.01659151166677475, "beta_dpo/gap_mean": 20.119701385498047, "beta_dpo/gap_std": 30.129091262817383, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.387434554973822, "grad_norm": 34.30731201171875, "learning_rate": 3.859293653520604e-07, "logits/chosen": 1.819935917854309, "logits/rejected": 1.873971939086914, "loss": 5.1, "step": 185 }, { "beta_dpo/beta_used": 0.016711510717868805, "beta_dpo/beta_used_raw": 0.00014704966451972723, "beta_dpo/gap_mean": 17.954086303710938, "beta_dpo/gap_std": 29.141178131103516, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.38952879581151834, "grad_norm": 43.99291229248047, "learning_rate": 3.8438923131177237e-07, "logits/chosen": 1.7304484844207764, "logits/rejected": 1.6357572078704834, "loss": 4.5823, "step": 186 }, { "beta_dpo/beta_used": 0.010872665792703629, "beta_dpo/beta_used_raw": -0.0031126337125897408, "beta_dpo/gap_mean": 16.949188232421875, "beta_dpo/gap_std": 30.313583374023438, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.39162303664921466, "grad_norm": 29.05012321472168, "learning_rate": 3.828418903848593e-07, "logits/chosen": 1.5062894821166992, "logits/rejected": 1.626598834991455, "loss": 4.9126, "step": 187 }, { "beta_dpo/beta_used": 0.021171947941184044, "beta_dpo/beta_used_raw": 0.010028712451457977, "beta_dpo/gap_mean": 16.50074577331543, "beta_dpo/gap_std": 30.938051223754883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.393717277486911, "grad_norm": 45.898658752441406, "learning_rate": 3.812874255505191e-07, "logits/chosen": 1.5269906520843506, "logits/rejected": 1.3458209037780762, "loss": 4.5338, "step": 188 }, { "beta_dpo/beta_used": 0.03484039008617401, "beta_dpo/beta_used_raw": 0.022049371153116226, "beta_dpo/gap_mean": 17.477540969848633, "beta_dpo/gap_std": 29.527908325195312, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3958115183246073, "grad_norm": 70.90847778320312, "learning_rate": 3.797259201699833e-07, "logits/chosen": 1.5551743507385254, "logits/rejected": 1.6014527082443237, "loss": 3.8358, "step": 189 }, { "beta_dpo/beta_used": 0.0200703926384449, "beta_dpo/beta_used_raw": 0.012458120472729206, "beta_dpo/gap_mean": 18.339256286621094, "beta_dpo/gap_std": 28.938512802124023, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.39790575916230364, "grad_norm": 56.81261444091797, "learning_rate": 3.781574579820464e-07, "logits/chosen": 0.9362454414367676, "logits/rejected": 0.9899096488952637, "loss": 4.414, "step": 190 }, { "beta_dpo/beta_used": 0.01700519025325775, "beta_dpo/beta_used_raw": -0.005064443219453096, "beta_dpo/gap_mean": 18.290935516357422, "beta_dpo/gap_std": 30.99585723876953, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.4, "grad_norm": 56.54753494262695, "learning_rate": 3.765821230985757e-07, "logits/chosen": 1.404714822769165, "logits/rejected": 1.5215625762939453, "loss": 4.8064, "step": 191 }, { "beta_dpo/beta_used": 0.031894296407699585, "beta_dpo/beta_used_raw": 0.020558489486575127, "beta_dpo/gap_mean": 16.527379989624023, "beta_dpo/gap_std": 31.373319625854492, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.40209424083769635, "grad_norm": 70.80916595458984, "learning_rate": 3.75e-07, "logits/chosen": 1.5337512493133545, "logits/rejected": 1.7164283990859985, "loss": 4.1842, "step": 192 }, { "beta_dpo/beta_used": 0.01636136882007122, "beta_dpo/beta_used_raw": 0.005046369507908821, "beta_dpo/gap_mean": 15.377167701721191, "beta_dpo/gap_std": 31.938879013061523, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4041884816753927, "grad_norm": 55.80176544189453, "learning_rate": 3.734111735307796e-07, "logits/chosen": 1.7271709442138672, "logits/rejected": 1.558451533317566, "loss": 4.6877, "step": 193 }, { "beta_dpo/beta_used": 0.007611277513206005, "beta_dpo/beta_used_raw": -0.01890621893107891, "beta_dpo/gap_mean": 14.705482482910156, "beta_dpo/gap_std": 30.904098510742188, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.406282722513089, "grad_norm": 22.856273651123047, "learning_rate": 3.7181572889485623e-07, "logits/chosen": 1.3973853588104248, "logits/rejected": 1.4764728546142578, "loss": 5.1599, "step": 194 }, { "beta_dpo/beta_used": 0.031484756618738174, "beta_dpo/beta_used_raw": 0.0071922894567251205, "beta_dpo/gap_mean": 13.331430435180664, "beta_dpo/gap_std": 30.900182723999023, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4083769633507853, "grad_norm": 66.02396392822266, "learning_rate": 3.7021375165108377e-07, "logits/chosen": 1.272679328918457, "logits/rejected": 1.2566474676132202, "loss": 4.3933, "step": 195 }, { "beta_dpo/beta_used": 0.03322502225637436, "beta_dpo/beta_used_raw": 0.02791755273938179, "beta_dpo/gap_mean": 14.905118942260742, "beta_dpo/gap_std": 30.485837936401367, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.41047120418848165, "grad_norm": 79.28156280517578, "learning_rate": 3.6860532770864005e-07, "logits/chosen": 1.275534749031067, "logits/rejected": 1.4435292482376099, "loss": 4.0546, "step": 196 }, { "beta_dpo/beta_used": 0.04939180985093117, "beta_dpo/beta_used_raw": 0.043553970754146576, "beta_dpo/gap_mean": 18.400535583496094, "beta_dpo/gap_std": 30.686927795410156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41256544502617803, "grad_norm": 67.8173828125, "learning_rate": 3.6699054332241985e-07, "logits/chosen": 1.38494873046875, "logits/rejected": 1.254716157913208, "loss": 3.338, "step": 197 }, { "beta_dpo/beta_used": 0.022166196256875992, "beta_dpo/beta_used_raw": 0.007883399724960327, "beta_dpo/gap_mean": 20.444957733154297, "beta_dpo/gap_std": 32.35297393798828, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.41465968586387436, "grad_norm": 60.04436492919922, "learning_rate": 3.653694850884091e-07, "logits/chosen": 1.9333720207214355, "logits/rejected": 2.020900011062622, "loss": 4.4855, "step": 198 }, { "beta_dpo/beta_used": 0.02409629337489605, "beta_dpo/beta_used_raw": 0.021340614184737206, "beta_dpo/gap_mean": 19.7289981842041, "beta_dpo/gap_std": 33.021812438964844, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4167539267015707, "grad_norm": 47.563514709472656, "learning_rate": 3.6374223993904124e-07, "logits/chosen": 0.8853669762611389, "logits/rejected": 0.8789573907852173, "loss": 4.2058, "step": 199 }, { "beta_dpo/beta_used": 0.019850242882966995, "beta_dpo/beta_used_raw": 0.0021575437858700752, "beta_dpo/gap_mean": 18.26460075378418, "beta_dpo/gap_std": 35.18665313720703, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.418848167539267, "grad_norm": 75.98006439208984, "learning_rate": 3.621088951385353e-07, "logits/chosen": 1.4998607635498047, "logits/rejected": 1.4999333620071411, "loss": 4.6811, "step": 200 }, { "epoch": 0.418848167539267, "eval_beta_dpo/beta_used": 0.027261212468147278, "eval_beta_dpo/beta_used_raw": 0.011497409082949162, "eval_beta_dpo/gap_mean": 17.349489212036133, "eval_beta_dpo/gap_std": 36.29584884643555, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.4600857496261597, "eval_logits/rejected": 1.4735403060913086, "eval_loss": 0.582222044467926, "eval_runtime": 93.942, "eval_samples_per_second": 21.29, "eval_steps_per_second": 1.331, "step": 200 }, { "beta_dpo/beta_used": 0.015480000525712967, "beta_dpo/beta_used_raw": 0.0017268508672714233, "beta_dpo/gap_mean": 16.916603088378906, "beta_dpo/gap_std": 34.051475524902344, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.42094240837696334, "grad_norm": 41.899105072021484, "learning_rate": 3.604695382782159e-07, "logits/chosen": 1.3517783880233765, "logits/rejected": 1.4856456518173218, "loss": 4.8087, "step": 201 }, { "beta_dpo/beta_used": 0.03667040914297104, "beta_dpo/beta_used_raw": 0.02494371309876442, "beta_dpo/gap_mean": 18.696678161621094, "beta_dpo/gap_std": 34.44628143310547, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.42303664921465967, "grad_norm": 94.09333038330078, "learning_rate": 3.588242572718162e-07, "logits/chosen": 1.9142837524414062, "logits/rejected": 1.8261678218841553, "loss": 4.2233, "step": 202 }, { "beta_dpo/beta_used": 0.017151907086372375, "beta_dpo/beta_used_raw": 0.00911116972565651, "beta_dpo/gap_mean": 16.54568862915039, "beta_dpo/gap_std": 32.38970184326172, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.42513089005235605, "grad_norm": 44.56381607055664, "learning_rate": 3.571731403507635e-07, "logits/chosen": 1.4302637577056885, "logits/rejected": 1.2982755899429321, "loss": 4.5763, "step": 203 }, { "beta_dpo/beta_used": 0.034039054065942764, "beta_dpo/beta_used_raw": 0.0323847234249115, "beta_dpo/gap_mean": 18.076196670532227, "beta_dpo/gap_std": 31.370433807373047, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4272251308900524, "grad_norm": 71.95513153076172, "learning_rate": 3.5551627605944746e-07, "logits/chosen": 2.1505026817321777, "logits/rejected": 2.025639772415161, "loss": 3.8071, "step": 204 }, { "beta_dpo/beta_used": 0.027348071336746216, "beta_dpo/beta_used_raw": 0.006836746819317341, "beta_dpo/gap_mean": 18.946754455566406, "beta_dpo/gap_std": 31.32244110107422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4293193717277487, "grad_norm": 45.75480651855469, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 1.419930100440979, "logits/rejected": 1.7142930030822754, "loss": 4.1724, "step": 205 }, { "beta_dpo/beta_used": 0.016552381217479706, "beta_dpo/beta_used_raw": -0.0049156793393194675, "beta_dpo/gap_mean": 19.863826751708984, "beta_dpo/gap_std": 30.71218490600586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.431413612565445, "grad_norm": 41.705875396728516, "learning_rate": 3.5218566107988867e-07, "logits/chosen": 1.124336838722229, "logits/rejected": 1.3756214380264282, "loss": 4.77, "step": 206 }, { "beta_dpo/beta_used": 0.015663469210267067, "beta_dpo/beta_used_raw": 0.0052419002167880535, "beta_dpo/gap_mean": 17.88925552368164, "beta_dpo/gap_std": 31.518335342407227, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.43350785340314135, "grad_norm": 69.29541015625, "learning_rate": 3.505120890024195e-07, "logits/chosen": 1.4753804206848145, "logits/rejected": 1.621216058731079, "loss": 4.8643, "step": 207 }, { "beta_dpo/beta_used": 0.02139691449701786, "beta_dpo/beta_used_raw": 0.005481313914060593, "beta_dpo/gap_mean": 16.749000549316406, "beta_dpo/gap_std": 32.0452880859375, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4356020942408377, "grad_norm": 42.13248825073242, "learning_rate": 3.4883312676665534e-07, "logits/chosen": 1.683328628540039, "logits/rejected": 1.6666276454925537, "loss": 4.4627, "step": 208 }, { "beta_dpo/beta_used": 0.022991986945271492, "beta_dpo/beta_used_raw": 0.0012511502718552947, "beta_dpo/gap_mean": 16.64447784423828, "beta_dpo/gap_std": 31.43779945373535, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.437696335078534, "grad_norm": 47.651954650878906, "learning_rate": 3.4714886441024573e-07, "logits/chosen": 1.4982630014419556, "logits/rejected": 1.2422916889190674, "loss": 4.65, "step": 209 }, { "beta_dpo/beta_used": 0.023505035787820816, "beta_dpo/beta_used_raw": 0.01351526565849781, "beta_dpo/gap_mean": 16.755630493164062, "beta_dpo/gap_std": 30.364093780517578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4397905759162304, "grad_norm": 40.224185943603516, "learning_rate": 3.454593922550693e-07, "logits/chosen": 1.622258186340332, "logits/rejected": 1.7734078168869019, "loss": 4.4717, "step": 210 }, { "beta_dpo/beta_used": 0.017476221546530724, "beta_dpo/beta_used_raw": 0.009140146896243095, "beta_dpo/gap_mean": 18.972339630126953, "beta_dpo/gap_std": 29.722959518432617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4418848167539267, "grad_norm": 31.73094940185547, "learning_rate": 3.4376480090239047e-07, "logits/chosen": 1.476683497428894, "logits/rejected": 1.5253487825393677, "loss": 4.7, "step": 211 }, { "beta_dpo/beta_used": 0.019394179806113243, "beta_dpo/beta_used_raw": 0.015454288572072983, "beta_dpo/gap_mean": 19.75035858154297, "beta_dpo/gap_std": 29.714906692504883, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.44397905759162304, "grad_norm": 41.0455322265625, "learning_rate": 3.4206518122800055e-07, "logits/chosen": 1.2970361709594727, "logits/rejected": 1.37529456615448, "loss": 4.3472, "step": 212 }, { "beta_dpo/beta_used": 0.011834348551928997, "beta_dpo/beta_used_raw": -0.017926108092069626, "beta_dpo/gap_mean": 17.426942825317383, "beta_dpo/gap_std": 29.695297241210938, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.44607329842931936, "grad_norm": 33.91777038574219, "learning_rate": 3.403606243773448e-07, "logits/chosen": 1.5579262971878052, "logits/rejected": 1.68187415599823, "loss": 4.9313, "step": 213 }, { "beta_dpo/beta_used": 0.01894894242286682, "beta_dpo/beta_used_raw": 0.013838745653629303, "beta_dpo/gap_mean": 15.725707054138184, "beta_dpo/gap_std": 30.105939865112305, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.4481675392670157, "grad_norm": 39.40108871459961, "learning_rate": 3.3865122176063385e-07, "logits/chosen": 1.7685400247573853, "logits/rejected": 1.8661746978759766, "loss": 4.5791, "step": 214 }, { "beta_dpo/beta_used": 0.010954681783914566, "beta_dpo/beta_used_raw": -0.014796811155974865, "beta_dpo/gap_mean": 16.314573287963867, "beta_dpo/gap_std": 32.43828201293945, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.450261780104712, "grad_norm": 35.527313232421875, "learning_rate": 3.3693706504794243e-07, "logits/chosen": 2.244570732116699, "logits/rejected": 2.2803215980529785, "loss": 4.9752, "step": 215 }, { "beta_dpo/beta_used": 0.03374152258038521, "beta_dpo/beta_used_raw": 0.027103085070848465, "beta_dpo/gap_mean": 17.088348388671875, "beta_dpo/gap_std": 31.838451385498047, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.4523560209424084, "grad_norm": 109.9549331665039, "learning_rate": 3.3521824616429284e-07, "logits/chosen": 1.6181087493896484, "logits/rejected": 1.51048743724823, "loss": 4.0202, "step": 216 }, { "beta_dpo/beta_used": 0.02223392203450203, "beta_dpo/beta_used_raw": 0.01797131821513176, "beta_dpo/gap_mean": 18.80224609375, "beta_dpo/gap_std": 33.52192306518555, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4544502617801047, "grad_norm": 46.53620910644531, "learning_rate": 3.334948572847253e-07, "logits/chosen": 1.5461317300796509, "logits/rejected": 1.6692814826965332, "loss": 4.1595, "step": 217 }, { "beta_dpo/beta_used": 0.0231946911662817, "beta_dpo/beta_used_raw": 0.0011494825594127178, "beta_dpo/gap_mean": 20.61969757080078, "beta_dpo/gap_std": 33.30976486206055, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.45654450261780105, "grad_norm": 93.0323715209961, "learning_rate": 3.317669908293554e-07, "logits/chosen": 1.7362779378890991, "logits/rejected": 1.9851727485656738, "loss": 4.5681, "step": 218 }, { "beta_dpo/beta_used": 0.01022251509130001, "beta_dpo/beta_used_raw": -0.0047258916310966015, "beta_dpo/gap_mean": 20.800567626953125, "beta_dpo/gap_std": 31.367717742919922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4586387434554974, "grad_norm": 25.090171813964844, "learning_rate": 3.300347394584172e-07, "logits/chosen": 1.3783564567565918, "logits/rejected": 1.4508250951766968, "loss": 4.8685, "step": 219 }, { "beta_dpo/beta_used": 0.010799276642501354, "beta_dpo/beta_used_raw": -0.0034070992842316628, "beta_dpo/gap_mean": 21.666975021362305, "beta_dpo/gap_std": 31.608016967773438, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4607329842931937, "grad_norm": 27.500185012817383, "learning_rate": 3.2829819606729477e-07, "logits/chosen": 2.0254147052764893, "logits/rejected": 1.8281564712524414, "loss": 4.886, "step": 220 }, { "beta_dpo/beta_used": 0.014519060961902142, "beta_dpo/beta_used_raw": -0.009499384090304375, "beta_dpo/gap_mean": 19.58493423461914, "beta_dpo/gap_std": 32.41563415527344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46282722513089003, "grad_norm": 48.36823272705078, "learning_rate": 3.265574537815398e-07, "logits/chosen": 1.2991694211959839, "logits/rejected": 1.4876360893249512, "loss": 4.8269, "step": 221 }, { "beta_dpo/beta_used": 0.015203127637505531, "beta_dpo/beta_used_raw": -0.0034404161851853132, "beta_dpo/gap_mean": 19.56608009338379, "beta_dpo/gap_std": 32.176658630371094, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.4649214659685864, "grad_norm": 48.740413665771484, "learning_rate": 3.248126059518784e-07, "logits/chosen": 1.5680516958236694, "logits/rejected": 1.4788140058517456, "loss": 4.7036, "step": 222 }, { "beta_dpo/beta_used": 0.04362927004694939, "beta_dpo/beta_used_raw": 0.04143592342734337, "beta_dpo/gap_mean": 20.544513702392578, "beta_dpo/gap_std": 32.305206298828125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46701570680628274, "grad_norm": 91.45673370361328, "learning_rate": 3.230637461492043e-07, "logits/chosen": 1.3730167150497437, "logits/rejected": 1.3536475896835327, "loss": 3.6045, "step": 223 }, { "beta_dpo/beta_used": 0.027421563863754272, "beta_dpo/beta_used_raw": 0.016309306025505066, "beta_dpo/gap_mean": 20.78533935546875, "beta_dpo/gap_std": 32.98493957519531, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.46910994764397906, "grad_norm": 50.12267303466797, "learning_rate": 3.213109681595612e-07, "logits/chosen": 1.4133144617080688, "logits/rejected": 1.5317778587341309, "loss": 4.1259, "step": 224 }, { "beta_dpo/beta_used": 0.01402560155838728, "beta_dpo/beta_used_raw": -0.016284221783280373, "beta_dpo/gap_mean": 21.371601104736328, "beta_dpo/gap_std": 34.09131622314453, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4712041884816754, "grad_norm": 37.89866256713867, "learning_rate": 3.1955436597911315e-07, "logits/chosen": 1.8815144300460815, "logits/rejected": 1.992702603340149, "loss": 4.6298, "step": 225 }, { "beta_dpo/beta_used": 0.022889500483870506, "beta_dpo/beta_used_raw": 0.017042387276887894, "beta_dpo/gap_mean": 17.320327758789062, "beta_dpo/gap_std": 35.05849075317383, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4732984293193717, "grad_norm": 60.14391326904297, "learning_rate": 3.1779403380910425e-07, "logits/chosen": 1.0302306413650513, "logits/rejected": 1.2303485870361328, "loss": 4.3797, "step": 226 }, { "beta_dpo/beta_used": 0.04840033873915672, "beta_dpo/beta_used_raw": 0.047016169875860214, "beta_dpo/gap_mean": 18.82254409790039, "beta_dpo/gap_std": 34.905059814453125, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.47539267015706804, "grad_norm": 90.10204315185547, "learning_rate": 3.160300660508064e-07, "logits/chosen": 1.6820147037506104, "logits/rejected": 1.8873445987701416, "loss": 3.4083, "step": 227 }, { "beta_dpo/beta_used": 0.02424338273704052, "beta_dpo/beta_used_raw": 0.009696955792605877, "beta_dpo/gap_mean": 21.77010726928711, "beta_dpo/gap_std": 34.2744140625, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4774869109947644, "grad_norm": 56.25212860107422, "learning_rate": 3.1426255730045695e-07, "logits/chosen": 1.5530939102172852, "logits/rejected": 1.6357148885726929, "loss": 4.4004, "step": 228 }, { "beta_dpo/beta_used": 0.03371588513255119, "beta_dpo/beta_used_raw": 0.027584807947278023, "beta_dpo/gap_mean": 25.558032989501953, "beta_dpo/gap_std": 33.908870697021484, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.47958115183246075, "grad_norm": 96.84803771972656, "learning_rate": 3.1249160234418644e-07, "logits/chosen": 1.348872184753418, "logits/rejected": 1.2927398681640625, "loss": 3.7788, "step": 229 }, { "beta_dpo/beta_used": 0.008082384243607521, "beta_dpo/beta_used_raw": -0.00950661115348339, "beta_dpo/gap_mean": 25.10620880126953, "beta_dpo/gap_std": 34.92431640625, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4816753926701571, "grad_norm": 20.94957160949707, "learning_rate": 3.1071729615293424e-07, "logits/chosen": 1.1827516555786133, "logits/rejected": 1.1730360984802246, "loss": 4.9364, "step": 230 }, { "beta_dpo/beta_used": 0.005764795932918787, "beta_dpo/beta_used_raw": -0.024570820853114128, "beta_dpo/gap_mean": 22.6708927154541, "beta_dpo/gap_std": 34.03562927246094, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.4837696335078534, "grad_norm": 38.76413345336914, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 1.2684296369552612, "logits/rejected": 1.329715609550476, "loss": 5.1608, "step": 231 }, { "beta_dpo/beta_used": 0.021905038505792618, "beta_dpo/beta_used_raw": -0.017752759158611298, "beta_dpo/gap_mean": 20.692659378051758, "beta_dpo/gap_std": 33.874855041503906, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.48586387434554973, "grad_norm": 112.87725067138672, "learning_rate": 3.071590108427243e-07, "logits/chosen": 1.426222562789917, "logits/rejected": 1.5956566333770752, "loss": 4.6128, "step": 232 }, { "beta_dpo/beta_used": 0.039701350033283234, "beta_dpo/beta_used_raw": 0.035932619124650955, "beta_dpo/gap_mean": 21.13761329650879, "beta_dpo/gap_std": 34.44068908691406, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.48795811518324606, "grad_norm": 89.65320587158203, "learning_rate": 3.05375222543809e-07, "logits/chosen": 1.137376070022583, "logits/rejected": 1.239527940750122, "loss": 3.6241, "step": 233 }, { "beta_dpo/beta_used": 0.020853759720921516, "beta_dpo/beta_used_raw": 0.006785106845200062, "beta_dpo/gap_mean": 22.310590744018555, "beta_dpo/gap_std": 36.559181213378906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4900523560209424, "grad_norm": 152.07374572753906, "learning_rate": 3.035884646397637e-07, "logits/chosen": 1.3747183084487915, "logits/rejected": 1.4081201553344727, "loss": 4.5591, "step": 234 }, { "beta_dpo/beta_used": 0.022356968373060226, "beta_dpo/beta_used_raw": 0.017687149345874786, "beta_dpo/gap_mean": 21.469078063964844, "beta_dpo/gap_std": 38.99213790893555, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.49214659685863876, "grad_norm": 49.92569351196289, "learning_rate": 3.017988329489923e-07, "logits/chosen": 1.6978657245635986, "logits/rejected": 1.6188864707946777, "loss": 4.497, "step": 235 }, { "beta_dpo/beta_used": 0.027202440425753593, "beta_dpo/beta_used_raw": 0.013716357760131359, "beta_dpo/gap_mean": 21.86897087097168, "beta_dpo/gap_std": 38.970787048339844, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4942408376963351, "grad_norm": 51.87431335449219, "learning_rate": 3.000064234440111e-07, "logits/chosen": 1.4140355587005615, "logits/rejected": 1.421186923980713, "loss": 4.3147, "step": 236 }, { "beta_dpo/beta_used": 0.026827599853277206, "beta_dpo/beta_used_raw": 0.002097531221807003, "beta_dpo/gap_mean": 21.94005584716797, "beta_dpo/gap_std": 36.81498718261719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4963350785340314, "grad_norm": 54.699974060058594, "learning_rate": 2.9821133224630223e-07, "logits/chosen": 1.4084728956222534, "logits/rejected": 1.6357187032699585, "loss": 4.0251, "step": 237 }, { "beta_dpo/beta_used": 0.013866505585610867, "beta_dpo/beta_used_raw": -0.01890200935304165, "beta_dpo/gap_mean": 23.559459686279297, "beta_dpo/gap_std": 35.92485427856445, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.49842931937172774, "grad_norm": 52.4506950378418, "learning_rate": 2.964136556211588e-07, "logits/chosen": 1.2949869632720947, "logits/rejected": 1.249887228012085, "loss": 4.7275, "step": 238 }, { "beta_dpo/beta_used": 0.02205376699566841, "beta_dpo/beta_used_raw": 0.0070870416238904, "beta_dpo/gap_mean": 21.201807022094727, "beta_dpo/gap_std": 37.64961624145508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5005235602094241, "grad_norm": 65.6231918334961, "learning_rate": 2.946134899725226e-07, "logits/chosen": 1.542831540107727, "logits/rejected": 1.6906412839889526, "loss": 4.8225, "step": 239 }, { "beta_dpo/beta_used": 0.016875216737389565, "beta_dpo/beta_used_raw": 0.013624901883304119, "beta_dpo/gap_mean": 22.050025939941406, "beta_dpo/gap_std": 35.68221664428711, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.5026178010471204, "grad_norm": 71.22918701171875, "learning_rate": 2.9281093183781403e-07, "logits/chosen": 1.3054808378219604, "logits/rejected": 1.2251484394073486, "loss": 4.3994, "step": 240 }, { "beta_dpo/beta_used": 0.010428352281451225, "beta_dpo/beta_used_raw": -0.009664381854236126, "beta_dpo/gap_mean": 20.70039176940918, "beta_dpo/gap_std": 36.04539108276367, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5047120418848168, "grad_norm": 35.78901290893555, "learning_rate": 2.910060778827554e-07, "logits/chosen": 1.4216902256011963, "logits/rejected": 1.5455743074417114, "loss": 5.0779, "step": 241 }, { "beta_dpo/beta_used": 0.03037761151790619, "beta_dpo/beta_used_raw": 0.01391815859824419, "beta_dpo/gap_mean": 21.673847198486328, "beta_dpo/gap_std": 35.858516693115234, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.506806282722513, "grad_norm": 76.41270446777344, "learning_rate": 2.891990248961871e-07, "logits/chosen": 1.8587148189544678, "logits/rejected": 1.6864495277404785, "loss": 4.2734, "step": 242 }, { "beta_dpo/beta_used": 0.035171203315258026, "beta_dpo/beta_used_raw": 0.02459963783621788, "beta_dpo/gap_mean": 23.178098678588867, "beta_dpo/gap_std": 35.096439361572266, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5089005235602094, "grad_norm": 77.48847198486328, "learning_rate": 2.873898697848762e-07, "logits/chosen": 1.6573126316070557, "logits/rejected": 1.6771302223205566, "loss": 3.6758, "step": 243 }, { "beta_dpo/beta_used": 0.02781713753938675, "beta_dpo/beta_used_raw": 0.016341306269168854, "beta_dpo/gap_mean": 26.562307357788086, "beta_dpo/gap_std": 36.088531494140625, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5109947643979058, "grad_norm": 90.8469009399414, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 1.3104796409606934, "logits/rejected": 1.1022838354110718, "loss": 4.1908, "step": 244 }, { "beta_dpo/beta_used": 0.023681480437517166, "beta_dpo/beta_used_raw": 0.007075564004480839, "beta_dpo/gap_mean": 26.648090362548828, "beta_dpo/gap_std": 35.5743522644043, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5130890052356021, "grad_norm": 49.45195007324219, "learning_rate": 2.837656413735479e-07, "logits/chosen": 1.8954524993896484, "logits/rejected": 1.5859884023666382, "loss": 4.2197, "step": 245 }, { "beta_dpo/beta_used": 0.007673209998756647, "beta_dpo/beta_used_raw": -0.03442414849996567, "beta_dpo/gap_mean": 23.275249481201172, "beta_dpo/gap_std": 37.69624328613281, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5151832460732985, "grad_norm": 34.83370590209961, "learning_rate": 2.8195076242990116e-07, "logits/chosen": 1.075661301612854, "logits/rejected": 0.9320878982543945, "loss": 5.0958, "step": 246 }, { "beta_dpo/beta_used": 0.02569355070590973, "beta_dpo/beta_used_raw": 0.011104905046522617, "beta_dpo/gap_mean": 21.302507400512695, "beta_dpo/gap_std": 37.52021789550781, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.5172774869109947, "grad_norm": 60.37564468383789, "learning_rate": 2.801341700638307e-07, "logits/chosen": 1.1491472721099854, "logits/rejected": 0.9154660105705261, "loss": 4.1681, "step": 247 }, { "beta_dpo/beta_used": 0.0077388836070895195, "beta_dpo/beta_used_raw": -0.01996331661939621, "beta_dpo/gap_mean": 21.50804328918457, "beta_dpo/gap_std": 37.68701934814453, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5193717277486911, "grad_norm": 42.47214126586914, "learning_rate": 2.7831596169367227e-07, "logits/chosen": 1.0854613780975342, "logits/rejected": 1.1457273960113525, "loss": 5.0609, "step": 248 }, { "beta_dpo/beta_used": 0.00828784704208374, "beta_dpo/beta_used_raw": -0.007833743467926979, "beta_dpo/gap_mean": 20.011716842651367, "beta_dpo/gap_std": 37.14725875854492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5214659685863874, "grad_norm": 41.61552810668945, "learning_rate": 2.7649623482442274e-07, "logits/chosen": 1.2434636354446411, "logits/rejected": 1.2950477600097656, "loss": 5.0897, "step": 249 }, { "beta_dpo/beta_used": 0.028967518359422684, "beta_dpo/beta_used_raw": 0.017602279782295227, "beta_dpo/gap_mean": 21.15532112121582, "beta_dpo/gap_std": 36.99894714355469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5235602094240838, "grad_norm": 111.2298583984375, "learning_rate": 2.7467508704251135e-07, "logits/chosen": 1.5354533195495605, "logits/rejected": 1.6301560401916504, "loss": 4.2794, "step": 250 }, { "beta_dpo/beta_used": 0.02736206352710724, "beta_dpo/beta_used_raw": 0.006099463440477848, "beta_dpo/gap_mean": 20.01749038696289, "beta_dpo/gap_std": 37.12480926513672, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5256544502617801, "grad_norm": 79.339599609375, "learning_rate": 2.7285261601056697e-07, "logits/chosen": 1.3763610124588013, "logits/rejected": 1.155696988105774, "loss": 4.4696, "step": 251 }, { "beta_dpo/beta_used": 0.03472306579351425, "beta_dpo/beta_used_raw": 0.02851836569607258, "beta_dpo/gap_mean": 22.56066131591797, "beta_dpo/gap_std": 38.38005065917969, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5277486910994764, "grad_norm": 57.71303939819336, "learning_rate": 2.7102891946217994e-07, "logits/chosen": 1.829942226409912, "logits/rejected": 1.845513105392456, "loss": 3.7725, "step": 252 }, { "beta_dpo/beta_used": 0.030697450041770935, "beta_dpo/beta_used_raw": 0.01660301722586155, "beta_dpo/gap_mean": 19.772396087646484, "beta_dpo/gap_std": 39.422203063964844, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5298429319371728, "grad_norm": 70.57060241699219, "learning_rate": 2.692040951966617e-07, "logits/chosen": 1.419633388519287, "logits/rejected": 1.3010826110839844, "loss": 4.158, "step": 253 }, { "beta_dpo/beta_used": 0.03239889442920685, "beta_dpo/beta_used_raw": 0.021261408925056458, "beta_dpo/gap_mean": 19.49216079711914, "beta_dpo/gap_std": 36.011436462402344, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5319371727748691, "grad_norm": 85.16039276123047, "learning_rate": 2.6737824107379947e-07, "logits/chosen": 1.652917504310608, "logits/rejected": 1.5930885076522827, "loss": 4.1323, "step": 254 }, { "beta_dpo/beta_used": 0.07058847695589066, "beta_dpo/beta_used_raw": 0.0682307779788971, "beta_dpo/gap_mean": 22.544225692749023, "beta_dpo/gap_std": 38.23542022705078, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5340314136125655, "grad_norm": 126.1849365234375, "learning_rate": 2.655514550086086e-07, "logits/chosen": 1.4259027242660522, "logits/rejected": 1.4180747270584106, "loss": 2.8543, "step": 255 }, { "beta_dpo/beta_used": 0.035115234553813934, "beta_dpo/beta_used_raw": 0.01770986244082451, "beta_dpo/gap_mean": 25.101337432861328, "beta_dpo/gap_std": 40.27662658691406, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5361256544502618, "grad_norm": 74.76777648925781, "learning_rate": 2.6372383496608186e-07, "logits/chosen": 1.584543228149414, "logits/rejected": 1.6146832704544067, "loss": 4.0922, "step": 256 }, { "beta_dpo/beta_used": 0.02713741734623909, "beta_dpo/beta_used_raw": 0.0023514775093644857, "beta_dpo/gap_mean": 26.48859977722168, "beta_dpo/gap_std": 40.16349792480469, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5382198952879581, "grad_norm": 87.41287231445312, "learning_rate": 2.618954789559356e-07, "logits/chosen": 1.334143042564392, "logits/rejected": 1.4390063285827637, "loss": 4.1405, "step": 257 }, { "beta_dpo/beta_used": 0.024670587852597237, "beta_dpo/beta_used_raw": 0.006600758992135525, "beta_dpo/gap_mean": 24.859146118164062, "beta_dpo/gap_std": 38.38996505737305, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5403141361256545, "grad_norm": 78.93328094482422, "learning_rate": 2.600664850273538e-07, "logits/chosen": 1.2462736368179321, "logits/rejected": 1.4119253158569336, "loss": 4.1682, "step": 258 }, { "beta_dpo/beta_used": 0.026468459516763687, "beta_dpo/beta_used_raw": 0.009973703883588314, "beta_dpo/gap_mean": 22.97103500366211, "beta_dpo/gap_std": 37.827335357666016, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5424083769633508, "grad_norm": 62.47282409667969, "learning_rate": 2.582369512637302e-07, "logits/chosen": 1.400333285331726, "logits/rejected": 1.3363168239593506, "loss": 4.2019, "step": 259 }, { "beta_dpo/beta_used": 0.0057443841360509396, "beta_dpo/beta_used_raw": -0.03509850427508354, "beta_dpo/gap_mean": 19.301353454589844, "beta_dpo/gap_std": 37.98316192626953, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5445026178010471, "grad_norm": 29.450904846191406, "learning_rate": 2.5640697577740815e-07, "logits/chosen": 1.2627638578414917, "logits/rejected": 1.3713899850845337, "loss": 5.2443, "step": 260 }, { "beta_dpo/beta_used": 0.02370859682559967, "beta_dpo/beta_used_raw": 0.009769135154783726, "beta_dpo/gap_mean": 17.073835372924805, "beta_dpo/gap_std": 38.706729888916016, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5465968586387434, "grad_norm": 119.15771484375, "learning_rate": 2.5457665670441937e-07, "logits/chosen": 0.9551135301589966, "logits/rejected": 0.7918010354042053, "loss": 4.8051, "step": 261 }, { "beta_dpo/beta_used": 0.01725778356194496, "beta_dpo/beta_used_raw": 0.007985102012753487, "beta_dpo/gap_mean": 19.15559959411621, "beta_dpo/gap_std": 37.25046920776367, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5486910994764398, "grad_norm": 42.16154479980469, "learning_rate": 2.527460921992209e-07, "logits/chosen": 1.7428507804870605, "logits/rejected": 1.745199203491211, "loss": 4.7292, "step": 262 }, { "beta_dpo/beta_used": 0.027581116184592247, "beta_dpo/beta_used_raw": 0.0017390409484505653, "beta_dpo/gap_mean": 21.374671936035156, "beta_dpo/gap_std": 36.47187805175781, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5507853403141362, "grad_norm": 72.0134506225586, "learning_rate": 2.509153804294318e-07, "logits/chosen": 1.3248748779296875, "logits/rejected": 1.480365514755249, "loss": 4.2062, "step": 263 }, { "beta_dpo/beta_used": 0.015040460973978043, "beta_dpo/beta_used_raw": -0.002720870077610016, "beta_dpo/gap_mean": 22.537841796875, "beta_dpo/gap_std": 36.9581298828125, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5528795811518324, "grad_norm": 53.91576385498047, "learning_rate": 2.4908461957056825e-07, "logits/chosen": 1.3922407627105713, "logits/rejected": 1.1616618633270264, "loss": 4.7735, "step": 264 }, { "beta_dpo/beta_used": 0.04024341329932213, "beta_dpo/beta_used_raw": 0.02337898127734661, "beta_dpo/gap_mean": 23.94507598876953, "beta_dpo/gap_std": 36.818138122558594, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5549738219895288, "grad_norm": 190.59609985351562, "learning_rate": 2.4725390780077905e-07, "logits/chosen": 1.6322290897369385, "logits/rejected": 1.6508582830429077, "loss": 4.2363, "step": 265 }, { "beta_dpo/beta_used": 0.026812460273504257, "beta_dpo/beta_used_raw": 0.015981679782271385, "beta_dpo/gap_mean": 23.17593002319336, "beta_dpo/gap_std": 35.23807907104492, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5570680628272251, "grad_norm": 115.75420379638672, "learning_rate": 2.454233432955807e-07, "logits/chosen": 1.3934905529022217, "logits/rejected": 1.4551239013671875, "loss": 4.156, "step": 266 }, { "beta_dpo/beta_used": 0.014945639297366142, "beta_dpo/beta_used_raw": -0.003206442343071103, "beta_dpo/gap_mean": 22.777759552001953, "beta_dpo/gap_std": 35.72869873046875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5591623036649215, "grad_norm": 42.64310073852539, "learning_rate": 2.435930242225919e-07, "logits/chosen": 1.5525813102722168, "logits/rejected": 1.673789143562317, "loss": 4.8052, "step": 267 }, { "beta_dpo/beta_used": 0.030046723783016205, "beta_dpo/beta_used_raw": 0.024244606494903564, "beta_dpo/gap_mean": 21.284276962280273, "beta_dpo/gap_std": 36.792415618896484, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5612565445026177, "grad_norm": 85.80408477783203, "learning_rate": 2.4176304873626984e-07, "logits/chosen": 1.1172372102737427, "logits/rejected": 1.1572062969207764, "loss": 4.0405, "step": 268 }, { "beta_dpo/beta_used": 0.016361307352781296, "beta_dpo/beta_used_raw": -0.008380460552871227, "beta_dpo/gap_mean": 21.142919540405273, "beta_dpo/gap_std": 36.69437789916992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5633507853403141, "grad_norm": 30.00682258605957, "learning_rate": 2.399335149726463e-07, "logits/chosen": 1.3953180313110352, "logits/rejected": 1.582595944404602, "loss": 4.8939, "step": 269 }, { "beta_dpo/beta_used": 0.024136360734701157, "beta_dpo/beta_used_raw": 0.01455269567668438, "beta_dpo/gap_mean": 20.730382919311523, "beta_dpo/gap_std": 38.18457794189453, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5654450261780105, "grad_norm": 104.796630859375, "learning_rate": 2.381045210440644e-07, "logits/chosen": 1.706362009048462, "logits/rejected": 1.9905970096588135, "loss": 4.8619, "step": 270 }, { "beta_dpo/beta_used": 0.015366212464869022, "beta_dpo/beta_used_raw": -0.010098990984261036, "beta_dpo/gap_mean": 20.525156021118164, "beta_dpo/gap_std": 36.195465087890625, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5675392670157068, "grad_norm": 27.481109619140625, "learning_rate": 2.3627616503391812e-07, "logits/chosen": 1.2522549629211426, "logits/rejected": 1.3000314235687256, "loss": 4.6612, "step": 271 }, { "beta_dpo/beta_used": 0.02246049977838993, "beta_dpo/beta_used_raw": 0.002398681826889515, "beta_dpo/gap_mean": 20.4349365234375, "beta_dpo/gap_std": 35.98146438598633, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5696335078534032, "grad_norm": 223.59896850585938, "learning_rate": 2.344485449913914e-07, "logits/chosen": 1.606691837310791, "logits/rejected": 1.451743483543396, "loss": 4.7041, "step": 272 }, { "beta_dpo/beta_used": 0.025656994432210922, "beta_dpo/beta_used_raw": 0.001691313460469246, "beta_dpo/gap_mean": 21.252532958984375, "beta_dpo/gap_std": 34.84130096435547, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5717277486910994, "grad_norm": 60.19879913330078, "learning_rate": 2.3262175892620062e-07, "logits/chosen": 1.5752846002578735, "logits/rejected": 1.6109840869903564, "loss": 4.3398, "step": 273 }, { "beta_dpo/beta_used": 0.024387702345848083, "beta_dpo/beta_used_raw": 0.01869470439851284, "beta_dpo/gap_mean": 22.542556762695312, "beta_dpo/gap_std": 35.69194030761719, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.5738219895287958, "grad_norm": 37.60686492919922, "learning_rate": 2.3079590480333827e-07, "logits/chosen": 1.6102871894836426, "logits/rejected": 1.7174773216247559, "loss": 4.1491, "step": 274 }, { "beta_dpo/beta_used": 0.043057817965745926, "beta_dpo/beta_used_raw": 0.04157021641731262, "beta_dpo/gap_mean": 24.984006881713867, "beta_dpo/gap_std": 35.83733367919922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5759162303664922, "grad_norm": 96.29705810546875, "learning_rate": 2.2897108053782e-07, "logits/chosen": 1.1287708282470703, "logits/rejected": 1.208784818649292, "loss": 3.0424, "step": 275 }, { "beta_dpo/beta_used": 0.0038480497896671295, "beta_dpo/beta_used_raw": -0.015348054468631744, "beta_dpo/gap_mean": 25.66550064086914, "beta_dpo/gap_std": 33.74402618408203, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5780104712041885, "grad_norm": 24.424198150634766, "learning_rate": 2.2714738398943308e-07, "logits/chosen": 1.8258295059204102, "logits/rejected": 1.6733819246292114, "loss": 5.2051, "step": 276 }, { "beta_dpo/beta_used": 0.017688903957605362, "beta_dpo/beta_used_raw": -0.011028681881725788, "beta_dpo/gap_mean": 22.55120086669922, "beta_dpo/gap_std": 35.05712890625, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5801047120418849, "grad_norm": 33.83370590209961, "learning_rate": 2.2532491295748865e-07, "logits/chosen": 1.1561347246170044, "logits/rejected": 1.3503713607788086, "loss": 4.532, "step": 277 }, { "beta_dpo/beta_used": 0.019777359440922737, "beta_dpo/beta_used_raw": -0.004533551167696714, "beta_dpo/gap_mean": 19.028533935546875, "beta_dpo/gap_std": 36.112735748291016, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5821989528795811, "grad_norm": 42.212650299072266, "learning_rate": 2.2350376517557726e-07, "logits/chosen": 1.0686261653900146, "logits/rejected": 1.0221307277679443, "loss": 4.6354, "step": 278 }, { "beta_dpo/beta_used": 0.02981048822402954, "beta_dpo/beta_used_raw": 0.028192678466439247, "beta_dpo/gap_mean": 19.808574676513672, "beta_dpo/gap_std": 35.35283660888672, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5842931937172775, "grad_norm": 53.312747955322266, "learning_rate": 2.2168403830632769e-07, "logits/chosen": 1.2553820610046387, "logits/rejected": 1.2719086408615112, "loss": 3.9651, "step": 279 }, { "beta_dpo/beta_used": 0.0026543322019279003, "beta_dpo/beta_used_raw": -0.015082788653671741, "beta_dpo/gap_mean": 21.008886337280273, "beta_dpo/gap_std": 34.17639923095703, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.5863874345549738, "grad_norm": 11.17526912689209, "learning_rate": 2.1986582993616925e-07, "logits/chosen": 1.5121065378189087, "logits/rejected": 1.5147109031677246, "loss": 5.2115, "step": 280 }, { "beta_dpo/beta_used": 0.015546365641057491, "beta_dpo/beta_used_raw": -0.014291130006313324, "beta_dpo/gap_mean": 20.403629302978516, "beta_dpo/gap_std": 34.77376174926758, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5884816753926702, "grad_norm": 57.08203125, "learning_rate": 2.1804923757009882e-07, "logits/chosen": 1.4907077550888062, "logits/rejected": 1.448096513748169, "loss": 4.8509, "step": 281 }, { "beta_dpo/beta_used": 0.013758410699665546, "beta_dpo/beta_used_raw": -0.0017688155639916658, "beta_dpo/gap_mean": 20.669015884399414, "beta_dpo/gap_std": 35.69584274291992, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5905759162303665, "grad_norm": 28.517318725585938, "learning_rate": 2.1623435862645205e-07, "logits/chosen": 1.7699363231658936, "logits/rejected": 1.8309452533721924, "loss": 5.0077, "step": 282 }, { "beta_dpo/beta_used": 0.028719400987029076, "beta_dpo/beta_used_raw": 0.018162164837121964, "beta_dpo/gap_mean": 20.43427276611328, "beta_dpo/gap_std": 35.05901336669922, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5926701570680628, "grad_norm": 74.51838684082031, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 1.243952751159668, "logits/rejected": 1.4681645631790161, "loss": 4.1383, "step": 283 }, { "beta_dpo/beta_used": 0.022418132051825523, "beta_dpo/beta_used_raw": -0.00897371955215931, "beta_dpo/gap_mean": 20.829967498779297, "beta_dpo/gap_std": 37.05330276489258, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5947643979057592, "grad_norm": 53.890785217285156, "learning_rate": 2.1261013021512378e-07, "logits/chosen": 1.3836698532104492, "logits/rejected": 1.3280866146087646, "loss": 4.7208, "step": 284 }, { "beta_dpo/beta_used": 0.0186537504196167, "beta_dpo/beta_used_raw": -0.003015751950442791, "beta_dpo/gap_mean": 18.022796630859375, "beta_dpo/gap_std": 36.89912414550781, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5968586387434555, "grad_norm": 28.00040626525879, "learning_rate": 2.1080097510381294e-07, "logits/chosen": 1.706050157546997, "logits/rejected": 1.584727168083191, "loss": 4.632, "step": 285 }, { "beta_dpo/beta_used": 0.013129707425832748, "beta_dpo/beta_used_raw": -0.00048280227929353714, "beta_dpo/gap_mean": 19.448501586914062, "beta_dpo/gap_std": 36.36820983886719, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.5989528795811518, "grad_norm": 51.00930404663086, "learning_rate": 2.089939221172446e-07, "logits/chosen": 1.2181655168533325, "logits/rejected": 1.2918510437011719, "loss": 4.8983, "step": 286 }, { "beta_dpo/beta_used": 0.0334862619638443, "beta_dpo/beta_used_raw": 0.031023263931274414, "beta_dpo/gap_mean": 20.484294891357422, "beta_dpo/gap_std": 38.072418212890625, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6010471204188481, "grad_norm": 68.44963073730469, "learning_rate": 2.0718906816218595e-07, "logits/chosen": 1.4797168970108032, "logits/rejected": 1.5804214477539062, "loss": 4.3089, "step": 287 }, { "beta_dpo/beta_used": 0.031299516558647156, "beta_dpo/beta_used_raw": 0.020958131179213524, "beta_dpo/gap_mean": 19.536659240722656, "beta_dpo/gap_std": 37.194252014160156, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6031413612565445, "grad_norm": 125.42591857910156, "learning_rate": 2.053865100274774e-07, "logits/chosen": 1.6277129650115967, "logits/rejected": 1.4404486417770386, "loss": 4.2485, "step": 288 }, { "beta_dpo/beta_used": 0.013463410548865795, "beta_dpo/beta_used_raw": -0.002038992242887616, "beta_dpo/gap_mean": 18.123918533325195, "beta_dpo/gap_std": 37.70576477050781, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6052356020942409, "grad_norm": 50.54543685913086, "learning_rate": 2.035863443788411e-07, "logits/chosen": 1.6278074979782104, "logits/rejected": 1.5724064111709595, "loss": 4.813, "step": 289 }, { "beta_dpo/beta_used": 0.013063677586615086, "beta_dpo/beta_used_raw": -0.02197786420583725, "beta_dpo/gap_mean": 19.04131317138672, "beta_dpo/gap_std": 35.90309524536133, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6073298429319371, "grad_norm": 41.749141693115234, "learning_rate": 2.0178866775369774e-07, "logits/chosen": 1.3940989971160889, "logits/rejected": 1.3121880292892456, "loss": 4.8478, "step": 290 }, { "beta_dpo/beta_used": 0.03433792293071747, "beta_dpo/beta_used_raw": 0.0014921380206942558, "beta_dpo/gap_mean": 20.723804473876953, "beta_dpo/gap_std": 36.17911148071289, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6094240837696335, "grad_norm": 95.32479095458984, "learning_rate": 1.9999357655598891e-07, "logits/chosen": 1.084555983543396, "logits/rejected": 1.1702072620391846, "loss": 4.7487, "step": 291 }, { "beta_dpo/beta_used": 0.03228276968002319, "beta_dpo/beta_used_raw": 0.018787425011396408, "beta_dpo/gap_mean": 20.76034164428711, "beta_dpo/gap_std": 37.097103118896484, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6115183246073298, "grad_norm": 67.72441864013672, "learning_rate": 1.9820116705100775e-07, "logits/chosen": 1.160035252571106, "logits/rejected": 1.1472792625427246, "loss": 3.9976, "step": 292 }, { "beta_dpo/beta_used": 0.02482818439602852, "beta_dpo/beta_used_raw": -0.0007117787608876824, "beta_dpo/gap_mean": 20.157255172729492, "beta_dpo/gap_std": 39.040748596191406, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6136125654450262, "grad_norm": 244.3824462890625, "learning_rate": 1.9641153536023642e-07, "logits/chosen": 2.0036768913269043, "logits/rejected": 1.8342108726501465, "loss": 4.5759, "step": 293 }, { "beta_dpo/beta_used": 0.02537180297076702, "beta_dpo/beta_used_raw": 0.0016407333314418793, "beta_dpo/gap_mean": 21.209617614746094, "beta_dpo/gap_std": 38.50959777832031, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.6157068062827226, "grad_norm": 76.85967254638672, "learning_rate": 1.9462477745619106e-07, "logits/chosen": 1.4297269582748413, "logits/rejected": 1.5640549659729004, "loss": 4.6346, "step": 294 }, { "beta_dpo/beta_used": 0.03157725930213928, "beta_dpo/beta_used_raw": 0.02505682222545147, "beta_dpo/gap_mean": 21.574724197387695, "beta_dpo/gap_std": 39.374446868896484, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6178010471204188, "grad_norm": 86.37284851074219, "learning_rate": 1.928409891572757e-07, "logits/chosen": 1.1579641103744507, "logits/rejected": 1.1256705522537231, "loss": 4.4772, "step": 295 }, { "beta_dpo/beta_used": 0.03921440243721008, "beta_dpo/beta_used_raw": 0.030128249898552895, "beta_dpo/gap_mean": 26.082651138305664, "beta_dpo/gap_std": 39.295570373535156, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6198952879581152, "grad_norm": 129.71774291992188, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 1.5179616212844849, "logits/rejected": 1.6978120803833008, "loss": 3.8258, "step": 296 }, { "beta_dpo/beta_used": 0.029375022277235985, "beta_dpo/beta_used_raw": 0.011093353852629662, "beta_dpo/gap_mean": 27.48119354248047, "beta_dpo/gap_std": 39.495452880859375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6219895287958115, "grad_norm": 118.33712768554688, "learning_rate": 1.8928270384706582e-07, "logits/chosen": 1.495194435119629, "logits/rejected": 1.649183988571167, "loss": 4.2477, "step": 297 }, { "beta_dpo/beta_used": 0.03636423125863075, "beta_dpo/beta_used_raw": 0.028217561542987823, "beta_dpo/gap_mean": 26.37271499633789, "beta_dpo/gap_std": 39.67487335205078, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.6240837696335079, "grad_norm": 85.49053955078125, "learning_rate": 1.875083976558136e-07, "logits/chosen": 1.4574960470199585, "logits/rejected": 1.3186194896697998, "loss": 3.8972, "step": 298 }, { "beta_dpo/beta_used": 0.02849549427628517, "beta_dpo/beta_used_raw": -0.0016860419418662786, "beta_dpo/gap_mean": 24.45018196105957, "beta_dpo/gap_std": 39.10914993286133, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6261780104712041, "grad_norm": 41.191104888916016, "learning_rate": 1.8573744269954297e-07, "logits/chosen": 1.6376529932022095, "logits/rejected": 1.6397225856781006, "loss": 3.844, "step": 299 }, { "beta_dpo/beta_used": 0.02068179100751877, "beta_dpo/beta_used_raw": 0.005061999429017305, "beta_dpo/gap_mean": 22.947368621826172, "beta_dpo/gap_std": 38.15463638305664, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6282722513089005, "grad_norm": 54.7095947265625, "learning_rate": 1.839699339491937e-07, "logits/chosen": 1.2076692581176758, "logits/rejected": 1.2860641479492188, "loss": 4.3858, "step": 300 }, { "beta_dpo/beta_used": 0.03375673294067383, "beta_dpo/beta_used_raw": 0.011599482968449593, "beta_dpo/gap_mean": 21.817138671875, "beta_dpo/gap_std": 40.71202850341797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6303664921465969, "grad_norm": 67.1680908203125, "learning_rate": 1.8220596619089573e-07, "logits/chosen": 1.5903642177581787, "logits/rejected": 1.5883557796478271, "loss": 3.9801, "step": 301 }, { "beta_dpo/beta_used": 0.031289342790842056, "beta_dpo/beta_used_raw": 0.02020403742790222, "beta_dpo/gap_mean": 22.630334854125977, "beta_dpo/gap_std": 39.44662094116211, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6324607329842932, "grad_norm": 73.67294311523438, "learning_rate": 1.8044563402088682e-07, "logits/chosen": 1.4647196531295776, "logits/rejected": 1.6538636684417725, "loss": 3.8922, "step": 302 }, { "beta_dpo/beta_used": 0.026227440685033798, "beta_dpo/beta_used_raw": 0.01093749888241291, "beta_dpo/gap_mean": 21.381053924560547, "beta_dpo/gap_std": 40.288665771484375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6345549738219896, "grad_norm": 75.99285888671875, "learning_rate": 1.7868903184043885e-07, "logits/chosen": 1.345954179763794, "logits/rejected": 1.4914484024047852, "loss": 4.3761, "step": 303 }, { "beta_dpo/beta_used": 0.024651650339365005, "beta_dpo/beta_used_raw": 0.010574829764664173, "beta_dpo/gap_mean": 21.974733352661133, "beta_dpo/gap_std": 38.83090591430664, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6366492146596858, "grad_norm": 230.4051513671875, "learning_rate": 1.7693625385079574e-07, "logits/chosen": 1.2385737895965576, "logits/rejected": 1.2572718858718872, "loss": 4.7737, "step": 304 }, { "beta_dpo/beta_used": 0.023414814844727516, "beta_dpo/beta_used_raw": 0.013659648597240448, "beta_dpo/gap_mean": 24.257299423217773, "beta_dpo/gap_std": 38.524078369140625, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6387434554973822, "grad_norm": 46.621604919433594, "learning_rate": 1.7518739404812155e-07, "logits/chosen": 1.235711932182312, "logits/rejected": 1.2289034128189087, "loss": 4.3571, "step": 305 }, { "beta_dpo/beta_used": 0.0353800505399704, "beta_dpo/beta_used_raw": -0.00411562342196703, "beta_dpo/gap_mean": 26.567459106445312, "beta_dpo/gap_std": 40.30250549316406, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6408376963350786, "grad_norm": 185.1968536376953, "learning_rate": 1.7344254621846017e-07, "logits/chosen": 1.48641836643219, "logits/rejected": 1.3792299032211304, "loss": 4.523, "step": 306 }, { "beta_dpo/beta_used": 0.057250961661338806, "beta_dpo/beta_used_raw": 0.05049164220690727, "beta_dpo/gap_mean": 26.73577117919922, "beta_dpo/gap_std": 40.15787124633789, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.6429319371727749, "grad_norm": 112.26713562011719, "learning_rate": 1.717018039327053e-07, "logits/chosen": 1.2322039604187012, "logits/rejected": 1.3177506923675537, "loss": 2.6335, "step": 307 }, { "beta_dpo/beta_used": 0.012129316106438637, "beta_dpo/beta_used_raw": -0.021789539605379105, "beta_dpo/gap_mean": 25.66850471496582, "beta_dpo/gap_std": 39.91798400878906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6450261780104712, "grad_norm": 49.086910247802734, "learning_rate": 1.699652605415828e-07, "logits/chosen": 1.3670289516448975, "logits/rejected": 1.3430283069610596, "loss": 4.7575, "step": 308 }, { "beta_dpo/beta_used": 0.04727376997470856, "beta_dpo/beta_used_raw": 0.04426693171262741, "beta_dpo/gap_mean": 24.053421020507812, "beta_dpo/gap_std": 41.2784309387207, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.6471204188481675, "grad_norm": 212.80130004882812, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 1.8778178691864014, "logits/rejected": 1.6358754634857178, "loss": 4.2193, "step": 309 }, { "beta_dpo/beta_used": 0.019439999014139175, "beta_dpo/beta_used_raw": -0.004033832810819149, "beta_dpo/gap_mean": 24.703720092773438, "beta_dpo/gap_std": 41.20947265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6492146596858639, "grad_norm": 48.24752426147461, "learning_rate": 1.6650514271527465e-07, "logits/chosen": 1.368004560470581, "logits/rejected": 1.6040199995040894, "loss": 4.5291, "step": 310 }, { "beta_dpo/beta_used": 0.015120752155780792, "beta_dpo/beta_used_raw": -0.0021106062922626734, "beta_dpo/gap_mean": 23.902956008911133, "beta_dpo/gap_std": 41.10802459716797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6513089005235602, "grad_norm": 83.40555572509766, "learning_rate": 1.647817538357072e-07, "logits/chosen": 1.4084839820861816, "logits/rejected": 1.5573794841766357, "loss": 5.0402, "step": 311 }, { "beta_dpo/beta_used": 0.03297141566872597, "beta_dpo/beta_used_raw": 0.015015541575849056, "beta_dpo/gap_mean": 25.408002853393555, "beta_dpo/gap_std": 40.86416244506836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6534031413612565, "grad_norm": 73.86492156982422, "learning_rate": 1.6306293495205755e-07, "logits/chosen": 1.538864016532898, "logits/rejected": 1.5750356912612915, "loss": 4.1733, "step": 312 }, { "beta_dpo/beta_used": 0.026556478813290596, "beta_dpo/beta_used_raw": 0.014360915869474411, "beta_dpo/gap_mean": 22.71212387084961, "beta_dpo/gap_std": 41.899532318115234, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6554973821989529, "grad_norm": 43.307254791259766, "learning_rate": 1.6134877823936607e-07, "logits/chosen": 1.4833365678787231, "logits/rejected": 1.5087875127792358, "loss": 4.3745, "step": 313 }, { "beta_dpo/beta_used": 0.04993228241801262, "beta_dpo/beta_used_raw": 0.04400447756052017, "beta_dpo/gap_mean": 23.01084327697754, "beta_dpo/gap_std": 41.7484245300293, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6575916230366492, "grad_norm": 185.30311584472656, "learning_rate": 1.5963937562265522e-07, "logits/chosen": 1.5994868278503418, "logits/rejected": 1.6039897203445435, "loss": 3.9626, "step": 314 }, { "beta_dpo/beta_used": 0.019904792308807373, "beta_dpo/beta_used_raw": 0.0038104329723864794, "beta_dpo/gap_mean": 24.50067710876465, "beta_dpo/gap_std": 41.975162506103516, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6596858638743456, "grad_norm": 60.188743591308594, "learning_rate": 1.5793481877199943e-07, "logits/chosen": 1.8757685422897339, "logits/rejected": 1.802669644355774, "loss": 4.3242, "step": 315 }, { "beta_dpo/beta_used": 0.011747484095394611, "beta_dpo/beta_used_raw": -0.011036318726837635, "beta_dpo/gap_mean": 25.946598052978516, "beta_dpo/gap_std": 41.94285583496094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6617801047120419, "grad_norm": 96.64191436767578, "learning_rate": 1.562351990976095e-07, "logits/chosen": 1.2265623807907104, "logits/rejected": 1.3494703769683838, "loss": 4.9269, "step": 316 }, { "beta_dpo/beta_used": 0.01594529114663601, "beta_dpo/beta_used_raw": -0.011434204876422882, "beta_dpo/gap_mean": 25.075801849365234, "beta_dpo/gap_std": 42.253684997558594, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.6638743455497382, "grad_norm": 66.5694580078125, "learning_rate": 1.5454060774493065e-07, "logits/chosen": 1.4082281589508057, "logits/rejected": 1.4196900129318237, "loss": 4.7001, "step": 317 }, { "beta_dpo/beta_used": 0.04018227756023407, "beta_dpo/beta_used_raw": 0.030732491984963417, "beta_dpo/gap_mean": 24.28862953186035, "beta_dpo/gap_std": 38.98953628540039, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6659685863874345, "grad_norm": 67.83879852294922, "learning_rate": 1.5285113558975427e-07, "logits/chosen": 1.5352228879928589, "logits/rejected": 1.7299730777740479, "loss": 3.609, "step": 318 }, { "beta_dpo/beta_used": 0.021217646077275276, "beta_dpo/beta_used_raw": 0.006635315250605345, "beta_dpo/gap_mean": 25.60199737548828, "beta_dpo/gap_std": 38.8849983215332, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6680628272251309, "grad_norm": 51.74640655517578, "learning_rate": 1.5116687323334464e-07, "logits/chosen": 1.2286893129348755, "logits/rejected": 1.462414026260376, "loss": 4.2736, "step": 319 }, { "beta_dpo/beta_used": 0.015490580350160599, "beta_dpo/beta_used_raw": -0.004652615636587143, "beta_dpo/gap_mean": 25.7495059967041, "beta_dpo/gap_std": 39.36385726928711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6701570680628273, "grad_norm": 66.98085021972656, "learning_rate": 1.4948791099758052e-07, "logits/chosen": 1.9294114112854004, "logits/rejected": 1.8916367292404175, "loss": 4.5611, "step": 320 }, { "beta_dpo/beta_used": 0.02924424409866333, "beta_dpo/beta_used_raw": 0.012194283306598663, "beta_dpo/gap_mean": 23.17910385131836, "beta_dpo/gap_std": 40.0921745300293, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6722513089005235, "grad_norm": 75.77815246582031, "learning_rate": 1.478143389201113e-07, "logits/chosen": 1.6986711025238037, "logits/rejected": 1.4788739681243896, "loss": 3.9257, "step": 321 }, { "beta_dpo/beta_used": 0.01892891526222229, "beta_dpo/beta_used_raw": 0.0015440168790519238, "beta_dpo/gap_mean": 23.837888717651367, "beta_dpo/gap_std": 39.51669692993164, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6743455497382199, "grad_norm": 61.61996841430664, "learning_rate": 1.461462467495284e-07, "logits/chosen": 1.2796248197555542, "logits/rejected": 1.2974272966384888, "loss": 4.6315, "step": 322 }, { "beta_dpo/beta_used": 0.02444988675415516, "beta_dpo/beta_used_raw": -0.004915682598948479, "beta_dpo/gap_mean": 22.961061477661133, "beta_dpo/gap_std": 40.85033416748047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6764397905759162, "grad_norm": 61.971153259277344, "learning_rate": 1.4448372394055246e-07, "logits/chosen": 1.2066650390625, "logits/rejected": 0.9574912190437317, "loss": 4.1271, "step": 323 }, { "beta_dpo/beta_used": 0.04017874598503113, "beta_dpo/beta_used_raw": 0.02891341596841812, "beta_dpo/gap_mean": 23.883920669555664, "beta_dpo/gap_std": 40.295066833496094, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6785340314136126, "grad_norm": 67.87089538574219, "learning_rate": 1.428268596492364e-07, "logits/chosen": 1.6108598709106445, "logits/rejected": 1.5994318723678589, "loss": 3.8856, "step": 324 }, { "beta_dpo/beta_used": 0.04682011157274246, "beta_dpo/beta_used_raw": 0.020984284579753876, "beta_dpo/gap_mean": 25.911354064941406, "beta_dpo/gap_std": 41.97956085205078, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.680628272251309, "grad_norm": 139.75146484375, "learning_rate": 1.4117574272818386e-07, "logits/chosen": 1.6725175380706787, "logits/rejected": 1.797964096069336, "loss": 4.4611, "step": 325 }, { "beta_dpo/beta_used": 0.01575140468776226, "beta_dpo/beta_used_raw": -0.0014644484035670757, "beta_dpo/gap_mean": 23.560775756835938, "beta_dpo/gap_std": 44.54059982299805, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.6827225130890052, "grad_norm": 56.50615310668945, "learning_rate": 1.3953046172178413e-07, "logits/chosen": 1.166620135307312, "logits/rejected": 1.4378832578659058, "loss": 4.8138, "step": 326 }, { "beta_dpo/beta_used": 0.060598503798246384, "beta_dpo/beta_used_raw": 0.054446715861558914, "beta_dpo/gap_mean": 25.692852020263672, "beta_dpo/gap_std": 43.64955520629883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6848167539267016, "grad_norm": 121.72166442871094, "learning_rate": 1.3789110486146468e-07, "logits/chosen": 1.5548646450042725, "logits/rejected": 1.4554078578948975, "loss": 3.1471, "step": 327 }, { "beta_dpo/beta_used": 0.015800345689058304, "beta_dpo/beta_used_raw": -0.010136552155017853, "beta_dpo/gap_mean": 27.02881622314453, "beta_dpo/gap_std": 41.867454528808594, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6869109947643979, "grad_norm": 41.46779251098633, "learning_rate": 1.362577600609588e-07, "logits/chosen": 1.3131914138793945, "logits/rejected": 1.3917593955993652, "loss": 4.4447, "step": 328 }, { "beta_dpo/beta_used": 0.013338714838027954, "beta_dpo/beta_used_raw": -0.004739915020763874, "beta_dpo/gap_mean": 25.284814834594727, "beta_dpo/gap_std": 41.969566345214844, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6890052356020943, "grad_norm": 60.99818420410156, "learning_rate": 1.3463051491159093e-07, "logits/chosen": 1.4903924465179443, "logits/rejected": 1.814817190170288, "loss": 5.1013, "step": 329 }, { "beta_dpo/beta_used": 0.028788069263100624, "beta_dpo/beta_used_raw": 0.005851927679032087, "beta_dpo/gap_mean": 22.452590942382812, "beta_dpo/gap_std": 44.61354064941406, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6910994764397905, "grad_norm": 179.97225952148438, "learning_rate": 1.3300945667758012e-07, "logits/chosen": 1.6997681856155396, "logits/rejected": 1.6331228017807007, "loss": 4.3589, "step": 330 }, { "beta_dpo/beta_used": 0.028657177463173866, "beta_dpo/beta_used_raw": 0.019459933042526245, "beta_dpo/gap_mean": 23.764484405517578, "beta_dpo/gap_std": 42.601539611816406, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6931937172774869, "grad_norm": 48.68358612060547, "learning_rate": 1.3139467229135998e-07, "logits/chosen": 1.441627860069275, "logits/rejected": 1.3355118036270142, "loss": 4.303, "step": 331 }, { "beta_dpo/beta_used": 0.04341350123286247, "beta_dpo/beta_used_raw": 0.038200560957193375, "beta_dpo/gap_mean": 26.584733963012695, "beta_dpo/gap_std": 41.82080078125, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6952879581151833, "grad_norm": 73.14544677734375, "learning_rate": 1.2978624834891626e-07, "logits/chosen": 1.2019636631011963, "logits/rejected": 1.203635334968567, "loss": 3.8045, "step": 332 }, { "beta_dpo/beta_used": 0.011696412228047848, "beta_dpo/beta_used_raw": -0.014751153066754341, "beta_dpo/gap_mean": 23.98305892944336, "beta_dpo/gap_std": 42.328861236572266, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6973821989528796, "grad_norm": 47.275943756103516, "learning_rate": 1.281842711051438e-07, "logits/chosen": 1.2524588108062744, "logits/rejected": 1.1359145641326904, "loss": 4.9502, "step": 333 }, { "beta_dpo/beta_used": 0.034421779215335846, "beta_dpo/beta_used_raw": 0.018691357225179672, "beta_dpo/gap_mean": 22.934709548950195, "beta_dpo/gap_std": 41.71361541748047, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6994764397905759, "grad_norm": 63.18965530395508, "learning_rate": 1.2658882646922033e-07, "logits/chosen": 1.3189448118209839, "logits/rejected": 1.3639788627624512, "loss": 3.9628, "step": 334 }, { "beta_dpo/beta_used": 0.02628299593925476, "beta_dpo/beta_used_raw": -0.008556408807635307, "beta_dpo/gap_mean": 23.939117431640625, "beta_dpo/gap_std": 43.04575729370117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7015706806282722, "grad_norm": 158.49334716796875, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 1.460978627204895, "logits/rejected": 1.5252642631530762, "loss": 4.7371, "step": 335 }, { "beta_dpo/beta_used": 0.013360177166759968, "beta_dpo/beta_used_raw": -0.006929943338036537, "beta_dpo/gap_mean": 21.377792358398438, "beta_dpo/gap_std": 43.017784118652344, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.7036649214659686, "grad_norm": 49.986663818359375, "learning_rate": 1.2341787690142435e-07, "logits/chosen": 1.5372939109802246, "logits/rejected": 1.7963600158691406, "loss": 4.9661, "step": 336 }, { "beta_dpo/beta_used": 0.03531493619084358, "beta_dpo/beta_used_raw": 0.009031134657561779, "beta_dpo/gap_mean": 21.560890197753906, "beta_dpo/gap_std": 42.4267578125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7057591623036649, "grad_norm": 93.49922943115234, "learning_rate": 1.2184254201795363e-07, "logits/chosen": 1.0734624862670898, "logits/rejected": 0.9902403950691223, "loss": 4.5839, "step": 337 }, { "beta_dpo/beta_used": 0.026785733178257942, "beta_dpo/beta_used_raw": 0.008016789332032204, "beta_dpo/gap_mean": 24.554834365844727, "beta_dpo/gap_std": 42.207237243652344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7078534031413612, "grad_norm": 270.1446533203125, "learning_rate": 1.202740798300168e-07, "logits/chosen": 1.5387308597564697, "logits/rejected": 1.5395488739013672, "loss": 4.6984, "step": 338 }, { "beta_dpo/beta_used": 0.03279449790716171, "beta_dpo/beta_used_raw": 0.020053986459970474, "beta_dpo/gap_mean": 27.445066452026367, "beta_dpo/gap_std": 43.14484405517578, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7099476439790576, "grad_norm": 70.26140594482422, "learning_rate": 1.1871257444948096e-07, "logits/chosen": 1.5849591493606567, "logits/rejected": 1.5081734657287598, "loss": 4.0688, "step": 339 }, { "beta_dpo/beta_used": 0.013446008786559105, "beta_dpo/beta_used_raw": -0.02429656684398651, "beta_dpo/gap_mean": 26.41143226623535, "beta_dpo/gap_std": 44.58018493652344, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7120418848167539, "grad_norm": 44.72693634033203, "learning_rate": 1.1715810961514072e-07, "logits/chosen": 0.8878348469734192, "logits/rejected": 1.03843355178833, "loss": 4.9074, "step": 340 }, { "beta_dpo/beta_used": 0.02605244144797325, "beta_dpo/beta_used_raw": -0.017769023776054382, "beta_dpo/gap_mean": 21.7451114654541, "beta_dpo/gap_std": 44.111759185791016, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.7141361256544503, "grad_norm": 71.31874084472656, "learning_rate": 1.1561076868822755e-07, "logits/chosen": 1.4821139574050903, "logits/rejected": 1.688697338104248, "loss": 4.741, "step": 341 }, { "beta_dpo/beta_used": 0.039557162672281265, "beta_dpo/beta_used_raw": 0.024851929396390915, "beta_dpo/gap_mean": 22.442163467407227, "beta_dpo/gap_std": 42.288307189941406, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7162303664921466, "grad_norm": 90.50724029541016, "learning_rate": 1.1407063464793965e-07, "logits/chosen": 1.515696406364441, "logits/rejected": 1.6636167764663696, "loss": 3.8821, "step": 342 }, { "beta_dpo/beta_used": 0.028740962967276573, "beta_dpo/beta_used_raw": 0.023837603628635406, "beta_dpo/gap_mean": 22.93502426147461, "beta_dpo/gap_std": 41.14816665649414, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7183246073298429, "grad_norm": 84.98859405517578, "learning_rate": 1.125377900869913e-07, "logits/chosen": 1.6616275310516357, "logits/rejected": 1.49526846408844, "loss": 4.1559, "step": 343 }, { "beta_dpo/beta_used": 0.03510721027851105, "beta_dpo/beta_used_raw": 0.019166965037584305, "beta_dpo/gap_mean": 22.779037475585938, "beta_dpo/gap_std": 41.92900085449219, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7204188481675393, "grad_norm": 148.84140014648438, "learning_rate": 1.110123172071844e-07, "logits/chosen": 1.341618537902832, "logits/rejected": 1.4202890396118164, "loss": 4.5051, "step": 344 }, { "beta_dpo/beta_used": 0.02456255815923214, "beta_dpo/beta_used_raw": -0.002841557841747999, "beta_dpo/gap_mean": 23.927555084228516, "beta_dpo/gap_std": 41.32786560058594, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.7225130890052356, "grad_norm": 71.29635620117188, "learning_rate": 1.09494297815e-07, "logits/chosen": 1.6482702493667603, "logits/rejected": 1.768045425415039, "loss": 4.6483, "step": 345 }, { "beta_dpo/beta_used": 0.03388482332229614, "beta_dpo/beta_used_raw": 0.01795162260532379, "beta_dpo/gap_mean": 24.46042251586914, "beta_dpo/gap_std": 38.79722595214844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.724607329842932, "grad_norm": 70.42410278320312, "learning_rate": 1.0798381331721107e-07, "logits/chosen": 1.0491037368774414, "logits/rejected": 1.1440801620483398, "loss": 4.0215, "step": 346 }, { "beta_dpo/beta_used": 0.028078395873308182, "beta_dpo/beta_used_raw": 0.014503560960292816, "beta_dpo/gap_mean": 25.07908058166504, "beta_dpo/gap_std": 40.29609680175781, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7267015706806282, "grad_norm": 71.0637435913086, "learning_rate": 1.0648094471651722e-07, "logits/chosen": 1.4748269319534302, "logits/rejected": 1.4847553968429565, "loss": 4.2448, "step": 347 }, { "beta_dpo/beta_used": 0.014106756076216698, "beta_dpo/beta_used_raw": -0.01745045930147171, "beta_dpo/gap_mean": 20.426612854003906, "beta_dpo/gap_std": 37.750858306884766, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7287958115183246, "grad_norm": 41.9898681640625, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 1.4606678485870361, "logits/rejected": 1.539605736732483, "loss": 4.7306, "step": 348 }, { "beta_dpo/beta_used": 0.03836182504892349, "beta_dpo/beta_used_raw": 0.027038609609007835, "beta_dpo/gap_mean": 23.00733757019043, "beta_dpo/gap_std": 40.6578369140625, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7308900523560209, "grad_norm": 116.7526626586914, "learning_rate": 1.0349837717080347e-07, "logits/chosen": 1.5413777828216553, "logits/rejected": 1.6035332679748535, "loss": 4.5157, "step": 349 }, { "beta_dpo/beta_used": 0.03894190117716789, "beta_dpo/beta_used_raw": 0.026062268763780594, "beta_dpo/gap_mean": 24.171770095825195, "beta_dpo/gap_std": 41.29063415527344, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7329842931937173, "grad_norm": 110.67535400390625, "learning_rate": 1.0201883817182949e-07, "logits/chosen": 1.7762742042541504, "logits/rejected": 1.5685731172561646, "loss": 3.9019, "step": 350 }, { "beta_dpo/beta_used": 0.005233833100646734, "beta_dpo/beta_used_raw": -0.016874097287654877, "beta_dpo/gap_mean": 21.94039535522461, "beta_dpo/gap_std": 42.503211975097656, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7350785340314137, "grad_norm": 17.592376708984375, "learning_rate": 1.0054723495346482e-07, "logits/chosen": 1.4498162269592285, "logits/rejected": 1.4771305322647095, "loss": 5.2076, "step": 351 }, { "beta_dpo/beta_used": 0.05153050646185875, "beta_dpo/beta_used_raw": 0.0483248271048069, "beta_dpo/gap_mean": 23.78329086303711, "beta_dpo/gap_std": 43.25350570678711, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7371727748691099, "grad_norm": 260.2582092285156, "learning_rate": 9.908364643332398e-08, "logits/chosen": 1.537024974822998, "logits/rejected": 1.781685471534729, "loss": 3.9455, "step": 352 }, { "beta_dpo/beta_used": 0.030592329800128937, "beta_dpo/beta_used_raw": 0.014342766255140305, "beta_dpo/gap_mean": 25.760425567626953, "beta_dpo/gap_std": 40.68629455566406, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7392670157068063, "grad_norm": 90.93749237060547, "learning_rate": 9.76281510992176e-08, "logits/chosen": 1.2568163871765137, "logits/rejected": 1.252407193183899, "loss": 4.1275, "step": 353 }, { "beta_dpo/beta_used": 0.012342535890638828, "beta_dpo/beta_used_raw": -0.01871517114341259, "beta_dpo/gap_mean": 23.74026870727539, "beta_dpo/gap_std": 42.1845703125, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.7413612565445026, "grad_norm": 94.49176025390625, "learning_rate": 9.618082700494318e-08, "logits/chosen": 1.3868615627288818, "logits/rejected": 1.4805989265441895, "loss": 6.0723, "step": 354 }, { "beta_dpo/beta_used": 0.04293268173933029, "beta_dpo/beta_used_raw": 0.03582005202770233, "beta_dpo/gap_mean": 23.41856575012207, "beta_dpo/gap_std": 43.963043212890625, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.743455497382199, "grad_norm": 109.3790054321289, "learning_rate": 9.474175176609956e-08, "logits/chosen": 1.5852292776107788, "logits/rejected": 1.7418677806854248, "loss": 4.0902, "step": 355 }, { "beta_dpo/beta_used": 0.017568301409482956, "beta_dpo/beta_used_raw": -0.009104796685278416, "beta_dpo/gap_mean": 22.803916931152344, "beta_dpo/gap_std": 39.86484909057617, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7455497382198953, "grad_norm": 80.7624282836914, "learning_rate": 9.331100255592436e-08, "logits/chosen": 1.3812074661254883, "logits/rejected": 1.4987109899520874, "loss": 4.7965, "step": 356 }, { "beta_dpo/beta_used": 0.028205767273902893, "beta_dpo/beta_used_raw": 0.009551008231937885, "beta_dpo/gap_mean": 21.426677703857422, "beta_dpo/gap_std": 41.5255012512207, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7476439790575916, "grad_norm": 198.40061950683594, "learning_rate": 9.18886561011557e-08, "logits/chosen": 1.535756230354309, "logits/rejected": 1.5348542928695679, "loss": 4.8634, "step": 357 }, { "beta_dpo/beta_used": 0.03555550426244736, "beta_dpo/beta_used_raw": 0.02786700241267681, "beta_dpo/gap_mean": 24.894935607910156, "beta_dpo/gap_std": 42.7304801940918, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.749738219895288, "grad_norm": 100.15424346923828, "learning_rate": 9.047478867791731e-08, "logits/chosen": 1.3941529989242554, "logits/rejected": 1.3515270948410034, "loss": 4.5553, "step": 358 }, { "beta_dpo/beta_used": 0.022278830409049988, "beta_dpo/beta_used_raw": 0.012576328590512276, "beta_dpo/gap_mean": 25.73493194580078, "beta_dpo/gap_std": 42.311771392822266, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7518324607329843, "grad_norm": 63.86215591430664, "learning_rate": 8.906947610762825e-08, "logits/chosen": 1.4539521932601929, "logits/rejected": 1.5561376810073853, "loss": 4.4114, "step": 359 }, { "beta_dpo/beta_used": 0.011337094008922577, "beta_dpo/beta_used_raw": -0.001419117208570242, "beta_dpo/gap_mean": 25.067241668701172, "beta_dpo/gap_std": 41.38372802734375, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.7539267015706806, "grad_norm": 33.68746566772461, "learning_rate": 8.76727937529367e-08, "logits/chosen": 1.602333664894104, "logits/rejected": 1.5335873365402222, "loss": 4.7231, "step": 360 }, { "beta_dpo/beta_used": 0.05738076567649841, "beta_dpo/beta_used_raw": 0.053437668830156326, "beta_dpo/gap_mean": 26.05853271484375, "beta_dpo/gap_std": 42.002994537353516, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.7560209424083769, "grad_norm": 172.0302734375, "learning_rate": 8.628481651367875e-08, "logits/chosen": 1.2185293436050415, "logits/rejected": 1.4148153066635132, "loss": 3.4371, "step": 361 }, { "beta_dpo/beta_used": 0.04557962343096733, "beta_dpo/beta_used_raw": 0.027200574055314064, "beta_dpo/gap_mean": 26.162132263183594, "beta_dpo/gap_std": 42.437416076660156, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7581151832460733, "grad_norm": 90.24806213378906, "learning_rate": 8.490561882286135e-08, "logits/chosen": 1.3487976789474487, "logits/rejected": 1.3411986827850342, "loss": 3.4565, "step": 362 }, { "beta_dpo/beta_used": 0.03907949849963188, "beta_dpo/beta_used_raw": 0.0354890413582325, "beta_dpo/gap_mean": 25.548728942871094, "beta_dpo/gap_std": 42.264503479003906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7602094240837697, "grad_norm": 101.23867797851562, "learning_rate": 8.353527464267104e-08, "logits/chosen": 1.5559055805206299, "logits/rejected": 1.4353469610214233, "loss": 3.6541, "step": 363 }, { "beta_dpo/beta_used": 0.019362712278962135, "beta_dpo/beta_used_raw": -0.005188856739550829, "beta_dpo/gap_mean": 24.893081665039062, "beta_dpo/gap_std": 41.87436294555664, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.762303664921466, "grad_norm": 84.14205932617188, "learning_rate": 8.217385746050742e-08, "logits/chosen": 1.8355655670166016, "logits/rejected": 1.5974853038787842, "loss": 4.7009, "step": 364 }, { "beta_dpo/beta_used": 0.02898905798792839, "beta_dpo/beta_used_raw": 0.02243414893746376, "beta_dpo/gap_mean": 23.674781799316406, "beta_dpo/gap_std": 41.810665130615234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7643979057591623, "grad_norm": 77.57154083251953, "learning_rate": 8.082144028504231e-08, "logits/chosen": 1.512800693511963, "logits/rejected": 1.7196999788284302, "loss": 4.3814, "step": 365 }, { "beta_dpo/beta_used": 0.030239790678024292, "beta_dpo/beta_used_raw": 0.0004999339580535889, "beta_dpo/gap_mean": 25.40928840637207, "beta_dpo/gap_std": 41.03025817871094, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7664921465968586, "grad_norm": 41.87646484375, "learning_rate": 7.947809564230445e-08, "logits/chosen": 1.4762005805969238, "logits/rejected": 1.3744585514068604, "loss": 4.3359, "step": 366 }, { "beta_dpo/beta_used": 0.028317891061306, "beta_dpo/beta_used_raw": 0.008798494935035706, "beta_dpo/gap_mean": 25.357412338256836, "beta_dpo/gap_std": 39.42461013793945, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.768586387434555, "grad_norm": 70.21991729736328, "learning_rate": 7.814389557179016e-08, "logits/chosen": 1.8320472240447998, "logits/rejected": 1.5733611583709717, "loss": 3.8554, "step": 367 }, { "beta_dpo/beta_used": 0.040644265711307526, "beta_dpo/beta_used_raw": 0.029841335490345955, "beta_dpo/gap_mean": 27.69914436340332, "beta_dpo/gap_std": 39.52192687988281, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7706806282722513, "grad_norm": 51.4883918762207, "learning_rate": 7.681891162260015e-08, "logits/chosen": 1.7997376918792725, "logits/rejected": 1.644882321357727, "loss": 3.7779, "step": 368 }, { "beta_dpo/beta_used": 0.007025650702416897, "beta_dpo/beta_used_raw": -0.022717807441949844, "beta_dpo/gap_mean": 26.59383201599121, "beta_dpo/gap_std": 39.74239730834961, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7727748691099476, "grad_norm": 31.33010482788086, "learning_rate": 7.550321484960251e-08, "logits/chosen": 1.567758560180664, "logits/rejected": 1.5652072429656982, "loss": 5.0706, "step": 369 }, { "beta_dpo/beta_used": 0.03189126402139664, "beta_dpo/beta_used_raw": -0.0016455072909593582, "beta_dpo/gap_mean": 25.960403442382812, "beta_dpo/gap_std": 41.779354095458984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.774869109947644, "grad_norm": 61.498207092285156, "learning_rate": 7.419687580962222e-08, "logits/chosen": 1.4514704942703247, "logits/rejected": 1.6543275117874146, "loss": 4.0113, "step": 370 }, { "beta_dpo/beta_used": 0.02217245101928711, "beta_dpo/beta_used_raw": 0.0033044693991541862, "beta_dpo/gap_mean": 22.760690689086914, "beta_dpo/gap_std": 41.05923080444336, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7769633507853403, "grad_norm": 52.41913604736328, "learning_rate": 7.289996455765748e-08, "logits/chosen": 0.8454320430755615, "logits/rejected": 1.0241940021514893, "loss": 4.3701, "step": 371 }, { "beta_dpo/beta_used": 0.06150563433766365, "beta_dpo/beta_used_raw": 0.060376305133104324, "beta_dpo/gap_mean": 26.305227279663086, "beta_dpo/gap_std": 40.897579193115234, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7790575916230367, "grad_norm": 95.63087463378906, "learning_rate": 7.161255064312283e-08, "logits/chosen": 1.3337714672088623, "logits/rejected": 1.200531244277954, "loss": 3.4199, "step": 372 }, { "beta_dpo/beta_used": 0.017467252910137177, "beta_dpo/beta_used_raw": -0.0006841365247964859, "beta_dpo/gap_mean": 27.40023422241211, "beta_dpo/gap_std": 41.40983963012695, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7811518324607329, "grad_norm": 65.07406616210938, "learning_rate": 7.033470310611945e-08, "logits/chosen": 1.5559697151184082, "logits/rejected": 1.267425537109375, "loss": 4.8366, "step": 373 }, { "beta_dpo/beta_used": 0.020001672208309174, "beta_dpo/beta_used_raw": -0.006743720732629299, "beta_dpo/gap_mean": 25.95492172241211, "beta_dpo/gap_std": 42.976318359375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7832460732984293, "grad_norm": 43.096229553222656, "learning_rate": 6.906649047373245e-08, "logits/chosen": 1.5863916873931885, "logits/rejected": 1.7011443376541138, "loss": 4.5082, "step": 374 }, { "beta_dpo/beta_used": 0.012499826960265636, "beta_dpo/beta_used_raw": -0.00970209576189518, "beta_dpo/gap_mean": 23.713022232055664, "beta_dpo/gap_std": 42.88922119140625, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7853403141361257, "grad_norm": 35.47541427612305, "learning_rate": 6.780798075635675e-08, "logits/chosen": 1.4474728107452393, "logits/rejected": 1.3061145544052124, "loss": 4.878, "step": 375 }, { "beta_dpo/beta_used": 0.032169777899980545, "beta_dpo/beta_used_raw": 0.023137152194976807, "beta_dpo/gap_mean": 23.426164627075195, "beta_dpo/gap_std": 42.51594924926758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.787434554973822, "grad_norm": 95.04769897460938, "learning_rate": 6.655924144404906e-08, "logits/chosen": 1.573278546333313, "logits/rejected": 1.815221905708313, "loss": 4.1144, "step": 376 }, { "beta_dpo/beta_used": 0.030707208439707756, "beta_dpo/beta_used_raw": 0.005986468866467476, "beta_dpo/gap_mean": 23.08704376220703, "beta_dpo/gap_std": 41.96858596801758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7895287958115184, "grad_norm": 90.52848052978516, "learning_rate": 6.532033950290885e-08, "logits/chosen": 1.5606698989868164, "logits/rejected": 1.6266758441925049, "loss": 4.5857, "step": 377 }, { "beta_dpo/beta_used": 0.0293353870511055, "beta_dpo/beta_used_raw": 0.019241416826844215, "beta_dpo/gap_mean": 21.17989730834961, "beta_dpo/gap_std": 42.731689453125, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7916230366492146, "grad_norm": 168.0338897705078, "learning_rate": 6.409134137148736e-08, "logits/chosen": 1.567497968673706, "logits/rejected": 1.6306406259536743, "loss": 4.6972, "step": 378 }, { "beta_dpo/beta_used": 0.021104762330651283, "beta_dpo/beta_used_raw": -0.0026983979623764753, "beta_dpo/gap_mean": 22.86931610107422, "beta_dpo/gap_std": 42.299137115478516, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.793717277486911, "grad_norm": 53.448760986328125, "learning_rate": 6.28723129572247e-08, "logits/chosen": 1.6663786172866821, "logits/rejected": 1.593047022819519, "loss": 4.8597, "step": 379 }, { "beta_dpo/beta_used": 0.014543892815709114, "beta_dpo/beta_used_raw": 0.004879960790276527, "beta_dpo/gap_mean": 23.742534637451172, "beta_dpo/gap_std": 42.56512451171875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7958115183246073, "grad_norm": 51.2754020690918, "learning_rate": 6.166331963291519e-08, "logits/chosen": 1.9557546377182007, "logits/rejected": 1.7796638011932373, "loss": 4.7633, "step": 380 }, { "beta_dpo/beta_used": 0.0125275244936347, "beta_dpo/beta_used_raw": -0.0011020167730748653, "beta_dpo/gap_mean": 24.683391571044922, "beta_dpo/gap_std": 41.60409927368164, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.7979057591623037, "grad_norm": 35.46774673461914, "learning_rate": 6.046442623320145e-08, "logits/chosen": 1.191896677017212, "logits/rejected": 1.2276725769042969, "loss": 5.1082, "step": 381 }, { "beta_dpo/beta_used": 0.03925769403576851, "beta_dpo/beta_used_raw": 0.03246406838297844, "beta_dpo/gap_mean": 26.57273292541504, "beta_dpo/gap_std": 40.347042083740234, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8, "grad_norm": 91.7632064819336, "learning_rate": 5.9275697051098275e-08, "logits/chosen": 1.5332963466644287, "logits/rejected": 1.5386418104171753, "loss": 3.9613, "step": 382 }, { "beta_dpo/beta_used": 0.026574671268463135, "beta_dpo/beta_used_raw": 0.0041369106620550156, "beta_dpo/gap_mean": 27.20392608642578, "beta_dpo/gap_std": 41.187217712402344, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8020942408376963, "grad_norm": 90.72322082519531, "learning_rate": 5.809719583454414e-08, "logits/chosen": 1.213146448135376, "logits/rejected": 1.4346027374267578, "loss": 4.2591, "step": 383 }, { "beta_dpo/beta_used": 0.018568674102425575, "beta_dpo/beta_used_raw": -0.005661527160555124, "beta_dpo/gap_mean": 23.266735076904297, "beta_dpo/gap_std": 40.896419525146484, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8041884816753927, "grad_norm": 97.89303588867188, "learning_rate": 5.6928985782982524e-08, "logits/chosen": 1.4912177324295044, "logits/rejected": 1.8480693101882935, "loss": 4.8446, "step": 384 }, { "beta_dpo/beta_used": 0.025455057621002197, "beta_dpo/beta_used_raw": 0.020301831886172295, "beta_dpo/gap_mean": 22.282352447509766, "beta_dpo/gap_std": 40.13404846191406, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.806282722513089, "grad_norm": 87.30133056640625, "learning_rate": 5.57711295439732e-08, "logits/chosen": 1.6445767879486084, "logits/rejected": 1.6937466859817505, "loss": 4.6559, "step": 385 }, { "beta_dpo/beta_used": 0.07003487646579742, "beta_dpo/beta_used_raw": 0.05945579335093498, "beta_dpo/gap_mean": 25.810016632080078, "beta_dpo/gap_std": 40.25865936279297, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8083769633507853, "grad_norm": 74.50102233886719, "learning_rate": 5.4623689209832484e-08, "logits/chosen": 1.644815444946289, "logits/rejected": 1.745370864868164, "loss": 3.0306, "step": 386 }, { "beta_dpo/beta_used": 0.02559298276901245, "beta_dpo/beta_used_raw": -0.001606471836566925, "beta_dpo/gap_mean": 25.212953567504883, "beta_dpo/gap_std": 42.34771728515625, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8104712041884817, "grad_norm": 97.23846435546875, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 1.5523253679275513, "logits/rejected": 1.617262363433838, "loss": 4.3258, "step": 387 }, { "beta_dpo/beta_used": 0.019050609320402145, "beta_dpo/beta_used_raw": -0.013357133604586124, "beta_dpo/gap_mean": 24.462581634521484, "beta_dpo/gap_std": 42.33854675292969, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.812565445026178, "grad_norm": 115.40874481201172, "learning_rate": 5.2360301829254745e-08, "logits/chosen": 1.898555040359497, "logits/rejected": 1.8352364301681519, "loss": 4.8619, "step": 388 }, { "beta_dpo/beta_used": 0.031168397516012192, "beta_dpo/beta_used_raw": 0.01580439880490303, "beta_dpo/gap_mean": 24.110021591186523, "beta_dpo/gap_std": 41.419647216796875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8146596858638744, "grad_norm": 114.02845001220703, "learning_rate": 5.1244476161413806e-08, "logits/chosen": 1.7501044273376465, "logits/rejected": 1.5219378471374512, "loss": 4.4305, "step": 389 }, { "beta_dpo/beta_used": 0.037911996245384216, "beta_dpo/beta_used_raw": 0.03245529904961586, "beta_dpo/gap_mean": 24.70856475830078, "beta_dpo/gap_std": 42.322147369384766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8167539267015707, "grad_norm": 107.86334228515625, "learning_rate": 5.013930914912476e-08, "logits/chosen": 1.4109928607940674, "logits/rejected": 1.5585747957229614, "loss": 3.9697, "step": 390 }, { "beta_dpo/beta_used": 0.01184625644236803, "beta_dpo/beta_used_raw": -0.0196970384567976, "beta_dpo/gap_mean": 25.497241973876953, "beta_dpo/gap_std": 39.925994873046875, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.818848167539267, "grad_norm": 36.966331481933594, "learning_rate": 4.904486005914027e-08, "logits/chosen": 1.4992268085479736, "logits/rejected": 1.4016600847244263, "loss": 4.8753, "step": 391 }, { "beta_dpo/beta_used": 0.031304676085710526, "beta_dpo/beta_used_raw": 0.019566738978028297, "beta_dpo/gap_mean": 29.37858772277832, "beta_dpo/gap_std": 39.760597229003906, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8209424083769633, "grad_norm": 57.252769470214844, "learning_rate": 4.796118758344353e-08, "logits/chosen": 1.1666127443313599, "logits/rejected": 1.1494946479797363, "loss": 3.3712, "step": 392 }, { "beta_dpo/beta_used": 0.02446107193827629, "beta_dpo/beta_used_raw": 0.00717612449079752, "beta_dpo/gap_mean": 27.458255767822266, "beta_dpo/gap_std": 40.529483795166016, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8230366492146597, "grad_norm": 41.9975700378418, "learning_rate": 4.688834983610082e-08, "logits/chosen": 1.3543047904968262, "logits/rejected": 1.1334538459777832, "loss": 4.502, "step": 393 }, { "beta_dpo/beta_used": 0.013751739636063576, "beta_dpo/beta_used_raw": -0.013827711343765259, "beta_dpo/gap_mean": 25.792306900024414, "beta_dpo/gap_std": 41.532981872558594, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8251308900523561, "grad_norm": 38.37825012207031, "learning_rate": 4.582640435014459e-08, "logits/chosen": 1.755271077156067, "logits/rejected": 1.836128830909729, "loss": 4.8139, "step": 394 }, { "beta_dpo/beta_used": 0.03642860800027847, "beta_dpo/beta_used_raw": 0.02762317843735218, "beta_dpo/gap_mean": 22.787147521972656, "beta_dpo/gap_std": 39.04203414916992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8272251308900523, "grad_norm": 76.76990509033203, "learning_rate": 4.477540807448832e-08, "logits/chosen": 1.3757838010787964, "logits/rejected": 1.4005060195922852, "loss": 3.6736, "step": 395 }, { "beta_dpo/beta_used": 0.016622822731733322, "beta_dpo/beta_used_raw": -0.0027820090763270855, "beta_dpo/gap_mean": 23.37274932861328, "beta_dpo/gap_std": 39.84015655517578, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.8293193717277487, "grad_norm": 105.81222534179688, "learning_rate": 4.373541737087263e-08, "logits/chosen": 1.650363802909851, "logits/rejected": 1.6201927661895752, "loss": 5.2625, "step": 396 }, { "beta_dpo/beta_used": 0.022990621626377106, "beta_dpo/beta_used_raw": -0.0033985301852226257, "beta_dpo/gap_mean": 23.020658493041992, "beta_dpo/gap_std": 39.6679573059082, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.831413612565445, "grad_norm": 91.26580047607422, "learning_rate": 4.270648801084295e-08, "logits/chosen": 1.4977787733078003, "logits/rejected": 1.5780669450759888, "loss": 4.5482, "step": 397 }, { "beta_dpo/beta_used": 0.02246342971920967, "beta_dpo/beta_used_raw": 0.007876865565776825, "beta_dpo/gap_mean": 21.515539169311523, "beta_dpo/gap_std": 42.26047134399414, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8335078534031414, "grad_norm": 80.77655029296875, "learning_rate": 4.168867517275806e-08, "logits/chosen": 1.3882070779800415, "logits/rejected": 1.648177146911621, "loss": 4.6146, "step": 398 }, { "beta_dpo/beta_used": 0.030784644186496735, "beta_dpo/beta_used_raw": 0.016542304307222366, "beta_dpo/gap_mean": 22.006698608398438, "beta_dpo/gap_std": 42.646385192871094, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8356020942408376, "grad_norm": 157.0540313720703, "learning_rate": 4.0682033438831584e-08, "logits/chosen": 1.6338375806808472, "logits/rejected": 1.731345772743225, "loss": 4.4317, "step": 399 }, { "beta_dpo/beta_used": 0.04295587167143822, "beta_dpo/beta_used_raw": 0.029314618557691574, "beta_dpo/gap_mean": 21.83963394165039, "beta_dpo/gap_std": 39.70830154418945, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.837696335078534, "grad_norm": 134.598388671875, "learning_rate": 3.968661679220467e-08, "logits/chosen": 1.497736930847168, "logits/rejected": 1.427824854850769, "loss": 4.2926, "step": 400 }, { "epoch": 0.837696335078534, "eval_beta_dpo/beta_used": 0.03352755680680275, "eval_beta_dpo/beta_used_raw": 0.014615737833082676, "eval_beta_dpo/gap_mean": 23.013574600219727, "eval_beta_dpo/gap_std": 39.912696838378906, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.5097905397415161, "eval_logits/rejected": 1.546280860900879, "eval_loss": 0.5896762609481812, "eval_runtime": 92.7086, "eval_samples_per_second": 21.573, "eval_steps_per_second": 1.348, "step": 400 }, { "beta_dpo/beta_used": 0.031112950295209885, "beta_dpo/beta_used_raw": 0.018789593130350113, "beta_dpo/gap_mean": 25.120380401611328, "beta_dpo/gap_std": 39.081172943115234, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8397905759162304, "grad_norm": 94.05326843261719, "learning_rate": 3.8702478614051345e-08, "logits/chosen": 1.4719927310943604, "logits/rejected": 1.6373367309570312, "loss": 4.188, "step": 401 }, { "beta_dpo/beta_used": 0.02975967340171337, "beta_dpo/beta_used_raw": 0.020481513813138008, "beta_dpo/gap_mean": 25.850921630859375, "beta_dpo/gap_std": 40.83582305908203, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8418848167539267, "grad_norm": 68.26434326171875, "learning_rate": 3.772967168071517e-08, "logits/chosen": 1.4517847299575806, "logits/rejected": 1.3798197507858276, "loss": 4.0377, "step": 402 }, { "beta_dpo/beta_used": 0.033130984753370285, "beta_dpo/beta_used_raw": 0.026949459686875343, "beta_dpo/gap_mean": 27.959623336791992, "beta_dpo/gap_std": 38.593902587890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8439790575916231, "grad_norm": 56.32769012451172, "learning_rate": 3.676824816087978e-08, "logits/chosen": 1.6041405200958252, "logits/rejected": 1.634192705154419, "loss": 3.6404, "step": 403 }, { "beta_dpo/beta_used": 0.014831377193331718, "beta_dpo/beta_used_raw": -0.013218341395258904, "beta_dpo/gap_mean": 29.18805694580078, "beta_dpo/gap_std": 39.73085021972656, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8460732984293193, "grad_norm": 27.067461013793945, "learning_rate": 3.581825961277074e-08, "logits/chosen": 1.493395209312439, "logits/rejected": 1.3758317232131958, "loss": 4.6703, "step": 404 }, { "beta_dpo/beta_used": 0.03309793025255203, "beta_dpo/beta_used_raw": 0.011897753924131393, "beta_dpo/gap_mean": 26.401506423950195, "beta_dpo/gap_std": 40.610694885253906, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8481675392670157, "grad_norm": 67.01002502441406, "learning_rate": 3.487975698139084e-08, "logits/chosen": 1.5461680889129639, "logits/rejected": 1.6689039468765259, "loss": 3.8802, "step": 405 }, { "beta_dpo/beta_used": 0.007973221130669117, "beta_dpo/beta_used_raw": -0.02517438679933548, "beta_dpo/gap_mean": 23.499588012695312, "beta_dpo/gap_std": 41.003013610839844, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8502617801047121, "grad_norm": 30.062997817993164, "learning_rate": 3.3952790595787986e-08, "logits/chosen": 1.3487330675125122, "logits/rejected": 1.2552706003189087, "loss": 5.0999, "step": 406 }, { "beta_dpo/beta_used": 0.018556706607341766, "beta_dpo/beta_used_raw": 0.006646966561675072, "beta_dpo/gap_mean": 23.741344451904297, "beta_dpo/gap_std": 42.31064987182617, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.8523560209424084, "grad_norm": 77.14202880859375, "learning_rate": 3.303741016635614e-08, "logits/chosen": 1.38568913936615, "logits/rejected": 1.1631001234054565, "loss": 4.6002, "step": 407 }, { "beta_dpo/beta_used": 0.04947693645954132, "beta_dpo/beta_used_raw": 0.024193253368139267, "beta_dpo/gap_mean": 23.99530029296875, "beta_dpo/gap_std": 40.86692810058594, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8544502617801047, "grad_norm": 144.13487243652344, "learning_rate": 3.2133664782169944e-08, "logits/chosen": 1.0143358707427979, "logits/rejected": 1.08698308467865, "loss": 4.4916, "step": 408 }, { "beta_dpo/beta_used": 0.01759941130876541, "beta_dpo/beta_used_raw": -0.006128270179033279, "beta_dpo/gap_mean": 25.6751708984375, "beta_dpo/gap_std": 40.675594329833984, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.856544502617801, "grad_norm": 66.57832336425781, "learning_rate": 3.12416029083514e-08, "logits/chosen": 1.6948835849761963, "logits/rejected": 1.8402390480041504, "loss": 4.5883, "step": 409 }, { "beta_dpo/beta_used": 0.032623328268527985, "beta_dpo/beta_used_raw": 0.020593255758285522, "beta_dpo/gap_mean": 23.831777572631836, "beta_dpo/gap_std": 41.50251770019531, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8586387434554974, "grad_norm": 108.39352416992188, "learning_rate": 3.036127238347164e-08, "logits/chosen": 1.7509747743606567, "logits/rejected": 1.7223472595214844, "loss": 4.1702, "step": 410 }, { "beta_dpo/beta_used": 0.058568619191646576, "beta_dpo/beta_used_raw": 0.03209678828716278, "beta_dpo/gap_mean": 26.16048812866211, "beta_dpo/gap_std": 41.54467010498047, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8607329842931937, "grad_norm": 156.947265625, "learning_rate": 2.9492720416985e-08, "logits/chosen": 1.5110323429107666, "logits/rejected": 1.5965254306793213, "loss": 3.4559, "step": 411 }, { "beta_dpo/beta_used": 0.023946017026901245, "beta_dpo/beta_used_raw": 0.0037475526332855225, "beta_dpo/gap_mean": 25.7176456451416, "beta_dpo/gap_std": 42.220760345458984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.86282722513089, "grad_norm": 45.27512741088867, "learning_rate": 2.863599358669755e-08, "logits/chosen": 1.275376796722412, "logits/rejected": 1.481441855430603, "loss": 4.4762, "step": 412 }, { "beta_dpo/beta_used": 0.034958455711603165, "beta_dpo/beta_used_raw": 0.017024677246809006, "beta_dpo/gap_mean": 23.186616897583008, "beta_dpo/gap_std": 41.46014404296875, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8649214659685864, "grad_norm": 124.6803970336914, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 1.6735713481903076, "logits/rejected": 1.6593836545944214, "loss": 4.0813, "step": 413 }, { "beta_dpo/beta_used": 0.026967719197273254, "beta_dpo/beta_used_raw": -0.0016478030011057854, "beta_dpo/gap_mean": 23.66002655029297, "beta_dpo/gap_std": 41.970882415771484, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8670157068062827, "grad_norm": 153.2272491455078, "learning_rate": 2.6958198472749717e-08, "logits/chosen": 1.6639155149459839, "logits/rejected": 1.536154866218567, "loss": 4.332, "step": 414 }, { "beta_dpo/beta_used": 0.04223136603832245, "beta_dpo/beta_used_raw": 0.038000062108039856, "beta_dpo/gap_mean": 25.557510375976562, "beta_dpo/gap_std": 42.886444091796875, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8691099476439791, "grad_norm": 165.83090209960938, "learning_rate": 2.613722016414943e-08, "logits/chosen": 1.1066584587097168, "logits/rejected": 1.1601117849349976, "loss": 4.1273, "step": 415 }, { "beta_dpo/beta_used": 0.028374191373586655, "beta_dpo/beta_used_raw": 0.01894223876297474, "beta_dpo/gap_mean": 28.670167922973633, "beta_dpo/gap_std": 42.47052001953125, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8712041884816754, "grad_norm": 66.93905639648438, "learning_rate": 2.5328246937043525e-08, "logits/chosen": 1.5560580492019653, "logits/rejected": 1.6145976781845093, "loss": 3.9753, "step": 416 }, { "beta_dpo/beta_used": 0.043879032135009766, "beta_dpo/beta_used_raw": 0.02182396501302719, "beta_dpo/gap_mean": 26.690717697143555, "beta_dpo/gap_std": 41.90580368041992, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8732984293193717, "grad_norm": 92.42415618896484, "learning_rate": 2.4531322174210973e-08, "logits/chosen": 1.2475701570510864, "logits/rejected": 1.3210117816925049, "loss": 4.1788, "step": 417 }, { "beta_dpo/beta_used": 0.03364454209804535, "beta_dpo/beta_used_raw": 0.005448690615594387, "beta_dpo/gap_mean": 25.629501342773438, "beta_dpo/gap_std": 40.84889602661133, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.875392670157068, "grad_norm": 60.8049430847168, "learning_rate": 2.3746488612308295e-08, "logits/chosen": 1.3086042404174805, "logits/rejected": 1.1799873113632202, "loss": 3.8843, "step": 418 }, { "beta_dpo/beta_used": 0.040316130965948105, "beta_dpo/beta_used_raw": 0.024059785529971123, "beta_dpo/gap_mean": 29.127347946166992, "beta_dpo/gap_std": 42.379608154296875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8774869109947644, "grad_norm": 65.74553680419922, "learning_rate": 2.297378833957761e-08, "logits/chosen": 1.9729444980621338, "logits/rejected": 1.894222617149353, "loss": 3.9346, "step": 419 }, { "beta_dpo/beta_used": 0.030392050743103027, "beta_dpo/beta_used_raw": 0.015165509656071663, "beta_dpo/gap_mean": 29.2987060546875, "beta_dpo/gap_std": 43.514549255371094, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8795811518324608, "grad_norm": 112.77594757080078, "learning_rate": 2.2213262793589482e-08, "logits/chosen": 1.2061651945114136, "logits/rejected": 1.2414170503616333, "loss": 4.1674, "step": 420 }, { "beta_dpo/beta_used": 0.037078239023685455, "beta_dpo/beta_used_raw": 0.006579352542757988, "beta_dpo/gap_mean": 30.2874698638916, "beta_dpo/gap_std": 41.12751007080078, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.881675392670157, "grad_norm": 50.55178451538086, "learning_rate": 2.1464952759020856e-08, "logits/chosen": 1.381372332572937, "logits/rejected": 1.1805065870285034, "loss": 3.5599, "step": 421 }, { "beta_dpo/beta_used": 0.027763448655605316, "beta_dpo/beta_used_raw": 0.0037402785383164883, "beta_dpo/gap_mean": 26.626432418823242, "beta_dpo/gap_std": 42.52971649169922, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8837696335078534, "grad_norm": 80.29391479492188, "learning_rate": 2.07288983654679e-08, "logits/chosen": 1.6077336072921753, "logits/rejected": 1.651180624961853, "loss": 4.4944, "step": 422 }, { "beta_dpo/beta_used": 0.03539786487817764, "beta_dpo/beta_used_raw": 0.004768058191984892, "beta_dpo/gap_mean": 26.751209259033203, "beta_dpo/gap_std": 42.32147979736328, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.8858638743455497, "grad_norm": 90.14205169677734, "learning_rate": 2.0005139085293942e-08, "logits/chosen": 1.4197824001312256, "logits/rejected": 1.5385533571243286, "loss": 4.5795, "step": 423 }, { "beta_dpo/beta_used": 0.01704780012369156, "beta_dpo/beta_used_raw": 0.012394540943205357, "beta_dpo/gap_mean": 27.506437301635742, "beta_dpo/gap_std": 42.84564208984375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8879581151832461, "grad_norm": 52.7910041809082, "learning_rate": 1.9293713731512673e-08, "logits/chosen": 1.3633639812469482, "logits/rejected": 1.1960315704345703, "loss": 4.4306, "step": 424 }, { "beta_dpo/beta_used": 0.0063092270866036415, "beta_dpo/beta_used_raw": -0.041274845600128174, "beta_dpo/gap_mean": 27.02210807800293, "beta_dpo/gap_std": 40.46715545654297, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.8900523560209425, "grad_norm": 22.821779251098633, "learning_rate": 1.8594660455706763e-08, "logits/chosen": 1.476675033569336, "logits/rejected": 1.6865489482879639, "loss": 4.8895, "step": 425 }, { "beta_dpo/beta_used": 0.033457279205322266, "beta_dpo/beta_used_raw": 0.02916746772825718, "beta_dpo/gap_mean": 24.161306381225586, "beta_dpo/gap_std": 39.77753448486328, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.8921465968586387, "grad_norm": 88.06718444824219, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 1.2509461641311646, "logits/rejected": 1.4100229740142822, "loss": 3.9195, "step": 426 }, { "beta_dpo/beta_used": 0.04828907176852226, "beta_dpo/beta_used_raw": 0.03954368457198143, "beta_dpo/gap_mean": 27.65555191040039, "beta_dpo/gap_std": 40.21341323852539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8942408376963351, "grad_norm": 90.2916030883789, "learning_rate": 1.7233819424956247e-08, "logits/chosen": 1.3937939405441284, "logits/rejected": 1.3810914754867554, "loss": 3.5748, "step": 427 }, { "beta_dpo/beta_used": 0.04655870795249939, "beta_dpo/beta_used_raw": 0.024588048458099365, "beta_dpo/gap_mean": 32.9439582824707, "beta_dpo/gap_std": 39.263301849365234, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8963350785340314, "grad_norm": 68.10398864746094, "learning_rate": 1.6572104647786245e-08, "logits/chosen": 1.752288818359375, "logits/rejected": 1.9130034446716309, "loss": 3.5059, "step": 428 }, { "beta_dpo/beta_used": 0.02215776965022087, "beta_dpo/beta_used_raw": -0.016678031533956528, "beta_dpo/gap_mean": 31.625703811645508, "beta_dpo/gap_std": 43.56167984008789, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8984293193717278, "grad_norm": 60.726661682128906, "learning_rate": 1.5922907900227017e-08, "logits/chosen": 1.458854079246521, "logits/rejected": 1.4256439208984375, "loss": 4.6525, "step": 429 }, { "beta_dpo/beta_used": 0.02091900259256363, "beta_dpo/beta_used_raw": 0.0070409020408988, "beta_dpo/gap_mean": 27.687143325805664, "beta_dpo/gap_std": 44.989070892333984, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.900523560209424, "grad_norm": 140.8614959716797, "learning_rate": 1.5286263996730026e-08, "logits/chosen": 1.4701473712921143, "logits/rejected": 1.5857133865356445, "loss": 4.5919, "step": 430 }, { "beta_dpo/beta_used": 0.014125513844192028, "beta_dpo/beta_used_raw": -0.018212314695119858, "beta_dpo/gap_mean": 24.097793579101562, "beta_dpo/gap_std": 43.06412124633789, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9026178010471204, "grad_norm": 114.83171081542969, "learning_rate": 1.4662207078575684e-08, "logits/chosen": 1.7383248805999756, "logits/rejected": 1.805346965789795, "loss": 4.965, "step": 431 }, { "beta_dpo/beta_used": 0.032197486609220505, "beta_dpo/beta_used_raw": 0.023590974509716034, "beta_dpo/gap_mean": 26.495365142822266, "beta_dpo/gap_std": 43.16999435424805, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9047120418848168, "grad_norm": 93.71367645263672, "learning_rate": 1.40507706120426e-08, "logits/chosen": 1.4706007242202759, "logits/rejected": 1.6791198253631592, "loss": 4.1943, "step": 432 }, { "beta_dpo/beta_used": 0.029206298291683197, "beta_dpo/beta_used_raw": 0.02880963124334812, "beta_dpo/gap_mean": 24.425756454467773, "beta_dpo/gap_std": 42.32783889770508, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9068062827225131, "grad_norm": 79.65400695800781, "learning_rate": 1.345198738661285e-08, "logits/chosen": 1.5126326084136963, "logits/rejected": 1.4506518840789795, "loss": 4.3461, "step": 433 }, { "beta_dpo/beta_used": 0.04482489451766014, "beta_dpo/beta_used_raw": 0.019631531089544296, "beta_dpo/gap_mean": 23.61885643005371, "beta_dpo/gap_std": 41.121665954589844, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9089005235602095, "grad_norm": 76.20455169677734, "learning_rate": 1.2865889513213628e-08, "logits/chosen": 1.9426430463790894, "logits/rejected": 1.9414358139038086, "loss": 3.626, "step": 434 }, { "beta_dpo/beta_used": 0.024887006729841232, "beta_dpo/beta_used_raw": 0.01590941660106182, "beta_dpo/gap_mean": 23.983257293701172, "beta_dpo/gap_std": 40.91677474975586, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9109947643979057, "grad_norm": 117.16897583007812, "learning_rate": 1.2292508422495157e-08, "logits/chosen": 1.6585721969604492, "logits/rejected": 1.773654580116272, "loss": 4.7233, "step": 435 }, { "beta_dpo/beta_used": 0.022081829607486725, "beta_dpo/beta_used_raw": -0.0030337003991007805, "beta_dpo/gap_mean": 21.94788932800293, "beta_dpo/gap_std": 40.543338775634766, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9130890052356021, "grad_norm": 41.44011688232422, "learning_rate": 1.1731874863145142e-08, "logits/chosen": 1.3716554641723633, "logits/rejected": 1.4048748016357422, "loss": 4.5878, "step": 436 }, { "beta_dpo/beta_used": 0.03096182271838188, "beta_dpo/beta_used_raw": 0.029562827199697495, "beta_dpo/gap_mean": 23.157291412353516, "beta_dpo/gap_std": 40.46465301513672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9151832460732985, "grad_norm": 81.40292358398438, "learning_rate": 1.118401890024001e-08, "logits/chosen": 1.6667184829711914, "logits/rejected": 1.8092567920684814, "loss": 4.1753, "step": 437 }, { "beta_dpo/beta_used": 0.00928124412894249, "beta_dpo/beta_used_raw": -0.024261336773633957, "beta_dpo/gap_mean": 20.033138275146484, "beta_dpo/gap_std": 41.23052215576172, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9172774869109948, "grad_norm": 50.23611068725586, "learning_rate": 1.06489699136324e-08, "logits/chosen": 1.3478763103485107, "logits/rejected": 1.4908018112182617, "loss": 5.1895, "step": 438 }, { "beta_dpo/beta_used": 0.04591372609138489, "beta_dpo/beta_used_raw": 0.04151216149330139, "beta_dpo/gap_mean": 19.75481414794922, "beta_dpo/gap_std": 41.36615753173828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9193717277486911, "grad_norm": 145.0548095703125, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 1.5163558721542358, "logits/rejected": 1.5085352659225464, "loss": 3.9243, "step": 439 }, { "beta_dpo/beta_used": 0.014184126630425453, "beta_dpo/beta_used_raw": -0.008172026835381985, "beta_dpo/gap_mean": 20.215518951416016, "beta_dpo/gap_std": 39.6240119934082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9214659685863874, "grad_norm": 50.933837890625, "learning_rate": 9.617406953185136e-09, "logits/chosen": 1.4577587842941284, "logits/rejected": 1.234389305114746, "loss": 4.9376, "step": 440 }, { "beta_dpo/beta_used": 0.048469383269548416, "beta_dpo/beta_used_raw": 0.0424063466489315, "beta_dpo/gap_mean": 22.741992950439453, "beta_dpo/gap_std": 39.93981170654297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9235602094240838, "grad_norm": 101.18359375, "learning_rate": 9.12094829893642e-09, "logits/chosen": 1.7504223585128784, "logits/rejected": 1.9641519784927368, "loss": 4.1214, "step": 441 }, { "beta_dpo/beta_used": 0.03708556294441223, "beta_dpo/beta_used_raw": 0.02750963345170021, "beta_dpo/gap_mean": 24.97802734375, "beta_dpo/gap_std": 41.040199279785156, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9256544502617801, "grad_norm": 100.34196472167969, "learning_rate": 8.637407257200496e-09, "logits/chosen": 1.3330552577972412, "logits/rejected": 1.4373996257781982, "loss": 4.1536, "step": 442 }, { "beta_dpo/beta_used": 0.04446953535079956, "beta_dpo/beta_used_raw": 0.022015634924173355, "beta_dpo/gap_mean": 22.627042770385742, "beta_dpo/gap_std": 41.79437255859375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9277486910994764, "grad_norm": 65.15979766845703, "learning_rate": 8.166809758815895e-09, "logits/chosen": 1.2715387344360352, "logits/rejected": 1.2342997789382935, "loss": 3.7321, "step": 443 }, { "beta_dpo/beta_used": 0.014596132561564445, "beta_dpo/beta_used_raw": -0.007604743354022503, "beta_dpo/gap_mean": 24.320253372192383, "beta_dpo/gap_std": 41.13831329345703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9298429319371728, "grad_norm": 47.09414291381836, "learning_rate": 7.709181040498253e-09, "logits/chosen": 1.0621271133422852, "logits/rejected": 1.241407871246338, "loss": 4.861, "step": 444 }, { "beta_dpo/beta_used": 0.02850104495882988, "beta_dpo/beta_used_raw": 0.016521329060196877, "beta_dpo/gap_mean": 22.053783416748047, "beta_dpo/gap_std": 42.03921890258789, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9319371727748691, "grad_norm": 111.25325775146484, "learning_rate": 7.2645456434869965e-09, "logits/chosen": 1.5844391584396362, "logits/rejected": 1.637407898902893, "loss": 4.4202, "step": 445 }, { "beta_dpo/beta_used": 0.029594026505947113, "beta_dpo/beta_used_raw": 0.0185114536434412, "beta_dpo/gap_mean": 24.767749786376953, "beta_dpo/gap_std": 41.35893249511719, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9340314136125655, "grad_norm": 41.6215705871582, "learning_rate": 6.832927412229017e-09, "logits/chosen": 1.4550718069076538, "logits/rejected": 1.433241367340088, "loss": 4.1939, "step": 446 }, { "beta_dpo/beta_used": 0.03615984693169594, "beta_dpo/beta_used_raw": 0.03083086758852005, "beta_dpo/gap_mean": 28.0212345123291, "beta_dpo/gap_std": 39.88979721069336, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9361256544502617, "grad_norm": 43.32276153564453, "learning_rate": 6.414349493100129e-09, "logits/chosen": 1.5409138202667236, "logits/rejected": 1.6101213693618774, "loss": 3.7742, "step": 447 }, { "beta_dpo/beta_used": 0.031161731109023094, "beta_dpo/beta_used_raw": 0.012630118057131767, "beta_dpo/gap_mean": 27.05018424987793, "beta_dpo/gap_std": 40.15449905395508, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9382198952879581, "grad_norm": 235.60301208496094, "learning_rate": 6.0088343331638756e-09, "logits/chosen": 1.854709506034851, "logits/rejected": 1.8700783252716064, "loss": 4.4011, "step": 448 }, { "beta_dpo/beta_used": 0.035951972007751465, "beta_dpo/beta_used_raw": 0.021076416596770287, "beta_dpo/gap_mean": 26.136516571044922, "beta_dpo/gap_std": 39.963043212890625, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9403141361256544, "grad_norm": 100.77395629882812, "learning_rate": 5.616403678967624e-09, "logits/chosen": 2.0368571281433105, "logits/rejected": 1.7351016998291016, "loss": 3.8561, "step": 449 }, { "beta_dpo/beta_used": 0.01658363826572895, "beta_dpo/beta_used_raw": -0.019273536279797554, "beta_dpo/gap_mean": 25.731136322021484, "beta_dpo/gap_std": 40.702030181884766, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9424083769633508, "grad_norm": 41.71562957763672, "learning_rate": 5.2370785753763356e-09, "logits/chosen": 1.7945507764816284, "logits/rejected": 1.5377925634384155, "loss": 4.7532, "step": 450 }, { "beta_dpo/beta_used": 0.030548732727766037, "beta_dpo/beta_used_raw": 0.022728927433490753, "beta_dpo/gap_mean": 24.457050323486328, "beta_dpo/gap_std": 39.438201904296875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9445026178010472, "grad_norm": 84.55509948730469, "learning_rate": 4.8708793644441086e-09, "logits/chosen": 1.5343233346939087, "logits/rejected": 1.6422300338745117, "loss": 4.0291, "step": 451 }, { "beta_dpo/beta_used": 0.022664647549390793, "beta_dpo/beta_used_raw": -0.009222008287906647, "beta_dpo/gap_mean": 25.828996658325195, "beta_dpo/gap_std": 41.49300003051758, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9465968586387434, "grad_norm": 116.09254455566406, "learning_rate": 4.517825684323323e-09, "logits/chosen": 1.4695273637771606, "logits/rejected": 1.6382958889007568, "loss": 4.6374, "step": 452 }, { "beta_dpo/beta_used": 0.026332221925258636, "beta_dpo/beta_used_raw": 0.0258626826107502, "beta_dpo/gap_mean": 24.971637725830078, "beta_dpo/gap_std": 39.16703414916992, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9486910994764398, "grad_norm": 85.11585998535156, "learning_rate": 4.1779364682113794e-09, "logits/chosen": 1.7189387083053589, "logits/rejected": 1.8478630781173706, "loss": 4.0201, "step": 453 }, { "beta_dpo/beta_used": 0.02039419114589691, "beta_dpo/beta_used_raw": 0.003133818507194519, "beta_dpo/gap_mean": 25.356918334960938, "beta_dpo/gap_std": 39.97523498535156, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9507853403141361, "grad_norm": 47.996421813964844, "learning_rate": 3.851229943335393e-09, "logits/chosen": 2.0254852771759033, "logits/rejected": 1.9557225704193115, "loss": 4.2785, "step": 454 }, { "beta_dpo/beta_used": 0.014043524861335754, "beta_dpo/beta_used_raw": -0.01924164779484272, "beta_dpo/gap_mean": 24.39451789855957, "beta_dpo/gap_std": 40.95219039916992, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.9528795811518325, "grad_norm": 79.08866882324219, "learning_rate": 3.5377236299748147e-09, "logits/chosen": 1.5097756385803223, "logits/rejected": 1.603163242340088, "loss": 4.8423, "step": 455 }, { "beta_dpo/beta_used": 0.05385340750217438, "beta_dpo/beta_used_raw": 0.029043981805443764, "beta_dpo/gap_mean": 25.243539810180664, "beta_dpo/gap_std": 42.33509063720703, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9549738219895288, "grad_norm": 108.52057647705078, "learning_rate": 3.2374343405217884e-09, "logits/chosen": 1.6896770000457764, "logits/rejected": 1.829254150390625, "loss": 4.0501, "step": 456 }, { "beta_dpo/beta_used": 0.04148964211344719, "beta_dpo/beta_used_raw": 0.018075397238135338, "beta_dpo/gap_mean": 27.367046356201172, "beta_dpo/gap_std": 43.94456100463867, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.9570680628272251, "grad_norm": 287.26763916015625, "learning_rate": 2.9503781785795713e-09, "logits/chosen": 1.5245857238769531, "logits/rejected": 1.4000697135925293, "loss": 4.0654, "step": 457 }, { "beta_dpo/beta_used": 0.011880859732627869, "beta_dpo/beta_used_raw": -0.01639743149280548, "beta_dpo/gap_mean": 25.620864868164062, "beta_dpo/gap_std": 41.843963623046875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9591623036649215, "grad_norm": 45.80873107910156, "learning_rate": 2.6765705380989432e-09, "logits/chosen": 1.518320918083191, "logits/rejected": 1.3533384799957275, "loss": 4.969, "step": 458 }, { "beta_dpo/beta_used": 0.052308086305856705, "beta_dpo/beta_used_raw": 0.024883100762963295, "beta_dpo/gap_mean": 23.730758666992188, "beta_dpo/gap_std": 41.868125915527344, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.9612565445026178, "grad_norm": 121.35041809082031, "learning_rate": 2.416026102552732e-09, "logits/chosen": 1.4219530820846558, "logits/rejected": 1.2508901357650757, "loss": 3.466, "step": 459 }, { "beta_dpo/beta_used": 0.017205236479640007, "beta_dpo/beta_used_raw": -0.0033456708770245314, "beta_dpo/gap_mean": 22.880821228027344, "beta_dpo/gap_std": 45.12669372558594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9633507853403142, "grad_norm": 73.71929931640625, "learning_rate": 2.168758844148272e-09, "logits/chosen": 1.3608553409576416, "logits/rejected": 1.3055371046066284, "loss": 5.0311, "step": 460 }, { "beta_dpo/beta_used": 0.04041147232055664, "beta_dpo/beta_used_raw": 0.029882332310080528, "beta_dpo/gap_mean": 22.926301956176758, "beta_dpo/gap_std": 44.20081329345703, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.9654450261780104, "grad_norm": 86.9037094116211, "learning_rate": 1.9347820230782295e-09, "logits/chosen": 1.735243797302246, "logits/rejected": 1.66280996799469, "loss": 3.8386, "step": 461 }, { "beta_dpo/beta_used": 0.03556675463914871, "beta_dpo/beta_used_raw": 0.027323313057422638, "beta_dpo/gap_mean": 25.144573211669922, "beta_dpo/gap_std": 43.731327056884766, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9675392670157068, "grad_norm": 159.07362365722656, "learning_rate": 1.7141081868094209e-09, "logits/chosen": 1.5209287405014038, "logits/rejected": 1.4356799125671387, "loss": 4.4081, "step": 462 }, { "beta_dpo/beta_used": 0.03522716090083122, "beta_dpo/beta_used_raw": 0.009717161767184734, "beta_dpo/gap_mean": 25.50450325012207, "beta_dpo/gap_std": 42.545188903808594, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9696335078534032, "grad_norm": 87.98490142822266, "learning_rate": 1.5067491694100153e-09, "logits/chosen": 1.5676113367080688, "logits/rejected": 1.6250090599060059, "loss": 3.8654, "step": 463 }, { "beta_dpo/beta_used": 0.021527249366044998, "beta_dpo/beta_used_raw": 0.011957229115068913, "beta_dpo/gap_mean": 24.842899322509766, "beta_dpo/gap_std": 42.1388053894043, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9717277486910995, "grad_norm": 79.93045043945312, "learning_rate": 1.3127160909147672e-09, "logits/chosen": 1.8131260871887207, "logits/rejected": 1.744214653968811, "loss": 4.3541, "step": 464 }, { "beta_dpo/beta_used": 0.03722041845321655, "beta_dpo/beta_used_raw": 0.016623277217149734, "beta_dpo/gap_mean": 26.415016174316406, "beta_dpo/gap_std": 41.290672302246094, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9738219895287958, "grad_norm": 76.404541015625, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 1.4614487886428833, "logits/rejected": 1.4553896188735962, "loss": 4.0587, "step": 465 }, { "beta_dpo/beta_used": 0.0460047721862793, "beta_dpo/beta_used_raw": 0.03773031011223793, "beta_dpo/gap_mean": 28.092792510986328, "beta_dpo/gap_std": 40.66791534423828, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9759162303664921, "grad_norm": 89.3587417602539, "learning_rate": 9.64668657069706e-10, "logits/chosen": 1.3052603006362915, "logits/rejected": 1.347874641418457, "loss": 3.221, "step": 466 }, { "beta_dpo/beta_used": 0.02018456533551216, "beta_dpo/beta_used_raw": -0.0011910395696759224, "beta_dpo/gap_mean": 25.53974151611328, "beta_dpo/gap_std": 40.64295196533203, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9780104712041885, "grad_norm": 181.21160888671875, "learning_rate": 8.106729664475176e-10, "logits/chosen": 0.9222959876060486, "logits/rejected": 1.1561161279678345, "loss": 5.0134, "step": 467 }, { "beta_dpo/beta_used": 0.015501348301768303, "beta_dpo/beta_used_raw": -0.027839092537760735, "beta_dpo/gap_mean": 24.239940643310547, "beta_dpo/gap_std": 40.417659759521484, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9801047120418848, "grad_norm": 32.802974700927734, "learning_rate": 6.700405431837585e-10, "logits/chosen": 1.68427312374115, "logits/rejected": 1.4638608694076538, "loss": 4.8736, "step": 468 }, { "beta_dpo/beta_used": 0.043932512402534485, "beta_dpo/beta_used_raw": 0.02907262183725834, "beta_dpo/gap_mean": 23.42894744873047, "beta_dpo/gap_std": 40.17053985595703, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9821989528795811, "grad_norm": 74.9969482421875, "learning_rate": 5.427789289685347e-10, "logits/chosen": 1.6762428283691406, "logits/rejected": 1.6395068168640137, "loss": 3.908, "step": 469 }, { "beta_dpo/beta_used": 0.05348680168390274, "beta_dpo/beta_used_raw": 0.047200098633766174, "beta_dpo/gap_mean": 26.360820770263672, "beta_dpo/gap_std": 41.91456985473633, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9842931937172775, "grad_norm": 92.29603576660156, "learning_rate": 4.288949484559934e-10, "logits/chosen": 0.9740282297134399, "logits/rejected": 0.9412952065467834, "loss": 3.2812, "step": 470 }, { "beta_dpo/beta_used": 0.011362526565790176, "beta_dpo/beta_used_raw": -0.002826599171385169, "beta_dpo/gap_mean": 26.84084701538086, "beta_dpo/gap_std": 42.06930160522461, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9863874345549738, "grad_norm": 39.85667419433594, "learning_rate": 3.2839470889836627e-10, "logits/chosen": 1.6476500034332275, "logits/rejected": 1.6063101291656494, "loss": 4.6968, "step": 471 }, { "beta_dpo/beta_used": 0.04099735617637634, "beta_dpo/beta_used_raw": 0.03528433293104172, "beta_dpo/gap_mean": 27.509807586669922, "beta_dpo/gap_std": 42.573822021484375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9884816753926702, "grad_norm": 135.5849609375, "learning_rate": 2.412835998185092e-10, "logits/chosen": 1.3469210863113403, "logits/rejected": 1.4127790927886963, "loss": 3.8637, "step": 472 }, { "beta_dpo/beta_used": 0.020598269999027252, "beta_dpo/beta_used_raw": 0.011284598149359226, "beta_dpo/gap_mean": 26.61202621459961, "beta_dpo/gap_std": 42.61575698852539, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9905759162303664, "grad_norm": 39.22035598754883, "learning_rate": 1.6756629272085544e-10, "logits/chosen": 1.4856796264648438, "logits/rejected": 1.2598925828933716, "loss": 4.3459, "step": 473 }, { "beta_dpo/beta_used": 0.015516398474574089, "beta_dpo/beta_used_raw": -0.019677024334669113, "beta_dpo/gap_mean": 27.40287971496582, "beta_dpo/gap_std": 42.025856018066406, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9926701570680628, "grad_norm": 34.569698333740234, "learning_rate": 1.072467408408384e-10, "logits/chosen": 1.5088553428649902, "logits/rejected": 1.615687370300293, "loss": 4.6458, "step": 474 }, { "beta_dpo/beta_used": 0.01646936498582363, "beta_dpo/beta_used_raw": -0.006308557000011206, "beta_dpo/gap_mean": 22.261816024780273, "beta_dpo/gap_std": 39.92071533203125, "beta_dpo/mask_keep_frac": 0.5, "epoch": 0.9947643979057592, "grad_norm": 33.08564758300781, "learning_rate": 6.032817893297793e-11, "logits/chosen": 1.1749279499053955, "logits/rejected": 1.2055437564849854, "loss": 4.2627, "step": 475 }, { "beta_dpo/beta_used": 0.027492396533489227, "beta_dpo/beta_used_raw": 0.008521707728505135, "beta_dpo/gap_mean": 23.523109436035156, "beta_dpo/gap_std": 40.176387786865234, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9968586387434555, "grad_norm": 60.566287994384766, "learning_rate": 2.6813123097352287e-11, "logits/chosen": 1.3323711156845093, "logits/rejected": 1.4667065143585205, "loss": 4.2655, "step": 476 }, { "beta_dpo/beta_used": 0.032702527940273285, "beta_dpo/beta_used_raw": 0.020156463608145714, "beta_dpo/gap_mean": 24.190080642700195, "beta_dpo/gap_std": 42.31235885620117, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9989528795811519, "grad_norm": 115.37435150146484, "learning_rate": 6.7033706447061635e-12, "logits/chosen": 1.080468773841858, "logits/rejected": 1.1553194522857666, "loss": 4.2411, "step": 477 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 4.692083022879355, "train_runtime": 7712.5154, "train_samples_per_second": 7.927, "train_steps_per_second": 0.062 } ], "logging_steps": 1, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }