{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 200, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta_used": 0.01011180505156517, "beta_dpo/beta_used_raw": 0.01011180505156517, "beta_dpo/gap_mean": -0.015508938580751419, "beta_dpo/gap_std": 0.2148897498846054, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.0020942408376963353, "grad_norm": 34.31053161621094, "learning_rate": 0.0, "logits/chosen": -0.5995081663131714, "logits/rejected": -0.6144353747367859, "loss": 5.5447, "step": 1 }, { "beta_dpo/beta_used": 0.009844036772847176, "beta_dpo/beta_used_raw": 0.009844036772847176, "beta_dpo/gap_mean": -0.0009143210481852293, "beta_dpo/gap_std": 0.4510902464389801, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.004188481675392671, "grad_norm": 29.54327392578125, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -0.6431564688682556, "logits/rejected": -0.5975700616836548, "loss": 5.5466, "step": 2 }, { "beta_dpo/beta_used": 0.010173876769840717, "beta_dpo/beta_used_raw": 0.010173876769840717, "beta_dpo/gap_mean": -0.016529276967048645, "beta_dpo/gap_std": 0.5596910119056702, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0062827225130890054, "grad_norm": 29.85909652709961, "learning_rate": 2.083333333333333e-08, "logits/chosen": -0.6880007982254028, "logits/rejected": -0.7442882061004639, "loss": 5.5438, "step": 3 }, { "beta_dpo/beta_used": 0.010584751144051552, "beta_dpo/beta_used_raw": 0.010584751144051552, "beta_dpo/gap_mean": -0.009412091225385666, "beta_dpo/gap_std": 0.690794050693512, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.008376963350785341, "grad_norm": 38.64099884033203, "learning_rate": 3.125e-08, "logits/chosen": -0.6261060833930969, "logits/rejected": -0.5069095492362976, "loss": 5.5411, "step": 4 }, { "beta_dpo/beta_used": 0.009799078106880188, "beta_dpo/beta_used_raw": 0.009799078106880188, "beta_dpo/gap_mean": 0.02601781114935875, "beta_dpo/gap_std": 0.7904683947563171, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.010471204188481676, "grad_norm": 36.012081146240234, "learning_rate": 4.166666666666666e-08, "logits/chosen": -0.5312447547912598, "logits/rejected": -0.5814427137374878, "loss": 5.5449, "step": 5 }, { "beta_dpo/beta_used": 0.009586527943611145, "beta_dpo/beta_used_raw": 0.009586527943611145, "beta_dpo/gap_mean": 0.041127197444438934, "beta_dpo/gap_std": 0.8036903738975525, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.012565445026178011, "grad_norm": 30.233118057250977, "learning_rate": 5.208333333333333e-08, "logits/chosen": -0.6583905220031738, "logits/rejected": -0.656255304813385, "loss": 5.5456, "step": 6 }, { "beta_dpo/beta_used": 0.010109594091773033, "beta_dpo/beta_used_raw": 0.010109594091773033, "beta_dpo/gap_mean": 0.05177360400557518, "beta_dpo/gap_std": 0.7368500232696533, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.014659685863874346, "grad_norm": 33.09341812133789, "learning_rate": 6.25e-08, "logits/chosen": -0.5148481726646423, "logits/rejected": -0.5897587537765503, "loss": 5.5416, "step": 7 }, { "beta_dpo/beta_used": 0.010191082023084164, "beta_dpo/beta_used_raw": 0.010191082023084164, "beta_dpo/gap_mean": 0.01677882857620716, "beta_dpo/gap_std": 0.7229223847389221, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.016753926701570682, "grad_norm": 35.61125564575195, "learning_rate": 7.291666666666667e-08, "logits/chosen": -0.7006567716598511, "logits/rejected": -0.7195206880569458, "loss": 5.5429, "step": 8 }, { "beta_dpo/beta_used": 0.009976114146411419, "beta_dpo/beta_used_raw": 0.009976114146411419, "beta_dpo/gap_mean": 0.020590361207723618, "beta_dpo/gap_std": 0.7182962894439697, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.018848167539267015, "grad_norm": 28.307985305786133, "learning_rate": 8.333333333333333e-08, "logits/chosen": -0.6901550889015198, "logits/rejected": -0.6974665522575378, "loss": 5.5439, "step": 9 }, { "beta_dpo/beta_used": 0.009834789671003819, "beta_dpo/beta_used_raw": 0.009834789671003819, "beta_dpo/gap_mean": 0.01076302770525217, "beta_dpo/gap_std": 0.699016809463501, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.020942408376963352, "grad_norm": 28.891916275024414, "learning_rate": 9.375e-08, "logits/chosen": -0.6282883882522583, "logits/rejected": -0.6301394701004028, "loss": 5.5458, "step": 10 }, { "beta_dpo/beta_used": 0.009896289557218552, "beta_dpo/beta_used_raw": 0.009896289557218552, "beta_dpo/gap_mean": -0.03149949014186859, "beta_dpo/gap_std": 0.6834414005279541, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.023036649214659685, "grad_norm": 33.830101013183594, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -0.5225973129272461, "logits/rejected": -0.6075971126556396, "loss": 5.5463, "step": 11 }, { "beta_dpo/beta_used": 0.010411511175334454, "beta_dpo/beta_used_raw": 0.010411511175334454, "beta_dpo/gap_mean": 0.003659537062048912, "beta_dpo/gap_std": 0.6871599555015564, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.025130890052356022, "grad_norm": 35.04637145996094, "learning_rate": 1.1458333333333332e-07, "logits/chosen": -0.6008322834968567, "logits/rejected": -0.5699715614318848, "loss": 5.5394, "step": 12 }, { "beta_dpo/beta_used": 0.009875054471194744, "beta_dpo/beta_used_raw": 0.009875054471194744, "beta_dpo/gap_mean": 0.05279437080025673, "beta_dpo/gap_std": 0.6677561402320862, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.027225130890052355, "grad_norm": 31.19098472595215, "learning_rate": 1.25e-07, "logits/chosen": -0.7021859288215637, "logits/rejected": -0.6853169202804565, "loss": 5.5435, "step": 13 }, { "beta_dpo/beta_used": 0.009974612854421139, "beta_dpo/beta_used_raw": 0.009974612854421139, "beta_dpo/gap_mean": 0.024167632684111595, "beta_dpo/gap_std": 0.6448996663093567, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.02931937172774869, "grad_norm": 31.935443878173828, "learning_rate": 1.3541666666666666e-07, "logits/chosen": -0.5705533027648926, "logits/rejected": -0.6388446688652039, "loss": 5.5451, "step": 14 }, { "beta_dpo/beta_used": 0.010165886022150517, "beta_dpo/beta_used_raw": 0.010165886022150517, "beta_dpo/gap_mean": 0.050552304834127426, "beta_dpo/gap_std": 0.682822585105896, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.031413612565445025, "grad_norm": 35.0179443359375, "learning_rate": 1.4583333333333335e-07, "logits/chosen": -0.6065237522125244, "logits/rejected": -0.6314604878425598, "loss": 5.5405, "step": 15 }, { "beta_dpo/beta_used": 0.009956092573702335, "beta_dpo/beta_used_raw": 0.009956092573702335, "beta_dpo/gap_mean": 0.07386220246553421, "beta_dpo/gap_std": 0.705920934677124, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.033507853403141365, "grad_norm": 33.774627685546875, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -0.6334318518638611, "logits/rejected": -0.6558720469474792, "loss": 5.5422, "step": 16 }, { "beta_dpo/beta_used": 0.009694953449070454, "beta_dpo/beta_used_raw": 0.009694953449070454, "beta_dpo/gap_mean": 0.004169606603682041, "beta_dpo/gap_std": 0.7264626622200012, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.0356020942408377, "grad_norm": 33.06454086303711, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -0.5417214632034302, "logits/rejected": -0.5611686110496521, "loss": 5.5471, "step": 17 }, { "beta_dpo/beta_used": 0.010714413598179817, "beta_dpo/beta_used_raw": 0.010714413598179817, "beta_dpo/gap_mean": 0.02533562108874321, "beta_dpo/gap_std": 0.7237865924835205, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03769633507853403, "grad_norm": 37.169307708740234, "learning_rate": 1.7708333333333334e-07, "logits/chosen": -0.4865175485610962, "logits/rejected": -0.5460414886474609, "loss": 5.5388, "step": 18 }, { "beta_dpo/beta_used": 0.01004834845662117, "beta_dpo/beta_used_raw": 0.01004834845662117, "beta_dpo/gap_mean": 0.029139002785086632, "beta_dpo/gap_std": 0.7092792987823486, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.039790575916230364, "grad_norm": 33.33395004272461, "learning_rate": 1.875e-07, "logits/chosen": -0.639908492565155, "logits/rejected": -0.6775057315826416, "loss": 5.5437, "step": 19 }, { "beta_dpo/beta_used": 0.009934858419001102, "beta_dpo/beta_used_raw": 0.009934858419001102, "beta_dpo/gap_mean": 0.03032633848488331, "beta_dpo/gap_std": 0.6968315839767456, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.041884816753926704, "grad_norm": 32.22944259643555, "learning_rate": 1.9791666666666664e-07, "logits/chosen": -0.658079206943512, "logits/rejected": -0.6970005631446838, "loss": 5.5448, "step": 20 }, { "beta_dpo/beta_used": 0.010048963129520416, "beta_dpo/beta_used_raw": 0.010048963129520416, "beta_dpo/gap_mean": 0.06978250294923782, "beta_dpo/gap_std": 0.7305155992507935, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04397905759162304, "grad_norm": 31.048315048217773, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -0.6539341807365417, "logits/rejected": -0.6931516528129578, "loss": 5.5406, "step": 21 }, { "beta_dpo/beta_used": 0.009562183171510696, "beta_dpo/beta_used_raw": 0.009562183171510696, "beta_dpo/gap_mean": 0.05501282587647438, "beta_dpo/gap_std": 0.7383480072021484, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.04607329842931937, "grad_norm": 27.25322723388672, "learning_rate": 2.1875e-07, "logits/chosen": -0.6295111775398254, "logits/rejected": -0.6111897230148315, "loss": 5.546, "step": 22 }, { "beta_dpo/beta_used": 0.009907824918627739, "beta_dpo/beta_used_raw": 0.009907824918627739, "beta_dpo/gap_mean": 0.08610469102859497, "beta_dpo/gap_std": 0.7474377751350403, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.048167539267015703, "grad_norm": 32.43076705932617, "learning_rate": 2.2916666666666663e-07, "logits/chosen": -0.6042340397834778, "logits/rejected": -0.6491126418113708, "loss": 5.5425, "step": 23 }, { "beta_dpo/beta_used": 0.010367114096879959, "beta_dpo/beta_used_raw": 0.010367114096879959, "beta_dpo/gap_mean": 0.154057115316391, "beta_dpo/gap_std": 0.7526560425758362, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.050261780104712044, "grad_norm": 33.83905029296875, "learning_rate": 2.3958333333333335e-07, "logits/chosen": -0.5649707317352295, "logits/rejected": -0.42430925369262695, "loss": 5.5343, "step": 24 }, { "beta_dpo/beta_used": 0.010158861055970192, "beta_dpo/beta_used_raw": 0.010158861055970192, "beta_dpo/gap_mean": 0.19064763188362122, "beta_dpo/gap_std": 0.7487001419067383, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.05235602094240838, "grad_norm": 31.9135684967041, "learning_rate": 2.5e-07, "logits/chosen": -0.4519118368625641, "logits/rejected": -0.46168017387390137, "loss": 5.5363, "step": 25 }, { "beta_dpo/beta_used": 0.00981106236577034, "beta_dpo/beta_used_raw": 0.00981106236577034, "beta_dpo/gap_mean": 0.15975670516490936, "beta_dpo/gap_std": 0.8194867968559265, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05445026178010471, "grad_norm": 31.96481704711914, "learning_rate": 2.604166666666667e-07, "logits/chosen": -0.7263092398643494, "logits/rejected": -0.733163058757782, "loss": 5.5392, "step": 26 }, { "beta_dpo/beta_used": 0.010345407761633396, "beta_dpo/beta_used_raw": 0.010345407761633396, "beta_dpo/gap_mean": 0.17174594104290009, "beta_dpo/gap_std": 0.8231180310249329, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.05654450261780105, "grad_norm": 32.44462966918945, "learning_rate": 2.708333333333333e-07, "logits/chosen": -0.6372715830802917, "logits/rejected": -0.6687661409378052, "loss": 5.5353, "step": 27 }, { "beta_dpo/beta_used": 0.009617293253540993, "beta_dpo/beta_used_raw": 0.009617293253540993, "beta_dpo/gap_mean": 0.17203421890735626, "beta_dpo/gap_std": 0.8581656217575073, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.05863874345549738, "grad_norm": 27.413312911987305, "learning_rate": 2.8125e-07, "logits/chosen": -0.6947358250617981, "logits/rejected": -0.6780796647071838, "loss": 5.5409, "step": 28 }, { "beta_dpo/beta_used": 0.009752588346600533, "beta_dpo/beta_used_raw": 0.009752588346600533, "beta_dpo/gap_mean": 0.170832097530365, "beta_dpo/gap_std": 0.8217583298683167, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.060732984293193716, "grad_norm": 34.61642837524414, "learning_rate": 2.916666666666667e-07, "logits/chosen": -0.6086971163749695, "logits/rejected": -0.5876795649528503, "loss": 5.5375, "step": 29 }, { "beta_dpo/beta_used": 0.009866783395409584, "beta_dpo/beta_used_raw": 0.009866783395409584, "beta_dpo/gap_mean": 0.19807885587215424, "beta_dpo/gap_std": 0.8146649599075317, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.06282722513089005, "grad_norm": 31.527999877929688, "learning_rate": 3.020833333333333e-07, "logits/chosen": -0.5960394144058228, "logits/rejected": -0.5833207964897156, "loss": 5.539, "step": 30 }, { "beta_dpo/beta_used": 0.0096372589468956, "beta_dpo/beta_used_raw": 0.0096372589468956, "beta_dpo/gap_mean": 0.22884601354599, "beta_dpo/gap_std": 0.8796005249023438, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06492146596858639, "grad_norm": 26.304962158203125, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -0.5981181859970093, "logits/rejected": -0.6432889103889465, "loss": 5.5367, "step": 31 }, { "beta_dpo/beta_used": 0.010047816671431065, "beta_dpo/beta_used_raw": 0.010047816671431065, "beta_dpo/gap_mean": 0.2255675345659256, "beta_dpo/gap_std": 0.9126529097557068, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.06701570680628273, "grad_norm": 30.972850799560547, "learning_rate": 3.2291666666666666e-07, "logits/chosen": -0.6324287056922913, "logits/rejected": -0.6502685546875, "loss": 5.5354, "step": 32 }, { "beta_dpo/beta_used": 0.010098990052938461, "beta_dpo/beta_used_raw": 0.010098990052938461, "beta_dpo/gap_mean": 0.32231834530830383, "beta_dpo/gap_std": 0.9891802072525024, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.06910994764397906, "grad_norm": 31.14387321472168, "learning_rate": 3.333333333333333e-07, "logits/chosen": -0.5357920527458191, "logits/rejected": -0.6322364211082458, "loss": 5.5305, "step": 33 }, { "beta_dpo/beta_used": 0.009454782120883465, "beta_dpo/beta_used_raw": 0.009454782120883465, "beta_dpo/gap_mean": 0.44986480474472046, "beta_dpo/gap_std": 1.0094612836837769, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.0712041884816754, "grad_norm": 32.375919342041016, "learning_rate": 3.4375e-07, "logits/chosen": -0.7248749136924744, "logits/rejected": -0.7035080194473267, "loss": 5.5304, "step": 34 }, { "beta_dpo/beta_used": 0.009926512837409973, "beta_dpo/beta_used_raw": 0.009926512837409973, "beta_dpo/gap_mean": 0.4365549683570862, "beta_dpo/gap_std": 1.0834380388259888, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.07329842931937172, "grad_norm": 30.43588638305664, "learning_rate": 3.541666666666667e-07, "logits/chosen": -0.6929864287376404, "logits/rejected": -0.6378797888755798, "loss": 5.5286, "step": 35 }, { "beta_dpo/beta_used": 0.009561766870319843, "beta_dpo/beta_used_raw": 0.009561766870319843, "beta_dpo/gap_mean": 0.49735885858535767, "beta_dpo/gap_std": 1.1678481101989746, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.07539267015706806, "grad_norm": 31.427370071411133, "learning_rate": 3.645833333333333e-07, "logits/chosen": -0.5668917298316956, "logits/rejected": -0.6229207515716553, "loss": 5.528, "step": 36 }, { "beta_dpo/beta_used": 0.010987182147800922, "beta_dpo/beta_used_raw": 0.010987182147800922, "beta_dpo/gap_mean": 0.507057249546051, "beta_dpo/gap_std": 1.272064447402954, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0774869109947644, "grad_norm": 41.305686950683594, "learning_rate": 3.75e-07, "logits/chosen": -0.5756943225860596, "logits/rejected": -0.6139695048332214, "loss": 5.5141, "step": 37 }, { "beta_dpo/beta_used": 0.010229920968413353, "beta_dpo/beta_used_raw": 0.010229920968413353, "beta_dpo/gap_mean": 0.4955774247646332, "beta_dpo/gap_std": 1.377665400505066, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.07958115183246073, "grad_norm": 36.85758972167969, "learning_rate": 3.8541666666666665e-07, "logits/chosen": -0.5929851531982422, "logits/rejected": -0.5943086743354797, "loss": 5.5237, "step": 38 }, { "beta_dpo/beta_used": 0.010049818083643913, "beta_dpo/beta_used_raw": 0.010049818083643913, "beta_dpo/gap_mean": 0.7315759062767029, "beta_dpo/gap_std": 1.3812720775604248, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.08167539267015707, "grad_norm": 33.5423469543457, "learning_rate": 3.958333333333333e-07, "logits/chosen": -0.5810495018959045, "logits/rejected": -0.5888175964355469, "loss": 5.511, "step": 39 }, { "beta_dpo/beta_used": 0.008092176169157028, "beta_dpo/beta_used_raw": 0.008092176169157028, "beta_dpo/gap_mean": 0.7477964162826538, "beta_dpo/gap_std": 1.5241725444793701, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08376963350785341, "grad_norm": 24.289613723754883, "learning_rate": 4.0625e-07, "logits/chosen": -0.6579009890556335, "logits/rejected": -0.7191402316093445, "loss": 5.5302, "step": 40 }, { "beta_dpo/beta_used": 0.009270838461816311, "beta_dpo/beta_used_raw": 0.009270838461816311, "beta_dpo/gap_mean": 0.7307737469673157, "beta_dpo/gap_std": 1.632360577583313, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.08586387434554973, "grad_norm": 30.218177795410156, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -0.5917030572891235, "logits/rejected": -0.668786346912384, "loss": 5.5194, "step": 41 }, { "beta_dpo/beta_used": 0.00970252975821495, "beta_dpo/beta_used_raw": 0.00970252975821495, "beta_dpo/gap_mean": 0.8179957270622253, "beta_dpo/gap_std": 1.7464549541473389, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08795811518324607, "grad_norm": 28.182844161987305, "learning_rate": 4.270833333333333e-07, "logits/chosen": -0.6131463050842285, "logits/rejected": -0.6607965230941772, "loss": 5.5155, "step": 42 }, { "beta_dpo/beta_used": 0.011301547288894653, "beta_dpo/beta_used_raw": 0.011301547288894653, "beta_dpo/gap_mean": 0.8352429270744324, "beta_dpo/gap_std": 1.9265403747558594, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.09005235602094241, "grad_norm": 39.69644546508789, "learning_rate": 4.375e-07, "logits/chosen": -0.5696575045585632, "logits/rejected": -0.5967999696731567, "loss": 5.4945, "step": 43 }, { "beta_dpo/beta_used": 0.011869620531797409, "beta_dpo/beta_used_raw": 0.011869620531797409, "beta_dpo/gap_mean": 0.9845832586288452, "beta_dpo/gap_std": 2.1420016288757324, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.09214659685863874, "grad_norm": 44.15154266357422, "learning_rate": 4.479166666666667e-07, "logits/chosen": -0.5550575852394104, "logits/rejected": -0.6399248838424683, "loss": 5.478, "step": 44 }, { "beta_dpo/beta_used": 0.009358462877571583, "beta_dpo/beta_used_raw": 0.009358462877571583, "beta_dpo/gap_mean": 1.1377849578857422, "beta_dpo/gap_std": 2.3049428462982178, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.09424083769633508, "grad_norm": 34.14745330810547, "learning_rate": 4.5833333333333327e-07, "logits/chosen": -0.685950756072998, "logits/rejected": -0.7422507405281067, "loss": 5.4987, "step": 45 }, { "beta_dpo/beta_used": 0.009525664150714874, "beta_dpo/beta_used_raw": 0.009525664150714874, "beta_dpo/gap_mean": 1.1683762073516846, "beta_dpo/gap_std": 2.3120195865631104, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.09633507853403141, "grad_norm": 31.92166519165039, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -0.6844733357429504, "logits/rejected": -0.6822009682655334, "loss": 5.5016, "step": 46 }, { "beta_dpo/beta_used": 0.008399980142712593, "beta_dpo/beta_used_raw": 0.008399980142712593, "beta_dpo/gap_mean": 1.1559507846832275, "beta_dpo/gap_std": 2.4187076091766357, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.09842931937172775, "grad_norm": 26.383420944213867, "learning_rate": 4.791666666666667e-07, "logits/chosen": -0.6458744406700134, "logits/rejected": -0.6522045135498047, "loss": 5.5085, "step": 47 }, { "beta_dpo/beta_used": 0.007347858510911465, "beta_dpo/beta_used_raw": 0.007347858510911465, "beta_dpo/gap_mean": 1.0993822813034058, "beta_dpo/gap_std": 2.6655614376068115, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10052356020942409, "grad_norm": 22.220972061157227, "learning_rate": 4.895833333333333e-07, "logits/chosen": -0.5958544611930847, "logits/rejected": -0.6661175489425659, "loss": 5.5207, "step": 48 }, { "beta_dpo/beta_used": 0.008892661891877651, "beta_dpo/beta_used_raw": 0.008892661891877651, "beta_dpo/gap_mean": 1.1662849187850952, "beta_dpo/gap_std": 2.745657205581665, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.10261780104712041, "grad_norm": 33.02886962890625, "learning_rate": 5e-07, "logits/chosen": -0.644591212272644, "logits/rejected": -0.6800640225410461, "loss": 5.5039, "step": 49 }, { "beta_dpo/beta_used": 0.008311200886964798, "beta_dpo/beta_used_raw": 0.008311200886964798, "beta_dpo/gap_mean": 1.091849684715271, "beta_dpo/gap_std": 2.904430866241455, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.10471204188481675, "grad_norm": 29.243675231933594, "learning_rate": 4.999932966293553e-07, "logits/chosen": -0.6915316581726074, "logits/rejected": -0.6876245737075806, "loss": 5.5135, "step": 50 }, { "beta_dpo/beta_used": 0.012040691450238228, "beta_dpo/beta_used_raw": 0.012040691450238228, "beta_dpo/gap_mean": 1.3487975597381592, "beta_dpo/gap_std": 3.2586777210235596, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1068062827225131, "grad_norm": 53.16353988647461, "learning_rate": 4.999731868769026e-07, "logits/chosen": -0.6590722799301147, "logits/rejected": -0.6033743619918823, "loss": 5.4366, "step": 51 }, { "beta_dpo/beta_used": 0.011686221696436405, "beta_dpo/beta_used_raw": 0.011686221696436405, "beta_dpo/gap_mean": 1.7514865398406982, "beta_dpo/gap_std": 3.606762647628784, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10890052356020942, "grad_norm": 41.719051361083984, "learning_rate": 4.99939671821067e-07, "logits/chosen": -0.6899721622467041, "logits/rejected": -0.6855327486991882, "loss": 5.4423, "step": 52 }, { "beta_dpo/beta_used": 0.014263564720749855, "beta_dpo/beta_used_raw": 0.014263564720749855, "beta_dpo/gap_mean": 1.7108714580535889, "beta_dpo/gap_std": 3.8523051738739014, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11099476439790576, "grad_norm": 56.673248291015625, "learning_rate": 4.998927532591591e-07, "logits/chosen": -0.7215074300765991, "logits/rejected": -0.6849179863929749, "loss": 5.3767, "step": 53 }, { "beta_dpo/beta_used": 0.008228869177401066, "beta_dpo/beta_used_raw": 0.006880041211843491, "beta_dpo/gap_mean": 1.9823561906814575, "beta_dpo/gap_std": 4.244045734405518, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1130890052356021, "grad_norm": 27.707256317138672, "learning_rate": 4.998324337072792e-07, "logits/chosen": -0.6958556175231934, "logits/rejected": -0.7273838520050049, "loss": 5.4762, "step": 54 }, { "beta_dpo/beta_used": 0.009436525404453278, "beta_dpo/beta_used_raw": 0.009436525404453278, "beta_dpo/gap_mean": 1.670468807220459, "beta_dpo/gap_std": 4.168619155883789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11518324607329843, "grad_norm": 32.20229721069336, "learning_rate": 4.997587164001815e-07, "logits/chosen": -0.61600261926651, "logits/rejected": -0.6316042542457581, "loss": 5.4736, "step": 55 }, { "beta_dpo/beta_used": 0.012474480085074902, "beta_dpo/beta_used_raw": 0.012474480085074902, "beta_dpo/gap_mean": 1.908013939857483, "beta_dpo/gap_std": 4.524105072021484, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.11727748691099477, "grad_norm": 46.529991149902344, "learning_rate": 4.996716052911017e-07, "logits/chosen": -0.5789837837219238, "logits/rejected": -0.6456868052482605, "loss": 5.3918, "step": 56 }, { "beta_dpo/beta_used": 0.010171854868531227, "beta_dpo/beta_used_raw": 0.010171854868531227, "beta_dpo/gap_mean": 2.9082393646240234, "beta_dpo/gap_std": 4.872549057006836, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.1193717277486911, "grad_norm": 36.80557632446289, "learning_rate": 4.99571105051544e-07, "logits/chosen": -0.7390983700752258, "logits/rejected": -0.7615019679069519, "loss": 5.4256, "step": 57 }, { "beta_dpo/beta_used": 0.008224893361330032, "beta_dpo/beta_used_raw": 0.007648976054042578, "beta_dpo/gap_mean": 2.4738152027130127, "beta_dpo/gap_std": 4.7731475830078125, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.12146596858638743, "grad_norm": 29.444232940673828, "learning_rate": 4.994572210710314e-07, "logits/chosen": -0.5107704401016235, "logits/rejected": -0.5453117489814758, "loss": 5.4671, "step": 58 }, { "beta_dpo/beta_used": 0.004772379528731108, "beta_dpo/beta_used_raw": 0.0037195575423538685, "beta_dpo/gap_mean": 2.086270570755005, "beta_dpo/gap_std": 5.413858413696289, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.12356020942408377, "grad_norm": 18.721179962158203, "learning_rate": 4.993299594568162e-07, "logits/chosen": -0.4708081781864166, "logits/rejected": -0.5131938457489014, "loss": 5.513, "step": 59 }, { "beta_dpo/beta_used": 0.013422971591353416, "beta_dpo/beta_used_raw": 0.013422971591353416, "beta_dpo/gap_mean": 1.9685330390930176, "beta_dpo/gap_std": 5.735987663269043, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.1256544502617801, "grad_norm": 41.163246154785156, "learning_rate": 4.991893270335525e-07, "logits/chosen": -0.7342594861984253, "logits/rejected": -0.7558184266090393, "loss": 5.3913, "step": 60 }, { "beta_dpo/beta_used": 0.01401711255311966, "beta_dpo/beta_used_raw": 0.013643806800246239, "beta_dpo/gap_mean": 2.660452127456665, "beta_dpo/gap_std": 6.109948635101318, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12774869109947645, "grad_norm": 64.77427673339844, "learning_rate": 4.990353313429303e-07, "logits/chosen": -0.7148650288581848, "logits/rejected": -0.6945707201957703, "loss": 5.3027, "step": 61 }, { "beta_dpo/beta_used": 0.00857143197208643, "beta_dpo/beta_used_raw": 0.007453832309693098, "beta_dpo/gap_mean": 2.698399543762207, "beta_dpo/gap_std": 6.293516159057617, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.12984293193717278, "grad_norm": 32.562408447265625, "learning_rate": 4.988679806432711e-07, "logits/chosen": -0.605577826499939, "logits/rejected": -0.636237621307373, "loss": 5.4644, "step": 62 }, { "beta_dpo/beta_used": 0.010178687050938606, "beta_dpo/beta_used_raw": 0.009732894599437714, "beta_dpo/gap_mean": 2.676795721054077, "beta_dpo/gap_std": 6.444081783294678, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1319371727748691, "grad_norm": 40.96943664550781, "learning_rate": 4.986872839090852e-07, "logits/chosen": -0.7076640129089355, "logits/rejected": -0.6968494653701782, "loss": 5.3937, "step": 63 }, { "beta_dpo/beta_used": 0.013196549378335476, "beta_dpo/beta_used_raw": 0.013196549378335476, "beta_dpo/gap_mean": 3.017939567565918, "beta_dpo/gap_std": 6.541075229644775, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.13403141361256546, "grad_norm": 50.38563919067383, "learning_rate": 4.9849325083059e-07, "logits/chosen": -0.6310144662857056, "logits/rejected": -0.623473048210144, "loss": 5.3114, "step": 64 }, { "beta_dpo/beta_used": 0.00806832779198885, "beta_dpo/beta_used_raw": 0.007314560003578663, "beta_dpo/gap_mean": 3.0990614891052246, "beta_dpo/gap_std": 6.7054572105407715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13612565445026178, "grad_norm": 30.428876876831055, "learning_rate": 4.982858918131906e-07, "logits/chosen": -0.703696608543396, "logits/rejected": -0.7108103632926941, "loss": 5.4262, "step": 65 }, { "beta_dpo/beta_used": 0.00982650276273489, "beta_dpo/beta_used_raw": 0.00982650276273489, "beta_dpo/gap_mean": 3.185175657272339, "beta_dpo/gap_std": 7.470331192016602, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.1382198952879581, "grad_norm": 40.387332916259766, "learning_rate": 4.980652179769217e-07, "logits/chosen": -0.7345768809318542, "logits/rejected": -0.7284728288650513, "loss": 5.3961, "step": 66 }, { "beta_dpo/beta_used": 0.010977521538734436, "beta_dpo/beta_used_raw": 0.010384490713477135, "beta_dpo/gap_mean": 3.2669320106506348, "beta_dpo/gap_std": 7.810610294342041, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14031413612565444, "grad_norm": 43.06589889526367, "learning_rate": 4.978312411558517e-07, "logits/chosen": -0.7585128545761108, "logits/rejected": -0.7754156589508057, "loss": 5.3403, "step": 67 }, { "beta_dpo/beta_used": 0.006959153804928064, "beta_dpo/beta_used_raw": 0.0068751610815525055, "beta_dpo/gap_mean": 3.3277812004089355, "beta_dpo/gap_std": 8.508169174194336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1424083769633508, "grad_norm": 28.538686752319336, "learning_rate": 4.975839738974473e-07, "logits/chosen": -0.7104411125183105, "logits/rejected": -0.7601235508918762, "loss": 5.4432, "step": 68 }, { "beta_dpo/beta_used": 0.014520850963890553, "beta_dpo/beta_used_raw": 0.013226198963820934, "beta_dpo/gap_mean": 4.106817245483398, "beta_dpo/gap_std": 8.52523422241211, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.14450261780104712, "grad_norm": 54.57806396484375, "learning_rate": 4.97323429461901e-07, "logits/chosen": -0.7267593741416931, "logits/rejected": -0.7121102809906006, "loss": 5.1806, "step": 69 }, { "beta_dpo/beta_used": 0.012321692891418934, "beta_dpo/beta_used_raw": 0.010563489980995655, "beta_dpo/gap_mean": 4.343552112579346, "beta_dpo/gap_std": 9.016190528869629, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.14659685863874344, "grad_norm": 43.94473648071289, "learning_rate": 4.970496218214204e-07, "logits/chosen": -0.7021334171295166, "logits/rejected": -0.7124741673469543, "loss": 5.2922, "step": 70 }, { "beta_dpo/beta_used": 0.012451138347387314, "beta_dpo/beta_used_raw": 0.011505233123898506, "beta_dpo/gap_mean": 4.387954235076904, "beta_dpo/gap_std": 9.844895362854004, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.1486910994764398, "grad_norm": 44.80491638183594, "learning_rate": 4.967625656594781e-07, "logits/chosen": -0.61981600522995, "logits/rejected": -0.5610257387161255, "loss": 5.2723, "step": 71 }, { "beta_dpo/beta_used": 0.0142544936388731, "beta_dpo/beta_used_raw": 0.010479929856956005, "beta_dpo/gap_mean": 4.6102423667907715, "beta_dpo/gap_std": 9.631770133972168, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.15078534031413612, "grad_norm": 50.041893005371094, "learning_rate": 4.964622763700252e-07, "logits/chosen": -0.6500818729400635, "logits/rejected": -0.6521684527397156, "loss": 5.2495, "step": 72 }, { "beta_dpo/beta_used": 0.00896795466542244, "beta_dpo/beta_used_raw": 0.006745354738086462, "beta_dpo/gap_mean": 4.212050437927246, "beta_dpo/gap_std": 10.10843276977539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15287958115183245, "grad_norm": 41.38795471191406, "learning_rate": 4.961487700566646e-07, "logits/chosen": -0.6939983367919922, "logits/rejected": -0.750190019607544, "loss": 5.3721, "step": 73 }, { "beta_dpo/beta_used": 0.008795595727860928, "beta_dpo/beta_used_raw": 0.0059730554930865765, "beta_dpo/gap_mean": 3.9412529468536377, "beta_dpo/gap_std": 10.357192039489746, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.1549738219895288, "grad_norm": 50.13154220581055, "learning_rate": 4.958220635317885e-07, "logits/chosen": -0.7438157200813293, "logits/rejected": -0.7368298768997192, "loss": 5.3539, "step": 74 }, { "beta_dpo/beta_used": 0.018827691674232483, "beta_dpo/beta_used_raw": 0.018391240388154984, "beta_dpo/gap_mean": 4.437331199645996, "beta_dpo/gap_std": 10.493773460388184, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.15706806282722513, "grad_norm": 66.55812072753906, "learning_rate": 4.954821743156767e-07, "logits/chosen": -0.6884775757789612, "logits/rejected": -0.6879805326461792, "loss": 5.077, "step": 75 }, { "beta_dpo/beta_used": 0.008767299354076385, "beta_dpo/beta_used_raw": 0.004196059890091419, "beta_dpo/gap_mean": 5.429379463195801, "beta_dpo/gap_std": 10.738119125366211, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.15916230366492146, "grad_norm": 39.24678421020508, "learning_rate": 4.951291206355559e-07, "logits/chosen": -0.6956934332847595, "logits/rejected": -0.7201342582702637, "loss": 5.33, "step": 76 }, { "beta_dpo/beta_used": 0.005481656640768051, "beta_dpo/beta_used_raw": 0.001297416165471077, "beta_dpo/gap_mean": 5.060276031494141, "beta_dpo/gap_std": 11.49527359008789, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.1612565445026178, "grad_norm": 26.497804641723633, "learning_rate": 4.947629214246236e-07, "logits/chosen": -0.5094698071479797, "logits/rejected": -0.5404853820800781, "loss": 5.2853, "step": 77 }, { "beta_dpo/beta_used": 0.020722726359963417, "beta_dpo/beta_used_raw": 0.020722726359963417, "beta_dpo/gap_mean": 5.881702899932861, "beta_dpo/gap_std": 12.785398483276367, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.16335078534031414, "grad_norm": 74.64260864257812, "learning_rate": 4.943835963210323e-07, "logits/chosen": -0.7535753846168518, "logits/rejected": -0.6771411895751953, "loss": 4.8878, "step": 78 }, { "beta_dpo/beta_used": 0.00859091617166996, "beta_dpo/beta_used_raw": 0.003717180108651519, "beta_dpo/gap_mean": 6.6240081787109375, "beta_dpo/gap_std": 12.910642623901367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16544502617801046, "grad_norm": 35.39107131958008, "learning_rate": 4.939911656668361e-07, "logits/chosen": -0.6833164691925049, "logits/rejected": -0.6924921274185181, "loss": 5.2913, "step": 79 }, { "beta_dpo/beta_used": 0.01494982186704874, "beta_dpo/beta_used_raw": 0.011456114239990711, "beta_dpo/gap_mean": 5.857873916625977, "beta_dpo/gap_std": 13.087008476257324, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.16753926701570682, "grad_norm": 63.859745025634766, "learning_rate": 4.935856505068998e-07, "logits/chosen": -0.6719616055488586, "logits/rejected": -0.6523293852806091, "loss": 5.0917, "step": 80 }, { "beta_dpo/beta_used": 0.009839367121458054, "beta_dpo/beta_used_raw": 0.005198465194553137, "beta_dpo/gap_mean": 6.319545269012451, "beta_dpo/gap_std": 13.469895362854004, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.16963350785340314, "grad_norm": 50.84832000732422, "learning_rate": 4.93167072587771e-07, "logits/chosen": -0.6479890942573547, "logits/rejected": -0.654083788394928, "loss": 5.2317, "step": 81 }, { "beta_dpo/beta_used": 0.007517299614846706, "beta_dpo/beta_used_raw": 0.0021146952640265226, "beta_dpo/gap_mean": 6.252190113067627, "beta_dpo/gap_std": 13.920367240905762, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.17172774869109947, "grad_norm": 38.04156494140625, "learning_rate": 4.92735454356513e-07, "logits/chosen": -0.6817227005958557, "logits/rejected": -0.6929246187210083, "loss": 5.3369, "step": 82 }, { "beta_dpo/beta_used": 0.02531317248940468, "beta_dpo/beta_used_raw": 0.021217333152890205, "beta_dpo/gap_mean": 6.274941921234131, "beta_dpo/gap_std": 14.928312301635742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17382198952879582, "grad_norm": 80.58802795410156, "learning_rate": 4.922908189595017e-07, "logits/chosen": -0.6450899243354797, "logits/rejected": -0.6248490810394287, "loss": 4.6335, "step": 83 }, { "beta_dpo/beta_used": 0.007469699718058109, "beta_dpo/beta_used_raw": 0.005744083784520626, "beta_dpo/gap_mean": 6.542113780975342, "beta_dpo/gap_std": 15.590079307556152, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17591623036649215, "grad_norm": 34.23841094970703, "learning_rate": 4.918331902411841e-07, "logits/chosen": -0.7670571208000183, "logits/rejected": -0.7832205891609192, "loss": 5.3401, "step": 84 }, { "beta_dpo/beta_used": 0.01102585531771183, "beta_dpo/beta_used_raw": 0.0029200459830462933, "beta_dpo/gap_mean": 5.665676593780518, "beta_dpo/gap_std": 15.28662395477295, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.17801047120418848, "grad_norm": 48.48334884643555, "learning_rate": 4.913625927427995e-07, "logits/chosen": -0.6236759424209595, "logits/rejected": -0.6183326244354248, "loss": 5.2517, "step": 85 }, { "beta_dpo/beta_used": 0.01666923239827156, "beta_dpo/beta_used_raw": 0.01666923239827156, "beta_dpo/gap_mean": 6.055604934692383, "beta_dpo/gap_std": 15.560418128967285, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.18010471204188483, "grad_norm": 64.15731811523438, "learning_rate": 4.908790517010636e-07, "logits/chosen": -0.6312252879142761, "logits/rejected": -0.598781943321228, "loss": 5.1042, "step": 86 }, { "beta_dpo/beta_used": 0.013364073820412159, "beta_dpo/beta_used_raw": 0.0035636532120406628, "beta_dpo/gap_mean": 7.059961318969727, "beta_dpo/gap_std": 15.8635835647583, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.18219895287958116, "grad_norm": 60.10577392578125, "learning_rate": 4.903825930468148e-07, "logits/chosen": -0.7649690508842468, "logits/rejected": -0.6910430192947388, "loss": 5.0837, "step": 87 }, { "beta_dpo/beta_used": 0.006094816140830517, "beta_dpo/beta_used_raw": -0.001817956566810608, "beta_dpo/gap_mean": 7.482639312744141, "beta_dpo/gap_std": 16.63443374633789, "beta_dpo/mask_keep_frac": 0.53125, "epoch": 0.18429319371727748, "grad_norm": 39.89524841308594, "learning_rate": 4.898732434036243e-07, "logits/chosen": -0.5936161875724792, "logits/rejected": -0.697693943977356, "loss": 5.3929, "step": 88 }, { "beta_dpo/beta_used": 0.014302433468401432, "beta_dpo/beta_used_raw": 0.008434826508164406, "beta_dpo/gap_mean": 6.980473518371582, "beta_dpo/gap_std": 17.23158073425293, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.18638743455497384, "grad_norm": 62.69277572631836, "learning_rate": 4.893510300863676e-07, "logits/chosen": -0.7539916038513184, "logits/rejected": -0.8090816736221313, "loss": 5.1073, "step": 89 }, { "beta_dpo/beta_used": 0.012626252137124538, "beta_dpo/beta_used_raw": 0.007227581460028887, "beta_dpo/gap_mean": 7.1677327156066895, "beta_dpo/gap_std": 16.382646560668945, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.18848167539267016, "grad_norm": 53.65109634399414, "learning_rate": 4.8881598109976e-07, "logits/chosen": -0.7130351662635803, "logits/rejected": -0.7106346487998962, "loss": 5.1351, "step": 90 }, { "beta_dpo/beta_used": 0.009408114477992058, "beta_dpo/beta_used_raw": -0.0041107251308858395, "beta_dpo/gap_mean": 7.250586986541748, "beta_dpo/gap_std": 16.855989456176758, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.1905759162303665, "grad_norm": 51.472225189208984, "learning_rate": 4.882681251368548e-07, "logits/chosen": -0.6540703177452087, "logits/rejected": -0.7079422473907471, "loss": 5.1118, "step": 91 }, { "beta_dpo/beta_used": 0.030049897730350494, "beta_dpo/beta_used_raw": 0.026972174644470215, "beta_dpo/gap_mean": 7.000770568847656, "beta_dpo/gap_std": 17.0972843170166, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.19267015706806281, "grad_norm": 105.66353607177734, "learning_rate": 4.877074915775048e-07, "logits/chosen": -0.7033326625823975, "logits/rejected": -0.6728801727294922, "loss": 4.4387, "step": 92 }, { "beta_dpo/beta_used": 0.008473473601043224, "beta_dpo/beta_used_raw": 0.002239819150418043, "beta_dpo/gap_mean": 7.2776947021484375, "beta_dpo/gap_std": 17.40105628967285, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.19476439790575917, "grad_norm": 41.199073791503906, "learning_rate": 4.871341104867864e-07, "logits/chosen": -0.6418673396110535, "logits/rejected": -0.7276042699813843, "loss": 5.2607, "step": 93 }, { "beta_dpo/beta_used": 0.011333908885717392, "beta_dpo/beta_used_raw": 0.0038538086228072643, "beta_dpo/gap_mean": 7.516191482543945, "beta_dpo/gap_std": 18.00417709350586, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.1968586387434555, "grad_norm": 51.31498718261719, "learning_rate": 4.865480126133871e-07, "logits/chosen": -0.5814552307128906, "logits/rejected": -0.6306831240653992, "loss": 5.1584, "step": 94 }, { "beta_dpo/beta_used": 0.012983493506908417, "beta_dpo/beta_used_raw": 0.007386527489870787, "beta_dpo/gap_mean": 7.620054244995117, "beta_dpo/gap_std": 18.478008270263672, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19895287958115182, "grad_norm": 51.42605209350586, "learning_rate": 4.859492293879573e-07, "logits/chosen": -0.7178781032562256, "logits/rejected": -0.7296870946884155, "loss": 5.1229, "step": 95 }, { "beta_dpo/beta_used": 0.014206080697476864, "beta_dpo/beta_used_raw": 0.008307880721986294, "beta_dpo/gap_mean": 8.099912643432617, "beta_dpo/gap_std": 19.668779373168945, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.20104712041884817, "grad_norm": 57.065765380859375, "learning_rate": 4.853377929214243e-07, "logits/chosen": -0.587355375289917, "logits/rejected": -0.597959578037262, "loss": 5.0654, "step": 96 }, { "beta_dpo/beta_used": 0.010406726971268654, "beta_dpo/beta_used_raw": 0.006649984512478113, "beta_dpo/gap_mean": 8.605752944946289, "beta_dpo/gap_std": 19.875154495239258, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2031413612565445, "grad_norm": 50.55111312866211, "learning_rate": 4.847137360032699e-07, "logits/chosen": -0.6221433877944946, "logits/rejected": -0.5777587890625, "loss": 5.1555, "step": 97 }, { "beta_dpo/beta_used": 0.012800071388483047, "beta_dpo/beta_used_raw": 0.0028023526538163424, "beta_dpo/gap_mean": 9.07392692565918, "beta_dpo/gap_std": 19.343914031982422, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.20523560209424083, "grad_norm": 69.06121826171875, "learning_rate": 4.84077092099773e-07, "logits/chosen": -0.7695798277854919, "logits/rejected": -0.7975507974624634, "loss": 5.1301, "step": 98 }, { "beta_dpo/beta_used": 0.01906406879425049, "beta_dpo/beta_used_raw": 0.010687445290386677, "beta_dpo/gap_mean": 7.9853668212890625, "beta_dpo/gap_std": 21.019094467163086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20732984293193718, "grad_norm": 66.07819366455078, "learning_rate": 4.834278953522137e-07, "logits/chosen": -0.7368970513343811, "logits/rejected": -0.7557910680770874, "loss": 4.8975, "step": 99 }, { "beta_dpo/beta_used": 0.009735495783388615, "beta_dpo/beta_used_raw": 0.00393830519169569, "beta_dpo/gap_mean": 9.077505111694336, "beta_dpo/gap_std": 21.074779510498047, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2094240837696335, "grad_norm": 48.8430061340332, "learning_rate": 4.827661805750437e-07, "logits/chosen": -0.7334079742431641, "logits/rejected": -0.7196102738380432, "loss": 5.1873, "step": 100 }, { "beta_dpo/beta_used": 0.023290041834115982, "beta_dpo/beta_used_raw": 0.017167603597044945, "beta_dpo/gap_mean": 9.945512771606445, "beta_dpo/gap_std": 22.141578674316406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21151832460732983, "grad_norm": 79.32262420654297, "learning_rate": 4.820919832540181e-07, "logits/chosen": -0.4960673749446869, "logits/rejected": -0.5593528747558594, "loss": 4.595, "step": 101 }, { "beta_dpo/beta_used": 0.01968398503959179, "beta_dpo/beta_used_raw": 0.010066845454275608, "beta_dpo/gap_mean": 10.21080493927002, "beta_dpo/gap_std": 21.471494674682617, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.2136125654450262, "grad_norm": 65.2578125, "learning_rate": 4.814053395442932e-07, "logits/chosen": -0.699000358581543, "logits/rejected": -0.720572829246521, "loss": 4.72, "step": 102 }, { "beta_dpo/beta_used": 0.011599740013480186, "beta_dpo/beta_used_raw": 0.0011850475566461682, "beta_dpo/gap_mean": 10.333209991455078, "beta_dpo/gap_std": 21.639957427978516, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.2157068062827225, "grad_norm": 43.60222244262695, "learning_rate": 4.807062862684873e-07, "logits/chosen": -0.7409847974777222, "logits/rejected": -0.7405369877815247, "loss": 5.0793, "step": 103 }, { "beta_dpo/beta_used": 0.022328007966279984, "beta_dpo/beta_used_raw": 0.013688994571566582, "beta_dpo/gap_mean": 8.519068717956543, "beta_dpo/gap_std": 22.0716495513916, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.21780104712041884, "grad_norm": 91.37364196777344, "learning_rate": 4.799948609147061e-07, "logits/chosen": -0.7835843563079834, "logits/rejected": -0.8219706416130066, "loss": 4.6679, "step": 104 }, { "beta_dpo/beta_used": 0.028743159025907516, "beta_dpo/beta_used_raw": 0.028356103226542473, "beta_dpo/gap_mean": 10.944629669189453, "beta_dpo/gap_std": 22.042673110961914, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.2198952879581152, "grad_norm": 100.99183654785156, "learning_rate": 4.792711016345321e-07, "logits/chosen": -0.7158729434013367, "logits/rejected": -0.739811897277832, "loss": 4.3696, "step": 105 }, { "beta_dpo/beta_used": 0.02642572484910488, "beta_dpo/beta_used_raw": 0.010559840127825737, "beta_dpo/gap_mean": 9.867205619812012, "beta_dpo/gap_std": 22.872636795043945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22198952879581152, "grad_norm": 96.6792221069336, "learning_rate": 4.785350472409791e-07, "logits/chosen": -0.6776769161224365, "logits/rejected": -0.7080086469650269, "loss": 4.6016, "step": 106 }, { "beta_dpo/beta_used": 0.01445105578750372, "beta_dpo/beta_used_raw": 0.012959499843418598, "beta_dpo/gap_mean": 10.998950958251953, "beta_dpo/gap_std": 23.701820373535156, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.22408376963350785, "grad_norm": 66.5599594116211, "learning_rate": 4.777867372064105e-07, "logits/chosen": -0.7649465203285217, "logits/rejected": -0.8023307919502258, "loss": 4.9656, "step": 107 }, { "beta_dpo/beta_used": 0.032948337495326996, "beta_dpo/beta_used_raw": 0.02698555961251259, "beta_dpo/gap_mean": 12.660971641540527, "beta_dpo/gap_std": 24.206636428833008, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.2261780104712042, "grad_norm": 102.94635772705078, "learning_rate": 4.770262116604223e-07, "logits/chosen": -0.7107124924659729, "logits/rejected": -0.7374171614646912, "loss": 4.3364, "step": 108 }, { "beta_dpo/beta_used": 0.007749465759843588, "beta_dpo/beta_used_raw": -0.0016765656182542443, "beta_dpo/gap_mean": 13.632909774780273, "beta_dpo/gap_std": 24.86305809020996, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22827225130890053, "grad_norm": 43.27128601074219, "learning_rate": 4.7625351138769166e-07, "logits/chosen": -0.7678626775741577, "logits/rejected": -0.760747492313385, "loss": 5.1962, "step": 109 }, { "beta_dpo/beta_used": 0.013254636898636818, "beta_dpo/beta_used_raw": 0.003388074692338705, "beta_dpo/gap_mean": 13.47364330291748, "beta_dpo/gap_std": 25.939802169799805, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.23036649214659685, "grad_norm": 52.916778564453125, "learning_rate": 4.75468677825789e-07, "logits/chosen": -0.7187046408653259, "logits/rejected": -0.6971960663795471, "loss": 4.9661, "step": 110 }, { "beta_dpo/beta_used": 0.018351394683122635, "beta_dpo/beta_used_raw": 0.010718288831412792, "beta_dpo/gap_mean": 13.720507621765137, "beta_dpo/gap_std": 26.687028884887695, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2324607329842932, "grad_norm": 82.68915557861328, "learning_rate": 4.7467175306295647e-07, "logits/chosen": -0.824735701084137, "logits/rejected": -0.7799985408782959, "loss": 4.8194, "step": 111 }, { "beta_dpo/beta_used": 0.011828150600194931, "beta_dpo/beta_used_raw": -0.005314134992659092, "beta_dpo/gap_mean": 12.305923461914062, "beta_dpo/gap_std": 26.428997039794922, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.23455497382198953, "grad_norm": 56.26085662841797, "learning_rate": 4.7386277983585053e-07, "logits/chosen": -0.6889740228652954, "logits/rejected": -0.7342170476913452, "loss": 5.0215, "step": 112 }, { "beta_dpo/beta_used": 0.021786488592624664, "beta_dpo/beta_used_raw": 0.011742182075977325, "beta_dpo/gap_mean": 14.276546478271484, "beta_dpo/gap_std": 29.68646812438965, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.23664921465968586, "grad_norm": 80.65067291259766, "learning_rate": 4.7304180152725024e-07, "logits/chosen": -0.6449406743049622, "logits/rejected": -0.6256552338600159, "loss": 4.4312, "step": 113 }, { "beta_dpo/beta_used": 0.020888667553663254, "beta_dpo/beta_used_raw": 0.007870053872466087, "beta_dpo/gap_mean": 12.523893356323242, "beta_dpo/gap_std": 28.998544692993164, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2387434554973822, "grad_norm": 59.69179153442383, "learning_rate": 4.7220886216373085e-07, "logits/chosen": -0.7064129710197449, "logits/rejected": -0.7065778970718384, "loss": 4.5294, "step": 114 }, { "beta_dpo/beta_used": 0.013495873659849167, "beta_dpo/beta_used_raw": -0.008398683741688728, "beta_dpo/gap_mean": 10.547378540039062, "beta_dpo/gap_std": 27.94576644897461, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.24083769633507854, "grad_norm": 56.25117492675781, "learning_rate": 4.7136400641330245e-07, "logits/chosen": -0.7171422839164734, "logits/rejected": -0.6828722357749939, "loss": 4.9334, "step": 115 }, { "beta_dpo/beta_used": 0.016198089346289635, "beta_dpo/beta_used_raw": 0.011391330510377884, "beta_dpo/gap_mean": 10.625633239746094, "beta_dpo/gap_std": 27.245738983154297, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.24293193717277486, "grad_norm": 72.90325927734375, "learning_rate": 4.70507279583015e-07, "logits/chosen": -0.7499311566352844, "logits/rejected": -0.739253580570221, "loss": 4.9531, "step": 116 }, { "beta_dpo/beta_used": 0.036482565104961395, "beta_dpo/beta_used_raw": 0.02595018595457077, "beta_dpo/gap_mean": 10.865323066711426, "beta_dpo/gap_std": 26.646053314208984, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2450261780104712, "grad_norm": 128.06033325195312, "learning_rate": 4.6963872761652834e-07, "logits/chosen": -0.7866169214248657, "logits/rejected": -0.8020620346069336, "loss": 4.1584, "step": 117 }, { "beta_dpo/beta_used": 0.02256722003221512, "beta_dpo/beta_used_raw": 0.004887686111032963, "beta_dpo/gap_mean": 12.92835807800293, "beta_dpo/gap_std": 27.222332000732422, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.24712041884816754, "grad_norm": 121.85209655761719, "learning_rate": 4.687583970916486e-07, "logits/chosen": -0.6286183595657349, "logits/rejected": -0.6127574443817139, "loss": 4.7058, "step": 118 }, { "beta_dpo/beta_used": 0.012178106233477592, "beta_dpo/beta_used_raw": 0.0018881040159612894, "beta_dpo/gap_mean": 12.664083480834961, "beta_dpo/gap_std": 29.877716064453125, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.24921465968586387, "grad_norm": 52.12855529785156, "learning_rate": 4.6786633521783005e-07, "logits/chosen": -0.8367944359779358, "logits/rejected": -0.8432599306106567, "loss": 4.9455, "step": 119 }, { "beta_dpo/beta_used": 0.011312302201986313, "beta_dpo/beta_used_raw": -0.009262747131288052, "beta_dpo/gap_mean": 12.50714111328125, "beta_dpo/gap_std": 29.64698028564453, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2513089005235602, "grad_norm": 62.29435729980469, "learning_rate": 4.669625898336438e-07, "logits/chosen": -0.7120507955551147, "logits/rejected": -0.7823662161827087, "loss": 5.029, "step": 120 }, { "beta_dpo/beta_used": 0.0014778866898268461, "beta_dpo/beta_used_raw": -0.02501249685883522, "beta_dpo/gap_mean": 11.199564933776855, "beta_dpo/gap_std": 29.29185676574707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2534031413612565, "grad_norm": 7.333785057067871, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -0.7075969576835632, "logits/rejected": -0.7335148453712463, "loss": 5.5081, "step": 121 }, { "beta_dpo/beta_used": 0.014175733551383018, "beta_dpo/beta_used_raw": -0.001818017102777958, "beta_dpo/gap_mean": 12.57092571258545, "beta_dpo/gap_std": 31.017070770263672, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.2554973821989529, "grad_norm": 65.50248718261719, "learning_rate": 4.651202430186092e-07, "logits/chosen": -0.8409684300422668, "logits/rejected": -0.8054923415184021, "loss": 4.9411, "step": 122 }, { "beta_dpo/beta_used": 0.03022715263068676, "beta_dpo/beta_used_raw": 0.01728089153766632, "beta_dpo/gap_mean": 14.076234817504883, "beta_dpo/gap_std": 31.252927780151367, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25759162303664923, "grad_norm": 140.3761444091797, "learning_rate": 4.6418174038722924e-07, "logits/chosen": -0.6981220245361328, "logits/rejected": -0.7018057107925415, "loss": 4.4325, "step": 123 }, { "beta_dpo/beta_used": 0.016084099188447, "beta_dpo/beta_used_raw": 0.007035914342850447, "beta_dpo/gap_mean": 15.691198348999023, "beta_dpo/gap_std": 30.451919555664062, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.25968586387434556, "grad_norm": 93.5594482421875, "learning_rate": 4.6323175183912023e-07, "logits/chosen": -0.8233157992362976, "logits/rejected": -0.7800065279006958, "loss": 4.7914, "step": 124 }, { "beta_dpo/beta_used": 0.020123766735196114, "beta_dpo/beta_used_raw": -0.0011871629394590855, "beta_dpo/gap_mean": 13.572896957397461, "beta_dpo/gap_std": 31.540260314941406, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.2617801047120419, "grad_norm": 81.28208923339844, "learning_rate": 4.6227032831928483e-07, "logits/chosen": -0.7599564790725708, "logits/rejected": -0.6782684326171875, "loss": 4.704, "step": 125 }, { "beta_dpo/beta_used": 0.02084464207291603, "beta_dpo/beta_used_raw": 0.010123949497938156, "beta_dpo/gap_mean": 14.827800750732422, "beta_dpo/gap_std": 32.751522064208984, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.2638743455497382, "grad_norm": 91.95066833496094, "learning_rate": 4.612975213859487e-07, "logits/chosen": -0.7613145112991333, "logits/rejected": -0.7944775819778442, "loss": 4.6694, "step": 126 }, { "beta_dpo/beta_used": 0.023009877651929855, "beta_dpo/beta_used_raw": 0.014726024121046066, "beta_dpo/gap_mean": 14.955554962158203, "beta_dpo/gap_std": 33.054447174072266, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.26596858638743454, "grad_norm": 91.58712005615234, "learning_rate": 4.603133832077953e-07, "logits/chosen": -0.8286364674568176, "logits/rejected": -0.8062022924423218, "loss": 4.5869, "step": 127 }, { "beta_dpo/beta_used": 0.0319821797311306, "beta_dpo/beta_used_raw": 0.02191462367773056, "beta_dpo/gap_mean": 17.882171630859375, "beta_dpo/gap_std": 33.18529510498047, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2680628272251309, "grad_norm": 106.6471939086914, "learning_rate": 4.5931796656116837e-07, "logits/chosen": -0.699189305305481, "logits/rejected": -0.6564383506774902, "loss": 4.2103, "step": 128 }, { "beta_dpo/beta_used": 0.019659318029880524, "beta_dpo/beta_used_raw": 0.015033195726573467, "beta_dpo/gap_mean": 17.7318058013916, "beta_dpo/gap_std": 33.44122314453125, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.27015706806282724, "grad_norm": 72.52957153320312, "learning_rate": 4.5831132482724193e-07, "logits/chosen": -0.7713093161582947, "logits/rejected": -0.7497988939285278, "loss": 4.4109, "step": 129 }, { "beta_dpo/beta_used": 0.00805729627609253, "beta_dpo/beta_used_raw": -0.008298722095787525, "beta_dpo/gap_mean": 18.417720794677734, "beta_dpo/gap_std": 34.202728271484375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.27225130890052357, "grad_norm": 62.98481369018555, "learning_rate": 4.5729351198915705e-07, "logits/chosen": -0.7118815779685974, "logits/rejected": -0.7767693996429443, "loss": 5.0954, "step": 130 }, { "beta_dpo/beta_used": 0.0160170029848814, "beta_dpo/beta_used_raw": 0.00947889219969511, "beta_dpo/gap_mean": 16.390932083129883, "beta_dpo/gap_std": 35.38821029663086, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.2743455497382199, "grad_norm": 78.16362762451172, "learning_rate": 4.5626458262912735e-07, "logits/chosen": -0.7344637513160706, "logits/rejected": -0.7118038535118103, "loss": 4.8656, "step": 131 }, { "beta_dpo/beta_used": 0.03713168576359749, "beta_dpo/beta_used_raw": 0.014095718041062355, "beta_dpo/gap_mean": 16.76073455810547, "beta_dpo/gap_std": 35.335784912109375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2764397905759162, "grad_norm": 105.79364013671875, "learning_rate": 4.5522459192551166e-07, "logits/chosen": -0.8273689150810242, "logits/rejected": -0.79078209400177, "loss": 4.106, "step": 132 }, { "beta_dpo/beta_used": 0.021496238186955452, "beta_dpo/beta_used_raw": 0.006832793354988098, "beta_dpo/gap_mean": 18.750276565551758, "beta_dpo/gap_std": 36.73375701904297, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.27853403141361255, "grad_norm": 94.68896484375, "learning_rate": 4.541735956498554e-07, "logits/chosen": -0.8012921214103699, "logits/rejected": -0.8170878291130066, "loss": 4.4552, "step": 133 }, { "beta_dpo/beta_used": 0.012021646834909916, "beta_dpo/beta_used_raw": -0.007044796831905842, "beta_dpo/gap_mean": 16.580371856689453, "beta_dpo/gap_std": 34.95547866821289, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.2806282722513089, "grad_norm": 57.19186019897461, "learning_rate": 4.5311165016389914e-07, "logits/chosen": -0.8249697685241699, "logits/rejected": -0.807636022567749, "loss": 4.8618, "step": 134 }, { "beta_dpo/beta_used": 0.02594444341957569, "beta_dpo/beta_used_raw": 0.009492763318121433, "beta_dpo/gap_mean": 16.52640151977539, "beta_dpo/gap_std": 31.791019439697266, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.28272251308900526, "grad_norm": 157.5948028564453, "learning_rate": 4.520388124165564e-07, "logits/chosen": -0.7143105268478394, "logits/rejected": -0.7277257442474365, "loss": 4.7733, "step": 135 }, { "beta_dpo/beta_used": 0.018465936183929443, "beta_dpo/beta_used_raw": 0.010593372397124767, "beta_dpo/gap_mean": 17.351146697998047, "beta_dpo/gap_std": 33.06019592285156, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.2848167539267016, "grad_norm": 131.78701782226562, "learning_rate": 4.5095513994085974e-07, "logits/chosen": -0.7398912906646729, "logits/rejected": -0.7863351702690125, "loss": 4.8188, "step": 136 }, { "beta_dpo/beta_used": 0.011953875422477722, "beta_dpo/beta_used_raw": 0.0052395714446902275, "beta_dpo/gap_mean": 16.20960235595703, "beta_dpo/gap_std": 35.670745849609375, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.2869109947643979, "grad_norm": 74.34517669677734, "learning_rate": 4.498606908508753e-07, "logits/chosen": -0.80860835313797, "logits/rejected": -0.7614427804946899, "loss": 5.0427, "step": 137 }, { "beta_dpo/beta_used": 0.02045310102403164, "beta_dpo/beta_used_raw": -0.0056061288341879845, "beta_dpo/gap_mean": 17.586992263793945, "beta_dpo/gap_std": 36.90517807006836, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.28900523560209423, "grad_norm": 143.13523864746094, "learning_rate": 4.487555238385862e-07, "logits/chosen": -0.7992879152297974, "logits/rejected": -0.8304911851882935, "loss": 5.2096, "step": 138 }, { "beta_dpo/beta_used": 0.016061272472143173, "beta_dpo/beta_used_raw": -0.0029601496644318104, "beta_dpo/gap_mean": 15.417540550231934, "beta_dpo/gap_std": 36.3847541809082, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.29109947643979056, "grad_norm": 95.46815490722656, "learning_rate": 4.476396981707453e-07, "logits/chosen": -0.7314491271972656, "logits/rejected": -0.7732853293418884, "loss": 5.0469, "step": 139 }, { "beta_dpo/beta_used": 0.044663287699222565, "beta_dpo/beta_used_raw": 0.04263610392808914, "beta_dpo/gap_mean": 15.303201675415039, "beta_dpo/gap_std": 34.73930358886719, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.2931937172774869, "grad_norm": 174.06341552734375, "learning_rate": 4.4651327368569684e-07, "logits/chosen": -0.8693004846572876, "logits/rejected": -0.8686134815216064, "loss": 3.9564, "step": 140 }, { "beta_dpo/beta_used": 0.021187350153923035, "beta_dpo/beta_used_raw": 0.019022824242711067, "beta_dpo/gap_mean": 19.225461959838867, "beta_dpo/gap_std": 34.109764099121094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29528795811518327, "grad_norm": 110.43495178222656, "learning_rate": 4.453763107901675e-07, "logits/chosen": -0.7424483299255371, "logits/rejected": -0.7873528599739075, "loss": 4.6015, "step": 141 }, { "beta_dpo/beta_used": 0.0332464836537838, "beta_dpo/beta_used_raw": 0.008531760424375534, "beta_dpo/gap_mean": 18.10867691040039, "beta_dpo/gap_std": 36.432342529296875, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.2973821989528796, "grad_norm": 122.61527252197266, "learning_rate": 4.4422887045602674e-07, "logits/chosen": -0.7638643383979797, "logits/rejected": -0.7775416970252991, "loss": 4.2986, "step": 142 }, { "beta_dpo/beta_used": 0.016291283071041107, "beta_dpo/beta_used_raw": 0.0006667158449999988, "beta_dpo/gap_mean": 18.518922805786133, "beta_dpo/gap_std": 35.83793258666992, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.2994764397905759, "grad_norm": 79.98748016357422, "learning_rate": 4.4307101421701755e-07, "logits/chosen": -0.8503552675247192, "logits/rejected": -0.8338074088096619, "loss": 4.739, "step": 143 }, { "beta_dpo/beta_used": 0.012389753945171833, "beta_dpo/beta_used_raw": 0.00042197853326797485, "beta_dpo/gap_mean": 18.385601043701172, "beta_dpo/gap_std": 36.555580139160156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30157068062827225, "grad_norm": 74.62446594238281, "learning_rate": 4.419028041654559e-07, "logits/chosen": -0.8731358051300049, "logits/rejected": -0.867561936378479, "loss": 4.9057, "step": 144 }, { "beta_dpo/beta_used": 0.016797425225377083, "beta_dpo/beta_used_raw": -0.007564428262412548, "beta_dpo/gap_mean": 18.421340942382812, "beta_dpo/gap_std": 35.51329040527344, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3036649214659686, "grad_norm": 102.74947357177734, "learning_rate": 4.4072430294890166e-07, "logits/chosen": -0.7647844552993774, "logits/rejected": -0.766077995300293, "loss": 4.7931, "step": 145 }, { "beta_dpo/beta_used": 0.007330628577619791, "beta_dpo/beta_used_raw": -0.01089246105402708, "beta_dpo/gap_mean": 19.39159393310547, "beta_dpo/gap_std": 33.0991325378418, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.3057591623036649, "grad_norm": 57.71752166748047, "learning_rate": 4.395355737667985e-07, "logits/chosen": -0.807758092880249, "logits/rejected": -0.821743905544281, "loss": 5.1453, "step": 146 }, { "beta_dpo/beta_used": 0.017891917377710342, "beta_dpo/beta_used_raw": 0.0008026466239243746, "beta_dpo/gap_mean": 16.48558235168457, "beta_dpo/gap_std": 33.77042007446289, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3078534031413613, "grad_norm": 94.41093444824219, "learning_rate": 4.3833668036708483e-07, "logits/chosen": -0.8066427111625671, "logits/rejected": -0.8248432278633118, "loss": 4.7533, "step": 147 }, { "beta_dpo/beta_used": 0.009967929683625698, "beta_dpo/beta_used_raw": -0.007602631114423275, "beta_dpo/gap_mean": 16.034523010253906, "beta_dpo/gap_std": 36.380615234375, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.3099476439790576, "grad_norm": 74.8528823852539, "learning_rate": 4.3712768704277524e-07, "logits/chosen": -0.8988285660743713, "logits/rejected": -0.9119629859924316, "loss": 5.3024, "step": 148 }, { "beta_dpo/beta_used": 0.029269058257341385, "beta_dpo/beta_used_raw": 0.024799324572086334, "beta_dpo/gap_mean": 18.69751739501953, "beta_dpo/gap_std": 34.20708465576172, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.31204188481675393, "grad_norm": 95.18089294433594, "learning_rate": 4.3590865862851263e-07, "logits/chosen": -0.9157636761665344, "logits/rejected": -0.8866834044456482, "loss": 3.9915, "step": 149 }, { "beta_dpo/beta_used": 0.028355229645967484, "beta_dpo/beta_used_raw": 0.014602387323975563, "beta_dpo/gap_mean": 18.82350730895996, "beta_dpo/gap_std": 33.63038635253906, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.31413612565445026, "grad_norm": 127.4208984375, "learning_rate": 4.346796604970912e-07, "logits/chosen": -0.8425026535987854, "logits/rejected": -0.7345662117004395, "loss": 4.1692, "step": 150 }, { "beta_dpo/beta_used": 0.0352584645152092, "beta_dpo/beta_used_raw": 0.030752388760447502, "beta_dpo/gap_mean": 19.252273559570312, "beta_dpo/gap_std": 36.00699996948242, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.3162303664921466, "grad_norm": 101.46421813964844, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -0.764532208442688, "logits/rejected": -0.7699897885322571, "loss": 3.9384, "step": 151 }, { "beta_dpo/beta_used": 0.030788574367761612, "beta_dpo/beta_used_raw": 0.01764693856239319, "beta_dpo/gap_mean": 21.685163497924805, "beta_dpo/gap_std": 36.85689163208008, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3183246073298429, "grad_norm": 102.9702377319336, "learning_rate": 4.3219201924364323e-07, "logits/chosen": -0.9068971872329712, "logits/rejected": -0.9211371541023254, "loss": 3.8419, "step": 152 }, { "beta_dpo/beta_used": 0.024467987939715385, "beta_dpo/beta_used_raw": 0.015887044370174408, "beta_dpo/gap_mean": 24.365219116210938, "beta_dpo/gap_std": 36.4759521484375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3204188481675393, "grad_norm": 111.00398254394531, "learning_rate": 4.309335095262675e-07, "logits/chosen": -0.7987594604492188, "logits/rejected": -0.7632243037223816, "loss": 4.1512, "step": 153 }, { "beta_dpo/beta_used": 0.04007789492607117, "beta_dpo/beta_used_raw": 0.01483201328665018, "beta_dpo/gap_mean": 25.266956329345703, "beta_dpo/gap_std": 39.56476593017578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3225130890052356, "grad_norm": 123.44865417480469, "learning_rate": 4.2966529689388064e-07, "logits/chosen": -0.8501051068305969, "logits/rejected": -0.8371157646179199, "loss": 3.4469, "step": 154 }, { "beta_dpo/beta_used": 0.022579234093427658, "beta_dpo/beta_used_raw": -0.02031770907342434, "beta_dpo/gap_mean": 21.613218307495117, "beta_dpo/gap_std": 39.026023864746094, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.32460732984293195, "grad_norm": 129.148193359375, "learning_rate": 4.2838744935687716e-07, "logits/chosen": -0.761044442653656, "logits/rejected": -0.7877327799797058, "loss": 4.4611, "step": 155 }, { "beta_dpo/beta_used": 0.029358845204114914, "beta_dpo/beta_used_raw": 0.013587499037384987, "beta_dpo/gap_mean": 23.304094314575195, "beta_dpo/gap_std": 41.368614196777344, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.3267015706806283, "grad_norm": 138.49502563476562, "learning_rate": 4.271000354423425e-07, "logits/chosen": -0.7635002732276917, "logits/rejected": -0.8206408023834229, "loss": 4.1597, "step": 156 }, { "beta_dpo/beta_used": 0.01539008691906929, "beta_dpo/beta_used_raw": -0.02126063033938408, "beta_dpo/gap_mean": 24.20404624938965, "beta_dpo/gap_std": 41.25341033935547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3287958115183246, "grad_norm": 83.74942016601562, "learning_rate": 4.258031241903777e-07, "logits/chosen": -0.8348160982131958, "logits/rejected": -0.7768077850341797, "loss": 4.6953, "step": 157 }, { "beta_dpo/beta_used": 0.03978518396615982, "beta_dpo/beta_used_raw": 0.028111770749092102, "beta_dpo/gap_mean": 22.0745849609375, "beta_dpo/gap_std": 39.07844924926758, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.3308900523560209, "grad_norm": 135.35491943359375, "learning_rate": 4.2449678515039743e-07, "logits/chosen": -0.8687289357185364, "logits/rejected": -0.8547466993331909, "loss": 3.9821, "step": 158 }, { "beta_dpo/beta_used": 0.02545471116900444, "beta_dpo/beta_used_raw": -0.00017212284728884697, "beta_dpo/gap_mean": 19.57489776611328, "beta_dpo/gap_std": 41.78768539428711, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.33298429319371725, "grad_norm": 144.7659912109375, "learning_rate": 4.2318108837739986e-07, "logits/chosen": -0.8753824234008789, "logits/rejected": -0.8525476455688477, "loss": 4.8274, "step": 159 }, { "beta_dpo/beta_used": 0.039335690438747406, "beta_dpo/beta_used_raw": 0.02949613332748413, "beta_dpo/gap_mean": 21.556251525878906, "beta_dpo/gap_std": 38.69097137451172, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.33507853403141363, "grad_norm": 240.3103790283203, "learning_rate": 4.218561044282098e-07, "logits/chosen": -0.86173415184021, "logits/rejected": -0.8341448903083801, "loss": 3.7782, "step": 160 }, { "beta_dpo/beta_used": 0.030621008947491646, "beta_dpo/beta_used_raw": -0.0023182015866041183, "beta_dpo/gap_mean": 22.37126922607422, "beta_dpo/gap_std": 39.51905059814453, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.33717277486910996, "grad_norm": 150.58641052246094, "learning_rate": 4.2052190435769554e-07, "logits/chosen": -0.8699642419815063, "logits/rejected": -0.86982661485672, "loss": 4.2166, "step": 161 }, { "beta_dpo/beta_used": 0.021013660356402397, "beta_dpo/beta_used_raw": 0.005534999072551727, "beta_dpo/gap_mean": 22.425281524658203, "beta_dpo/gap_std": 40.90775680541992, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.3392670157068063, "grad_norm": 135.9263916015625, "learning_rate": 4.1917855971495763e-07, "logits/chosen": -0.8752709031105042, "logits/rejected": -0.8557614684104919, "loss": 4.5621, "step": 162 }, { "beta_dpo/beta_used": 0.02784748375415802, "beta_dpo/beta_used_raw": 0.005173914600163698, "beta_dpo/gap_mean": 20.817134857177734, "beta_dpo/gap_std": 40.16265106201172, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3413612565445026, "grad_norm": 132.69302368164062, "learning_rate": 4.1782614253949255e-07, "logits/chosen": -0.771392822265625, "logits/rejected": -0.794430673122406, "loss": 4.3633, "step": 163 }, { "beta_dpo/beta_used": 0.021030288189649582, "beta_dpo/beta_used_raw": 0.004847892560064793, "beta_dpo/gap_mean": 20.410049438476562, "beta_dpo/gap_std": 41.04210662841797, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.34345549738219894, "grad_norm": 151.0008087158203, "learning_rate": 4.164647253573289e-07, "logits/chosen": -0.9349634647369385, "logits/rejected": -0.8864374160766602, "loss": 4.5775, "step": 164 }, { "beta_dpo/beta_used": 0.009714031592011452, "beta_dpo/beta_used_raw": -0.03288843855261803, "beta_dpo/gap_mean": 20.747264862060547, "beta_dpo/gap_std": 39.629669189453125, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.34554973821989526, "grad_norm": 57.11280822753906, "learning_rate": 4.1509438117713863e-07, "logits/chosen": -0.8814147114753723, "logits/rejected": -0.8542748093605042, "loss": 5.1159, "step": 165 }, { "beta_dpo/beta_used": 0.010636869817972183, "beta_dpo/beta_used_raw": -0.009791170246899128, "beta_dpo/gap_mean": 20.025184631347656, "beta_dpo/gap_std": 39.09601974487305, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.34764397905759165, "grad_norm": 241.4670867919922, "learning_rate": 4.137151834863213e-07, "logits/chosen": -0.8034209609031677, "logits/rejected": -0.7690469026565552, "loss": 5.1549, "step": 166 }, { "beta_dpo/beta_used": 0.03181453049182892, "beta_dpo/beta_used_raw": 0.028030332177877426, "beta_dpo/gap_mean": 20.967866897583008, "beta_dpo/gap_std": 40.07197952270508, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.34973821989528797, "grad_norm": 176.5508270263672, "learning_rate": 4.123272062470633e-07, "logits/chosen": -0.8304077982902527, "logits/rejected": -0.7818213105201721, "loss": 4.3848, "step": 167 }, { "beta_dpo/beta_used": 0.03028823807835579, "beta_dpo/beta_used_raw": 0.020576341077685356, "beta_dpo/gap_mean": 22.560794830322266, "beta_dpo/gap_std": 43.39508819580078, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.3518324607329843, "grad_norm": 110.01106262207031, "learning_rate": 4.1093052389237174e-07, "logits/chosen": -0.7103608846664429, "logits/rejected": -0.7231693267822266, "loss": 3.9635, "step": 168 }, { "beta_dpo/beta_used": 0.04548133164644241, "beta_dpo/beta_used_raw": 0.04410823807120323, "beta_dpo/gap_mean": 24.36121940612793, "beta_dpo/gap_std": 41.35852813720703, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.3539267015706806, "grad_norm": 325.0240478515625, "learning_rate": 4.0952521132208267e-07, "logits/chosen": -0.8891708254814148, "logits/rejected": -0.8906590938568115, "loss": 3.4009, "step": 169 }, { "beta_dpo/beta_used": 0.013463572598993778, "beta_dpo/beta_used_raw": -0.014656160026788712, "beta_dpo/gap_mean": 27.53852081298828, "beta_dpo/gap_std": 41.62273406982422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35602094240837695, "grad_norm": 111.54733276367188, "learning_rate": 4.081113438988443e-07, "logits/chosen": -0.8540668487548828, "logits/rejected": -0.8349031805992126, "loss": 4.8316, "step": 170 }, { "beta_dpo/beta_used": 0.022017715498805046, "beta_dpo/beta_used_raw": 0.0025704074651002884, "beta_dpo/gap_mean": 26.90930938720703, "beta_dpo/gap_std": 38.87221908569336, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.3581151832460733, "grad_norm": 160.54473876953125, "learning_rate": 4.0668899744407567e-07, "logits/chosen": -0.8043861985206604, "logits/rejected": -0.8006876707077026, "loss": 4.4335, "step": 171 }, { "beta_dpo/beta_used": 0.008449875749647617, "beta_dpo/beta_used_raw": -0.03602520003914833, "beta_dpo/gap_mean": 22.49981117248535, "beta_dpo/gap_std": 37.147884368896484, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.36020942408376966, "grad_norm": 74.52308654785156, "learning_rate": 4.0525824823390043e-07, "logits/chosen": -0.8162400722503662, "logits/rejected": -0.8232384324073792, "loss": 5.1239, "step": 172 }, { "beta_dpo/beta_used": 0.016885017976164818, "beta_dpo/beta_used_raw": -0.014056820422410965, "beta_dpo/gap_mean": 18.819292068481445, "beta_dpo/gap_std": 36.193111419677734, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.362303664921466, "grad_norm": 80.40719604492188, "learning_rate": 4.0381917299505686e-07, "logits/chosen": -0.7334867119789124, "logits/rejected": -0.7083029747009277, "loss": 4.7244, "step": 173 }, { "beta_dpo/beta_used": 0.03210830315947533, "beta_dpo/beta_used_raw": 0.02511240914463997, "beta_dpo/gap_mean": 20.32571792602539, "beta_dpo/gap_std": 35.956050872802734, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3643979057591623, "grad_norm": 83.13336181640625, "learning_rate": 4.0237184890078243e-07, "logits/chosen": -0.8683218359947205, "logits/rejected": -0.8630374073982239, "loss": 4.126, "step": 174 }, { "beta_dpo/beta_used": 0.03025994263589382, "beta_dpo/beta_used_raw": 0.012775203213095665, "beta_dpo/gap_mean": 20.767414093017578, "beta_dpo/gap_std": 35.31028747558594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36649214659685864, "grad_norm": 133.00521850585938, "learning_rate": 4.00916353566676e-07, "logits/chosen": -0.7180600762367249, "logits/rejected": -0.7292754650115967, "loss": 4.5442, "step": 175 }, { "beta_dpo/beta_used": 0.020637210458517075, "beta_dpo/beta_used_raw": 0.006442366633564234, "beta_dpo/gap_mean": 19.601943969726562, "beta_dpo/gap_std": 39.14218521118164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36858638743455496, "grad_norm": 93.0556640625, "learning_rate": 3.994527650465352e-07, "logits/chosen": -0.7302559018135071, "logits/rejected": -0.7689952850341797, "loss": 4.4981, "step": 176 }, { "beta_dpo/beta_used": 0.014554323628544807, "beta_dpo/beta_used_raw": -0.010481350123882294, "beta_dpo/gap_mean": 17.167186737060547, "beta_dpo/gap_std": 39.22663497924805, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.3706806282722513, "grad_norm": 78.07917785644531, "learning_rate": 3.979811618281705e-07, "logits/chosen": -0.7889816761016846, "logits/rejected": -0.7952367067337036, "loss": 4.7903, "step": 177 }, { "beta_dpo/beta_used": 0.021441150456666946, "beta_dpo/beta_used_raw": 0.002083552535623312, "beta_dpo/gap_mean": 20.210954666137695, "beta_dpo/gap_std": 39.11219787597656, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.37277486910994767, "grad_norm": 100.62411499023438, "learning_rate": 3.9650162282919654e-07, "logits/chosen": -0.6511439681053162, "logits/rejected": -0.6596049666404724, "loss": 4.3602, "step": 178 }, { "beta_dpo/beta_used": 0.0247175469994545, "beta_dpo/beta_used_raw": -6.247404962778091e-05, "beta_dpo/gap_mean": 19.763917922973633, "beta_dpo/gap_std": 37.68657302856445, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.374869109947644, "grad_norm": 76.8095932006836, "learning_rate": 3.9501422739279953e-07, "logits/chosen": -0.7534154057502747, "logits/rejected": -0.7417958974838257, "loss": 4.166, "step": 179 }, { "beta_dpo/beta_used": 0.03843570500612259, "beta_dpo/beta_used_raw": 0.017177987843751907, "beta_dpo/gap_mean": 19.746444702148438, "beta_dpo/gap_std": 37.75269317626953, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.3769633507853403, "grad_norm": 138.32301330566406, "learning_rate": 3.935190552834828e-07, "logits/chosen": -0.6509720087051392, "logits/rejected": -0.7430813312530518, "loss": 3.856, "step": 180 }, { "beta_dpo/beta_used": 0.026693008840084076, "beta_dpo/beta_used_raw": 0.021555408835411072, "beta_dpo/gap_mean": 20.54876136779785, "beta_dpo/gap_std": 38.2152214050293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37905759162303665, "grad_norm": 180.86692810058594, "learning_rate": 3.920161866827889e-07, "logits/chosen": -0.813086211681366, "logits/rejected": -0.8329648971557617, "loss": 4.3399, "step": 181 }, { "beta_dpo/beta_used": 0.029124662280082703, "beta_dpo/beta_used_raw": 0.01702137291431427, "beta_dpo/gap_mean": 21.305740356445312, "beta_dpo/gap_std": 37.635379791259766, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.381151832460733, "grad_norm": 117.95497131347656, "learning_rate": 3.90505702185e-07, "logits/chosen": -0.6139867305755615, "logits/rejected": -0.722787082195282, "loss": 3.9507, "step": 182 }, { "beta_dpo/beta_used": 0.023187464103102684, "beta_dpo/beta_used_raw": -0.002321781124919653, "beta_dpo/gap_mean": 23.785552978515625, "beta_dpo/gap_std": 39.93912887573242, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3832460732984293, "grad_norm": 109.51704406738281, "learning_rate": 3.889876827928156e-07, "logits/chosen": -0.7375423312187195, "logits/rejected": -0.7235562205314636, "loss": 4.2095, "step": 183 }, { "beta_dpo/beta_used": 0.03651594743132591, "beta_dpo/beta_used_raw": 0.03403354063630104, "beta_dpo/gap_mean": 26.601848602294922, "beta_dpo/gap_std": 41.58767318725586, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.38534031413612563, "grad_norm": 98.02659606933594, "learning_rate": 3.874622099130087e-07, "logits/chosen": -0.7697808742523193, "logits/rejected": -0.7725228071212769, "loss": 3.7018, "step": 184 }, { "beta_dpo/beta_used": 0.02182621695101261, "beta_dpo/beta_used_raw": -0.006090118549764156, "beta_dpo/gap_mean": 26.3581485748291, "beta_dpo/gap_std": 42.42856216430664, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.387434554973822, "grad_norm": 218.3275146484375, "learning_rate": 3.859293653520604e-07, "logits/chosen": -0.8233194351196289, "logits/rejected": -0.8047745227813721, "loss": 4.246, "step": 185 }, { "beta_dpo/beta_used": 0.03138742968440056, "beta_dpo/beta_used_raw": 0.009212229400873184, "beta_dpo/gap_mean": 24.505887985229492, "beta_dpo/gap_std": 41.48905563354492, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.38952879581151834, "grad_norm": 124.85476684570312, "learning_rate": 3.8438923131177237e-07, "logits/chosen": -0.7481645345687866, "logits/rejected": -0.7928252220153809, "loss": 3.7283, "step": 186 }, { "beta_dpo/beta_used": 0.010927281342446804, "beta_dpo/beta_used_raw": -0.0037998317275196314, "beta_dpo/gap_mean": 22.086095809936523, "beta_dpo/gap_std": 42.30852127075195, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.39162303664921466, "grad_norm": 67.12848663330078, "learning_rate": 3.828418903848593e-07, "logits/chosen": -0.6687250137329102, "logits/rejected": -0.666191816329956, "loss": 5.0962, "step": 187 }, { "beta_dpo/beta_used": 0.02534855529665947, "beta_dpo/beta_used_raw": 0.011756940744817257, "beta_dpo/gap_mean": 21.788326263427734, "beta_dpo/gap_std": 42.513572692871094, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.393717277486911, "grad_norm": 134.685791015625, "learning_rate": 3.812874255505191e-07, "logits/chosen": -0.8032656908035278, "logits/rejected": -0.775035560131073, "loss": 4.2692, "step": 188 }, { "beta_dpo/beta_used": 0.030971940606832504, "beta_dpo/beta_used_raw": 0.02102605067193508, "beta_dpo/gap_mean": 23.528629302978516, "beta_dpo/gap_std": 41.77531433105469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3958115183246073, "grad_norm": 136.64242553710938, "learning_rate": 3.797259201699833e-07, "logits/chosen": -0.809384286403656, "logits/rejected": -0.8046677112579346, "loss": 4.0389, "step": 189 }, { "beta_dpo/beta_used": 0.01858203113079071, "beta_dpo/beta_used_raw": -0.0006800373084843159, "beta_dpo/gap_mean": 24.665685653686523, "beta_dpo/gap_std": 41.014503479003906, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.39790575916230364, "grad_norm": 119.0221176147461, "learning_rate": 3.781574579820464e-07, "logits/chosen": -0.7358199954032898, "logits/rejected": -0.7636604905128479, "loss": 4.5646, "step": 190 }, { "beta_dpo/beta_used": 0.031428806483745575, "beta_dpo/beta_used_raw": -0.002010398544371128, "beta_dpo/gap_mean": 24.216768264770508, "beta_dpo/gap_std": 43.79417419433594, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.4, "grad_norm": 139.61459350585938, "learning_rate": 3.765821230985757e-07, "logits/chosen": -0.8139005899429321, "logits/rejected": -0.7801560163497925, "loss": 4.2518, "step": 191 }, { "beta_dpo/beta_used": 0.03002096898853779, "beta_dpo/beta_used_raw": 0.009661837480962276, "beta_dpo/gap_mean": 24.45059585571289, "beta_dpo/gap_std": 42.039737701416016, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.40209424083769635, "grad_norm": 168.8079071044922, "learning_rate": 3.75e-07, "logits/chosen": -0.812671422958374, "logits/rejected": -0.8485623002052307, "loss": 3.8821, "step": 192 }, { "beta_dpo/beta_used": 0.02032877318561077, "beta_dpo/beta_used_raw": 0.0041326722130179405, "beta_dpo/gap_mean": 23.045848846435547, "beta_dpo/gap_std": 43.16719436645508, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4041884816753927, "grad_norm": 104.38407897949219, "learning_rate": 3.734111735307796e-07, "logits/chosen": -0.8072720766067505, "logits/rejected": -0.839698851108551, "loss": 4.4311, "step": 193 }, { "beta_dpo/beta_used": 0.020913559943437576, "beta_dpo/beta_used_raw": -0.004458375740796328, "beta_dpo/gap_mean": 23.628847122192383, "beta_dpo/gap_std": 41.59272766113281, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.406282722513089, "grad_norm": 118.84005737304688, "learning_rate": 3.7181572889485623e-07, "logits/chosen": -0.8274001479148865, "logits/rejected": -0.8259969353675842, "loss": 4.3544, "step": 194 }, { "beta_dpo/beta_used": 0.0032001424115151167, "beta_dpo/beta_used_raw": -0.02794015407562256, "beta_dpo/gap_mean": 20.366397857666016, "beta_dpo/gap_std": 40.22095489501953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4083769633507853, "grad_norm": 26.801753997802734, "learning_rate": 3.7021375165108377e-07, "logits/chosen": -0.8312541246414185, "logits/rejected": -0.8000338077545166, "loss": 5.3373, "step": 195 }, { "beta_dpo/beta_used": 0.031204037368297577, "beta_dpo/beta_used_raw": 0.022474460303783417, "beta_dpo/gap_mean": 21.7479190826416, "beta_dpo/gap_std": 39.999412536621094, "beta_dpo/mask_keep_frac": 0.96875, "epoch": 0.41047120418848165, "grad_norm": 175.2480926513672, "learning_rate": 3.6860532770864005e-07, "logits/chosen": -0.8379102945327759, "logits/rejected": -0.8230741620063782, "loss": 4.1619, "step": 196 }, { "beta_dpo/beta_used": 0.04341711848974228, "beta_dpo/beta_used_raw": 0.04169736057519913, "beta_dpo/gap_mean": 25.090091705322266, "beta_dpo/gap_std": 41.287593841552734, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41256544502617803, "grad_norm": 260.1309814453125, "learning_rate": 3.6699054332241985e-07, "logits/chosen": -0.6981998682022095, "logits/rejected": -0.7817898392677307, "loss": 3.6891, "step": 197 }, { "beta_dpo/beta_used": 0.018048102036118507, "beta_dpo/beta_used_raw": 0.005947708152234554, "beta_dpo/gap_mean": 27.571151733398438, "beta_dpo/gap_std": 44.79579544067383, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.41465968586387436, "grad_norm": 111.63640594482422, "learning_rate": 3.653694850884091e-07, "logits/chosen": -0.7784479856491089, "logits/rejected": -0.7769980430603027, "loss": 4.6279, "step": 198 }, { "beta_dpo/beta_used": 0.026122871786355972, "beta_dpo/beta_used_raw": 0.008887620642781258, "beta_dpo/gap_mean": 26.642627716064453, "beta_dpo/gap_std": 45.17276382446289, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4167539267015707, "grad_norm": 211.3232879638672, "learning_rate": 3.6374223993904124e-07, "logits/chosen": -0.7652086019515991, "logits/rejected": -0.7274236679077148, "loss": 4.526, "step": 199 }, { "beta_dpo/beta_used": 0.004629853181540966, "beta_dpo/beta_used_raw": -0.015042738988995552, "beta_dpo/gap_mean": 24.910205841064453, "beta_dpo/gap_std": 47.183074951171875, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.418848167539267, "grad_norm": 33.36002731323242, "learning_rate": 3.621088951385353e-07, "logits/chosen": -0.7769320607185364, "logits/rejected": -0.8450891971588135, "loss": 5.1899, "step": 200 }, { "epoch": 0.418848167539267, "eval_beta_dpo/beta_used": 0.03460463136434555, "eval_beta_dpo/beta_used_raw": 0.013989130035042763, "eval_beta_dpo/gap_mean": 23.174381256103516, "eval_beta_dpo/gap_std": 48.25934600830078, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.815741777420044, "eval_logits/rejected": -0.8024517893791199, "eval_loss": 0.6122435331344604, "eval_runtime": 82.2329, "eval_samples_per_second": 24.321, "eval_steps_per_second": 1.52, "step": 200 }, { "beta_dpo/beta_used": 0.03787456825375557, "beta_dpo/beta_used_raw": 0.026411913335323334, "beta_dpo/gap_mean": 23.28668785095215, "beta_dpo/gap_std": 45.737125396728516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42094240837696334, "grad_norm": 203.3619384765625, "learning_rate": 3.604695382782159e-07, "logits/chosen": -0.7913077473640442, "logits/rejected": -0.8229740262031555, "loss": 4.2735, "step": 201 }, { "beta_dpo/beta_used": 0.050101663917303085, "beta_dpo/beta_used_raw": 0.027181357145309448, "beta_dpo/gap_mean": 26.254316329956055, "beta_dpo/gap_std": 47.33518600463867, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.42303664921465967, "grad_norm": 260.6098327636719, "learning_rate": 3.588242572718162e-07, "logits/chosen": -0.8053906559944153, "logits/rejected": -0.8041623830795288, "loss": 3.7601, "step": 202 }, { "beta_dpo/beta_used": 0.009512822143733501, "beta_dpo/beta_used_raw": -0.01119938027113676, "beta_dpo/gap_mean": 23.491336822509766, "beta_dpo/gap_std": 43.72566223144531, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.42513089005235605, "grad_norm": 84.75527954101562, "learning_rate": 3.571731403507635e-07, "logits/chosen": -0.8103188872337341, "logits/rejected": -0.8483298420906067, "loss": 4.8249, "step": 203 }, { "beta_dpo/beta_used": 0.02792198956012726, "beta_dpo/beta_used_raw": 0.01622004434466362, "beta_dpo/gap_mean": 25.187780380249023, "beta_dpo/gap_std": 43.19692611694336, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4272251308900524, "grad_norm": 155.26531982421875, "learning_rate": 3.5551627605944746e-07, "logits/chosen": -0.8938873410224915, "logits/rejected": -0.8654384016990662, "loss": 4.0644, "step": 204 }, { "beta_dpo/beta_used": 0.03272661939263344, "beta_dpo/beta_used_raw": 0.005555758252739906, "beta_dpo/gap_mean": 26.425273895263672, "beta_dpo/gap_std": 45.58020782470703, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4293193717277487, "grad_norm": 67.79540252685547, "learning_rate": 3.5385375325047163e-07, "logits/chosen": -0.7698061466217041, "logits/rejected": -0.76741623878479, "loss": 4.3406, "step": 205 }, { "beta_dpo/beta_used": 0.032748252153396606, "beta_dpo/beta_used_raw": 0.013970796950161457, "beta_dpo/gap_mean": 28.709857940673828, "beta_dpo/gap_std": 44.605228424072266, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.431413612565445, "grad_norm": 372.34228515625, "learning_rate": 3.5218566107988867e-07, "logits/chosen": -0.7109194993972778, "logits/rejected": -0.8103634119033813, "loss": 4.6551, "step": 206 }, { "beta_dpo/beta_used": 0.015838006511330605, "beta_dpo/beta_used_raw": -0.017275551334023476, "beta_dpo/gap_mean": 25.304269790649414, "beta_dpo/gap_std": 46.00745391845703, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.43350785340314135, "grad_norm": 136.5730438232422, "learning_rate": 3.505120890024195e-07, "logits/chosen": -0.7835868000984192, "logits/rejected": -0.8143876194953918, "loss": 4.6841, "step": 207 }, { "beta_dpo/beta_used": 0.01868237368762493, "beta_dpo/beta_used_raw": -0.0011156108230352402, "beta_dpo/gap_mean": 24.15138816833496, "beta_dpo/gap_std": 47.38937759399414, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4356020942408377, "grad_norm": 93.81692504882812, "learning_rate": 3.4883312676665534e-07, "logits/chosen": -0.8852607011795044, "logits/rejected": -0.8384636640548706, "loss": 4.3394, "step": 208 }, { "beta_dpo/beta_used": 0.025227809324860573, "beta_dpo/beta_used_raw": -0.008135579526424408, "beta_dpo/gap_mean": 22.95732879638672, "beta_dpo/gap_std": 47.612056732177734, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.437696335078534, "grad_norm": 296.78143310546875, "learning_rate": 3.4714886441024573e-07, "logits/chosen": -0.6929375529289246, "logits/rejected": -0.6913096904754639, "loss": 4.5351, "step": 209 }, { "beta_dpo/beta_used": 0.03174670785665512, "beta_dpo/beta_used_raw": 0.003210625145584345, "beta_dpo/gap_mean": 23.549930572509766, "beta_dpo/gap_std": 46.66145706176758, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4397905759162304, "grad_norm": 154.39170837402344, "learning_rate": 3.454593922550693e-07, "logits/chosen": -0.7912762761116028, "logits/rejected": -0.7809194326400757, "loss": 4.5574, "step": 210 }, { "beta_dpo/beta_used": 0.02569686621427536, "beta_dpo/beta_used_raw": 0.008063238114118576, "beta_dpo/gap_mean": 27.75176429748535, "beta_dpo/gap_std": 44.786964416503906, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4418848167539267, "grad_norm": 125.4310531616211, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -0.9319531917572021, "logits/rejected": -0.9190531969070435, "loss": 4.106, "step": 211 }, { "beta_dpo/beta_used": 0.013355633243918419, "beta_dpo/beta_used_raw": -0.006055002100765705, "beta_dpo/gap_mean": 27.055316925048828, "beta_dpo/gap_std": 43.12101364135742, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.44397905759162304, "grad_norm": 71.85043334960938, "learning_rate": 3.4206518122800055e-07, "logits/chosen": -0.8760491609573364, "logits/rejected": -0.8264781832695007, "loss": 4.6725, "step": 212 }, { "beta_dpo/beta_used": 0.018737439066171646, "beta_dpo/beta_used_raw": -0.015005623921751976, "beta_dpo/gap_mean": 23.8645076751709, "beta_dpo/gap_std": 44.43546676635742, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.44607329842931936, "grad_norm": 226.47691345214844, "learning_rate": 3.403606243773448e-07, "logits/chosen": -0.9040374755859375, "logits/rejected": -0.873714804649353, "loss": 4.761, "step": 213 }, { "beta_dpo/beta_used": 0.03264402225613594, "beta_dpo/beta_used_raw": 0.007331144995987415, "beta_dpo/gap_mean": 23.217544555664062, "beta_dpo/gap_std": 46.46554946899414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4481675392670157, "grad_norm": 235.9413604736328, "learning_rate": 3.3865122176063385e-07, "logits/chosen": -0.8038402795791626, "logits/rejected": -0.8402938842773438, "loss": 4.3773, "step": 214 }, { "beta_dpo/beta_used": 0.012464843690395355, "beta_dpo/beta_used_raw": -0.03232930973172188, "beta_dpo/gap_mean": 22.477909088134766, "beta_dpo/gap_std": 47.451107025146484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.450261780104712, "grad_norm": 118.8662338256836, "learning_rate": 3.3693706504794243e-07, "logits/chosen": -0.872378945350647, "logits/rejected": -0.8904660940170288, "loss": 4.9657, "step": 215 }, { "beta_dpo/beta_used": 0.048138365149497986, "beta_dpo/beta_used_raw": 0.032591041177511215, "beta_dpo/gap_mean": 24.77643585205078, "beta_dpo/gap_std": 48.98875427246094, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.4523560209424084, "grad_norm": 260.2225036621094, "learning_rate": 3.3521824616429284e-07, "logits/chosen": -0.8762063980102539, "logits/rejected": -0.8824567794799805, "loss": 3.8946, "step": 216 }, { "beta_dpo/beta_used": 0.02065902203321457, "beta_dpo/beta_used_raw": 0.0016261846758425236, "beta_dpo/gap_mean": 27.15247917175293, "beta_dpo/gap_std": 48.955963134765625, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4544502617801047, "grad_norm": 117.39456939697266, "learning_rate": 3.334948572847253e-07, "logits/chosen": -0.7578608989715576, "logits/rejected": -0.7313589453697205, "loss": 4.2169, "step": 217 }, { "beta_dpo/beta_used": 0.03485836833715439, "beta_dpo/beta_used_raw": 0.020042069256305695, "beta_dpo/gap_mean": 29.53237533569336, "beta_dpo/gap_std": 46.928466796875, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.45654450261780105, "grad_norm": 340.87933349609375, "learning_rate": 3.317669908293554e-07, "logits/chosen": -0.8080700039863586, "logits/rejected": -0.8047543168067932, "loss": 4.0359, "step": 218 }, { "beta_dpo/beta_used": 0.028133587911725044, "beta_dpo/beta_used_raw": 0.0015811556950211525, "beta_dpo/gap_mean": 30.489063262939453, "beta_dpo/gap_std": 46.79350280761719, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4586387434554974, "grad_norm": 111.17486572265625, "learning_rate": 3.300347394584172e-07, "logits/chosen": -0.8630120158195496, "logits/rejected": -0.8839913606643677, "loss": 4.1282, "step": 219 }, { "beta_dpo/beta_used": 0.021622518077492714, "beta_dpo/beta_used_raw": -0.011343970894813538, "beta_dpo/gap_mean": 30.70256805419922, "beta_dpo/gap_std": 47.032894134521484, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.4607329842931937, "grad_norm": 238.27330017089844, "learning_rate": 3.2829819606729477e-07, "logits/chosen": -0.8160958290100098, "logits/rejected": -0.7701820135116577, "loss": 4.703, "step": 220 }, { "beta_dpo/beta_used": 0.0057728588581085205, "beta_dpo/beta_used_raw": -0.048508308827877045, "beta_dpo/gap_mean": 26.71761703491211, "beta_dpo/gap_std": 45.98579788208008, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.46282722513089003, "grad_norm": 70.58045959472656, "learning_rate": 3.265574537815398e-07, "logits/chosen": -0.8309197425842285, "logits/rejected": -0.8332974314689636, "loss": 5.2443, "step": 221 }, { "beta_dpo/beta_used": 0.023440374061465263, "beta_dpo/beta_used_raw": -0.0038303863257169724, "beta_dpo/gap_mean": 26.571941375732422, "beta_dpo/gap_std": 45.80172348022461, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4649214659685864, "grad_norm": 175.59906005859375, "learning_rate": 3.248126059518784e-07, "logits/chosen": -0.9114519953727722, "logits/rejected": -0.8528196215629578, "loss": 4.5703, "step": 222 }, { "beta_dpo/beta_used": 0.02268083207309246, "beta_dpo/beta_used_raw": 0.017425578087568283, "beta_dpo/gap_mean": 26.815799713134766, "beta_dpo/gap_std": 44.76752471923828, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.46701570680628274, "grad_norm": 131.18267822265625, "learning_rate": 3.230637461492043e-07, "logits/chosen": -0.7977765798568726, "logits/rejected": -0.7418711185455322, "loss": 4.2857, "step": 223 }, { "beta_dpo/beta_used": 0.027264375239610672, "beta_dpo/beta_used_raw": 0.011616711504757404, "beta_dpo/gap_mean": 27.529714584350586, "beta_dpo/gap_std": 45.91986846923828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46910994764397906, "grad_norm": 208.3940887451172, "learning_rate": 3.213109681595612e-07, "logits/chosen": -0.7965356707572937, "logits/rejected": -0.791540801525116, "loss": 4.1658, "step": 224 }, { "beta_dpo/beta_used": 0.013540107756853104, "beta_dpo/beta_used_raw": -0.02534569799900055, "beta_dpo/gap_mean": 28.939363479614258, "beta_dpo/gap_std": 45.13759231567383, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4712041884816754, "grad_norm": 136.84378051757812, "learning_rate": 3.1955436597911315e-07, "logits/chosen": -0.7949999570846558, "logits/rejected": -0.7891294360160828, "loss": 4.8717, "step": 225 }, { "beta_dpo/beta_used": 0.011019091121852398, "beta_dpo/beta_used_raw": -0.0036931331269443035, "beta_dpo/gap_mean": 26.09113311767578, "beta_dpo/gap_std": 47.119407653808594, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.4732984293193717, "grad_norm": 103.97045135498047, "learning_rate": 3.1779403380910425e-07, "logits/chosen": -0.8348425626754761, "logits/rejected": -0.8312546014785767, "loss": 4.7836, "step": 226 }, { "beta_dpo/beta_used": 0.02266230434179306, "beta_dpo/beta_used_raw": 0.014338882640004158, "beta_dpo/gap_mean": 26.389862060546875, "beta_dpo/gap_std": 47.60458755493164, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.47539267015706804, "grad_norm": 282.4552307128906, "learning_rate": 3.160300660508064e-07, "logits/chosen": -0.8365087509155273, "logits/rejected": -0.8325910568237305, "loss": 4.8797, "step": 227 }, { "beta_dpo/beta_used": 0.02304881624877453, "beta_dpo/beta_used_raw": -0.007625843398272991, "beta_dpo/gap_mean": 27.687213897705078, "beta_dpo/gap_std": 46.798221588134766, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4774869109947644, "grad_norm": 99.42752075195312, "learning_rate": 3.1426255730045695e-07, "logits/chosen": -0.8232005834579468, "logits/rejected": -0.785977840423584, "loss": 4.3965, "step": 228 }, { "beta_dpo/beta_used": 0.024218367412686348, "beta_dpo/beta_used_raw": -0.012653389945626259, "beta_dpo/gap_mean": 31.94796371459961, "beta_dpo/gap_std": 46.080589294433594, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.47958115183246075, "grad_norm": 176.44334411621094, "learning_rate": 3.1249160234418644e-07, "logits/chosen": -0.843792736530304, "logits/rejected": -0.8399423956871033, "loss": 4.5637, "step": 229 }, { "beta_dpo/beta_used": 0.004419737029820681, "beta_dpo/beta_used_raw": -0.03868510574102402, "beta_dpo/gap_mean": 32.72969436645508, "beta_dpo/gap_std": 48.032718658447266, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.4816753926701571, "grad_norm": 40.30256652832031, "learning_rate": 3.1071729615293424e-07, "logits/chosen": -0.8502980470657349, "logits/rejected": -0.8471386432647705, "loss": 5.141, "step": 230 }, { "beta_dpo/beta_used": 0.002270770724862814, "beta_dpo/beta_used_raw": -0.041417621076107025, "beta_dpo/gap_mean": 29.029102325439453, "beta_dpo/gap_std": 47.07488250732422, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4837696335078534, "grad_norm": 20.798189163208008, "learning_rate": 3.0893973387735683e-07, "logits/chosen": -0.7537152767181396, "logits/rejected": -0.7852950096130371, "loss": 5.3369, "step": 231 }, { "beta_dpo/beta_used": 0.021667521446943283, "beta_dpo/beta_used_raw": -0.012663575820624828, "beta_dpo/gap_mean": 26.52678108215332, "beta_dpo/gap_std": 47.0605354309082, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.48586387434554973, "grad_norm": 168.40077209472656, "learning_rate": 3.071590108427243e-07, "logits/chosen": -0.7681893706321716, "logits/rejected": -0.7273673415184021, "loss": 4.6679, "step": 232 }, { "beta_dpo/beta_used": 0.040682002902030945, "beta_dpo/beta_used_raw": 0.029946379363536835, "beta_dpo/gap_mean": 27.526458740234375, "beta_dpo/gap_std": 47.81543731689453, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.48795811518324606, "grad_norm": 153.6154022216797, "learning_rate": 3.05375222543809e-07, "logits/chosen": -0.8089311122894287, "logits/rejected": -0.8404504060745239, "loss": 3.544, "step": 233 }, { "beta_dpo/beta_used": 0.037120141088962555, "beta_dpo/beta_used_raw": 0.02446739934384823, "beta_dpo/gap_mean": 29.188695907592773, "beta_dpo/gap_std": 50.91583251953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4900523560209424, "grad_norm": 203.7050018310547, "learning_rate": 3.035884646397637e-07, "logits/chosen": -0.8175359964370728, "logits/rejected": -0.778833270072937, "loss": 4.2035, "step": 234 }, { "beta_dpo/beta_used": 0.03462304174900055, "beta_dpo/beta_used_raw": 0.023997776210308075, "beta_dpo/gap_mean": 28.996198654174805, "beta_dpo/gap_std": 53.151405334472656, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.49214659685863876, "grad_norm": 170.55416870117188, "learning_rate": 3.017988329489923e-07, "logits/chosen": -0.8345946073532104, "logits/rejected": -0.8394272923469543, "loss": 4.0689, "step": 235 }, { "beta_dpo/beta_used": 0.03600964695215225, "beta_dpo/beta_used_raw": 0.0005581271834671497, "beta_dpo/gap_mean": 29.45612144470215, "beta_dpo/gap_std": 52.83362579345703, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.4942408376963351, "grad_norm": 189.6999053955078, "learning_rate": 3.000064234440111e-07, "logits/chosen": -0.8346319794654846, "logits/rejected": -0.8440847396850586, "loss": 3.9006, "step": 236 }, { "beta_dpo/beta_used": 0.019271746277809143, "beta_dpo/beta_used_raw": -0.029383037239313126, "beta_dpo/gap_mean": 30.120567321777344, "beta_dpo/gap_std": 51.399436950683594, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.4963350785340314, "grad_norm": 125.6007308959961, "learning_rate": 2.9821133224630223e-07, "logits/chosen": -0.7935413122177124, "logits/rejected": -0.8029470443725586, "loss": 4.544, "step": 237 }, { "beta_dpo/beta_used": 0.01742161437869072, "beta_dpo/beta_used_raw": -0.03088521584868431, "beta_dpo/gap_mean": 31.576923370361328, "beta_dpo/gap_std": 51.387908935546875, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.49842931937172774, "grad_norm": 101.49148559570312, "learning_rate": 2.964136556211588e-07, "logits/chosen": -0.8203566074371338, "logits/rejected": -0.8182651996612549, "loss": 4.2478, "step": 238 }, { "beta_dpo/beta_used": 0.03185847029089928, "beta_dpo/beta_used_raw": -0.010265880264341831, "beta_dpo/gap_mean": 28.438522338867188, "beta_dpo/gap_std": 53.83900833129883, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5005235602094241, "grad_norm": 334.53521728515625, "learning_rate": 2.946134899725226e-07, "logits/chosen": -0.7999371886253357, "logits/rejected": -0.8673533201217651, "loss": 4.7214, "step": 239 }, { "beta_dpo/beta_used": 0.013771746307611465, "beta_dpo/beta_used_raw": 0.0037962235510349274, "beta_dpo/gap_mean": 29.074222564697266, "beta_dpo/gap_std": 51.6200065612793, "beta_dpo/mask_keep_frac": 0.96875, "epoch": 0.5026178010471204, "grad_norm": 125.98123168945312, "learning_rate": 2.9281093183781403e-07, "logits/chosen": -0.8858702182769775, "logits/rejected": -0.9153672456741333, "loss": 4.722, "step": 240 }, { "beta_dpo/beta_used": 0.008614077232778072, "beta_dpo/beta_used_raw": -0.03193598613142967, "beta_dpo/gap_mean": 27.712648391723633, "beta_dpo/gap_std": 50.65081787109375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5047120418848168, "grad_norm": 73.02886199951172, "learning_rate": 2.910060778827554e-07, "logits/chosen": -0.7879363298416138, "logits/rejected": -0.7629251480102539, "loss": 4.974, "step": 241 }, { "beta_dpo/beta_used": 0.017704099416732788, "beta_dpo/beta_used_raw": -0.026023104786872864, "beta_dpo/gap_mean": 26.438953399658203, "beta_dpo/gap_std": 49.28800582885742, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.506806282722513, "grad_norm": 155.94786071777344, "learning_rate": 2.891990248961871e-07, "logits/chosen": -0.8872713446617126, "logits/rejected": -0.8689901828765869, "loss": 4.6485, "step": 242 }, { "beta_dpo/beta_used": 0.03200588375329971, "beta_dpo/beta_used_raw": 0.007553852163255215, "beta_dpo/gap_mean": 29.208711624145508, "beta_dpo/gap_std": 48.12644577026367, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5089005235602094, "grad_norm": 199.69061279296875, "learning_rate": 2.873898697848762e-07, "logits/chosen": -0.8087879419326782, "logits/rejected": -0.7941450476646423, "loss": 4.3873, "step": 243 }, { "beta_dpo/beta_used": 0.03249687701463699, "beta_dpo/beta_used_raw": 0.00029761437326669693, "beta_dpo/gap_mean": 33.442317962646484, "beta_dpo/gap_std": 50.90048599243164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5109947643979058, "grad_norm": 206.1800079345703, "learning_rate": 2.8557870956832133e-07, "logits/chosen": -0.7746649384498596, "logits/rejected": -0.687791645526886, "loss": 3.88, "step": 244 }, { "beta_dpo/beta_used": 0.04741879552602768, "beta_dpo/beta_used_raw": 0.029347646981477737, "beta_dpo/gap_mean": 32.799251556396484, "beta_dpo/gap_std": 47.67058563232422, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5130890052356021, "grad_norm": 173.1085968017578, "learning_rate": 2.837656413735479e-07, "logits/chosen": -0.8986497521400452, "logits/rejected": -0.8961766958236694, "loss": 3.3984, "step": 245 }, { "beta_dpo/beta_used": 0.0235223900526762, "beta_dpo/beta_used_raw": -0.011849863454699516, "beta_dpo/gap_mean": 30.94761848449707, "beta_dpo/gap_std": 49.176815032958984, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5151832460732985, "grad_norm": 281.09698486328125, "learning_rate": 2.8195076242990116e-07, "logits/chosen": -0.8312711119651794, "logits/rejected": -0.8494311571121216, "loss": 4.6427, "step": 246 }, { "beta_dpo/beta_used": 0.02513197809457779, "beta_dpo/beta_used_raw": -0.007078057155013084, "beta_dpo/gap_mean": 28.14275550842285, "beta_dpo/gap_std": 48.82672882080078, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5172774869109947, "grad_norm": 136.38978576660156, "learning_rate": 2.801341700638307e-07, "logits/chosen": -0.8171231746673584, "logits/rejected": -0.8114153146743774, "loss": 4.2088, "step": 247 }, { "beta_dpo/beta_used": 0.01641557179391384, "beta_dpo/beta_used_raw": -0.02007879875600338, "beta_dpo/gap_mean": 26.242324829101562, "beta_dpo/gap_std": 45.897560119628906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5193717277486911, "grad_norm": 110.12213134765625, "learning_rate": 2.7831596169367227e-07, "logits/chosen": -0.7554613351821899, "logits/rejected": -0.8297998905181885, "loss": 4.4495, "step": 248 }, { "beta_dpo/beta_used": 0.013018419966101646, "beta_dpo/beta_used_raw": -0.01035161130130291, "beta_dpo/gap_mean": 21.812284469604492, "beta_dpo/gap_std": 46.56766128540039, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5214659685863874, "grad_norm": 90.18573760986328, "learning_rate": 2.7649623482442274e-07, "logits/chosen": -0.818859338760376, "logits/rejected": -0.8167266845703125, "loss": 5.007, "step": 249 }, { "beta_dpo/beta_used": 0.05589645728468895, "beta_dpo/beta_used_raw": 0.04678558558225632, "beta_dpo/gap_mean": 24.672931671142578, "beta_dpo/gap_std": 48.47020721435547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5235602094240838, "grad_norm": 336.7721862792969, "learning_rate": 2.7467508704251135e-07, "logits/chosen": -0.862523078918457, "logits/rejected": -0.8510252237319946, "loss": 3.9654, "step": 250 }, { "beta_dpo/beta_used": 0.018005074933171272, "beta_dpo/beta_used_raw": -0.011267204768955708, "beta_dpo/gap_mean": 24.661828994750977, "beta_dpo/gap_std": 47.70268249511719, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5256544502617801, "grad_norm": 113.27556610107422, "learning_rate": 2.7285261601056697e-07, "logits/chosen": -0.9115744829177856, "logits/rejected": -0.8325821757316589, "loss": 5.02, "step": 251 }, { "beta_dpo/beta_used": 0.034432608634233475, "beta_dpo/beta_used_raw": 0.016598613932728767, "beta_dpo/gap_mean": 27.05451011657715, "beta_dpo/gap_std": 50.06959915161133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5277486910994764, "grad_norm": 188.25326538085938, "learning_rate": 2.7102891946217994e-07, "logits/chosen": -0.9205706119537354, "logits/rejected": -0.8480794429779053, "loss": 4.3982, "step": 252 }, { "beta_dpo/beta_used": 0.030707869678735733, "beta_dpo/beta_used_raw": 0.004673094488680363, "beta_dpo/gap_mean": 26.41693878173828, "beta_dpo/gap_std": 50.89750289916992, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5298429319371728, "grad_norm": 140.49769592285156, "learning_rate": 2.692040951966617e-07, "logits/chosen": -0.8601374626159668, "logits/rejected": -0.8499505519866943, "loss": 4.3829, "step": 253 }, { "beta_dpo/beta_used": 0.030033409595489502, "beta_dpo/beta_used_raw": 0.012539991177618504, "beta_dpo/gap_mean": 24.60215950012207, "beta_dpo/gap_std": 47.5504035949707, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5319371727748691, "grad_norm": 98.29093933105469, "learning_rate": 2.6737824107379947e-07, "logits/chosen": -0.9111440777778625, "logits/rejected": -0.8825950026512146, "loss": 4.2623, "step": 254 }, { "beta_dpo/beta_used": 0.03537018597126007, "beta_dpo/beta_used_raw": 0.024893784895539284, "beta_dpo/gap_mean": 27.02058219909668, "beta_dpo/gap_std": 46.62839126586914, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5340314136125655, "grad_norm": 235.2870330810547, "learning_rate": 2.655514550086086e-07, "logits/chosen": -0.7854205369949341, "logits/rejected": -0.7229121327400208, "loss": 3.8108, "step": 255 }, { "beta_dpo/beta_used": 0.042297471314668655, "beta_dpo/beta_used_raw": 0.008141601458191872, "beta_dpo/gap_mean": 28.063838958740234, "beta_dpo/gap_std": 49.99174499511719, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5361256544502618, "grad_norm": 155.22914123535156, "learning_rate": 2.6372383496608186e-07, "logits/chosen": -0.8625849485397339, "logits/rejected": -0.8409400582313538, "loss": 3.9188, "step": 256 }, { "beta_dpo/beta_used": 0.020409418269991875, "beta_dpo/beta_used_raw": 8.291192352771759e-05, "beta_dpo/gap_mean": 29.060293197631836, "beta_dpo/gap_std": 51.213829040527344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5382198952879581, "grad_norm": 107.94662475585938, "learning_rate": 2.618954789559356e-07, "logits/chosen": -0.7469907999038696, "logits/rejected": -0.769670844078064, "loss": 4.4188, "step": 257 }, { "beta_dpo/beta_used": 0.02360442467033863, "beta_dpo/beta_used_raw": -0.016117922961711884, "beta_dpo/gap_mean": 29.77499771118164, "beta_dpo/gap_std": 46.84881591796875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5403141361256545, "grad_norm": 134.36001586914062, "learning_rate": 2.600664850273538e-07, "logits/chosen": -0.803202748298645, "logits/rejected": -0.7860767841339111, "loss": 4.2857, "step": 258 }, { "beta_dpo/beta_used": 0.0034133887384086847, "beta_dpo/beta_used_raw": -0.029627330601215363, "beta_dpo/gap_mean": 26.850561141967773, "beta_dpo/gap_std": 44.52630615234375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5424083769633508, "grad_norm": 30.7167911529541, "learning_rate": 2.582369512637302e-07, "logits/chosen": -0.6924210786819458, "logits/rejected": -0.7782201766967773, "loss": 5.2647, "step": 259 }, { "beta_dpo/beta_used": 0.008151357993483543, "beta_dpo/beta_used_raw": -0.03724336996674538, "beta_dpo/gap_mean": 21.897171020507812, "beta_dpo/gap_std": 44.250633239746094, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5445026178010471, "grad_norm": 110.90091705322266, "learning_rate": 2.5640697577740815e-07, "logits/chosen": -0.7184336185455322, "logits/rejected": -0.7615399956703186, "loss": 5.2479, "step": 260 }, { "beta_dpo/beta_used": 0.03933139145374298, "beta_dpo/beta_used_raw": 0.02173340693116188, "beta_dpo/gap_mean": 20.506837844848633, "beta_dpo/gap_std": 46.83831024169922, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5465968586387434, "grad_norm": 224.45547485351562, "learning_rate": 2.5457665670441937e-07, "logits/chosen": -0.6661160588264465, "logits/rejected": -0.6675682067871094, "loss": 4.4162, "step": 261 }, { "beta_dpo/beta_used": 0.014896124601364136, "beta_dpo/beta_used_raw": -0.00048278551548719406, "beta_dpo/gap_mean": 23.35280990600586, "beta_dpo/gap_std": 45.730369567871094, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5486910994764398, "grad_norm": 101.4534683227539, "learning_rate": 2.527460921992209e-07, "logits/chosen": -0.7730051875114441, "logits/rejected": -0.7815576791763306, "loss": 4.7897, "step": 262 }, { "beta_dpo/beta_used": 0.02885139361023903, "beta_dpo/beta_used_raw": -0.01011989638209343, "beta_dpo/gap_mean": 26.439210891723633, "beta_dpo/gap_std": 45.27045440673828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5507853403141362, "grad_norm": 95.77165985107422, "learning_rate": 2.509153804294318e-07, "logits/chosen": -0.7346749305725098, "logits/rejected": -0.7492486238479614, "loss": 4.3113, "step": 263 }, { "beta_dpo/beta_used": 0.04723266139626503, "beta_dpo/beta_used_raw": 0.032619744539260864, "beta_dpo/gap_mean": 26.848690032958984, "beta_dpo/gap_std": 45.16484451293945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5528795811518324, "grad_norm": 249.46133422851562, "learning_rate": 2.4908461957056825e-07, "logits/chosen": -0.8002597093582153, "logits/rejected": -0.7968762516975403, "loss": 4.0313, "step": 264 }, { "beta_dpo/beta_used": 0.044382501393556595, "beta_dpo/beta_used_raw": 0.019323019310832024, "beta_dpo/gap_mean": 31.021467208862305, "beta_dpo/gap_std": 46.95008087158203, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5549738219895288, "grad_norm": 179.83192443847656, "learning_rate": 2.4725390780077905e-07, "logits/chosen": -0.8639757633209229, "logits/rejected": -0.8595830202102661, "loss": 3.8278, "step": 265 }, { "beta_dpo/beta_used": 0.027583010494709015, "beta_dpo/beta_used_raw": 0.015042563900351524, "beta_dpo/gap_mean": 31.712360382080078, "beta_dpo/gap_std": 45.211669921875, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5570680628272251, "grad_norm": 100.64574432373047, "learning_rate": 2.454233432955807e-07, "logits/chosen": -0.8895573019981384, "logits/rejected": -0.8909889459609985, "loss": 3.7383, "step": 266 }, { "beta_dpo/beta_used": 0.01041501946747303, "beta_dpo/beta_used_raw": -0.028038477525115013, "beta_dpo/gap_mean": 30.713520050048828, "beta_dpo/gap_std": 45.09004211425781, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.5591623036649215, "grad_norm": 82.85396575927734, "learning_rate": 2.435930242225919e-07, "logits/chosen": -0.7605207562446594, "logits/rejected": -0.7826250195503235, "loss": 4.8741, "step": 267 }, { "beta_dpo/beta_used": 0.03263188153505325, "beta_dpo/beta_used_raw": 0.0181466955691576, "beta_dpo/gap_mean": 27.47226905822754, "beta_dpo/gap_std": 46.088748931884766, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5612565445026177, "grad_norm": 147.48915100097656, "learning_rate": 2.4176304873626984e-07, "logits/chosen": -0.7427608370780945, "logits/rejected": -0.6938825249671936, "loss": 3.6626, "step": 268 }, { "beta_dpo/beta_used": 0.013387175276875496, "beta_dpo/beta_used_raw": -0.010312670841813087, "beta_dpo/gap_mean": 26.25534439086914, "beta_dpo/gap_std": 48.16028594970703, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5633507853403141, "grad_norm": 136.2506103515625, "learning_rate": 2.399335149726463e-07, "logits/chosen": -0.8059217929840088, "logits/rejected": -0.7971139550209045, "loss": 5.0023, "step": 269 }, { "beta_dpo/beta_used": 0.021209895610809326, "beta_dpo/beta_used_raw": 0.007768834941089153, "beta_dpo/gap_mean": 26.338743209838867, "beta_dpo/gap_std": 52.1260986328125, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5654450261780105, "grad_norm": 129.21153259277344, "learning_rate": 2.381045210440644e-07, "logits/chosen": -0.8386709690093994, "logits/rejected": -0.8653663396835327, "loss": 4.4507, "step": 270 }, { "beta_dpo/beta_used": 0.0069845193065702915, "beta_dpo/beta_used_raw": -0.020907670259475708, "beta_dpo/gap_mean": 25.30891227722168, "beta_dpo/gap_std": 49.086795806884766, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.5675392670157068, "grad_norm": 40.63138961791992, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -0.7387904524803162, "logits/rejected": -0.7116048336029053, "loss": 5.1414, "step": 271 }, { "beta_dpo/beta_used": 0.026611195877194405, "beta_dpo/beta_used_raw": 0.016656765714287758, "beta_dpo/gap_mean": 28.21249771118164, "beta_dpo/gap_std": 49.86316680908203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5696335078534032, "grad_norm": 156.32347106933594, "learning_rate": 2.344485449913914e-07, "logits/chosen": -0.8664307594299316, "logits/rejected": -0.8278294205665588, "loss": 4.5218, "step": 272 }, { "beta_dpo/beta_used": 0.02900443784892559, "beta_dpo/beta_used_raw": -0.009947888553142548, "beta_dpo/gap_mean": 30.19207000732422, "beta_dpo/gap_std": 51.4546012878418, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5717277486910994, "grad_norm": 303.7254638671875, "learning_rate": 2.3262175892620062e-07, "logits/chosen": -0.8640813231468201, "logits/rejected": -0.8573806881904602, "loss": 4.7414, "step": 273 }, { "beta_dpo/beta_used": 0.05624593421816826, "beta_dpo/beta_used_raw": 0.053361114114522934, "beta_dpo/gap_mean": 32.530738830566406, "beta_dpo/gap_std": 51.59685516357422, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5738219895287958, "grad_norm": 273.17437744140625, "learning_rate": 2.3079590480333827e-07, "logits/chosen": -0.7935792207717896, "logits/rejected": -0.8075500726699829, "loss": 2.6873, "step": 274 }, { "beta_dpo/beta_used": 0.04389655217528343, "beta_dpo/beta_used_raw": 0.03967411816120148, "beta_dpo/gap_mean": 35.15380859375, "beta_dpo/gap_std": 50.761661529541016, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5759162303664922, "grad_norm": 142.54107666015625, "learning_rate": 2.2897108053782e-07, "logits/chosen": -0.836929202079773, "logits/rejected": -0.8122567534446716, "loss": 3.1636, "step": 275 }, { "beta_dpo/beta_used": 0.008040083572268486, "beta_dpo/beta_used_raw": -0.026715535670518875, "beta_dpo/gap_mean": 36.45258712768555, "beta_dpo/gap_std": 48.222740173339844, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5780104712041885, "grad_norm": 49.63078689575195, "learning_rate": 2.2714738398943308e-07, "logits/chosen": -0.9168733358383179, "logits/rejected": -0.8658912181854248, "loss": 4.7947, "step": 276 }, { "beta_dpo/beta_used": 0.017741093412041664, "beta_dpo/beta_used_raw": -0.005734635051339865, "beta_dpo/gap_mean": 30.747156143188477, "beta_dpo/gap_std": 49.511741638183594, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5801047120418849, "grad_norm": 129.92147827148438, "learning_rate": 2.2532491295748865e-07, "logits/chosen": -0.7629660367965698, "logits/rejected": -0.7584231495857239, "loss": 4.432, "step": 277 }, { "beta_dpo/beta_used": 0.03449155017733574, "beta_dpo/beta_used_raw": -0.003797696903347969, "beta_dpo/gap_mean": 27.227996826171875, "beta_dpo/gap_std": 50.867427825927734, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5821989528795811, "grad_norm": 177.40350341796875, "learning_rate": 2.2350376517557726e-07, "logits/chosen": -0.8415578603744507, "logits/rejected": -0.8428290486335754, "loss": 4.2994, "step": 278 }, { "beta_dpo/beta_used": 0.06249617412686348, "beta_dpo/beta_used_raw": 0.05501677840948105, "beta_dpo/gap_mean": 29.809844970703125, "beta_dpo/gap_std": 52.175148010253906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5842931937172775, "grad_norm": 182.45668029785156, "learning_rate": 2.2168403830632769e-07, "logits/chosen": -0.7722773551940918, "logits/rejected": -0.7824859619140625, "loss": 2.8122, "step": 279 }, { "beta_dpo/beta_used": 0.007684089243412018, "beta_dpo/beta_used_raw": -0.02332976460456848, "beta_dpo/gap_mean": 30.218582153320312, "beta_dpo/gap_std": 50.6556510925293, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.5863874345549738, "grad_norm": 57.70958709716797, "learning_rate": 2.1986582993616925e-07, "logits/chosen": -0.7730618715286255, "logits/rejected": -0.809870719909668, "loss": 5.0134, "step": 280 }, { "beta_dpo/beta_used": 0.00933461356908083, "beta_dpo/beta_used_raw": -0.045306965708732605, "beta_dpo/gap_mean": 30.127286911010742, "beta_dpo/gap_std": 51.82423782348633, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.5884816753926702, "grad_norm": 86.70292663574219, "learning_rate": 2.1804923757009882e-07, "logits/chosen": -0.7278214693069458, "logits/rejected": -0.7206936478614807, "loss": 5.0187, "step": 281 }, { "beta_dpo/beta_used": 0.028488921001553535, "beta_dpo/beta_used_raw": -0.000575296813622117, "beta_dpo/gap_mean": 29.9686336517334, "beta_dpo/gap_std": 53.73543167114258, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.5905759162303665, "grad_norm": 189.3360137939453, "learning_rate": 2.1623435862645205e-07, "logits/chosen": -0.9012278914451599, "logits/rejected": -0.833315372467041, "loss": 4.2414, "step": 282 }, { "beta_dpo/beta_used": 0.016119863837957382, "beta_dpo/beta_used_raw": -0.0076263779774308205, "beta_dpo/gap_mean": 28.940425872802734, "beta_dpo/gap_std": 52.418643951416016, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.5926701570680628, "grad_norm": 132.74644470214844, "learning_rate": 2.1442129043167873e-07, "logits/chosen": -0.8086240887641907, "logits/rejected": -0.7728883624076843, "loss": 4.6917, "step": 283 }, { "beta_dpo/beta_used": 0.02810695767402649, "beta_dpo/beta_used_raw": -0.01180135365575552, "beta_dpo/gap_mean": 30.99124526977539, "beta_dpo/gap_std": 53.4347038269043, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5947643979057592, "grad_norm": 113.83843231201172, "learning_rate": 2.1261013021512378e-07, "logits/chosen": -0.7596749067306519, "logits/rejected": -0.7445765733718872, "loss": 4.6012, "step": 284 }, { "beta_dpo/beta_used": 0.02452005073428154, "beta_dpo/beta_used_raw": 0.0031681647524237633, "beta_dpo/gap_mean": 25.594776153564453, "beta_dpo/gap_std": 52.7824821472168, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.5968586387434555, "grad_norm": 155.6459503173828, "learning_rate": 2.1080097510381294e-07, "logits/chosen": -0.8476120233535767, "logits/rejected": -0.8108228445053101, "loss": 4.5369, "step": 285 }, { "beta_dpo/beta_used": 0.015153134241700172, "beta_dpo/beta_used_raw": -0.011404473334550858, "beta_dpo/gap_mean": 26.985084533691406, "beta_dpo/gap_std": 54.268184661865234, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.5989528795811518, "grad_norm": 122.56304931640625, "learning_rate": 2.089939221172446e-07, "logits/chosen": -0.812626838684082, "logits/rejected": -0.7711913585662842, "loss": 4.5918, "step": 286 }, { "beta_dpo/beta_used": 0.04627405107021332, "beta_dpo/beta_used_raw": 0.04036061465740204, "beta_dpo/gap_mean": 28.02764320373535, "beta_dpo/gap_std": 54.610694885253906, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6010471204188481, "grad_norm": 211.06204223632812, "learning_rate": 2.0718906816218595e-07, "logits/chosen": -0.8649301528930664, "logits/rejected": -0.8563531041145325, "loss": 3.8764, "step": 287 }, { "beta_dpo/beta_used": 0.0363273024559021, "beta_dpo/beta_used_raw": 0.0149660874158144, "beta_dpo/gap_mean": 25.59058380126953, "beta_dpo/gap_std": 52.901607513427734, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6031413612565445, "grad_norm": 245.04263305664062, "learning_rate": 2.053865100274774e-07, "logits/chosen": -0.8099507093429565, "logits/rejected": -0.7958436608314514, "loss": 4.1567, "step": 288 }, { "beta_dpo/beta_used": 0.02292640507221222, "beta_dpo/beta_used_raw": -0.006416676566004753, "beta_dpo/gap_mean": 23.788057327270508, "beta_dpo/gap_std": 52.41061782836914, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6052356020942409, "grad_norm": 123.16299438476562, "learning_rate": 2.035863443788411e-07, "logits/chosen": -0.8208717703819275, "logits/rejected": -0.8096261620521545, "loss": 4.6924, "step": 289 }, { "beta_dpo/beta_used": 0.011839738115668297, "beta_dpo/beta_used_raw": -0.04058264195919037, "beta_dpo/gap_mean": 24.799976348876953, "beta_dpo/gap_std": 51.84151077270508, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6073298429319371, "grad_norm": 104.04353332519531, "learning_rate": 2.0178866775369774e-07, "logits/chosen": -0.7509340047836304, "logits/rejected": -0.7044723629951477, "loss": 4.8929, "step": 290 }, { "beta_dpo/beta_used": 0.03051171451807022, "beta_dpo/beta_used_raw": 0.014080343768000603, "beta_dpo/gap_mean": 26.96507453918457, "beta_dpo/gap_std": 52.527767181396484, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6094240837696335, "grad_norm": 175.74583435058594, "learning_rate": 1.9999357655598891e-07, "logits/chosen": -0.7986388802528381, "logits/rejected": -0.8011342287063599, "loss": 4.2739, "step": 291 }, { "beta_dpo/beta_used": 0.03412974625825882, "beta_dpo/beta_used_raw": 0.020118406042456627, "beta_dpo/gap_mean": 27.24551010131836, "beta_dpo/gap_std": 51.756317138671875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6115183246073298, "grad_norm": 189.43963623046875, "learning_rate": 1.9820116705100775e-07, "logits/chosen": -0.8060983419418335, "logits/rejected": -0.7809661030769348, "loss": 3.5835, "step": 292 }, { "beta_dpo/beta_used": 0.04781736806035042, "beta_dpo/beta_used_raw": 0.021636206656694412, "beta_dpo/gap_mean": 28.056617736816406, "beta_dpo/gap_std": 53.96324920654297, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6136125654450262, "grad_norm": 324.6336669921875, "learning_rate": 1.9641153536023642e-07, "logits/chosen": -0.9069850444793701, "logits/rejected": -0.7866148948669434, "loss": 4.2395, "step": 293 }, { "beta_dpo/beta_used": 0.02367311529815197, "beta_dpo/beta_used_raw": -0.020730314776301384, "beta_dpo/gap_mean": 27.37270736694336, "beta_dpo/gap_std": 53.96466064453125, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6157068062827226, "grad_norm": 273.607421875, "learning_rate": 1.9462477745619106e-07, "logits/chosen": -0.9232648611068726, "logits/rejected": -0.8572964668273926, "loss": 5.1925, "step": 294 }, { "beta_dpo/beta_used": 0.05513071268796921, "beta_dpo/beta_used_raw": 0.0484839528799057, "beta_dpo/gap_mean": 27.121583938598633, "beta_dpo/gap_std": 53.563331604003906, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.6178010471204188, "grad_norm": 294.5126647949219, "learning_rate": 1.928409891572757e-07, "logits/chosen": -0.7520920038223267, "logits/rejected": -0.7938590049743652, "loss": 4.2212, "step": 295 }, { "beta_dpo/beta_used": 0.0544293075799942, "beta_dpo/beta_used_raw": 0.042427390813827515, "beta_dpo/gap_mean": 32.12763214111328, "beta_dpo/gap_std": 54.309146881103516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6198952879581152, "grad_norm": 404.3480529785156, "learning_rate": 1.9106026612264315e-07, "logits/chosen": -0.8700776696205139, "logits/rejected": -0.8367108702659607, "loss": 4.1609, "step": 296 }, { "beta_dpo/beta_used": 0.011236435733735561, "beta_dpo/beta_used_raw": -0.024106943979859352, "beta_dpo/gap_mean": 31.98879051208496, "beta_dpo/gap_std": 54.55412292480469, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6219895287958115, "grad_norm": 132.0416717529297, "learning_rate": 1.8928270384706582e-07, "logits/chosen": -0.8638625741004944, "logits/rejected": -0.870927095413208, "loss": 5.1839, "step": 297 }, { "beta_dpo/beta_used": 0.04521133750677109, "beta_dpo/beta_used_raw": 0.024881090968847275, "beta_dpo/gap_mean": 29.161760330200195, "beta_dpo/gap_std": 54.410213470458984, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.6240837696335079, "grad_norm": 303.2014465332031, "learning_rate": 1.875083976558136e-07, "logits/chosen": -0.9359617829322815, "logits/rejected": -0.894604504108429, "loss": 5.0044, "step": 298 }, { "beta_dpo/beta_used": 0.03626459464430809, "beta_dpo/beta_used_raw": -0.00014625024050474167, "beta_dpo/gap_mean": 28.30124282836914, "beta_dpo/gap_std": 53.62518310546875, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6261780104712041, "grad_norm": 139.76968383789062, "learning_rate": 1.8573744269954297e-07, "logits/chosen": -0.7561138868331909, "logits/rejected": -0.7259418368339539, "loss": 4.1468, "step": 299 }, { "beta_dpo/beta_used": 0.0271303653717041, "beta_dpo/beta_used_raw": -0.004675944335758686, "beta_dpo/gap_mean": 28.45832633972168, "beta_dpo/gap_std": 51.58424377441406, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6282722513089005, "grad_norm": 269.99761962890625, "learning_rate": 1.839699339491937e-07, "logits/chosen": -0.7935373783111572, "logits/rejected": -0.8128796815872192, "loss": 4.8479, "step": 300 }, { "beta_dpo/beta_used": 0.02316497452557087, "beta_dpo/beta_used_raw": -0.008034785278141499, "beta_dpo/gap_mean": 27.73406982421875, "beta_dpo/gap_std": 52.02341079711914, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6303664921465969, "grad_norm": 137.60336303710938, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -0.8512569665908813, "logits/rejected": -0.8470555543899536, "loss": 4.1674, "step": 301 }, { "beta_dpo/beta_used": 0.033527493476867676, "beta_dpo/beta_used_raw": 0.005997128784656525, "beta_dpo/gap_mean": 29.30136489868164, "beta_dpo/gap_std": 49.16413497924805, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6324607329842932, "grad_norm": 190.3458709716797, "learning_rate": 1.8044563402088682e-07, "logits/chosen": -0.7430394291877747, "logits/rejected": -0.726094126701355, "loss": 4.1724, "step": 302 }, { "beta_dpo/beta_used": 0.05859103798866272, "beta_dpo/beta_used_raw": 0.053058795630931854, "beta_dpo/gap_mean": 28.67943572998047, "beta_dpo/gap_std": 50.48307418823242, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6345549738219896, "grad_norm": 523.866943359375, "learning_rate": 1.7868903184043885e-07, "logits/chosen": -0.8114441633224487, "logits/rejected": -0.7551754117012024, "loss": 3.9045, "step": 303 }, { "beta_dpo/beta_used": 0.014694188721477985, "beta_dpo/beta_used_raw": 0.00047776661813259125, "beta_dpo/gap_mean": 30.76772689819336, "beta_dpo/gap_std": 52.48418426513672, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6366492146596858, "grad_norm": 182.02586364746094, "learning_rate": 1.7693625385079574e-07, "logits/chosen": -0.7341251373291016, "logits/rejected": -0.7772490978240967, "loss": 5.0507, "step": 304 }, { "beta_dpo/beta_used": 0.023470664396882057, "beta_dpo/beta_used_raw": 0.006333658471703529, "beta_dpo/gap_mean": 35.33549118041992, "beta_dpo/gap_std": 51.53257751464844, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6387434554973822, "grad_norm": 80.4178695678711, "learning_rate": 1.7518739404812155e-07, "logits/chosen": -0.8776407837867737, "logits/rejected": -0.8734537363052368, "loss": 4.187, "step": 305 }, { "beta_dpo/beta_used": 0.019906463101506233, "beta_dpo/beta_used_raw": -0.0246460922062397, "beta_dpo/gap_mean": 35.897613525390625, "beta_dpo/gap_std": 51.13701629638672, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6408376963350786, "grad_norm": 180.6241912841797, "learning_rate": 1.7344254621846017e-07, "logits/chosen": -0.7335799932479858, "logits/rejected": -0.7366300225257874, "loss": 4.9526, "step": 306 }, { "beta_dpo/beta_used": 0.03133513033390045, "beta_dpo/beta_used_raw": 0.015121620148420334, "beta_dpo/gap_mean": 31.835227966308594, "beta_dpo/gap_std": 49.21765899658203, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.6429319371727749, "grad_norm": 140.04568481445312, "learning_rate": 1.717018039327053e-07, "logits/chosen": -0.7875911593437195, "logits/rejected": -0.8351340889930725, "loss": 3.6112, "step": 307 }, { "beta_dpo/beta_used": 0.01386056188493967, "beta_dpo/beta_used_raw": -0.003249811939895153, "beta_dpo/gap_mean": 30.477500915527344, "beta_dpo/gap_std": 48.171607971191406, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6450261780104712, "grad_norm": 77.68309020996094, "learning_rate": 1.699652605415828e-07, "logits/chosen": -0.7763692140579224, "logits/rejected": -0.7668969631195068, "loss": 4.5155, "step": 308 }, { "beta_dpo/beta_used": 0.0570245087146759, "beta_dpo/beta_used_raw": 0.0544467568397522, "beta_dpo/gap_mean": 28.889652252197266, "beta_dpo/gap_std": 51.812313079833984, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6471204188481675, "grad_norm": 348.3558654785156, "learning_rate": 1.6823300917064458e-07, "logits/chosen": -0.9028000831604004, "logits/rejected": -0.9401339888572693, "loss": 3.8498, "step": 309 }, { "beta_dpo/beta_used": 0.02267904207110405, "beta_dpo/beta_used_raw": -0.012671604752540588, "beta_dpo/gap_mean": 30.174596786499023, "beta_dpo/gap_std": 52.781192779541016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6492146596858639, "grad_norm": 177.11566162109375, "learning_rate": 1.6650514271527465e-07, "logits/chosen": -0.7747019529342651, "logits/rejected": -0.7555006146430969, "loss": 4.7697, "step": 310 }, { "beta_dpo/beta_used": 0.0282583124935627, "beta_dpo/beta_used_raw": -0.015359479002654552, "beta_dpo/gap_mean": 32.193870544433594, "beta_dpo/gap_std": 50.84648513793945, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6513089005235602, "grad_norm": 194.98880004882812, "learning_rate": 1.647817538357072e-07, "logits/chosen": -0.7590238451957703, "logits/rejected": -0.752559244632721, "loss": 4.6913, "step": 311 }, { "beta_dpo/beta_used": 0.02696000412106514, "beta_dpo/beta_used_raw": -0.0136557100340724, "beta_dpo/gap_mean": 31.40416145324707, "beta_dpo/gap_std": 54.36616516113281, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6534031413612565, "grad_norm": 502.6810607910156, "learning_rate": 1.6306293495205755e-07, "logits/chosen": -0.8581979274749756, "logits/rejected": -0.8272500038146973, "loss": 4.5039, "step": 312 }, { "beta_dpo/beta_used": 0.02222803235054016, "beta_dpo/beta_used_raw": -0.005380367860198021, "beta_dpo/gap_mean": 28.833663940429688, "beta_dpo/gap_std": 54.704952239990234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6554973821989529, "grad_norm": 152.86302185058594, "learning_rate": 1.6134877823936607e-07, "logits/chosen": -0.8324103355407715, "logits/rejected": -0.8865740299224854, "loss": 5.0221, "step": 313 }, { "beta_dpo/beta_used": 0.04997220262885094, "beta_dpo/beta_used_raw": 0.045812323689460754, "beta_dpo/gap_mean": 29.58029556274414, "beta_dpo/gap_std": 53.47450637817383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6575916230366492, "grad_norm": 239.0943145751953, "learning_rate": 1.5963937562265522e-07, "logits/chosen": -0.806653618812561, "logits/rejected": -0.7868531346321106, "loss": 4.296, "step": 314 }, { "beta_dpo/beta_used": 0.03027864173054695, "beta_dpo/beta_used_raw": 0.01615685038268566, "beta_dpo/gap_mean": 32.16781997680664, "beta_dpo/gap_std": 53.18808364868164, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6596858638743456, "grad_norm": 158.3759307861328, "learning_rate": 1.5793481877199943e-07, "logits/chosen": -0.842742919921875, "logits/rejected": -0.8674212694168091, "loss": 3.9229, "step": 315 }, { "beta_dpo/beta_used": 0.015697987750172615, "beta_dpo/beta_used_raw": -0.022454766556620598, "beta_dpo/gap_mean": 33.68966293334961, "beta_dpo/gap_std": 55.241519927978516, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6617801047120419, "grad_norm": 124.0105209350586, "learning_rate": 1.562351990976095e-07, "logits/chosen": -0.7664201259613037, "logits/rejected": -0.805154025554657, "loss": 4.9355, "step": 316 }, { "beta_dpo/beta_used": 0.027155417948961258, "beta_dpo/beta_used_raw": 0.006031029857695103, "beta_dpo/gap_mean": 32.50454330444336, "beta_dpo/gap_std": 53.5350341796875, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.6638743455497382, "grad_norm": 162.24105834960938, "learning_rate": 1.5454060774493065e-07, "logits/chosen": -0.8139037489891052, "logits/rejected": -0.77301025390625, "loss": 4.884, "step": 317 }, { "beta_dpo/beta_used": 0.030678538605570793, "beta_dpo/beta_used_raw": 0.004475907888263464, "beta_dpo/gap_mean": 31.712360382080078, "beta_dpo/gap_std": 49.18507766723633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6659685863874345, "grad_norm": 121.8013916015625, "learning_rate": 1.5285113558975427e-07, "logits/chosen": -0.7728986740112305, "logits/rejected": -0.7226128578186035, "loss": 4.1183, "step": 318 }, { "beta_dpo/beta_used": 0.029083475470542908, "beta_dpo/beta_used_raw": 0.004641437903046608, "beta_dpo/gap_mean": 34.69441223144531, "beta_dpo/gap_std": 49.81436538696289, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6680628272251309, "grad_norm": 92.8158187866211, "learning_rate": 1.5116687323334464e-07, "logits/chosen": -0.8575960993766785, "logits/rejected": -0.8856627345085144, "loss": 4.0625, "step": 319 }, { "beta_dpo/beta_used": 0.02023179829120636, "beta_dpo/beta_used_raw": -0.003970830701291561, "beta_dpo/gap_mean": 33.205867767333984, "beta_dpo/gap_std": 51.83220291137695, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6701570680628273, "grad_norm": 137.70501708984375, "learning_rate": 1.4948791099758052e-07, "logits/chosen": -0.8294675350189209, "logits/rejected": -0.8444851040840149, "loss": 4.3916, "step": 320 }, { "beta_dpo/beta_used": 0.01353040337562561, "beta_dpo/beta_used_raw": -0.03319290652871132, "beta_dpo/gap_mean": 28.161727905273438, "beta_dpo/gap_std": 52.91798400878906, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6722513089005235, "grad_norm": 137.2801513671875, "learning_rate": 1.478143389201113e-07, "logits/chosen": -0.8113803267478943, "logits/rejected": -0.7403082847595215, "loss": 4.6654, "step": 321 }, { "beta_dpo/beta_used": 0.031200017780065536, "beta_dpo/beta_used_raw": -0.0010120943188667297, "beta_dpo/gap_mean": 26.58349609375, "beta_dpo/gap_std": 53.48532485961914, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6743455497382199, "grad_norm": 241.01341247558594, "learning_rate": 1.461462467495284e-07, "logits/chosen": -0.7803442478179932, "logits/rejected": -0.7769550085067749, "loss": 5.2722, "step": 322 }, { "beta_dpo/beta_used": 0.010985669679939747, "beta_dpo/beta_used_raw": -0.015756428241729736, "beta_dpo/gap_mean": 26.728092193603516, "beta_dpo/gap_std": 53.64677047729492, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6764397905759162, "grad_norm": 100.47509002685547, "learning_rate": 1.4448372394055246e-07, "logits/chosen": -0.9064484238624573, "logits/rejected": -0.8854697346687317, "loss": 4.6664, "step": 323 }, { "beta_dpo/beta_used": 0.05118248984217644, "beta_dpo/beta_used_raw": 0.042741917073726654, "beta_dpo/gap_mean": 29.620723724365234, "beta_dpo/gap_std": 51.27871322631836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6785340314136126, "grad_norm": 227.78439331054688, "learning_rate": 1.428268596492364e-07, "logits/chosen": -0.8729650974273682, "logits/rejected": -0.8735213875770569, "loss": 3.7416, "step": 324 }, { "beta_dpo/beta_used": 0.023902013897895813, "beta_dpo/beta_used_raw": -0.004822437651455402, "beta_dpo/gap_mean": 32.345211029052734, "beta_dpo/gap_std": 51.50432586669922, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.680628272251309, "grad_norm": 370.4063415527344, "learning_rate": 1.4117574272818386e-07, "logits/chosen": -0.8013263940811157, "logits/rejected": -0.7928324341773987, "loss": 5.074, "step": 325 }, { "beta_dpo/beta_used": 0.008493431843817234, "beta_dpo/beta_used_raw": -0.04024779424071312, "beta_dpo/gap_mean": 30.87372589111328, "beta_dpo/gap_std": 53.50398254394531, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.6827225130890052, "grad_norm": 121.52214050292969, "learning_rate": 1.3953046172178413e-07, "logits/chosen": -0.8273008465766907, "logits/rejected": -0.8141711950302124, "loss": 5.1887, "step": 326 }, { "beta_dpo/beta_used": 0.03533978387713432, "beta_dpo/beta_used_raw": 0.012006538920104504, "beta_dpo/gap_mean": 31.25798988342285, "beta_dpo/gap_std": 53.022621154785156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6848167539267016, "grad_norm": 248.8169403076172, "learning_rate": 1.3789110486146468e-07, "logits/chosen": -0.8114765882492065, "logits/rejected": -0.7771793603897095, "loss": 3.8881, "step": 327 }, { "beta_dpo/beta_used": 0.017740879207849503, "beta_dpo/beta_used_raw": -0.01438824087381363, "beta_dpo/gap_mean": 33.111385345458984, "beta_dpo/gap_std": 50.42515563964844, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6869109947643979, "grad_norm": 92.58521270751953, "learning_rate": 1.362577600609588e-07, "logits/chosen": -0.8299423456192017, "logits/rejected": -0.8702976703643799, "loss": 4.0943, "step": 328 }, { "beta_dpo/beta_used": 0.01667260378599167, "beta_dpo/beta_used_raw": -0.009871412068605423, "beta_dpo/gap_mean": 30.09588623046875, "beta_dpo/gap_std": 52.19231033325195, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.6890052356020943, "grad_norm": 138.4306182861328, "learning_rate": 1.3463051491159093e-07, "logits/chosen": -0.7766485810279846, "logits/rejected": -0.8675934076309204, "loss": 4.911, "step": 329 }, { "beta_dpo/beta_used": 0.046647775918245316, "beta_dpo/beta_used_raw": 0.04041110351681709, "beta_dpo/gap_mean": 29.8335018157959, "beta_dpo/gap_std": 55.980369567871094, "beta_dpo/mask_keep_frac": 0.5625, "epoch": 0.6910994764397905, "grad_norm": 1010.2858276367188, "learning_rate": 1.3300945667758012e-07, "logits/chosen": -0.8615760207176208, "logits/rejected": -0.8630913496017456, "loss": 4.2895, "step": 330 }, { "beta_dpo/beta_used": 0.02816726081073284, "beta_dpo/beta_used_raw": -0.0015003189910203218, "beta_dpo/gap_mean": 31.772533416748047, "beta_dpo/gap_std": 55.0521354675293, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6931937172774869, "grad_norm": 259.1372375488281, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -0.8795358538627625, "logits/rejected": -0.8674964904785156, "loss": 4.7036, "step": 331 }, { "beta_dpo/beta_used": 0.039203815162181854, "beta_dpo/beta_used_raw": 0.01063997857272625, "beta_dpo/gap_mean": 33.736488342285156, "beta_dpo/gap_std": 56.953426361083984, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.6952879581151833, "grad_norm": 263.4537658691406, "learning_rate": 1.2978624834891626e-07, "logits/chosen": -0.9462342262268066, "logits/rejected": -0.9176090955734253, "loss": 4.1015, "step": 332 }, { "beta_dpo/beta_used": 0.002037803176790476, "beta_dpo/beta_used_raw": -0.032407838851213455, "beta_dpo/gap_mean": 30.212459564208984, "beta_dpo/gap_std": 55.63782501220703, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.6973821989528796, "grad_norm": 22.458953857421875, "learning_rate": 1.281842711051438e-07, "logits/chosen": -0.8374227285385132, "logits/rejected": -0.780229389667511, "loss": 5.3569, "step": 333 }, { "beta_dpo/beta_used": 0.04165830835700035, "beta_dpo/beta_used_raw": 0.03052227571606636, "beta_dpo/gap_mean": 29.47317123413086, "beta_dpo/gap_std": 53.91261672973633, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6994764397905759, "grad_norm": 258.20318603515625, "learning_rate": 1.2658882646922033e-07, "logits/chosen": -0.8327507376670837, "logits/rejected": -0.790196418762207, "loss": 4.3341, "step": 334 }, { "beta_dpo/beta_used": 0.023221183568239212, "beta_dpo/beta_used_raw": -0.035209063440561295, "beta_dpo/gap_mean": 32.27169418334961, "beta_dpo/gap_std": 54.47612762451172, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7015706806282722, "grad_norm": 174.3998565673828, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -0.818577229976654, "logits/rejected": -0.8766403198242188, "loss": 4.3985, "step": 335 }, { "beta_dpo/beta_used": 0.011233292520046234, "beta_dpo/beta_used_raw": -0.022189803421497345, "beta_dpo/gap_mean": 29.108884811401367, "beta_dpo/gap_std": 56.85524368286133, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.7036649214659686, "grad_norm": 100.67388153076172, "learning_rate": 1.2341787690142435e-07, "logits/chosen": -0.7078570127487183, "logits/rejected": -0.739229142665863, "loss": 4.9627, "step": 336 }, { "beta_dpo/beta_used": 0.039661239832639694, "beta_dpo/beta_used_raw": 0.014130711555480957, "beta_dpo/gap_mean": 30.064481735229492, "beta_dpo/gap_std": 55.913970947265625, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7057591623036649, "grad_norm": 278.1079406738281, "learning_rate": 1.2184254201795363e-07, "logits/chosen": -0.8292222023010254, "logits/rejected": -0.7518793940544128, "loss": 4.3712, "step": 337 }, { "beta_dpo/beta_used": 0.029370369389653206, "beta_dpo/beta_used_raw": 0.012822807766497135, "beta_dpo/gap_mean": 33.81048583984375, "beta_dpo/gap_std": 54.04378890991211, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7078534031413612, "grad_norm": 194.63438415527344, "learning_rate": 1.202740798300168e-07, "logits/chosen": -0.8475313782691956, "logits/rejected": -0.8578289151191711, "loss": 4.1704, "step": 338 }, { "beta_dpo/beta_used": 0.06615243852138519, "beta_dpo/beta_used_raw": 0.06248940899968147, "beta_dpo/gap_mean": 34.177696228027344, "beta_dpo/gap_std": 56.06435012817383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7099476439790576, "grad_norm": 368.3795471191406, "learning_rate": 1.1871257444948096e-07, "logits/chosen": -0.9380159974098206, "logits/rejected": -0.9480760097503662, "loss": 4.0919, "step": 339 }, { "beta_dpo/beta_used": 0.013038999401032925, "beta_dpo/beta_used_raw": -0.0053863683715462685, "beta_dpo/gap_mean": 33.19333267211914, "beta_dpo/gap_std": 59.489295959472656, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7120418848167539, "grad_norm": 138.34683227539062, "learning_rate": 1.1715810961514072e-07, "logits/chosen": -0.7801686525344849, "logits/rejected": -0.7577068209648132, "loss": 4.9209, "step": 340 }, { "beta_dpo/beta_used": 0.035000525414943695, "beta_dpo/beta_used_raw": 0.011500047519803047, "beta_dpo/gap_mean": 28.83623504638672, "beta_dpo/gap_std": 58.50289535522461, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.7141361256544503, "grad_norm": 218.66903686523438, "learning_rate": 1.1561076868822755e-07, "logits/chosen": -0.9182481169700623, "logits/rejected": -0.8721767067909241, "loss": 4.6165, "step": 341 }, { "beta_dpo/beta_used": 0.024583449587225914, "beta_dpo/beta_used_raw": 0.018431413918733597, "beta_dpo/gap_mean": 28.589534759521484, "beta_dpo/gap_std": 57.362159729003906, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7162303664921466, "grad_norm": 233.12059020996094, "learning_rate": 1.1407063464793965e-07, "logits/chosen": -0.7965834736824036, "logits/rejected": -0.8243657946586609, "loss": 4.7252, "step": 342 }, { "beta_dpo/beta_used": 0.025925535708665848, "beta_dpo/beta_used_raw": -0.011598478071391582, "beta_dpo/gap_mean": 28.507904052734375, "beta_dpo/gap_std": 55.28282928466797, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7183246073298429, "grad_norm": 255.1661376953125, "learning_rate": 1.125377900869913e-07, "logits/chosen": -0.900759756565094, "logits/rejected": -0.8987997174263, "loss": 5.1938, "step": 343 }, { "beta_dpo/beta_used": 0.05067792162299156, "beta_dpo/beta_used_raw": 0.023350853472948074, "beta_dpo/gap_mean": 28.617340087890625, "beta_dpo/gap_std": 56.286258697509766, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.7204188481675393, "grad_norm": 359.28851318359375, "learning_rate": 1.110123172071844e-07, "logits/chosen": -0.7748513221740723, "logits/rejected": -0.7623203992843628, "loss": 5.1968, "step": 344 }, { "beta_dpo/beta_used": 0.033438149839639664, "beta_dpo/beta_used_raw": -0.00029300153255462646, "beta_dpo/gap_mean": 30.098384857177734, "beta_dpo/gap_std": 53.45401382446289, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.7225130890052356, "grad_norm": 310.5905456542969, "learning_rate": 1.09494297815e-07, "logits/chosen": -0.8768536448478699, "logits/rejected": -0.8476714491844177, "loss": 4.7303, "step": 345 }, { "beta_dpo/beta_used": 0.05225639045238495, "beta_dpo/beta_used_raw": 0.04538067430257797, "beta_dpo/gap_mean": 30.668237686157227, "beta_dpo/gap_std": 52.24396896362305, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.724607329842932, "grad_norm": 311.78192138671875, "learning_rate": 1.0798381331721107e-07, "logits/chosen": -0.9083431959152222, "logits/rejected": -0.8552351593971252, "loss": 3.8896, "step": 346 }, { "beta_dpo/beta_used": 0.024577973410487175, "beta_dpo/beta_used_raw": 0.0004575531929731369, "beta_dpo/gap_mean": 31.580842971801758, "beta_dpo/gap_std": 51.64503479003906, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7267015706806282, "grad_norm": 154.31671142578125, "learning_rate": 1.0648094471651722e-07, "logits/chosen": -0.7399212121963501, "logits/rejected": -0.8290560841560364, "loss": 4.3639, "step": 347 }, { "beta_dpo/beta_used": 0.014230488799512386, "beta_dpo/beta_used_raw": -0.048038601875305176, "beta_dpo/gap_mean": 27.234729766845703, "beta_dpo/gap_std": 49.23517990112305, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7287958115183246, "grad_norm": 184.28305053710938, "learning_rate": 1.0498577260720048e-07, "logits/chosen": -0.9388685822486877, "logits/rejected": -0.9415339231491089, "loss": 5.0665, "step": 348 }, { "beta_dpo/beta_used": 0.050268374383449554, "beta_dpo/beta_used_raw": 0.031838420778512955, "beta_dpo/gap_mean": 30.112083435058594, "beta_dpo/gap_std": 55.729190826416016, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7308900523560209, "grad_norm": 378.33599853515625, "learning_rate": 1.0349837717080347e-07, "logits/chosen": -0.9334988594055176, "logits/rejected": -0.8848183751106262, "loss": 3.9407, "step": 349 }, { "beta_dpo/beta_used": 0.0406358428299427, "beta_dpo/beta_used_raw": 0.006889470852911472, "beta_dpo/gap_mean": 31.848020553588867, "beta_dpo/gap_std": 54.54989242553711, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7329842931937173, "grad_norm": 509.0325012207031, "learning_rate": 1.0201883817182949e-07, "logits/chosen": -0.8780160546302795, "logits/rejected": -0.8359534740447998, "loss": 4.1489, "step": 350 }, { "beta_dpo/beta_used": 0.012384520843625069, "beta_dpo/beta_used_raw": -0.029308203607797623, "beta_dpo/gap_mean": 28.5808162689209, "beta_dpo/gap_std": 55.44742965698242, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7350785340314137, "grad_norm": 124.26021575927734, "learning_rate": 1.0054723495346482e-07, "logits/chosen": -0.9024979472160339, "logits/rejected": -0.9018498063087463, "loss": 4.9646, "step": 351 }, { "beta_dpo/beta_used": 0.051346320658922195, "beta_dpo/beta_used_raw": 0.04155290499329567, "beta_dpo/gap_mean": 31.388181686401367, "beta_dpo/gap_std": 56.486900329589844, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7371727748691099, "grad_norm": 481.62066650390625, "learning_rate": 9.908364643332398e-08, "logits/chosen": -0.8058483600616455, "logits/rejected": -0.7557932734489441, "loss": 4.6374, "step": 352 }, { "beta_dpo/beta_used": 0.03087581694126129, "beta_dpo/beta_used_raw": 0.0010065771639347076, "beta_dpo/gap_mean": 33.28788375854492, "beta_dpo/gap_std": 54.57392883300781, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7392670157068063, "grad_norm": 174.88623046875, "learning_rate": 9.76281510992176e-08, "logits/chosen": -0.7731785774230957, "logits/rejected": -0.8036521673202515, "loss": 4.1953, "step": 353 }, { "beta_dpo/beta_used": 0.013481578789651394, "beta_dpo/beta_used_raw": -0.023063668981194496, "beta_dpo/gap_mean": 29.690311431884766, "beta_dpo/gap_std": 55.14631271362305, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7413612565445026, "grad_norm": 227.94309997558594, "learning_rate": 9.618082700494318e-08, "logits/chosen": -0.741845428943634, "logits/rejected": -0.778709352016449, "loss": 5.882, "step": 354 }, { "beta_dpo/beta_used": 0.06290622055530548, "beta_dpo/beta_used_raw": 0.06290622055530548, "beta_dpo/gap_mean": 31.194143295288086, "beta_dpo/gap_std": 57.11370849609375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.743455497382199, "grad_norm": 247.5913543701172, "learning_rate": 9.474175176609956e-08, "logits/chosen": -0.9444049596786499, "logits/rejected": -0.9045993089675903, "loss": 3.1331, "step": 355 }, { "beta_dpo/beta_used": 0.03636765852570534, "beta_dpo/beta_used_raw": 0.013633275404572487, "beta_dpo/gap_mean": 28.3127498626709, "beta_dpo/gap_std": 50.623878479003906, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.7455497382198953, "grad_norm": 214.78062438964844, "learning_rate": 9.331100255592436e-08, "logits/chosen": -0.8152442574501038, "logits/rejected": -0.8466963171958923, "loss": 4.2956, "step": 356 }, { "beta_dpo/beta_used": 0.027711525559425354, "beta_dpo/beta_used_raw": 0.011670958250761032, "beta_dpo/gap_mean": 28.688819885253906, "beta_dpo/gap_std": 51.74197006225586, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7476439790575916, "grad_norm": 158.7490234375, "learning_rate": 9.18886561011557e-08, "logits/chosen": -0.7832672595977783, "logits/rejected": -0.74955153465271, "loss": 3.9111, "step": 357 }, { "beta_dpo/beta_used": 0.024180788546800613, "beta_dpo/beta_used_raw": 0.008531359024345875, "beta_dpo/gap_mean": 33.06235122680664, "beta_dpo/gap_std": 52.99840545654297, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.749738219895288, "grad_norm": 165.2462615966797, "learning_rate": 9.047478867791731e-08, "logits/chosen": -0.8677491545677185, "logits/rejected": -0.838107168674469, "loss": 4.3925, "step": 358 }, { "beta_dpo/beta_used": 0.02725430205464363, "beta_dpo/beta_used_raw": 0.005270563997328281, "beta_dpo/gap_mean": 33.42242431640625, "beta_dpo/gap_std": 51.58427810668945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7518324607329843, "grad_norm": 216.0394287109375, "learning_rate": 8.906947610762825e-08, "logits/chosen": -0.8172123432159424, "logits/rejected": -0.849665105342865, "loss": 4.5131, "step": 359 }, { "beta_dpo/beta_used": 0.013111414387822151, "beta_dpo/beta_used_raw": 0.0025145215913653374, "beta_dpo/gap_mean": 31.21525764465332, "beta_dpo/gap_std": 54.58356857299805, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.7539267015706806, "grad_norm": 114.65906524658203, "learning_rate": 8.76727937529367e-08, "logits/chosen": -0.9042258262634277, "logits/rejected": -0.9122740626335144, "loss": 4.4779, "step": 360 }, { "beta_dpo/beta_used": 0.03473525866866112, "beta_dpo/beta_used_raw": 0.028849830850958824, "beta_dpo/gap_mean": 31.66191291809082, "beta_dpo/gap_std": 55.895851135253906, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7560209424083769, "grad_norm": 128.73867797851562, "learning_rate": 8.628481651367875e-08, "logits/chosen": -0.8746165633201599, "logits/rejected": -0.8471811413764954, "loss": 3.525, "step": 361 }, { "beta_dpo/beta_used": 0.03337887302041054, "beta_dpo/beta_used_raw": 0.015036560595035553, "beta_dpo/gap_mean": 33.18673324584961, "beta_dpo/gap_std": 54.25856018066406, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.7581151832460733, "grad_norm": 265.6235046386719, "learning_rate": 8.490561882286135e-08, "logits/chosen": -0.8912657499313354, "logits/rejected": -0.8793244957923889, "loss": 3.8266, "step": 362 }, { "beta_dpo/beta_used": 0.0334957093000412, "beta_dpo/beta_used_raw": 0.019749773666262627, "beta_dpo/gap_mean": 32.70677947998047, "beta_dpo/gap_std": 54.238922119140625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7602094240837697, "grad_norm": 328.0040588378906, "learning_rate": 8.353527464267104e-08, "logits/chosen": -0.8557516932487488, "logits/rejected": -0.8278414011001587, "loss": 4.4351, "step": 363 }, { "beta_dpo/beta_used": 0.019932106137275696, "beta_dpo/beta_used_raw": -0.02457229606807232, "beta_dpo/gap_mean": 31.01894760131836, "beta_dpo/gap_std": 54.44854736328125, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.762303664921466, "grad_norm": 89.25292205810547, "learning_rate": 8.217385746050742e-08, "logits/chosen": -0.8707149624824524, "logits/rejected": -0.8504204750061035, "loss": 4.7876, "step": 364 }, { "beta_dpo/beta_used": 0.052917227149009705, "beta_dpo/beta_used_raw": 0.04524911195039749, "beta_dpo/gap_mean": 28.029312133789062, "beta_dpo/gap_std": 55.016151428222656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7643979057591623, "grad_norm": 375.6981506347656, "learning_rate": 8.082144028504231e-08, "logits/chosen": -0.8357688188552856, "logits/rejected": -0.8424769639968872, "loss": 4.549, "step": 365 }, { "beta_dpo/beta_used": 0.023991985246539116, "beta_dpo/beta_used_raw": -0.00716618075966835, "beta_dpo/gap_mean": 30.980024337768555, "beta_dpo/gap_std": 55.70692443847656, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7664921465968586, "grad_norm": 168.83290100097656, "learning_rate": 7.947809564230445e-08, "logits/chosen": -0.8632270693778992, "logits/rejected": -0.8815495371818542, "loss": 4.2886, "step": 366 }, { "beta_dpo/beta_used": 0.024156922474503517, "beta_dpo/beta_used_raw": -0.014136096462607384, "beta_dpo/gap_mean": 32.812950134277344, "beta_dpo/gap_std": 54.38077163696289, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.768586387434555, "grad_norm": 272.86541748046875, "learning_rate": 7.814389557179016e-08, "logits/chosen": -0.8426069021224976, "logits/rejected": -0.7946543097496033, "loss": 4.6307, "step": 367 }, { "beta_dpo/beta_used": 0.05431270971894264, "beta_dpo/beta_used_raw": 0.0433184877038002, "beta_dpo/gap_mean": 35.47528839111328, "beta_dpo/gap_std": 52.5758171081543, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7706806282722513, "grad_norm": 146.9598388671875, "learning_rate": 7.681891162260015e-08, "logits/chosen": -0.9334856271743774, "logits/rejected": -0.9025843739509583, "loss": 2.8818, "step": 368 }, { "beta_dpo/beta_used": 0.024843934923410416, "beta_dpo/beta_used_raw": -0.02314029261469841, "beta_dpo/gap_mean": 37.284950256347656, "beta_dpo/gap_std": 48.017791748046875, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.7727748691099476, "grad_norm": 99.7154541015625, "learning_rate": 7.550321484960251e-08, "logits/chosen": -0.850791335105896, "logits/rejected": -0.816204845905304, "loss": 4.526, "step": 369 }, { "beta_dpo/beta_used": 0.005622061900794506, "beta_dpo/beta_used_raw": -0.02220618724822998, "beta_dpo/gap_mean": 36.12443161010742, "beta_dpo/gap_std": 49.77077102661133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.774869109947644, "grad_norm": 41.49360275268555, "learning_rate": 7.419687580962222e-08, "logits/chosen": -0.8648772239685059, "logits/rejected": -0.9024683237075806, "loss": 4.9406, "step": 370 }, { "beta_dpo/beta_used": 0.006420304998755455, "beta_dpo/beta_used_raw": -0.028947679325938225, "beta_dpo/gap_mean": 30.36486053466797, "beta_dpo/gap_std": 51.136146545410156, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.7769633507853403, "grad_norm": 59.23979568481445, "learning_rate": 7.289996455765748e-08, "logits/chosen": -0.741977870464325, "logits/rejected": -0.7357773184776306, "loss": 4.9714, "step": 371 }, { "beta_dpo/beta_used": 0.06715603172779083, "beta_dpo/beta_used_raw": 0.047685518860816956, "beta_dpo/gap_mean": 32.393035888671875, "beta_dpo/gap_std": 50.679080963134766, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7790575916230367, "grad_norm": 455.41925048828125, "learning_rate": 7.161255064312283e-08, "logits/chosen": -0.8044797778129578, "logits/rejected": -0.7840807437896729, "loss": 4.6727, "step": 372 }, { "beta_dpo/beta_used": 0.018992407247424126, "beta_dpo/beta_used_raw": -0.0017184526659548283, "beta_dpo/gap_mean": 33.258968353271484, "beta_dpo/gap_std": 49.465057373046875, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7811518324607329, "grad_norm": 222.24200439453125, "learning_rate": 7.033470310611945e-08, "logits/chosen": -0.8912656903266907, "logits/rejected": -0.8498582243919373, "loss": 5.1636, "step": 373 }, { "beta_dpo/beta_used": 0.005610483232885599, "beta_dpo/beta_used_raw": -0.04910598695278168, "beta_dpo/gap_mean": 31.699514389038086, "beta_dpo/gap_std": 52.40116500854492, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7832460732984293, "grad_norm": 51.2022590637207, "learning_rate": 6.906649047373245e-08, "logits/chosen": -0.820667028427124, "logits/rejected": -0.8256031274795532, "loss": 5.1379, "step": 374 }, { "beta_dpo/beta_used": 0.024863161146640778, "beta_dpo/beta_used_raw": -0.014078973792493343, "beta_dpo/gap_mean": 28.686037063598633, "beta_dpo/gap_std": 51.921531677246094, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7853403141361257, "grad_norm": 157.56956481933594, "learning_rate": 6.780798075635675e-08, "logits/chosen": -0.861274242401123, "logits/rejected": -0.8295719623565674, "loss": 4.3679, "step": 375 }, { "beta_dpo/beta_used": 0.021679656580090523, "beta_dpo/beta_used_raw": -0.001047454308718443, "beta_dpo/gap_mean": 28.755699157714844, "beta_dpo/gap_std": 52.53461837768555, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.787434554973822, "grad_norm": 120.26818084716797, "learning_rate": 6.655924144404906e-08, "logits/chosen": -0.7974970936775208, "logits/rejected": -0.754688024520874, "loss": 4.3403, "step": 376 }, { "beta_dpo/beta_used": 0.021915648132562637, "beta_dpo/beta_used_raw": -0.01294963899999857, "beta_dpo/gap_mean": 26.834131240844727, "beta_dpo/gap_std": 52.551292419433594, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7895287958115184, "grad_norm": 222.95492553710938, "learning_rate": 6.532033950290885e-08, "logits/chosen": -0.9012744426727295, "logits/rejected": -0.8887965679168701, "loss": 4.7781, "step": 377 }, { "beta_dpo/beta_used": 0.011818885803222656, "beta_dpo/beta_used_raw": -0.029823636636137962, "beta_dpo/gap_mean": 26.240825653076172, "beta_dpo/gap_std": 51.9726448059082, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.7916230366492146, "grad_norm": 142.94700622558594, "learning_rate": 6.409134137148736e-08, "logits/chosen": -0.8205504417419434, "logits/rejected": -0.826806366443634, "loss": 5.265, "step": 378 }, { "beta_dpo/beta_used": 0.038683511316776276, "beta_dpo/beta_used_raw": 0.015086468309164047, "beta_dpo/gap_mean": 28.403867721557617, "beta_dpo/gap_std": 53.254478454589844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.793717277486911, "grad_norm": 217.25274658203125, "learning_rate": 6.28723129572247e-08, "logits/chosen": -0.8288396596908569, "logits/rejected": -0.8588307499885559, "loss": 4.2979, "step": 379 }, { "beta_dpo/beta_used": 0.017481593415141106, "beta_dpo/beta_used_raw": -0.003103232476860285, "beta_dpo/gap_mean": 29.25552749633789, "beta_dpo/gap_std": 53.82293701171875, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7958115183246073, "grad_norm": 157.94027709960938, "learning_rate": 6.166331963291519e-08, "logits/chosen": -0.8616006970405579, "logits/rejected": -0.8570124506950378, "loss": 5.0083, "step": 380 }, { "beta_dpo/beta_used": 0.021431434899568558, "beta_dpo/beta_used_raw": -0.011747539043426514, "beta_dpo/gap_mean": 29.78434181213379, "beta_dpo/gap_std": 51.756473541259766, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.7979057591623037, "grad_norm": 144.8572235107422, "learning_rate": 6.046442623320145e-08, "logits/chosen": -0.8431472182273865, "logits/rejected": -0.7634297013282776, "loss": 4.6818, "step": 381 }, { "beta_dpo/beta_used": 0.03567413240671158, "beta_dpo/beta_used_raw": 0.008900219574570656, "beta_dpo/gap_mean": 31.605493545532227, "beta_dpo/gap_std": 50.421817779541016, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8, "grad_norm": 190.073974609375, "learning_rate": 5.9275697051098275e-08, "logits/chosen": -0.8556850552558899, "logits/rejected": -0.8041601777076721, "loss": 4.0047, "step": 382 }, { "beta_dpo/beta_used": 0.020303381606936455, "beta_dpo/beta_used_raw": 0.004482526797801256, "beta_dpo/gap_mean": 33.393123626708984, "beta_dpo/gap_std": 50.67055130004883, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.8020942408376963, "grad_norm": 126.13748931884766, "learning_rate": 5.809719583454414e-08, "logits/chosen": -0.788833737373352, "logits/rejected": -0.7815289497375488, "loss": 4.2619, "step": 383 }, { "beta_dpo/beta_used": 0.01302328985184431, "beta_dpo/beta_used_raw": -0.01569559797644615, "beta_dpo/gap_mean": 30.79790687561035, "beta_dpo/gap_std": 50.971168518066406, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8041884816753927, "grad_norm": 199.42294311523438, "learning_rate": 5.6928985782982524e-08, "logits/chosen": -0.8755144476890564, "logits/rejected": -0.8719990253448486, "loss": 5.012, "step": 384 }, { "beta_dpo/beta_used": 0.017824744805693626, "beta_dpo/beta_used_raw": -0.004108890891075134, "beta_dpo/gap_mean": 30.42023277282715, "beta_dpo/gap_std": 50.25197219848633, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.806282722513089, "grad_norm": 223.4486083984375, "learning_rate": 5.57711295439732e-08, "logits/chosen": -0.8377327919006348, "logits/rejected": -0.8308869004249573, "loss": 4.8747, "step": 385 }, { "beta_dpo/beta_used": 0.046246424317359924, "beta_dpo/beta_used_raw": 0.02471497654914856, "beta_dpo/gap_mean": 34.329776763916016, "beta_dpo/gap_std": 49.33695983886719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8083769633507853, "grad_norm": 221.65078735351562, "learning_rate": 5.4623689209832484e-08, "logits/chosen": -0.7701154947280884, "logits/rejected": -0.8202899694442749, "loss": 3.8539, "step": 386 }, { "beta_dpo/beta_used": 0.04278576001524925, "beta_dpo/beta_used_raw": 0.015627289190888405, "beta_dpo/gap_mean": 31.348127365112305, "beta_dpo/gap_std": 50.26094055175781, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8104712041884817, "grad_norm": 83.0886459350586, "learning_rate": 5.3486726314303175e-08, "logits/chosen": -0.8732501864433289, "logits/rejected": -0.8548240661621094, "loss": 3.9074, "step": 387 }, { "beta_dpo/beta_used": 0.009247594512999058, "beta_dpo/beta_used_raw": -0.018486540764570236, "beta_dpo/gap_mean": 29.602096557617188, "beta_dpo/gap_std": 50.2357177734375, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.812565445026178, "grad_norm": 58.26310348510742, "learning_rate": 5.2360301829254745e-08, "logits/chosen": -0.9190385937690735, "logits/rejected": -0.884000301361084, "loss": 4.9807, "step": 388 }, { "beta_dpo/beta_used": 0.03028152696788311, "beta_dpo/beta_used_raw": -0.0006860191933810711, "beta_dpo/gap_mean": 27.959213256835938, "beta_dpo/gap_std": 51.936866760253906, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8146596858638744, "grad_norm": 152.3852081298828, "learning_rate": 5.1244476161413806e-08, "logits/chosen": -0.8672448396682739, "logits/rejected": -0.8208280205726624, "loss": 4.512, "step": 389 }, { "beta_dpo/beta_used": 0.02013925462961197, "beta_dpo/beta_used_raw": 0.012077848426997662, "beta_dpo/gap_mean": 29.23447608947754, "beta_dpo/gap_std": 51.4747314453125, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.8167539267015707, "grad_norm": 197.6220245361328, "learning_rate": 5.013930914912476e-08, "logits/chosen": -0.837507963180542, "logits/rejected": -0.8486427664756775, "loss": 4.7944, "step": 390 }, { "beta_dpo/beta_used": 0.012308573350310326, "beta_dpo/beta_used_raw": -0.06008676812052727, "beta_dpo/gap_mean": 30.96744155883789, "beta_dpo/gap_std": 51.099151611328125, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.818848167539267, "grad_norm": 130.78782653808594, "learning_rate": 4.904486005914027e-08, "logits/chosen": -0.8092834949493408, "logits/rejected": -0.7616171836853027, "loss": 5.218, "step": 391 }, { "beta_dpo/beta_used": 0.03401728719472885, "beta_dpo/beta_used_raw": 0.01575944572687149, "beta_dpo/gap_mean": 35.89379119873047, "beta_dpo/gap_std": 50.69645690917969, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.8209424083769633, "grad_norm": 141.2838134765625, "learning_rate": 4.796118758344353e-08, "logits/chosen": -0.8125319480895996, "logits/rejected": -0.7968068718910217, "loss": 3.9905, "step": 392 }, { "beta_dpo/beta_used": 0.029492482542991638, "beta_dpo/beta_used_raw": 0.006723019294440746, "beta_dpo/gap_mean": 31.739521026611328, "beta_dpo/gap_std": 51.30779266357422, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8230366492146597, "grad_norm": 219.48927307128906, "learning_rate": 4.688834983610082e-08, "logits/chosen": -0.7747592926025391, "logits/rejected": -0.7800062894821167, "loss": 4.3227, "step": 393 }, { "beta_dpo/beta_used": 0.006166150793433189, "beta_dpo/beta_used_raw": -0.024336861446499825, "beta_dpo/gap_mean": 31.60442543029785, "beta_dpo/gap_std": 52.29357147216797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8251308900523561, "grad_norm": 46.31927490234375, "learning_rate": 4.582640435014459e-08, "logits/chosen": -0.8091763257980347, "logits/rejected": -0.8224099278450012, "loss": 5.0968, "step": 394 }, { "beta_dpo/beta_used": 0.036968886852264404, "beta_dpo/beta_used_raw": 0.023283787071704865, "beta_dpo/gap_mean": 30.08101463317871, "beta_dpo/gap_std": 49.931846618652344, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8272251308900523, "grad_norm": 206.76434326171875, "learning_rate": 4.477540807448832e-08, "logits/chosen": -0.8666899800300598, "logits/rejected": -0.9089019894599915, "loss": 3.6018, "step": 395 }, { "beta_dpo/beta_used": 0.02417484112083912, "beta_dpo/beta_used_raw": -0.01262733619660139, "beta_dpo/gap_mean": 32.86610412597656, "beta_dpo/gap_std": 49.70528793334961, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.8293193717277487, "grad_norm": 233.59030151367188, "learning_rate": 4.373541737087263e-08, "logits/chosen": -0.822211503982544, "logits/rejected": -0.8186702728271484, "loss": 4.8537, "step": 396 }, { "beta_dpo/beta_used": 0.02938215062022209, "beta_dpo/beta_used_raw": -0.01723414473235607, "beta_dpo/gap_mean": 31.259389877319336, "beta_dpo/gap_std": 48.74763870239258, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.831413612565445, "grad_norm": 204.6764373779297, "learning_rate": 4.270648801084295e-08, "logits/chosen": -0.9242237210273743, "logits/rejected": -0.914775013923645, "loss": 4.705, "step": 397 }, { "beta_dpo/beta_used": 0.028000906109809875, "beta_dpo/beta_used_raw": 0.01324938703328371, "beta_dpo/gap_mean": 28.033884048461914, "beta_dpo/gap_std": 53.956783294677734, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8335078534031414, "grad_norm": 272.271240234375, "learning_rate": 4.168867517275806e-08, "logits/chosen": -0.7791767120361328, "logits/rejected": -0.832636296749115, "loss": 4.4102, "step": 398 }, { "beta_dpo/beta_used": 0.040391743183135986, "beta_dpo/beta_used_raw": 0.029338005930185318, "beta_dpo/gap_mean": 26.80057716369629, "beta_dpo/gap_std": 53.54316711425781, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8356020942408376, "grad_norm": 146.1692352294922, "learning_rate": 4.0682033438831584e-08, "logits/chosen": -0.8662706613540649, "logits/rejected": -0.8145262002944946, "loss": 3.8751, "step": 399 }, { "beta_dpo/beta_used": 0.0640939474105835, "beta_dpo/beta_used_raw": 0.046954307705163956, "beta_dpo/gap_mean": 27.499759674072266, "beta_dpo/gap_std": 49.925628662109375, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.837696335078534, "grad_norm": 327.8544921875, "learning_rate": 3.968661679220467e-08, "logits/chosen": -0.9200219511985779, "logits/rejected": -0.9016293883323669, "loss": 3.5281, "step": 400 }, { "epoch": 0.837696335078534, "eval_beta_dpo/beta_used": 0.043008919805288315, "eval_beta_dpo/beta_used_raw": 0.020729079842567444, "eval_beta_dpo/gap_mean": 28.022653579711914, "eval_beta_dpo/gap_std": 50.3673095703125, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.8453658223152161, "eval_logits/rejected": -0.8282322883605957, "eval_loss": 0.6356604099273682, "eval_runtime": 81.5313, "eval_samples_per_second": 24.53, "eval_steps_per_second": 1.533, "step": 400 }, { "beta_dpo/beta_used": 0.025461485609412193, "beta_dpo/beta_used_raw": -0.004204742610454559, "beta_dpo/gap_mean": 29.487524032592773, "beta_dpo/gap_std": 50.156776428222656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8397905759162304, "grad_norm": 56.73976135253906, "learning_rate": 3.8702478614051345e-08, "logits/chosen": -0.7528951168060303, "logits/rejected": -0.719306230545044, "loss": 4.5596, "step": 401 }, { "beta_dpo/beta_used": 0.02623908221721649, "beta_dpo/beta_used_raw": 0.017582345753908157, "beta_dpo/gap_mean": 31.024076461791992, "beta_dpo/gap_std": 52.295101165771484, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8418848167539267, "grad_norm": 356.29595947265625, "learning_rate": 3.772967168071517e-08, "logits/chosen": -0.8574113845825195, "logits/rejected": -0.8025684356689453, "loss": 4.5168, "step": 402 }, { "beta_dpo/beta_used": 0.042898863554000854, "beta_dpo/beta_used_raw": 0.03765055909752846, "beta_dpo/gap_mean": 34.200382232666016, "beta_dpo/gap_std": 48.579872131347656, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.8439790575916231, "grad_norm": 138.32400512695312, "learning_rate": 3.676824816087978e-08, "logits/chosen": -0.7763471603393555, "logits/rejected": -0.7996782064437866, "loss": 3.3116, "step": 403 }, { "beta_dpo/beta_used": 0.0233171284198761, "beta_dpo/beta_used_raw": 0.0011256425641477108, "beta_dpo/gap_mean": 35.22697448730469, "beta_dpo/gap_std": 51.0013427734375, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8460732984293193, "grad_norm": 113.2969741821289, "learning_rate": 3.581825961277074e-08, "logits/chosen": -0.8510360717773438, "logits/rejected": -0.8215500116348267, "loss": 4.0111, "step": 404 }, { "beta_dpo/beta_used": 0.012892654165625572, "beta_dpo/beta_used_raw": 0.0009398059919476509, "beta_dpo/gap_mean": 34.50669860839844, "beta_dpo/gap_std": 52.5545654296875, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8481675392670157, "grad_norm": 106.28509521484375, "learning_rate": 3.487975698139084e-08, "logits/chosen": -0.6867244839668274, "logits/rejected": -0.677395761013031, "loss": 4.3154, "step": 405 }, { "beta_dpo/beta_used": 0.023643236607313156, "beta_dpo/beta_used_raw": -0.009170491248369217, "beta_dpo/gap_mean": 29.635848999023438, "beta_dpo/gap_std": 49.92266082763672, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8502617801047121, "grad_norm": 134.22201538085938, "learning_rate": 3.3952790595787986e-08, "logits/chosen": -0.8843967318534851, "logits/rejected": -0.8679218888282776, "loss": 4.8186, "step": 406 }, { "beta_dpo/beta_used": 0.028158362954854965, "beta_dpo/beta_used_raw": 0.01929015852510929, "beta_dpo/gap_mean": 29.419769287109375, "beta_dpo/gap_std": 50.9369010925293, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.8523560209424084, "grad_norm": 164.86927795410156, "learning_rate": 3.303741016635614e-08, "logits/chosen": -0.8338272571563721, "logits/rejected": -0.8456038236618042, "loss": 3.7483, "step": 407 }, { "beta_dpo/beta_used": 0.04387975111603737, "beta_dpo/beta_used_raw": 0.024929020553827286, "beta_dpo/gap_mean": 29.930322647094727, "beta_dpo/gap_std": 50.4144287109375, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8544502617801047, "grad_norm": 474.077880859375, "learning_rate": 3.2133664782169944e-08, "logits/chosen": -0.8612761497497559, "logits/rejected": -0.7689127326011658, "loss": 4.2207, "step": 408 }, { "beta_dpo/beta_used": 0.020363079383969307, "beta_dpo/beta_used_raw": 0.00438337679952383, "beta_dpo/gap_mean": 32.19656753540039, "beta_dpo/gap_std": 51.08381652832031, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.856544502617801, "grad_norm": 94.34365844726562, "learning_rate": 3.12416029083514e-08, "logits/chosen": -0.7993679642677307, "logits/rejected": -0.8109673261642456, "loss": 4.2298, "step": 409 }, { "beta_dpo/beta_used": 0.012268463149666786, "beta_dpo/beta_used_raw": -0.01718856208026409, "beta_dpo/gap_mean": 28.865314483642578, "beta_dpo/gap_std": 51.235557556152344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8586387434554974, "grad_norm": 82.62427520751953, "learning_rate": 3.036127238347164e-08, "logits/chosen": -0.8782304525375366, "logits/rejected": -0.8800264596939087, "loss": 4.8676, "step": 410 }, { "beta_dpo/beta_used": 0.03287056088447571, "beta_dpo/beta_used_raw": 0.008828896097838879, "beta_dpo/gap_mean": 31.09811019897461, "beta_dpo/gap_std": 50.671939849853516, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8607329842931937, "grad_norm": 111.97496032714844, "learning_rate": 2.9492720416985e-08, "logits/chosen": -0.7707123756408691, "logits/rejected": -0.7606396675109863, "loss": 3.8463, "step": 411 }, { "beta_dpo/beta_used": 0.02247859537601471, "beta_dpo/beta_used_raw": -0.003850158303976059, "beta_dpo/gap_mean": 30.756423950195312, "beta_dpo/gap_std": 50.8740119934082, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.86282722513089, "grad_norm": 121.05892181396484, "learning_rate": 2.863599358669755e-08, "logits/chosen": -0.779039740562439, "logits/rejected": -0.7793789505958557, "loss": 4.4319, "step": 412 }, { "beta_dpo/beta_used": 0.03222234919667244, "beta_dpo/beta_used_raw": 0.015333538874983788, "beta_dpo/gap_mean": 28.6728458404541, "beta_dpo/gap_std": 49.384368896484375, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8649214659685864, "grad_norm": 162.366455078125, "learning_rate": 2.7791137836269158e-08, "logits/chosen": -0.9385237097740173, "logits/rejected": -0.9121523499488831, "loss": 4.167, "step": 413 }, { "beta_dpo/beta_used": 0.017970332875847816, "beta_dpo/beta_used_raw": -0.019473586231470108, "beta_dpo/gap_mean": 29.9796085357666, "beta_dpo/gap_std": 50.113468170166016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8670157068062827, "grad_norm": 80.46460723876953, "learning_rate": 2.6958198472749717e-08, "logits/chosen": -0.9034287929534912, "logits/rejected": -0.855298638343811, "loss": 4.5007, "step": 414 }, { "beta_dpo/beta_used": 0.07080215215682983, "beta_dpo/beta_used_raw": 0.06397496908903122, "beta_dpo/gap_mean": 31.179443359375, "beta_dpo/gap_std": 48.66398239135742, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8691099476439791, "grad_norm": 204.76092529296875, "learning_rate": 2.613722016414943e-08, "logits/chosen": -0.8139724731445312, "logits/rejected": -0.7881863117218018, "loss": 2.9196, "step": 415 }, { "beta_dpo/beta_used": 0.03896103799343109, "beta_dpo/beta_used_raw": 0.02134716510772705, "beta_dpo/gap_mean": 34.836082458496094, "beta_dpo/gap_std": 50.03068923950195, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8712041884816754, "grad_norm": 203.65744018554688, "learning_rate": 2.5328246937043525e-08, "logits/chosen": -0.9353795647621155, "logits/rejected": -0.8975551128387451, "loss": 3.9669, "step": 416 }, { "beta_dpo/beta_used": 0.03526991605758667, "beta_dpo/beta_used_raw": -0.01772877387702465, "beta_dpo/gap_mean": 32.672035217285156, "beta_dpo/gap_std": 49.94234085083008, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.8732984293193717, "grad_norm": 189.84410095214844, "learning_rate": 2.4531322174210973e-08, "logits/chosen": -0.756232738494873, "logits/rejected": -0.8090646266937256, "loss": 4.3281, "step": 417 }, { "beta_dpo/beta_used": 0.045910660177469254, "beta_dpo/beta_used_raw": 0.014944255352020264, "beta_dpo/gap_mean": 30.950489044189453, "beta_dpo/gap_std": 51.23707580566406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.875392670157068, "grad_norm": 164.94105529785156, "learning_rate": 2.3746488612308295e-08, "logits/chosen": -0.8820661306381226, "logits/rejected": -0.8479762077331543, "loss": 3.3815, "step": 418 }, { "beta_dpo/beta_used": 0.05247935280203819, "beta_dpo/beta_used_raw": 0.038780488073825836, "beta_dpo/gap_mean": 31.49786376953125, "beta_dpo/gap_std": 52.62058639526367, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8774869109947644, "grad_norm": 147.83372497558594, "learning_rate": 2.297378833957761e-08, "logits/chosen": -0.7841131091117859, "logits/rejected": -0.7802114486694336, "loss": 3.6582, "step": 419 }, { "beta_dpo/beta_used": 0.057858943939208984, "beta_dpo/beta_used_raw": 0.04030502960085869, "beta_dpo/gap_mean": 34.97361755371094, "beta_dpo/gap_std": 55.68037033081055, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8795811518324608, "grad_norm": 268.9122619628906, "learning_rate": 2.2213262793589482e-08, "logits/chosen": -0.7773014903068542, "logits/rejected": -0.7394383549690247, "loss": 3.8373, "step": 420 }, { "beta_dpo/beta_used": 0.05588060989975929, "beta_dpo/beta_used_raw": 0.027968432754278183, "beta_dpo/gap_mean": 35.70938491821289, "beta_dpo/gap_std": 53.80148696899414, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.881675392670157, "grad_norm": 263.6426086425781, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -0.9263103008270264, "logits/rejected": -0.9025065898895264, "loss": 3.7191, "step": 421 }, { "beta_dpo/beta_used": 0.013131741434335709, "beta_dpo/beta_used_raw": -0.032616935670375824, "beta_dpo/gap_mean": 32.63302230834961, "beta_dpo/gap_std": 54.2334098815918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8837696335078534, "grad_norm": 111.65141296386719, "learning_rate": 2.07288983654679e-08, "logits/chosen": -0.7539777755737305, "logits/rejected": -0.7705018520355225, "loss": 4.7393, "step": 422 }, { "beta_dpo/beta_used": 0.04880265146493912, "beta_dpo/beta_used_raw": 0.021930556744337082, "beta_dpo/gap_mean": 33.534523010253906, "beta_dpo/gap_std": 52.5704460144043, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8858638743455497, "grad_norm": 376.0655822753906, "learning_rate": 2.0005139085293942e-08, "logits/chosen": -0.8840563893318176, "logits/rejected": -0.8793922662734985, "loss": 4.27, "step": 423 }, { "beta_dpo/beta_used": 0.02224777452647686, "beta_dpo/beta_used_raw": 0.0034151384606957436, "beta_dpo/gap_mean": 34.246089935302734, "beta_dpo/gap_std": 52.21100616455078, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8879581151832461, "grad_norm": 141.1042022705078, "learning_rate": 1.9293713731512673e-08, "logits/chosen": -0.8222829103469849, "logits/rejected": -0.8296815156936646, "loss": 3.9918, "step": 424 }, { "beta_dpo/beta_used": 0.03644920140504837, "beta_dpo/beta_used_raw": -0.014164052903652191, "beta_dpo/gap_mean": 32.60451889038086, "beta_dpo/gap_std": 50.56034851074219, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.8900523560209425, "grad_norm": 150.81195068359375, "learning_rate": 1.8594660455706763e-08, "logits/chosen": -0.8337830901145935, "logits/rejected": -0.8451286554336548, "loss": 3.8765, "step": 425 }, { "beta_dpo/beta_used": 0.03593583405017853, "beta_dpo/beta_used_raw": 0.02168644592165947, "beta_dpo/gap_mean": 29.087791442871094, "beta_dpo/gap_std": 49.60078048706055, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8921465968586387, "grad_norm": 134.13816833496094, "learning_rate": 1.7908016745981856e-08, "logits/chosen": -0.7210733294487, "logits/rejected": -0.7480963468551636, "loss": 4.2344, "step": 426 }, { "beta_dpo/beta_used": 0.0667373538017273, "beta_dpo/beta_used_raw": 0.033293262124061584, "beta_dpo/gap_mean": 31.312068939208984, "beta_dpo/gap_std": 51.69874572753906, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.8942408376963351, "grad_norm": 339.73394775390625, "learning_rate": 1.7233819424956247e-08, "logits/chosen": -0.8241250514984131, "logits/rejected": -0.7590780854225159, "loss": 4.1269, "step": 427 }, { "beta_dpo/beta_used": 0.03810206055641174, "beta_dpo/beta_used_raw": 0.005916805937886238, "beta_dpo/gap_mean": 38.2218017578125, "beta_dpo/gap_std": 51.52684020996094, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.8963350785340314, "grad_norm": 648.3778076171875, "learning_rate": 1.6572104647786245e-08, "logits/chosen": -0.7526270747184753, "logits/rejected": -0.8342408537864685, "loss": 4.9188, "step": 428 }, { "beta_dpo/beta_used": 0.02025276981294155, "beta_dpo/beta_used_raw": -0.01976284198462963, "beta_dpo/gap_mean": 36.52273178100586, "beta_dpo/gap_std": 54.76076126098633, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.8984293193717278, "grad_norm": 89.35426330566406, "learning_rate": 1.5922907900227017e-08, "logits/chosen": -0.7855672240257263, "logits/rejected": -0.769487202167511, "loss": 4.5233, "step": 429 }, { "beta_dpo/beta_used": 0.010954808443784714, "beta_dpo/beta_used_raw": -0.015571440570056438, "beta_dpo/gap_mean": 34.89046859741211, "beta_dpo/gap_std": 51.79176712036133, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.900523560209424, "grad_norm": 66.42906188964844, "learning_rate": 1.5286263996730026e-08, "logits/chosen": -0.9020602703094482, "logits/rejected": -0.799609899520874, "loss": 4.4628, "step": 430 }, { "beta_dpo/beta_used": 0.004862995818257332, "beta_dpo/beta_used_raw": -0.036979954689741135, "beta_dpo/gap_mean": 29.470109939575195, "beta_dpo/gap_std": 50.87688446044922, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9026178010471204, "grad_norm": 47.393733978271484, "learning_rate": 1.4662207078575684e-08, "logits/chosen": -0.8049024939537048, "logits/rejected": -0.772520899772644, "loss": 5.1653, "step": 431 }, { "beta_dpo/beta_used": 0.029558269307017326, "beta_dpo/beta_used_raw": 0.020929085090756416, "beta_dpo/gap_mean": 32.15821838378906, "beta_dpo/gap_std": 52.068603515625, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.9047120418848168, "grad_norm": 300.16351318359375, "learning_rate": 1.40507706120426e-08, "logits/chosen": -0.8398734331130981, "logits/rejected": -0.8560636639595032, "loss": 4.2815, "step": 432 }, { "beta_dpo/beta_used": 0.024735111743211746, "beta_dpo/beta_used_raw": 0.006575713399797678, "beta_dpo/gap_mean": 31.19025230407715, "beta_dpo/gap_std": 52.5582389831543, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9068062827225131, "grad_norm": 110.4648666381836, "learning_rate": 1.345198738661285e-08, "logits/chosen": -0.840786337852478, "logits/rejected": -0.8298450708389282, "loss": 4.0054, "step": 433 }, { "beta_dpo/beta_used": 0.017018688842654228, "beta_dpo/beta_used_raw": -0.01911812275648117, "beta_dpo/gap_mean": 28.489105224609375, "beta_dpo/gap_std": 50.24304962158203, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9089005235602095, "grad_norm": 142.97439575195312, "learning_rate": 1.2865889513213628e-08, "logits/chosen": -0.8282724618911743, "logits/rejected": -0.8246201276779175, "loss": 4.5609, "step": 434 }, { "beta_dpo/beta_used": 0.02615453489124775, "beta_dpo/beta_used_raw": 0.0022685863077640533, "beta_dpo/gap_mean": 30.370590209960938, "beta_dpo/gap_std": 50.549224853515625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9109947643979057, "grad_norm": 173.58056640625, "learning_rate": 1.2292508422495157e-08, "logits/chosen": -0.8596353530883789, "logits/rejected": -0.8763912916183472, "loss": 4.6802, "step": 435 }, { "beta_dpo/beta_used": 0.021672368049621582, "beta_dpo/beta_used_raw": -0.016893737018108368, "beta_dpo/gap_mean": 29.583505630493164, "beta_dpo/gap_std": 53.356544494628906, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9130890052356021, "grad_norm": 171.13877868652344, "learning_rate": 1.1731874863145142e-08, "logits/chosen": -0.7855619192123413, "logits/rejected": -0.8202630877494812, "loss": 4.4808, "step": 436 }, { "beta_dpo/beta_used": 0.024419579654932022, "beta_dpo/beta_used_raw": 0.019063415005803108, "beta_dpo/gap_mean": 30.05594253540039, "beta_dpo/gap_std": 54.0589485168457, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9151832460732985, "grad_norm": 139.63742065429688, "learning_rate": 1.118401890024001e-08, "logits/chosen": -0.8779160976409912, "logits/rejected": -0.850941002368927, "loss": 4.1159, "step": 437 }, { "beta_dpo/beta_used": 0.012894796207547188, "beta_dpo/beta_used_raw": -0.03390258550643921, "beta_dpo/gap_mean": 26.959020614624023, "beta_dpo/gap_std": 53.31471252441406, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9172774869109948, "grad_norm": 74.52314758300781, "learning_rate": 1.06489699136324e-08, "logits/chosen": -0.8029293417930603, "logits/rejected": -0.807404100894928, "loss": 4.9337, "step": 438 }, { "beta_dpo/beta_used": 0.041374292224645615, "beta_dpo/beta_used_raw": 0.02538049779832363, "beta_dpo/gap_mean": 26.866544723510742, "beta_dpo/gap_std": 51.9473876953125, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9193717277486911, "grad_norm": 281.1230163574219, "learning_rate": 1.0126756596375685e-08, "logits/chosen": -0.8005992770195007, "logits/rejected": -0.8386653065681458, "loss": 4.2462, "step": 439 }, { "beta_dpo/beta_used": 0.020926889032125473, "beta_dpo/beta_used_raw": -0.01957480050623417, "beta_dpo/gap_mean": 25.91887092590332, "beta_dpo/gap_std": 47.49887466430664, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9214659685863874, "grad_norm": 170.86585998535156, "learning_rate": 9.617406953185136e-09, "logits/chosen": -0.7887669801712036, "logits/rejected": -0.786566972732544, "loss": 4.7906, "step": 440 }, { "beta_dpo/beta_used": 0.040106188505887985, "beta_dpo/beta_used_raw": 0.017551787197589874, "beta_dpo/gap_mean": 27.56520652770996, "beta_dpo/gap_std": 48.65106964111328, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9235602094240838, "grad_norm": 185.98721313476562, "learning_rate": 9.12094829893642e-09, "logits/chosen": -0.8935746550559998, "logits/rejected": -0.8328600525856018, "loss": 4.5269, "step": 441 }, { "beta_dpo/beta_used": 0.028513526543974876, "beta_dpo/beta_used_raw": -0.0015279550570994616, "beta_dpo/gap_mean": 30.204608917236328, "beta_dpo/gap_std": 50.07164764404297, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9256544502617801, "grad_norm": 216.73915100097656, "learning_rate": 8.637407257200496e-09, "logits/chosen": -0.8646829724311829, "logits/rejected": -0.8786430954933167, "loss": 4.3975, "step": 442 }, { "beta_dpo/beta_used": 0.04191158711910248, "beta_dpo/beta_used_raw": 0.0261215940117836, "beta_dpo/gap_mean": 28.124242782592773, "beta_dpo/gap_std": 48.77510070800781, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.9277486910994764, "grad_norm": 246.431640625, "learning_rate": 8.166809758815895e-09, "logits/chosen": -0.7345380783081055, "logits/rejected": -0.8072965145111084, "loss": 4.057, "step": 443 }, { "beta_dpo/beta_used": 0.03024943172931671, "beta_dpo/beta_used_raw": 0.010058403015136719, "beta_dpo/gap_mean": 31.88334846496582, "beta_dpo/gap_std": 50.78257369995117, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9298429319371728, "grad_norm": 217.31375122070312, "learning_rate": 7.709181040498253e-09, "logits/chosen": -0.7552200555801392, "logits/rejected": -0.730567991733551, "loss": 4.2506, "step": 444 }, { "beta_dpo/beta_used": 0.04256928712129593, "beta_dpo/beta_used_raw": -0.013574687764048576, "beta_dpo/gap_mean": 29.146665573120117, "beta_dpo/gap_std": 53.06696701049805, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9319371727748691, "grad_norm": 349.0355529785156, "learning_rate": 7.2645456434869965e-09, "logits/chosen": -0.8601400256156921, "logits/rejected": -0.8750321865081787, "loss": 4.3467, "step": 445 }, { "beta_dpo/beta_used": 0.02674350142478943, "beta_dpo/beta_used_raw": 0.010028916411101818, "beta_dpo/gap_mean": 32.160865783691406, "beta_dpo/gap_std": 53.44306564331055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9340314136125655, "grad_norm": 164.8477325439453, "learning_rate": 6.832927412229017e-09, "logits/chosen": -0.7708781361579895, "logits/rejected": -0.7476394772529602, "loss": 4.01, "step": 446 }, { "beta_dpo/beta_used": 0.027856381610035896, "beta_dpo/beta_used_raw": 0.017823830246925354, "beta_dpo/gap_mean": 33.03885269165039, "beta_dpo/gap_std": 49.568260192871094, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9361256544502617, "grad_norm": 131.04490661621094, "learning_rate": 6.414349493100129e-09, "logits/chosen": -0.8864074349403381, "logits/rejected": -0.8868736624717712, "loss": 3.8027, "step": 447 }, { "beta_dpo/beta_used": 0.034370094537734985, "beta_dpo/beta_used_raw": 0.008034870028495789, "beta_dpo/gap_mean": 32.461265563964844, "beta_dpo/gap_std": 48.22648239135742, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.9382198952879581, "grad_norm": 350.2030334472656, "learning_rate": 6.0088343331638756e-09, "logits/chosen": -0.8367605209350586, "logits/rejected": -0.8392966985702515, "loss": 3.9396, "step": 448 }, { "beta_dpo/beta_used": 0.04194016754627228, "beta_dpo/beta_used_raw": 0.03314446657896042, "beta_dpo/gap_mean": 32.78199005126953, "beta_dpo/gap_std": 49.67825698852539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9403141361256544, "grad_norm": 367.3363037109375, "learning_rate": 5.616403678967624e-09, "logits/chosen": -0.920991063117981, "logits/rejected": -0.8886154294013977, "loss": 3.4965, "step": 449 }, { "beta_dpo/beta_used": 0.005077804904431105, "beta_dpo/beta_used_raw": -0.03199779987335205, "beta_dpo/gap_mean": 33.04655456542969, "beta_dpo/gap_std": 46.908870697021484, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9424083769633508, "grad_norm": 56.5856819152832, "learning_rate": 5.2370785753763356e-09, "logits/chosen": -0.8422713875770569, "logits/rejected": -0.8291035890579224, "loss": 5.1412, "step": 450 }, { "beta_dpo/beta_used": 0.023415734991431236, "beta_dpo/beta_used_raw": -0.007750632241368294, "beta_dpo/gap_mean": 31.3007869720459, "beta_dpo/gap_std": 46.751678466796875, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9445026178010472, "grad_norm": 115.54217529296875, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -0.7868208885192871, "logits/rejected": -0.7739187479019165, "loss": 4.6009, "step": 451 }, { "beta_dpo/beta_used": 0.02031254954636097, "beta_dpo/beta_used_raw": -0.02357018180191517, "beta_dpo/gap_mean": 32.508583068847656, "beta_dpo/gap_std": 50.76416778564453, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.9465968586387434, "grad_norm": 151.5921173095703, "learning_rate": 4.517825684323323e-09, "logits/chosen": -0.7874301075935364, "logits/rejected": -0.7545861005783081, "loss": 4.589, "step": 452 }, { "beta_dpo/beta_used": 0.015478750690817833, "beta_dpo/beta_used_raw": -0.0005428898148238659, "beta_dpo/gap_mean": 32.38516616821289, "beta_dpo/gap_std": 49.00554275512695, "beta_dpo/mask_keep_frac": 0.6875, "epoch": 0.9486910994764398, "grad_norm": 169.36245727539062, "learning_rate": 4.1779364682113794e-09, "logits/chosen": -0.8509343266487122, "logits/rejected": -0.8427782654762268, "loss": 4.7353, "step": 453 }, { "beta_dpo/beta_used": 0.012610476464033127, "beta_dpo/beta_used_raw": -0.010584852658212185, "beta_dpo/gap_mean": 32.17422103881836, "beta_dpo/gap_std": 49.280479431152344, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9507853403141361, "grad_norm": 73.80915832519531, "learning_rate": 3.851229943335393e-09, "logits/chosen": -0.9344862699508667, "logits/rejected": -0.9276149272918701, "loss": 4.5409, "step": 454 }, { "beta_dpo/beta_used": 0.013800965622067451, "beta_dpo/beta_used_raw": -0.027739258483052254, "beta_dpo/gap_mean": 28.679340362548828, "beta_dpo/gap_std": 50.449771881103516, "beta_dpo/mask_keep_frac": 0.59375, "epoch": 0.9528795811518325, "grad_norm": 98.63684844970703, "learning_rate": 3.5377236299748147e-09, "logits/chosen": -0.8242367506027222, "logits/rejected": -0.8344764113426208, "loss": 4.7569, "step": 455 }, { "beta_dpo/beta_used": 0.06382787972688675, "beta_dpo/beta_used_raw": 0.040650881826877594, "beta_dpo/gap_mean": 29.863061904907227, "beta_dpo/gap_std": 55.417232513427734, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9549738219895288, "grad_norm": 355.5675964355469, "learning_rate": 3.2374343405217884e-09, "logits/chosen": -0.884809136390686, "logits/rejected": -0.8778659701347351, "loss": 3.9301, "step": 456 }, { "beta_dpo/beta_used": 0.06500288099050522, "beta_dpo/beta_used_raw": 0.04227167367935181, "beta_dpo/gap_mean": 32.736595153808594, "beta_dpo/gap_std": 59.960296630859375, "beta_dpo/mask_keep_frac": 0.625, "epoch": 0.9570680628272251, "grad_norm": 157.1930389404297, "learning_rate": 2.9503781785795713e-09, "logits/chosen": -0.8487591743469238, "logits/rejected": -0.8349891901016235, "loss": 2.6759, "step": 457 }, { "beta_dpo/beta_used": 0.02374722994863987, "beta_dpo/beta_used_raw": -0.04022517800331116, "beta_dpo/gap_mean": 31.09552764892578, "beta_dpo/gap_std": 56.911495208740234, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9591623036649215, "grad_norm": 411.55517578125, "learning_rate": 2.6765705380989432e-09, "logits/chosen": -0.8151242136955261, "logits/rejected": -0.8581142425537109, "loss": 4.7969, "step": 458 }, { "beta_dpo/beta_used": 0.05280781164765358, "beta_dpo/beta_used_raw": 0.024646718055009842, "beta_dpo/gap_mean": 29.90413475036621, "beta_dpo/gap_std": 53.489784240722656, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.9612565445026178, "grad_norm": 256.2548522949219, "learning_rate": 2.416026102552732e-09, "logits/chosen": -0.8302851319313049, "logits/rejected": -0.8471137285232544, "loss": 3.843, "step": 459 }, { "beta_dpo/beta_used": 0.022719116881489754, "beta_dpo/beta_used_raw": 0.002210780745372176, "beta_dpo/gap_mean": 29.041353225708008, "beta_dpo/gap_std": 52.842437744140625, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9633507853403142, "grad_norm": 148.7731475830078, "learning_rate": 2.168758844148272e-09, "logits/chosen": -0.8966348171234131, "logits/rejected": -0.8892766833305359, "loss": 4.8806, "step": 460 }, { "beta_dpo/beta_used": 0.03528280928730965, "beta_dpo/beta_used_raw": 0.01805609092116356, "beta_dpo/gap_mean": 29.886600494384766, "beta_dpo/gap_std": 51.72296905517578, "beta_dpo/mask_keep_frac": 0.65625, "epoch": 0.9654450261780104, "grad_norm": 198.60765075683594, "learning_rate": 1.9347820230782295e-09, "logits/chosen": -0.791793942451477, "logits/rejected": -0.8195943236351013, "loss": 4.1942, "step": 461 }, { "beta_dpo/beta_used": 0.052680741995573044, "beta_dpo/beta_used_raw": 0.035064440220594406, "beta_dpo/gap_mean": 32.32624816894531, "beta_dpo/gap_std": 54.05101013183594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9675392670157068, "grad_norm": 282.3392639160156, "learning_rate": 1.7141081868094209e-09, "logits/chosen": -0.8903741240501404, "logits/rejected": -0.8310127258300781, "loss": 3.7547, "step": 462 }, { "beta_dpo/beta_used": 0.016039669513702393, "beta_dpo/beta_used_raw": -0.020198073238134384, "beta_dpo/gap_mean": 32.30640411376953, "beta_dpo/gap_std": 52.92686080932617, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9696335078534032, "grad_norm": 90.29280090332031, "learning_rate": 1.5067491694100153e-09, "logits/chosen": -0.8517540693283081, "logits/rejected": -0.853223443031311, "loss": 4.4556, "step": 463 }, { "beta_dpo/beta_used": 0.02930094487965107, "beta_dpo/beta_used_raw": 0.0013244133442640305, "beta_dpo/gap_mean": 30.446823120117188, "beta_dpo/gap_std": 51.893402099609375, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9717277486910995, "grad_norm": 141.81759643554688, "learning_rate": 1.3127160909147672e-09, "logits/chosen": -0.8333346843719482, "logits/rejected": -0.8381949663162231, "loss": 4.5807, "step": 464 }, { "beta_dpo/beta_used": 0.045830510556697845, "beta_dpo/beta_used_raw": 0.018346037715673447, "beta_dpo/gap_mean": 31.705657958984375, "beta_dpo/gap_std": 50.60383987426758, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9738219895287958, "grad_norm": 118.63272094726562, "learning_rate": 1.1320193567288527e-09, "logits/chosen": -0.7933779358863831, "logits/rejected": -0.7936585545539856, "loss": 3.4035, "step": 465 }, { "beta_dpo/beta_used": 0.04178696125745773, "beta_dpo/beta_used_raw": 0.0331585593521595, "beta_dpo/gap_mean": 36.18540573120117, "beta_dpo/gap_std": 50.454200744628906, "beta_dpo/mask_keep_frac": 0.90625, "epoch": 0.9759162303664921, "grad_norm": 175.4940643310547, "learning_rate": 9.64668657069706e-10, "logits/chosen": -0.8239483833312988, "logits/rejected": -0.7942164540290833, "loss": 3.9939, "step": 466 }, { "beta_dpo/beta_used": 0.030592869967222214, "beta_dpo/beta_used_raw": -0.0003968037199229002, "beta_dpo/gap_mean": 32.80325698852539, "beta_dpo/gap_std": 50.57613754272461, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9780104712041885, "grad_norm": 137.76971435546875, "learning_rate": 8.106729664475176e-10, "logits/chosen": -0.6968907117843628, "logits/rejected": -0.6687761545181274, "loss": 4.3001, "step": 467 }, { "beta_dpo/beta_used": 0.01786745898425579, "beta_dpo/beta_used_raw": -0.03523392230272293, "beta_dpo/gap_mean": 28.95020294189453, "beta_dpo/gap_std": 52.5392951965332, "beta_dpo/mask_keep_frac": 0.96875, "epoch": 0.9801047120418848, "grad_norm": 148.3948211669922, "learning_rate": 6.700405431837585e-10, "logits/chosen": -0.849189043045044, "logits/rejected": -0.8099946975708008, "loss": 4.8179, "step": 468 }, { "beta_dpo/beta_used": 0.05720680207014084, "beta_dpo/beta_used_raw": 0.04207749292254448, "beta_dpo/gap_mean": 31.435684204101562, "beta_dpo/gap_std": 53.248329162597656, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9821989528795811, "grad_norm": 237.18161010742188, "learning_rate": 5.427789289685347e-10, "logits/chosen": -0.8365699052810669, "logits/rejected": -0.7970238327980042, "loss": 4.0884, "step": 469 }, { "beta_dpo/beta_used": 0.04907160997390747, "beta_dpo/beta_used_raw": 0.026105834171175957, "beta_dpo/gap_mean": 33.24174118041992, "beta_dpo/gap_std": 52.36241912841797, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9842931937172775, "grad_norm": 157.68939208984375, "learning_rate": 4.288949484559934e-10, "logits/chosen": -0.7854397296905518, "logits/rejected": -0.8018498420715332, "loss": 3.7433, "step": 470 }, { "beta_dpo/beta_used": 0.0241762176156044, "beta_dpo/beta_used_raw": -0.003967747092247009, "beta_dpo/gap_mean": 34.16176223754883, "beta_dpo/gap_std": 51.57313919067383, "beta_dpo/mask_keep_frac": 0.71875, "epoch": 0.9863874345549738, "grad_norm": 123.31388854980469, "learning_rate": 3.2839470889836627e-10, "logits/chosen": -0.9119861125946045, "logits/rejected": -0.8991633057594299, "loss": 3.9463, "step": 471 }, { "beta_dpo/beta_used": 0.023680521175265312, "beta_dpo/beta_used_raw": 0.00017686188220977783, "beta_dpo/gap_mean": 34.94512176513672, "beta_dpo/gap_std": 53.29269027709961, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.9884816753926702, "grad_norm": 265.7321472167969, "learning_rate": 2.412835998185092e-10, "logits/chosen": -0.8801113367080688, "logits/rejected": -0.8942077159881592, "loss": 4.6235, "step": 472 }, { "beta_dpo/beta_used": 0.030485741794109344, "beta_dpo/beta_used_raw": 0.020329464226961136, "beta_dpo/gap_mean": 35.68143844604492, "beta_dpo/gap_std": 51.89659118652344, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9905759162303664, "grad_norm": 144.84486389160156, "learning_rate": 1.6756629272085544e-10, "logits/chosen": -0.8776077628135681, "logits/rejected": -0.8777634501457214, "loss": 3.8487, "step": 473 }, { "beta_dpo/beta_used": 0.039661701768636703, "beta_dpo/beta_used_raw": -0.012342464178800583, "beta_dpo/gap_mean": 36.153831481933594, "beta_dpo/gap_std": 50.874114990234375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9926701570680628, "grad_norm": 272.6778259277344, "learning_rate": 1.072467408408384e-10, "logits/chosen": -0.8588881492614746, "logits/rejected": -0.8895531892776489, "loss": 4.2205, "step": 474 }, { "beta_dpo/beta_used": 0.011708030477166176, "beta_dpo/beta_used_raw": -0.036866847425699234, "beta_dpo/gap_mean": 30.167787551879883, "beta_dpo/gap_std": 47.42060089111328, "beta_dpo/mask_keep_frac": 0.375, "epoch": 0.9947643979057592, "grad_norm": 72.4432601928711, "learning_rate": 6.032817893297793e-11, "logits/chosen": -0.7738948464393616, "logits/rejected": -0.8091400265693665, "loss": 4.5426, "step": 475 }, { "beta_dpo/beta_used": 0.023084495216608047, "beta_dpo/beta_used_raw": -0.026419004425406456, "beta_dpo/gap_mean": 30.15393829345703, "beta_dpo/gap_std": 47.20201110839844, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9968586387434555, "grad_norm": 92.88987731933594, "learning_rate": 2.6813123097352287e-11, "logits/chosen": -0.8247819542884827, "logits/rejected": -0.8255200982093811, "loss": 4.5438, "step": 476 }, { "beta_dpo/beta_used": 0.032955169677734375, "beta_dpo/beta_used_raw": -0.01132938638329506, "beta_dpo/gap_mean": 30.793474197387695, "beta_dpo/gap_std": 53.303714752197266, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9989528795811519, "grad_norm": 454.408203125, "learning_rate": 6.7033706447061635e-12, "logits/chosen": -0.7610109448432922, "logits/rejected": -0.7843220233917236, "loss": 4.6407, "step": 477 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 4.632088508745909, "train_runtime": 6811.5994, "train_samples_per_second": 8.975, "train_steps_per_second": 0.07 } ], "logging_steps": 1, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }