{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta_used": 0.10121209919452667, "beta_dpo/beta_used_raw": 0.10121209919452667, "beta_dpo/gap_mean": -0.0009442940354347229, "beta_dpo/gap_std": 0.03691839799284935, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.0029411764705882353, "grad_norm": 23.302410125732422, "learning_rate": 0.0, "logits/chosen": -0.4739703834056854, "logits/rejected": -0.44689586758613586, "loss": 0.6919, "step": 1 }, { "beta_dpo/beta_used": 0.10032124072313309, "beta_dpo/beta_used_raw": 0.10032124072313309, "beta_dpo/gap_mean": -0.0016960372449830174, "beta_dpo/gap_std": 0.1151522547006607, "beta_dpo/mask_keep_frac": 0.765625, "epoch": 0.014705882352941176, "grad_norm": 24.834075927734375, "learning_rate": 5.88235294117647e-08, "logits/chosen": -0.49943581223487854, "logits/rejected": -0.4934660494327545, "loss": 0.693, "step": 5 }, { "beta_dpo/beta_used": 0.101251520216465, "beta_dpo/beta_used_raw": 0.101251520216465, "beta_dpo/gap_mean": 0.0030363830737769604, "beta_dpo/gap_std": 0.2163175642490387, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.029411764705882353, "grad_norm": 21.942047119140625, "learning_rate": 1.3235294117647057e-07, "logits/chosen": -0.5174359083175659, "logits/rejected": -0.5005401968955994, "loss": 0.692, "step": 10 }, { "beta_dpo/beta_used": 0.10108586400747299, "beta_dpo/beta_used_raw": 0.10108586400747299, "beta_dpo/gap_mean": 0.024518460035324097, "beta_dpo/gap_std": 0.2784799039363861, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.04411764705882353, "grad_norm": 28.207460403442383, "learning_rate": 2.0588235294117645e-07, "logits/chosen": -0.5348216891288757, "logits/rejected": -0.5156930088996887, "loss": 0.6911, "step": 15 }, { "beta_dpo/beta_used": 0.10244777053594589, "beta_dpo/beta_used_raw": 0.10244777053594589, "beta_dpo/gap_mean": 0.0749056339263916, "beta_dpo/gap_std": 0.33879655599594116, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.058823529411764705, "grad_norm": 19.902040481567383, "learning_rate": 2.7941176470588235e-07, "logits/chosen": -0.5660465955734253, "logits/rejected": -0.5419166088104248, "loss": 0.6874, "step": 20 }, { "beta_dpo/beta_used": 0.10622622072696686, "beta_dpo/beta_used_raw": 0.10622622072696686, "beta_dpo/gap_mean": 0.20916345715522766, "beta_dpo/gap_std": 0.456662118434906, "beta_dpo/mask_keep_frac": 0.887499988079071, "epoch": 0.07352941176470588, "grad_norm": 22.522640228271484, "learning_rate": 3.529411764705882e-07, "logits/chosen": -0.5110368132591248, "logits/rejected": -0.5050845146179199, "loss": 0.6769, "step": 25 }, { "beta_dpo/beta_used": 0.10997174680233002, "beta_dpo/beta_used_raw": 0.10997174680233002, "beta_dpo/gap_mean": 0.5209842920303345, "beta_dpo/gap_std": 0.7702666521072388, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.08823529411764706, "grad_norm": 19.37394142150879, "learning_rate": 4.264705882352941e-07, "logits/chosen": -0.5535926222801208, "logits/rejected": -0.5316442251205444, "loss": 0.6574, "step": 30 }, { "beta_dpo/beta_used": 0.11611036211252213, "beta_dpo/beta_used_raw": 0.11611036211252213, "beta_dpo/gap_mean": 0.9489548802375793, "beta_dpo/gap_std": 1.3326656818389893, "beta_dpo/mask_keep_frac": 0.6625000238418579, "epoch": 0.10294117647058823, "grad_norm": 23.465280532836914, "learning_rate": 5e-07, "logits/chosen": -0.5605362057685852, "logits/rejected": -0.5497816801071167, "loss": 0.6265, "step": 35 }, { "beta_dpo/beta_used": 0.1326821744441986, "beta_dpo/beta_used_raw": 0.1326821744441986, "beta_dpo/gap_mean": 1.789758324623108, "beta_dpo/gap_std": 2.447655200958252, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.11764705882352941, "grad_norm": 20.867738723754883, "learning_rate": 4.996706849759452e-07, "logits/chosen": -0.6393685340881348, "logits/rejected": -0.6073721051216125, "loss": 0.5663, "step": 40 }, { "beta_dpo/beta_used": 0.12091531604528427, "beta_dpo/beta_used_raw": 0.12091531604528427, "beta_dpo/gap_mean": 2.8082375526428223, "beta_dpo/gap_std": 4.084892272949219, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.1323529411764706, "grad_norm": 19.109943389892578, "learning_rate": 4.986836074908615e-07, "logits/chosen": -0.6684064865112305, "logits/rejected": -0.6361075639724731, "loss": 0.5411, "step": 45 }, { "beta_dpo/beta_used": 0.11514081805944443, "beta_dpo/beta_used_raw": 0.11514081805944443, "beta_dpo/gap_mean": 3.5657267570495605, "beta_dpo/gap_std": 5.662721633911133, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.14705882352941177, "grad_norm": 22.54947853088379, "learning_rate": 4.970413680203148e-07, "logits/chosen": -0.6647295951843262, "logits/rejected": -0.6276803612709045, "loss": 0.536, "step": 50 }, { "beta_dpo/beta_used": 0.13137957453727722, "beta_dpo/beta_used_raw": 0.13137957453727722, "beta_dpo/gap_mean": 4.35926628112793, "beta_dpo/gap_std": 7.092940330505371, "beta_dpo/mask_keep_frac": 0.7250000238418579, "epoch": 0.16176470588235295, "grad_norm": 38.691123962402344, "learning_rate": 4.947482930773511e-07, "logits/chosen": -0.7010586261749268, "logits/rejected": -0.675391435623169, "loss": 0.493, "step": 55 }, { "beta_dpo/beta_used": 0.094205841422081, "beta_dpo/beta_used_raw": 0.094205841422081, "beta_dpo/gap_mean": 5.047989845275879, "beta_dpo/gap_std": 8.23731803894043, "beta_dpo/mask_keep_frac": 0.7250000238418579, "epoch": 0.17647058823529413, "grad_norm": 31.127901077270508, "learning_rate": 4.918104238142103e-07, "logits/chosen": -0.724422812461853, "logits/rejected": -0.6809322237968445, "loss": 0.5315, "step": 60 }, { "beta_dpo/beta_used": 0.11677428334951401, "beta_dpo/beta_used_raw": 0.11677428334951401, "beta_dpo/gap_mean": 5.827352523803711, "beta_dpo/gap_std": 8.861337661743164, "beta_dpo/mask_keep_frac": 0.8500000238418579, "epoch": 0.19117647058823528, "grad_norm": 20.432043075561523, "learning_rate": 4.882355001067891e-07, "logits/chosen": -0.6648474931716919, "logits/rejected": -0.637535572052002, "loss": 0.4741, "step": 65 }, { "beta_dpo/beta_used": 0.09036926180124283, "beta_dpo/beta_used_raw": 0.09036926180124283, "beta_dpo/gap_mean": 6.462141990661621, "beta_dpo/gap_std": 9.157753944396973, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.20588235294117646, "grad_norm": 24.550621032714844, "learning_rate": 4.840329401637809e-07, "logits/chosen": -0.6986874341964722, "logits/rejected": -0.6637295484542847, "loss": 0.5026, "step": 70 }, { "beta_dpo/beta_used": 0.07552285492420197, "beta_dpo/beta_used_raw": 0.0741354450583458, "beta_dpo/gap_mean": 6.905457496643066, "beta_dpo/gap_std": 9.706171035766602, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.22058823529411764, "grad_norm": 21.727449417114258, "learning_rate": 4.792138157142157e-07, "logits/chosen": -0.7469242215156555, "logits/rejected": -0.7223338484764099, "loss": 0.5172, "step": 75 }, { "beta_dpo/beta_used": 0.09862758219242096, "beta_dpo/beta_used_raw": 0.09059171378612518, "beta_dpo/gap_mean": 7.501389980316162, "beta_dpo/gap_std": 10.180580139160156, "beta_dpo/mask_keep_frac": 0.8374999761581421, "epoch": 0.23529411764705882, "grad_norm": 0.3042762279510498, "learning_rate": 4.737908228387656e-07, "logits/chosen": -0.7543509602546692, "logits/rejected": -0.7017374038696289, "loss": 0.4756, "step": 80 }, { "beta_dpo/beta_used": 0.06848205626010895, "beta_dpo/beta_used_raw": 0.05231575295329094, "beta_dpo/gap_mean": 7.9440507888793945, "beta_dpo/gap_std": 10.780364990234375, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.25, "grad_norm": 0.3057352602481842, "learning_rate": 4.6777824852166437e-07, "logits/chosen": -0.7203555107116699, "logits/rejected": -0.6912198066711426, "loss": 0.5345, "step": 85 }, { "beta_dpo/beta_used": 0.08253253251314163, "beta_dpo/beta_used_raw": 0.08253253251314163, "beta_dpo/gap_mean": 8.4508638381958, "beta_dpo/gap_std": 11.448507308959961, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.2647058823529412, "grad_norm": 21.29926300048828, "learning_rate": 4.611919330113591e-07, "logits/chosen": -0.6781951189041138, "logits/rejected": -0.6568866968154907, "loss": 0.4944, "step": 90 }, { "beta_dpo/beta_used": 0.05684714391827583, "beta_dpo/beta_used_raw": 0.05684714391827583, "beta_dpo/gap_mean": 8.875980377197266, "beta_dpo/gap_std": 12.020231246948242, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.27941176470588236, "grad_norm": 15.502776145935059, "learning_rate": 4.5404922808905543e-07, "logits/chosen": -0.7086650729179382, "logits/rejected": -0.6651682257652283, "loss": 0.5335, "step": 95 }, { "beta_dpo/beta_used": 0.1179933100938797, "beta_dpo/beta_used_raw": 0.1179933100938797, "beta_dpo/gap_mean": 9.62360954284668, "beta_dpo/gap_std": 12.684171676635742, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.29411764705882354, "grad_norm": 34.129478454589844, "learning_rate": 4.4636895135509966e-07, "logits/chosen": -0.6843082904815674, "logits/rejected": -0.660758912563324, "loss": 0.4098, "step": 100 }, { "epoch": 0.29411764705882354, "eval_beta_dpo/beta_used": 0.04330332204699516, "eval_beta_dpo/beta_used_raw": 0.01594320312142372, "eval_beta_dpo/gap_mean": 7.997772216796875, "eval_beta_dpo/gap_std": 13.260690689086914, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.6977978944778442, "eval_logits/rejected": -0.6668843626976013, "eval_loss": 0.6251118183135986, "eval_runtime": 20.4115, "eval_samples_per_second": 114.592, "eval_steps_per_second": 0.931, "step": 100 }, { "beta_dpo/beta_used": 0.15054886043071747, "beta_dpo/beta_used_raw": 0.15054886043071747, "beta_dpo/gap_mean": 7.8849334716796875, "beta_dpo/gap_std": 13.30543041229248, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.3088235294117647, "grad_norm": 15.665854454040527, "learning_rate": 4.381713366536311e-07, "logits/chosen": -0.7553393244743347, "logits/rejected": -0.710943341255188, "loss": 0.4061, "step": 105 }, { "beta_dpo/beta_used": 0.09847154468297958, "beta_dpo/beta_used_raw": 0.09847154468297958, "beta_dpo/gap_mean": 9.127924919128418, "beta_dpo/gap_std": 13.331835746765137, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.3235294117647059, "grad_norm": 7.982070446014404, "learning_rate": 4.2947798076611047e-07, "logits/chosen": -0.7284727692604065, "logits/rejected": -0.696673572063446, "loss": 0.4595, "step": 110 }, { "beta_dpo/beta_used": 0.12598751485347748, "beta_dpo/beta_used_raw": 0.12598751485347748, "beta_dpo/gap_mean": 9.942410469055176, "beta_dpo/gap_std": 13.166864395141602, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.3382352941176471, "grad_norm": 36.213523864746094, "learning_rate": 4.203117865141635e-07, "logits/chosen": -0.7145182490348816, "logits/rejected": -0.6985291242599487, "loss": 0.3778, "step": 115 }, { "beta_dpo/beta_used": 0.05509430170059204, "beta_dpo/beta_used_raw": 0.048566654324531555, "beta_dpo/gap_mean": 10.542096138000488, "beta_dpo/gap_std": 13.39216136932373, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.35294117647058826, "grad_norm": 14.08332347869873, "learning_rate": 4.106969024216348e-07, "logits/chosen": -0.7127692103385925, "logits/rejected": -0.6740670204162598, "loss": 0.5271, "step": 120 }, { "beta_dpo/beta_used": 0.05550508573651314, "beta_dpo/beta_used_raw": 0.05064947530627251, "beta_dpo/gap_mean": 11.009790420532227, "beta_dpo/gap_std": 13.461648941040039, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.36764705882352944, "grad_norm": 1.2771987915039062, "learning_rate": 4.006586590948141e-07, "logits/chosen": -0.7180671691894531, "logits/rejected": -0.6869726777076721, "loss": 0.513, "step": 125 }, { "beta_dpo/beta_used": 0.05528440326452255, "beta_dpo/beta_used_raw": 0.05528440326452255, "beta_dpo/gap_mean": 11.50378704071045, "beta_dpo/gap_std": 14.039319038391113, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.38235294117647056, "grad_norm": 5.609388828277588, "learning_rate": 3.9022350248844246e-07, "logits/chosen": -0.7099085450172424, "logits/rejected": -0.6715607643127441, "loss": 0.5068, "step": 130 }, { "beta_dpo/beta_used": 0.08324670791625977, "beta_dpo/beta_used_raw": 0.08324670791625977, "beta_dpo/gap_mean": 12.224153518676758, "beta_dpo/gap_std": 15.014795303344727, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.39705882352941174, "grad_norm": 22.845937728881836, "learning_rate": 3.794189242333106e-07, "logits/chosen": -0.7034512758255005, "logits/rejected": -0.6600346565246582, "loss": 0.4231, "step": 135 }, { "beta_dpo/beta_used": 0.10875506699085236, "beta_dpo/beta_used_raw": 0.10875506699085236, "beta_dpo/gap_mean": 13.073277473449707, "beta_dpo/gap_std": 15.834657669067383, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4117647058823529, "grad_norm": 31.461519241333008, "learning_rate": 3.6827338920900253e-07, "logits/chosen": -0.6739553213119507, "logits/rejected": -0.6366498470306396, "loss": 0.3902, "step": 140 }, { "beta_dpo/beta_used": 0.10356837511062622, "beta_dpo/beta_used_raw": 0.10356837511062622, "beta_dpo/gap_mean": 13.881492614746094, "beta_dpo/gap_std": 16.42840003967285, "beta_dpo/mask_keep_frac": 0.7250000238418579, "epoch": 0.4264705882352941, "grad_norm": 29.94761085510254, "learning_rate": 3.568162605525952e-07, "logits/chosen": -0.7220578193664551, "logits/rejected": -0.6809359788894653, "loss": 0.3819, "step": 145 }, { "beta_dpo/beta_used": 0.019714761525392532, "beta_dpo/beta_used_raw": -0.023198971524834633, "beta_dpo/gap_mean": 14.227258682250977, "beta_dpo/gap_std": 17.448183059692383, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.4411764705882353, "grad_norm": 5.527612209320068, "learning_rate": 3.4507772230088147e-07, "logits/chosen": -0.6326473355293274, "logits/rejected": -0.6032054424285889, "loss": 0.607, "step": 150 }, { "beta_dpo/beta_used": 0.07979521155357361, "beta_dpo/beta_used_raw": 0.06505511701107025, "beta_dpo/gap_mean": 14.670598983764648, "beta_dpo/gap_std": 18.554828643798828, "beta_dpo/mask_keep_frac": 0.699999988079071, "epoch": 0.45588235294117646, "grad_norm": 23.10860824584961, "learning_rate": 3.3308869986991487e-07, "logits/chosen": -0.7037164568901062, "logits/rejected": -0.6613154411315918, "loss": 0.4478, "step": 155 }, { "beta_dpo/beta_used": 0.06584476679563522, "beta_dpo/beta_used_raw": 0.040607184171676636, "beta_dpo/gap_mean": 15.389450073242188, "beta_dpo/gap_std": 19.081418991088867, "beta_dpo/mask_keep_frac": 0.8374999761581421, "epoch": 0.47058823529411764, "grad_norm": 31.426233291625977, "learning_rate": 3.208807785813777e-07, "logits/chosen": -0.6574662923812866, "logits/rejected": -0.630233883857727, "loss": 0.4758, "step": 160 }, { "beta_dpo/beta_used": 0.08810704201459885, "beta_dpo/beta_used_raw": 0.07671914994716644, "beta_dpo/gap_mean": 16.22821617126465, "beta_dpo/gap_std": 19.637792587280273, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.4852941176470588, "grad_norm": 73.11759948730469, "learning_rate": 3.084861204504122e-07, "logits/chosen": -0.6618590354919434, "logits/rejected": -0.6243924498558044, "loss": 0.4768, "step": 165 }, { "beta_dpo/beta_used": 0.02722100354731083, "beta_dpo/beta_used_raw": 0.02408101223409176, "beta_dpo/gap_mean": 17.07744598388672, "beta_dpo/gap_std": 20.277606964111328, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.5, "grad_norm": 0.5254238247871399, "learning_rate": 2.959373794541426e-07, "logits/chosen": -0.6966148614883423, "logits/rejected": -0.666491687297821, "loss": 0.5686, "step": 170 }, { "beta_dpo/beta_used": 0.0664793998003006, "beta_dpo/beta_used_raw": 0.03722615912556648, "beta_dpo/gap_mean": 17.654155731201172, "beta_dpo/gap_std": 21.08226776123047, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5147058823529411, "grad_norm": 0.5429490804672241, "learning_rate": 2.8326761550411346e-07, "logits/chosen": -0.6848665475845337, "logits/rejected": -0.6613831520080566, "loss": 0.4753, "step": 175 }, { "beta_dpo/beta_used": 0.024588093161582947, "beta_dpo/beta_used_raw": -0.008070843294262886, "beta_dpo/gap_mean": 17.7331485748291, "beta_dpo/gap_std": 22.08762550354004, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.5294117647058824, "grad_norm": 15.737401962280273, "learning_rate": 2.7051020734928443e-07, "logits/chosen": -0.6688377261161804, "logits/rejected": -0.6460214853286743, "loss": 0.5579, "step": 180 }, { "beta_dpo/beta_used": 0.08162590861320496, "beta_dpo/beta_used_raw": 0.05483890324831009, "beta_dpo/gap_mean": 18.408456802368164, "beta_dpo/gap_std": 22.61962890625, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.5441176470588235, "grad_norm": 0.506800651550293, "learning_rate": 2.5769876463904263e-07, "logits/chosen": -0.7215656042098999, "logits/rejected": -0.6699239611625671, "loss": 0.4855, "step": 185 }, { "beta_dpo/beta_used": 0.05661209672689438, "beta_dpo/beta_used_raw": 0.027242619544267654, "beta_dpo/gap_mean": 18.814666748046875, "beta_dpo/gap_std": 22.990680694580078, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.5588235294117647, "grad_norm": 26.899166107177734, "learning_rate": 2.4486703937790243e-07, "logits/chosen": -0.7019311785697937, "logits/rejected": -0.6498968005180359, "loss": 0.4949, "step": 190 }, { "beta_dpo/beta_used": 0.02816765382885933, "beta_dpo/beta_used_raw": 0.001962479902431369, "beta_dpo/gap_mean": 19.533567428588867, "beta_dpo/gap_std": 23.629451751708984, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.5735294117647058, "grad_norm": 11.008431434631348, "learning_rate": 2.320488370051681e-07, "logits/chosen": -0.7254117727279663, "logits/rejected": -0.6765154004096985, "loss": 0.5286, "step": 195 }, { "beta_dpo/beta_used": 0.0643467828631401, "beta_dpo/beta_used_raw": -0.0015767127042636275, "beta_dpo/gap_mean": 20.06936264038086, "beta_dpo/gap_std": 24.53436851501465, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.5882352941176471, "grad_norm": 78.41595458984375, "learning_rate": 2.192779273338215e-07, "logits/chosen": -0.6875912547111511, "logits/rejected": -0.6458339095115662, "loss": 0.5527, "step": 200 }, { "epoch": 0.5882352941176471, "eval_beta_dpo/beta_used": 0.03733323514461517, "eval_beta_dpo/beta_used_raw": -0.0800742357969284, "eval_beta_dpo/gap_mean": 17.105911254882812, "eval_beta_dpo/gap_std": 25.945871353149414, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.6982784271240234, "eval_logits/rejected": -0.6586927771568298, "eval_loss": 0.6420564651489258, "eval_runtime": 20.3208, "eval_samples_per_second": 115.104, "eval_steps_per_second": 0.935, "step": 200 }, { "beta_dpo/beta_used": 0.18078216910362244, "beta_dpo/beta_used_raw": 0.18078216910362244, "beta_dpo/gap_mean": 16.670331954956055, "beta_dpo/gap_std": 27.034832000732422, "beta_dpo/mask_keep_frac": 0.887499988079071, "epoch": 0.6029411764705882, "grad_norm": 76.40715789794922, "learning_rate": 2.065879555832674e-07, "logits/chosen": -0.6358317136764526, "logits/rejected": -0.5890509486198425, "loss": 0.4449, "step": 205 }, { "beta_dpo/beta_used": 0.187847301363945, "beta_dpo/beta_used_raw": 0.187847301363945, "beta_dpo/gap_mean": 18.74222183227539, "beta_dpo/gap_std": 27.3233642578125, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.6176470588235294, "grad_norm": 39.985557556152344, "learning_rate": 1.9401235374032425e-07, "logits/chosen": -0.6837745308876038, "logits/rejected": -0.6255474090576172, "loss": 0.4006, "step": 210 }, { "beta_dpo/beta_used": 0.0615837462246418, "beta_dpo/beta_used_raw": 0.053963758051395416, "beta_dpo/gap_mean": 20.168214797973633, "beta_dpo/gap_std": 27.281606674194336, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.6323529411764706, "grad_norm": 56.95214080810547, "learning_rate": 1.8158425248197928e-07, "logits/chosen": -0.5969057083129883, "logits/rejected": -0.5545859336853027, "loss": 0.5414, "step": 215 }, { "beta_dpo/beta_used": 0.031995899975299835, "beta_dpo/beta_used_raw": 0.008795802481472492, "beta_dpo/gap_mean": 20.327245712280273, "beta_dpo/gap_std": 26.49213218688965, "beta_dpo/mask_keep_frac": 0.8374999761581421, "epoch": 0.6470588235294118, "grad_norm": 0.543950080871582, "learning_rate": 1.6933639389195134e-07, "logits/chosen": -0.6841639280319214, "logits/rejected": -0.6511374711990356, "loss": 0.5464, "step": 220 }, { "beta_dpo/beta_used": 0.04347361996769905, "beta_dpo/beta_used_raw": 0.016606144607067108, "beta_dpo/gap_mean": 20.5634765625, "beta_dpo/gap_std": 25.834671020507812, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.6617647058823529, "grad_norm": 29.658409118652344, "learning_rate": 1.573010452010098e-07, "logits/chosen": -0.6632441282272339, "logits/rejected": -0.6577039957046509, "loss": 0.4873, "step": 225 }, { "beta_dpo/beta_used": 0.06577815115451813, "beta_dpo/beta_used_raw": 0.05692853406071663, "beta_dpo/gap_mean": 21.10856819152832, "beta_dpo/gap_std": 25.58962059020996, "beta_dpo/mask_keep_frac": 0.737500011920929, "epoch": 0.6764705882352942, "grad_norm": 0.6241604685783386, "learning_rate": 1.4550991377830423e-07, "logits/chosen": -0.7060235738754272, "logits/rejected": -0.669354259967804, "loss": 0.4797, "step": 230 }, { "beta_dpo/beta_used": 0.06475953757762909, "beta_dpo/beta_used_raw": 0.023799167945981026, "beta_dpo/gap_mean": 21.435104370117188, "beta_dpo/gap_std": 25.414148330688477, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.6911764705882353, "grad_norm": 14.550406455993652, "learning_rate": 1.339940635976592e-07, "logits/chosen": -0.6889506578445435, "logits/rejected": -0.6716668009757996, "loss": 0.4357, "step": 235 }, { "beta_dpo/beta_used": 0.022588472813367844, "beta_dpo/beta_used_raw": 0.0025838587898761034, "beta_dpo/gap_mean": 21.869482040405273, "beta_dpo/gap_std": 25.504459381103516, "beta_dpo/mask_keep_frac": 0.737500011920929, "epoch": 0.7058823529411765, "grad_norm": 11.02522087097168, "learning_rate": 1.227838333989088e-07, "logits/chosen": -0.6110752820968628, "logits/rejected": -0.5741311311721802, "loss": 0.5265, "step": 240 }, { "beta_dpo/beta_used": 0.03154964745044708, "beta_dpo/beta_used_raw": -0.007365362253040075, "beta_dpo/gap_mean": 22.568851470947266, "beta_dpo/gap_std": 25.90200424194336, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.7205882352941176, "grad_norm": 0.5983785390853882, "learning_rate": 1.1190875675987355e-07, "logits/chosen": -0.6300492286682129, "logits/rejected": -0.6109535098075867, "loss": 0.5448, "step": 245 }, { "beta_dpo/beta_used": 0.010828005149960518, "beta_dpo/beta_used_raw": -0.058729518204927444, "beta_dpo/gap_mean": 22.215688705444336, "beta_dpo/gap_std": 27.019912719726562, "beta_dpo/mask_keep_frac": 0.8374999761581421, "epoch": 0.7352941176470589, "grad_norm": 17.40310287475586, "learning_rate": 1.0139748428955333e-07, "logits/chosen": -0.6890392303466797, "logits/rejected": -0.629682183265686, "loss": 0.629, "step": 250 }, { "beta_dpo/beta_used": 0.09269052743911743, "beta_dpo/beta_used_raw": 0.08087030053138733, "beta_dpo/gap_mean": 22.695995330810547, "beta_dpo/gap_std": 27.621633529663086, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.75, "grad_norm": 53.0207405090332, "learning_rate": 9.127770814751932e-08, "logits/chosen": -0.6670210361480713, "logits/rejected": -0.6118627786636353, "loss": 0.483, "step": 255 }, { "beta_dpo/beta_used": 0.06755250692367554, "beta_dpo/beta_used_raw": 0.03205912187695503, "beta_dpo/gap_mean": 23.055011749267578, "beta_dpo/gap_std": 28.25390625, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.7647058823529411, "grad_norm": 0.6237814426422119, "learning_rate": 8.15760890883607e-08, "logits/chosen": -0.6667768359184265, "logits/rejected": -0.6239995956420898, "loss": 0.4968, "step": 260 }, { "beta_dpo/beta_used": 0.11417696624994278, "beta_dpo/beta_used_raw": 0.1062905341386795, "beta_dpo/gap_mean": 22.97963523864746, "beta_dpo/gap_std": 28.165149688720703, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.7794117647058824, "grad_norm": 93.24835205078125, "learning_rate": 7.231818622338822e-08, "logits/chosen": -0.6425198316574097, "logits/rejected": -0.6141684651374817, "loss": 0.4401, "step": 265 }, { "beta_dpo/beta_used": 0.07874588668346405, "beta_dpo/beta_used_raw": -0.0038310796953737736, "beta_dpo/gap_mean": 23.209665298461914, "beta_dpo/gap_std": 28.643651962280273, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7941176470588235, "grad_norm": 12.020166397094727, "learning_rate": 6.352838968463919e-08, "logits/chosen": -0.6789681911468506, "logits/rejected": -0.6184022426605225, "loss": 0.4832, "step": 270 }, { "beta_dpo/beta_used": 0.05330665037035942, "beta_dpo/beta_used_raw": -0.023246586322784424, "beta_dpo/gap_mean": 24.013660430908203, "beta_dpo/gap_std": 29.33469009399414, "beta_dpo/mask_keep_frac": 0.7250000238418579, "epoch": 0.8088235294117647, "grad_norm": 0.6346384882926941, "learning_rate": 5.5229856368582376e-08, "logits/chosen": -0.6784375905990601, "logits/rejected": -0.6448493599891663, "loss": 0.5253, "step": 275 }, { "beta_dpo/beta_used": 0.05292302370071411, "beta_dpo/beta_used_raw": -0.010663707740604877, "beta_dpo/gap_mean": 24.447540283203125, "beta_dpo/gap_std": 29.648815155029297, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.8235294117647058, "grad_norm": 0.6082450151443481, "learning_rate": 4.7444448928806615e-08, "logits/chosen": -0.6179937720298767, "logits/rejected": -0.5764154195785522, "loss": 0.53, "step": 280 }, { "beta_dpo/beta_used": 0.007934780791401863, "beta_dpo/beta_used_raw": -0.07739663124084473, "beta_dpo/gap_mean": 24.31735610961914, "beta_dpo/gap_std": 29.43593406677246, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.8382352941176471, "grad_norm": 0.6881201863288879, "learning_rate": 4.019267817841834e-08, "logits/chosen": -0.6771946549415588, "logits/rejected": -0.6086295247077942, "loss": 0.6357, "step": 285 }, { "beta_dpo/beta_used": 0.06074627488851547, "beta_dpo/beta_used_raw": 0.009315362200140953, "beta_dpo/gap_mean": 24.635099411010742, "beta_dpo/gap_std": 30.013864517211914, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.8529411764705882, "grad_norm": 2.793721914291382, "learning_rate": 3.349364905389032e-08, "logits/chosen": -0.6379111409187317, "logits/rejected": -0.5973175764083862, "loss": 0.5345, "step": 290 }, { "beta_dpo/beta_used": 0.03756168484687805, "beta_dpo/beta_used_raw": -0.01273317076265812, "beta_dpo/gap_mean": 24.830781936645508, "beta_dpo/gap_std": 30.81571388244629, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.8676470588235294, "grad_norm": 50.0855598449707, "learning_rate": 2.736501028272095e-08, "logits/chosen": -0.617714524269104, "logits/rejected": -0.6301193237304688, "loss": 0.5441, "step": 295 }, { "beta_dpo/beta_used": 0.044209837913513184, "beta_dpo/beta_used_raw": 0.004354533273726702, "beta_dpo/gap_mean": 24.904342651367188, "beta_dpo/gap_std": 31.082351684570312, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.8823529411764706, "grad_norm": 0.6376844048500061, "learning_rate": 2.1822907887504932e-08, "logits/chosen": -0.5964897274971008, "logits/rejected": -0.6028931736946106, "loss": 0.5831, "step": 300 }, { "epoch": 0.8823529411764706, "eval_beta_dpo/beta_used": 0.031669970601797104, "eval_beta_dpo/beta_used_raw": -0.11691396683454514, "eval_beta_dpo/gap_mean": 20.08871841430664, "eval_beta_dpo/gap_std": 30.078739166259766, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.6625580191612244, "eval_logits/rejected": -0.6206780672073364, "eval_loss": 0.6427361965179443, "eval_runtime": 20.3459, "eval_samples_per_second": 114.962, "eval_steps_per_second": 0.934, "step": 300 }, { "beta_dpo/beta_used": 0.20903603732585907, "beta_dpo/beta_used_raw": 0.20903603732585907, "beta_dpo/gap_mean": 19.461414337158203, "beta_dpo/gap_std": 29.93111801147461, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.8970588235294118, "grad_norm": 89.4169692993164, "learning_rate": 1.6881942648911074e-08, "logits/chosen": -0.6793561577796936, "logits/rejected": -0.6282657384872437, "loss": 0.5221, "step": 305 }, { "beta_dpo/beta_used": 0.1465708315372467, "beta_dpo/beta_used_raw": 0.13552138209342957, "beta_dpo/gap_mean": 21.82315444946289, "beta_dpo/gap_std": 30.26885414123535, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.9117647058823529, "grad_norm": 0.6243640780448914, "learning_rate": 1.2555131639630567e-08, "logits/chosen": -0.5958537459373474, "logits/rejected": -0.5621305704116821, "loss": 0.4927, "step": 310 }, { "beta_dpo/beta_used": 0.06637457758188248, "beta_dpo/beta_used_raw": -0.010266167111694813, "beta_dpo/gap_mean": 23.013385772705078, "beta_dpo/gap_std": 30.935138702392578, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.9264705882352942, "grad_norm": 0.5633993148803711, "learning_rate": 8.85387393063622e-09, "logits/chosen": -0.6413298845291138, "logits/rejected": -0.6052228808403015, "loss": 0.5593, "step": 315 }, { "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06694652885198593, "beta_dpo/gap_mean": 22.677587509155273, "beta_dpo/gap_std": 31.181507110595703, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.9411764705882353, "grad_norm": 0.6485550999641418, "learning_rate": 5.7879205600998296e-09, "logits/chosen": -0.6589199304580688, "logits/rejected": -0.6012631058692932, "loss": 0.6832, "step": 320 }, { "beta_dpo/beta_used": 0.03462111949920654, "beta_dpo/beta_used_raw": -0.054052434861660004, "beta_dpo/gap_mean": 23.140369415283203, "beta_dpo/gap_std": 31.43625259399414, "beta_dpo/mask_keep_frac": 0.8500000238418579, "epoch": 0.9558823529411765, "grad_norm": 0.5991944670677185, "learning_rate": 3.3653488440851253e-09, "logits/chosen": -0.6647250652313232, "logits/rejected": -0.6088197231292725, "loss": 0.6068, "step": 325 }, { "beta_dpo/beta_used": 0.03790256381034851, "beta_dpo/beta_used_raw": 0.029860854148864746, "beta_dpo/gap_mean": 24.296361923217773, "beta_dpo/gap_std": 31.577083587646484, "beta_dpo/mask_keep_frac": 0.7124999761581421, "epoch": 0.9705882352941176, "grad_norm": 0.614765465259552, "learning_rate": 1.592541096695571e-09, "logits/chosen": -0.6624591946601868, "logits/rejected": -0.62751704454422, "loss": 0.4828, "step": 330 }, { "beta_dpo/beta_used": 0.02071220614016056, "beta_dpo/beta_used_raw": -0.03980039432644844, "beta_dpo/gap_mean": 24.669193267822266, "beta_dpo/gap_std": 30.901264190673828, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9852941176470589, "grad_norm": 25.446868896484375, "learning_rate": 4.741678157389739e-10, "logits/chosen": -0.6353505849838257, "logits/rejected": -0.590802788734436, "loss": 0.5653, "step": 335 }, { "beta_dpo/beta_used": 0.04662991315126419, "beta_dpo/beta_used_raw": -0.04880411922931671, "beta_dpo/gap_mean": 25.268396377563477, "beta_dpo/gap_std": 30.97623062133789, "beta_dpo/mask_keep_frac": 0.75, "epoch": 1.0, "grad_norm": 0.6792064309120178, "learning_rate": 1.31753782067201e-11, "logits/chosen": -0.6614812016487122, "logits/rejected": -0.6312215924263, "loss": 0.6244, "step": 340 }, { "epoch": 1.0, "step": 340, "total_flos": 0.0, "train_loss": 0.5267414394546958, "train_runtime": 1440.2657, "train_samples_per_second": 30.271, "train_steps_per_second": 0.236 } ], "logging_steps": 5, "max_steps": 340, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }