{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta_used": 0.10009249299764633, "beta_dpo/beta_used_raw": 0.10009249299764633, "beta_dpo/gap_mean": 0.0012140885228291154, "beta_dpo/gap_std": 0.029596734791994095, "beta_dpo/mask_keep_frac": 0.9375, "epoch": 0.0030303030303030303, "grad_norm": 11.079418182373047, "learning_rate": 0.0, "logits/chosen": -0.818070113658905, "logits/rejected": -0.7612971663475037, "loss": 0.6929, "step": 1 }, { "beta_dpo/beta_used": 0.10004878044128418, "beta_dpo/beta_used_raw": 0.10004878044128418, "beta_dpo/gap_mean": -0.003181760897859931, "beta_dpo/gap_std": 0.09769059717655182, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.015151515151515152, "grad_norm": 12.246779441833496, "learning_rate": 6.060606060606061e-08, "logits/chosen": -0.8416346907615662, "logits/rejected": -0.8071619272232056, "loss": 0.6934, "step": 5 }, { "beta_dpo/beta_used": 0.10060784965753555, "beta_dpo/beta_used_raw": 0.10060784965753555, "beta_dpo/gap_mean": -0.0015905939508229494, "beta_dpo/gap_std": 0.1881129890680313, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.030303030303030304, "grad_norm": 11.778424263000488, "learning_rate": 1.3636363636363635e-07, "logits/chosen": -0.7911893129348755, "logits/rejected": -0.7587390542030334, "loss": 0.6928, "step": 10 }, { "beta_dpo/beta_used": 0.10040197521448135, "beta_dpo/beta_used_raw": 0.10040197521448135, "beta_dpo/gap_mean": 0.0006210329011082649, "beta_dpo/gap_std": 0.24522730708122253, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.045454545454545456, "grad_norm": 12.626185417175293, "learning_rate": 2.121212121212121e-07, "logits/chosen": -0.8082472085952759, "logits/rejected": -0.8093615770339966, "loss": 0.6928, "step": 15 }, { "beta_dpo/beta_used": 0.10040859878063202, "beta_dpo/beta_used_raw": 0.10040859878063202, "beta_dpo/gap_mean": 0.008134648203849792, "beta_dpo/gap_std": 0.2810249626636505, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.06060606060606061, "grad_norm": 12.163843154907227, "learning_rate": 2.878787878787879e-07, "logits/chosen": -0.7914258241653442, "logits/rejected": -0.7522870302200317, "loss": 0.6925, "step": 20 }, { "beta_dpo/beta_used": 0.10019676387310028, "beta_dpo/beta_used_raw": 0.10019676387310028, "beta_dpo/gap_mean": 0.007132118102163076, "beta_dpo/gap_std": 0.3137893080711365, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.07575757575757576, "grad_norm": 12.878430366516113, "learning_rate": 3.636363636363636e-07, "logits/chosen": -0.7768210172653198, "logits/rejected": -0.771538496017456, "loss": 0.6926, "step": 25 }, { "beta_dpo/beta_used": 0.10199077427387238, "beta_dpo/beta_used_raw": 0.10199077427387238, "beta_dpo/gap_mean": 0.015979086980223656, "beta_dpo/gap_std": 0.34232962131500244, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.09090909090909091, "grad_norm": 11.947314262390137, "learning_rate": 4.3939393939393937e-07, "logits/chosen": -0.8367147445678711, "logits/rejected": -0.8112382888793945, "loss": 0.6907, "step": 30 }, { "beta_dpo/beta_used": 0.10177697986364365, "beta_dpo/beta_used_raw": 0.10177697986364365, "beta_dpo/gap_mean": 0.0375533364713192, "beta_dpo/gap_std": 0.3859425187110901, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.10606060606060606, "grad_norm": 14.33592700958252, "learning_rate": 4.999860140229787e-07, "logits/chosen": -0.8096274137496948, "logits/rejected": -0.7928019762039185, "loss": 0.6898, "step": 35 }, { "beta_dpo/beta_used": 0.10338791459798813, "beta_dpo/beta_used_raw": 0.10338791459798813, "beta_dpo/gap_mean": 0.06975066661834717, "beta_dpo/gap_std": 0.45846351981163025, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.12121212121212122, "grad_norm": 11.904743194580078, "learning_rate": 4.994966691179711e-07, "logits/chosen": -0.7240467667579651, "logits/rejected": -0.6869294047355652, "loss": 0.6868, "step": 40 }, { "beta_dpo/beta_used": 0.105168916285038, "beta_dpo/beta_used_raw": 0.105168916285038, "beta_dpo/gap_mean": 0.14308178424835205, "beta_dpo/gap_std": 0.5644584894180298, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.13636363636363635, "grad_norm": 13.17418098449707, "learning_rate": 4.983095894354857e-07, "logits/chosen": -0.7734057307243347, "logits/rejected": -0.7477155923843384, "loss": 0.6818, "step": 45 }, { "beta_dpo/beta_used": 0.10223841667175293, "beta_dpo/beta_used_raw": 0.10223841667175293, "beta_dpo/gap_mean": 0.21264997124671936, "beta_dpo/gap_std": 0.7354207038879395, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.15151515151515152, "grad_norm": 12.405279159545898, "learning_rate": 4.964280947263676e-07, "logits/chosen": -0.7339795827865601, "logits/rejected": -0.7022608518600464, "loss": 0.6815, "step": 50 }, { "beta_dpo/beta_used": 0.10513879358768463, "beta_dpo/beta_used_raw": 0.10513879358768463, "beta_dpo/gap_mean": 0.27966898679733276, "beta_dpo/gap_std": 1.0065762996673584, "beta_dpo/mask_keep_frac": 0.875, "epoch": 0.16666666666666666, "grad_norm": 13.70584774017334, "learning_rate": 4.938574467213517e-07, "logits/chosen": -0.7537848949432373, "logits/rejected": -0.7295504808425903, "loss": 0.6752, "step": 55 }, { "beta_dpo/beta_used": 0.10337547957897186, "beta_dpo/beta_used_raw": 0.10337547957897186, "beta_dpo/gap_mean": 0.3844713568687439, "beta_dpo/gap_std": 1.2807694673538208, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.18181818181818182, "grad_norm": 12.184106826782227, "learning_rate": 4.906048344162676e-07, "logits/chosen": -0.7029341459274292, "logits/rejected": -0.6750706434249878, "loss": 0.6718, "step": 60 }, { "beta_dpo/beta_used": 0.10123707354068756, "beta_dpo/beta_used_raw": 0.10123707354068756, "beta_dpo/gap_mean": 0.5187833309173584, "beta_dpo/gap_std": 1.5582863092422485, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.19696969696969696, "grad_norm": 12.474862098693848, "learning_rate": 4.866793539675126e-07, "logits/chosen": -0.7182232737541199, "logits/rejected": -0.6864453554153442, "loss": 0.668, "step": 65 }, { "beta_dpo/beta_used": 0.10362961143255234, "beta_dpo/beta_used_raw": 0.10362961143255234, "beta_dpo/gap_mean": 0.6425492763519287, "beta_dpo/gap_std": 1.8649520874023438, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.21212121212121213, "grad_norm": 13.411380767822266, "learning_rate": 4.820919832540181e-07, "logits/chosen": -0.6498057842254639, "logits/rejected": -0.6468607783317566, "loss": 0.6611, "step": 70 }, { "beta_dpo/beta_used": 0.10772015154361725, "beta_dpo/beta_used_raw": 0.10772015154361725, "beta_dpo/gap_mean": 0.7031647562980652, "beta_dpo/gap_std": 2.167182683944702, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.22727272727272727, "grad_norm": 12.674415588378906, "learning_rate": 4.768555511768486e-07, "logits/chosen": -0.6153755187988281, "logits/rejected": -0.606307327747345, "loss": 0.653, "step": 75 }, { "beta_dpo/beta_used": 0.10870923101902008, "beta_dpo/beta_used_raw": 0.10870923101902008, "beta_dpo/gap_mean": 0.8461316227912903, "beta_dpo/gap_std": 2.5076112747192383, "beta_dpo/mask_keep_frac": 0.8374999761581421, "epoch": 0.24242424242424243, "grad_norm": 13.425226211547852, "learning_rate": 4.7098470178228755e-07, "logits/chosen": -0.6497966647148132, "logits/rejected": -0.6329380869865417, "loss": 0.6466, "step": 80 }, { "beta_dpo/beta_used": 0.1060580238699913, "beta_dpo/beta_used_raw": 0.1060580238699913, "beta_dpo/gap_mean": 0.9982147216796875, "beta_dpo/gap_std": 2.806090831756592, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.25757575757575757, "grad_norm": 9.75727653503418, "learning_rate": 4.6449585330874425e-07, "logits/chosen": -0.6012470722198486, "logits/rejected": -0.5752061605453491, "loss": 0.6435, "step": 85 }, { "beta_dpo/beta_used": 0.11574982106685638, "beta_dpo/beta_used_raw": 0.11574982106685638, "beta_dpo/gap_mean": 1.2254174947738647, "beta_dpo/gap_std": 3.2572083473205566, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.2727272727272727, "grad_norm": 10.738388061523438, "learning_rate": 4.5740715227200897e-07, "logits/chosen": -0.650251567363739, "logits/rejected": -0.6243180632591248, "loss": 0.6219, "step": 90 }, { "beta_dpo/beta_used": 0.09826114773750305, "beta_dpo/beta_used_raw": 0.09826114773750305, "beta_dpo/gap_mean": 1.4264709949493408, "beta_dpo/gap_std": 3.7166686058044434, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.2878787878787879, "grad_norm": 13.121673583984375, "learning_rate": 4.4973842271726024e-07, "logits/chosen": -0.5675602555274963, "logits/rejected": -0.5547417402267456, "loss": 0.6362, "step": 95 }, { "beta_dpo/beta_used": 0.10674748569726944, "beta_dpo/beta_used_raw": 0.10674748569726944, "beta_dpo/gap_mean": 1.5260875225067139, "beta_dpo/gap_std": 4.1418657302856445, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.30303030303030304, "grad_norm": 15.6002197265625, "learning_rate": 4.415111107797445e-07, "logits/chosen": -0.5712032914161682, "logits/rejected": -0.5290790796279907, "loss": 0.6231, "step": 100 }, { "epoch": 0.30303030303030304, "eval_beta_dpo/beta_used": 0.11167524755001068, "eval_beta_dpo/beta_used_raw": 0.11167524755001068, "eval_beta_dpo/gap_mean": 1.9525233507156372, "eval_beta_dpo/gap_std": 4.847992897033691, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.5574179887771606, "eval_logits/rejected": -0.540048360824585, "eval_loss": 0.6185675263404846, "eval_runtime": 18.8608, "eval_samples_per_second": 122.105, "eval_steps_per_second": 0.954, "step": 100 }, { "beta_dpo/beta_used": 0.06386379897594452, "beta_dpo/beta_used_raw": 0.06386379897594452, "beta_dpo/gap_mean": 2.0449135303497314, "beta_dpo/gap_std": 5.11466121673584, "beta_dpo/mask_keep_frac": 0.887499988079071, "epoch": 0.3181818181818182, "grad_norm": 10.90100383758545, "learning_rate": 4.327482247091679e-07, "logits/chosen": -0.5555615425109863, "logits/rejected": -0.528151273727417, "loss": 0.6534, "step": 105 }, { "beta_dpo/beta_used": 0.08590348809957504, "beta_dpo/beta_used_raw": 0.08590348809957504, "beta_dpo/gap_mean": 2.1610352993011475, "beta_dpo/gap_std": 5.504552364349365, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.3333333333333333, "grad_norm": 7.672910690307617, "learning_rate": 4.234742705255272e-07, "logits/chosen": -0.4595974385738373, "logits/rejected": -0.45340991020202637, "loss": 0.6317, "step": 110 }, { "beta_dpo/beta_used": 0.10557971149682999, "beta_dpo/beta_used_raw": 0.10557971149682999, "beta_dpo/gap_mean": 2.390939474105835, "beta_dpo/gap_std": 5.818662166595459, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.3484848484848485, "grad_norm": 8.269521713256836, "learning_rate": 4.137151834863213e-07, "logits/chosen": -0.5435389280319214, "logits/rejected": -0.4987867474555969, "loss": 0.5959, "step": 115 }, { "beta_dpo/beta_used": 0.08998899161815643, "beta_dpo/beta_used_raw": 0.08998899161815643, "beta_dpo/gap_mean": 2.3944687843322754, "beta_dpo/gap_std": 6.05053186416626, "beta_dpo/mask_keep_frac": 0.8374999761581421, "epoch": 0.36363636363636365, "grad_norm": 13.379582405090332, "learning_rate": 4.0349825555680045e-07, "logits/chosen": -0.5789726972579956, "logits/rejected": -0.5432100296020508, "loss": 0.6198, "step": 120 }, { "beta_dpo/beta_used": 0.08791515231132507, "beta_dpo/beta_used_raw": 0.08791515231132507, "beta_dpo/gap_mean": 2.5297319889068604, "beta_dpo/gap_std": 6.210949897766113, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.3787878787878788, "grad_norm": 7.562979221343994, "learning_rate": 3.9285205908608934e-07, "logits/chosen": -0.5596938729286194, "logits/rejected": -0.5469728708267212, "loss": 0.6146, "step": 125 }, { "beta_dpo/beta_used": 0.11058609187602997, "beta_dpo/beta_used_raw": 0.11058609187602997, "beta_dpo/gap_mean": 2.536633014678955, "beta_dpo/gap_std": 6.392093181610107, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.3939393939393939, "grad_norm": 23.452016830444336, "learning_rate": 3.818063669026256e-07, "logits/chosen": -0.5439124703407288, "logits/rejected": -0.5279029607772827, "loss": 0.5928, "step": 130 }, { "beta_dpo/beta_used": 0.10615509748458862, "beta_dpo/beta_used_raw": 0.10615509748458862, "beta_dpo/gap_mean": 2.8626952171325684, "beta_dpo/gap_std": 6.557906150817871, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.4090909090909091, "grad_norm": 16.79780387878418, "learning_rate": 3.7039206905237656e-07, "logits/chosen": -0.556363582611084, "logits/rejected": -0.5632845163345337, "loss": 0.5811, "step": 135 }, { "beta_dpo/beta_used": 0.1162651777267456, "beta_dpo/beta_used_raw": 0.1162651777267456, "beta_dpo/gap_mean": 3.088381290435791, "beta_dpo/gap_std": 6.59566593170166, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.42424242424242425, "grad_norm": 14.226531982421875, "learning_rate": 3.586410864126781e-07, "logits/chosen": -0.5420447587966919, "logits/rejected": -0.5301133990287781, "loss": 0.5488, "step": 140 }, { "beta_dpo/beta_used": 0.11434066295623779, "beta_dpo/beta_used_raw": 0.11434066295623779, "beta_dpo/gap_mean": 3.461772918701172, "beta_dpo/gap_std": 6.666165828704834, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4393939393939394, "grad_norm": 13.191394805908203, "learning_rate": 3.465862814232821e-07, "logits/chosen": -0.49957942962646484, "logits/rejected": -0.4835745394229889, "loss": 0.5499, "step": 145 }, { "beta_dpo/beta_used": 0.12056032568216324, "beta_dpo/beta_used_raw": 0.12056032568216324, "beta_dpo/gap_mean": 3.900587797164917, "beta_dpo/gap_std": 6.922667026519775, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.45454545454545453, "grad_norm": 10.217402458190918, "learning_rate": 3.3426136618426043e-07, "logits/chosen": -0.5163663625717163, "logits/rejected": -0.4923931062221527, "loss": 0.5155, "step": 150 }, { "beta_dpo/beta_used": 0.08996663987636566, "beta_dpo/beta_used_raw": 0.08996663987636566, "beta_dpo/gap_mean": 4.022343635559082, "beta_dpo/gap_std": 7.262037754058838, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.4696969696969697, "grad_norm": 6.328583240509033, "learning_rate": 3.2170080817777257e-07, "logits/chosen": -0.47460970282554626, "logits/rejected": -0.4646075665950775, "loss": 0.5723, "step": 155 }, { "beta_dpo/beta_used": 0.09257197380065918, "beta_dpo/beta_used_raw": 0.09257197380065918, "beta_dpo/gap_mean": 4.135162353515625, "beta_dpo/gap_std": 7.709047794342041, "beta_dpo/mask_keep_frac": 0.8374999761581421, "epoch": 0.48484848484848486, "grad_norm": 2.340575933456421, "learning_rate": 3.0893973387735683e-07, "logits/chosen": -0.549339234828949, "logits/rejected": -0.5254893898963928, "loss": 0.5706, "step": 160 }, { "beta_dpo/beta_used": 0.1215561255812645, "beta_dpo/beta_used_raw": 0.1215561255812645, "beta_dpo/gap_mean": 4.385509490966797, "beta_dpo/gap_std": 8.18330192565918, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5, "grad_norm": 27.537439346313477, "learning_rate": 2.9601383051430505e-07, "logits/chosen": -0.4928368926048279, "logits/rejected": -0.46984148025512695, "loss": 0.5273, "step": 165 }, { "beta_dpo/beta_used": 0.08485610783100128, "beta_dpo/beta_used_raw": 0.08485610783100128, "beta_dpo/gap_mean": 4.619694709777832, "beta_dpo/gap_std": 8.622313499450684, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.5151515151515151, "grad_norm": 10.716350555419922, "learning_rate": 2.8295924627584004e-07, "logits/chosen": -0.47423356771469116, "logits/rejected": -0.43696826696395874, "loss": 0.5656, "step": 170 }, { "beta_dpo/beta_used": 0.10904519259929657, "beta_dpo/beta_used_raw": 0.10904519259929657, "beta_dpo/gap_mean": 4.983495712280273, "beta_dpo/gap_std": 9.088811874389648, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.5303030303030303, "grad_norm": 16.4443416595459, "learning_rate": 2.698124892141971e-07, "logits/chosen": -0.4739559590816498, "logits/rejected": -0.452726274728775, "loss": 0.5275, "step": 175 }, { "beta_dpo/beta_used": 0.09932375699281693, "beta_dpo/beta_used_raw": 0.09932375699281693, "beta_dpo/gap_mean": 5.506978511810303, "beta_dpo/gap_std": 9.59619426727295, "beta_dpo/mask_keep_frac": 0.887499988079071, "epoch": 0.5454545454545454, "grad_norm": 6.31719446182251, "learning_rate": 2.5661032514931834e-07, "logits/chosen": -0.5071254968643188, "logits/rejected": -0.4881424307823181, "loss": 0.5367, "step": 180 }, { "beta_dpo/beta_used": 0.08257903903722763, "beta_dpo/beta_used_raw": 0.08257903903722763, "beta_dpo/gap_mean": 5.807556629180908, "beta_dpo/gap_std": 10.00381088256836, "beta_dpo/mask_keep_frac": 0.9125000238418579, "epoch": 0.5606060606060606, "grad_norm": 16.983186721801758, "learning_rate": 2.4338967485068164e-07, "logits/chosen": -0.44962626695632935, "logits/rejected": -0.4310552179813385, "loss": 0.5442, "step": 185 }, { "beta_dpo/beta_used": 0.1385645568370819, "beta_dpo/beta_used_raw": 0.1385645568370819, "beta_dpo/gap_mean": 5.958134651184082, "beta_dpo/gap_std": 10.562962532043457, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.5757575757575758, "grad_norm": 31.49508285522461, "learning_rate": 2.3018751078580283e-07, "logits/chosen": -0.4748384356498718, "logits/rejected": -0.4529237151145935, "loss": 0.4962, "step": 190 }, { "beta_dpo/beta_used": 0.10011277347803116, "beta_dpo/beta_used_raw": 0.09850181639194489, "beta_dpo/gap_mean": 6.100876808166504, "beta_dpo/gap_std": 11.020359992980957, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.5909090909090909, "grad_norm": 17.15842056274414, "learning_rate": 2.170407537241599e-07, "logits/chosen": -0.4534582495689392, "logits/rejected": -0.42914143204689026, "loss": 0.5502, "step": 195 }, { "beta_dpo/beta_used": 0.1180671900510788, "beta_dpo/beta_used_raw": 0.1180671900510788, "beta_dpo/gap_mean": 6.612210273742676, "beta_dpo/gap_std": 11.322927474975586, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.6060606060606061, "grad_norm": 13.65029239654541, "learning_rate": 2.0398616948569493e-07, "logits/chosen": -0.4936196208000183, "logits/rejected": -0.4612639546394348, "loss": 0.498, "step": 200 }, { "epoch": 0.6060606060606061, "eval_beta_dpo/beta_used": 0.10561517626047134, "eval_beta_dpo/beta_used_raw": 0.10561517626047134, "eval_beta_dpo/gap_mean": 6.780107498168945, "eval_beta_dpo/gap_std": 11.72070598602295, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.4722588062286377, "eval_logits/rejected": -0.45819586515426636, "eval_loss": 0.5506138801574707, "eval_runtime": 18.8213, "eval_samples_per_second": 122.361, "eval_steps_per_second": 0.956, "step": 200 }, { "beta_dpo/beta_used": 0.08741272985935211, "beta_dpo/beta_used_raw": 0.08735300600528717, "beta_dpo/gap_mean": 7.251504421234131, "beta_dpo/gap_std": 11.868724822998047, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.6212121212121212, "grad_norm": 0.15343494713306427, "learning_rate": 1.9106026612264315e-07, "logits/chosen": -0.4946843981742859, "logits/rejected": -0.46265077590942383, "loss": 0.5233, "step": 205 }, { "beta_dpo/beta_used": 0.08492619544267654, "beta_dpo/beta_used_raw": 0.08492619544267654, "beta_dpo/gap_mean": 7.168964385986328, "beta_dpo/gap_std": 11.9141845703125, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.6363636363636364, "grad_norm": 38.745361328125, "learning_rate": 1.782991918222275e-07, "logits/chosen": -0.42799100279808044, "logits/rejected": -0.4196823239326477, "loss": 0.5237, "step": 210 }, { "beta_dpo/beta_used": 0.08925200998783112, "beta_dpo/beta_used_raw": 0.08484373241662979, "beta_dpo/gap_mean": 7.09285831451416, "beta_dpo/gap_std": 12.202669143676758, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.6515151515151515, "grad_norm": 39.51192092895508, "learning_rate": 1.6573863381573954e-07, "logits/chosen": -0.43246760964393616, "logits/rejected": -0.4298061430454254, "loss": 0.5466, "step": 215 }, { "beta_dpo/beta_used": 0.1373816877603531, "beta_dpo/beta_used_raw": 0.1373816877603531, "beta_dpo/gap_mean": 7.408307075500488, "beta_dpo/gap_std": 12.6698579788208, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6666666666666666, "grad_norm": 66.92206573486328, "learning_rate": 1.534137185767178e-07, "logits/chosen": -0.5049004554748535, "logits/rejected": -0.4828864634037018, "loss": 0.4731, "step": 220 }, { "beta_dpo/beta_used": 0.11999156326055527, "beta_dpo/beta_used_raw": 0.11999156326055527, "beta_dpo/gap_mean": 7.8069658279418945, "beta_dpo/gap_std": 12.916173934936523, "beta_dpo/mask_keep_frac": 0.7124999761581421, "epoch": 0.6818181818181818, "grad_norm": 5.55664587020874, "learning_rate": 1.4135891358732205e-07, "logits/chosen": -0.4607675075531006, "logits/rejected": -0.429083913564682, "loss": 0.4933, "step": 225 }, { "beta_dpo/beta_used": 0.11390962451696396, "beta_dpo/beta_used_raw": 0.11390962451696396, "beta_dpo/gap_mean": 7.83342981338501, "beta_dpo/gap_std": 12.932693481445312, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.696969696969697, "grad_norm": 32.68361282348633, "learning_rate": 1.2960793094762345e-07, "logits/chosen": -0.41661542654037476, "logits/rejected": -0.4079780578613281, "loss": 0.4954, "step": 230 }, { "beta_dpo/beta_used": 0.09100167453289032, "beta_dpo/beta_used_raw": 0.09100167453289032, "beta_dpo/gap_mean": 8.167860984802246, "beta_dpo/gap_std": 12.970059394836426, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.7121212121212122, "grad_norm": 1.9182671308517456, "learning_rate": 1.1819363309737438e-07, "logits/chosen": -0.4386097490787506, "logits/rejected": -0.42474693059921265, "loss": 0.5136, "step": 235 }, { "beta_dpo/beta_used": 0.11001662909984589, "beta_dpo/beta_used_raw": 0.11001662909984589, "beta_dpo/gap_mean": 8.317561149597168, "beta_dpo/gap_std": 13.424278259277344, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.7272727272727273, "grad_norm": 17.994626998901367, "learning_rate": 1.0714794091391072e-07, "logits/chosen": -0.4545617997646332, "logits/rejected": -0.4394044280052185, "loss": 0.4769, "step": 240 }, { "beta_dpo/beta_used": 0.07068195939064026, "beta_dpo/beta_used_raw": 0.07068195939064026, "beta_dpo/gap_mean": 8.271533966064453, "beta_dpo/gap_std": 13.785310745239258, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.7424242424242424, "grad_norm": 9.725923538208008, "learning_rate": 9.650174444319956e-08, "logits/chosen": -0.45390695333480835, "logits/rejected": -0.43619924783706665, "loss": 0.5268, "step": 245 }, { "beta_dpo/beta_used": 0.08607280999422073, "beta_dpo/beta_used_raw": 0.08015486598014832, "beta_dpo/gap_mean": 8.123547554016113, "beta_dpo/gap_std": 14.15746021270752, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.7575757575757576, "grad_norm": 19.712242126464844, "learning_rate": 8.628481651367875e-08, "logits/chosen": -0.4595223069190979, "logits/rejected": -0.4408304691314697, "loss": 0.5287, "step": 250 }, { "beta_dpo/beta_used": 0.0958368107676506, "beta_dpo/beta_used_raw": 0.08722580969333649, "beta_dpo/gap_mean": 8.267644882202148, "beta_dpo/gap_std": 14.14880657196045, "beta_dpo/mask_keep_frac": 0.8999999761581421, "epoch": 0.7727272727272727, "grad_norm": 61.9700927734375, "learning_rate": 7.652572947447272e-08, "logits/chosen": -0.44903382658958435, "logits/rejected": -0.4424815773963928, "loss": 0.5257, "step": 255 }, { "beta_dpo/beta_used": 0.07386674731969833, "beta_dpo/beta_used_raw": 0.06767500936985016, "beta_dpo/gap_mean": 8.649662017822266, "beta_dpo/gap_std": 14.375146865844727, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.7878787878787878, "grad_norm": 20.901798248291016, "learning_rate": 6.725177529083209e-08, "logits/chosen": -0.46160441637039185, "logits/rejected": -0.44480133056640625, "loss": 0.5284, "step": 260 }, { "beta_dpo/beta_used": 0.08889990299940109, "beta_dpo/beta_used_raw": 0.05368128418922424, "beta_dpo/gap_mean": 8.253731727600098, "beta_dpo/gap_std": 14.49620532989502, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.803030303030303, "grad_norm": 36.13115692138672, "learning_rate": 5.848888922025552e-08, "logits/chosen": -0.4071124196052551, "logits/rejected": -0.38313764333724976, "loss": 0.5524, "step": 265 }, { "beta_dpo/beta_used": 0.05102431774139404, "beta_dpo/beta_used_raw": 0.05102431774139404, "beta_dpo/gap_mean": 8.481303215026855, "beta_dpo/gap_std": 14.435537338256836, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.8181818181818182, "grad_norm": 4.406769275665283, "learning_rate": 5.026157728273966e-08, "logits/chosen": -0.43619123101234436, "logits/rejected": -0.40814194083213806, "loss": 0.5676, "step": 270 }, { "beta_dpo/beta_used": 0.08905264735221863, "beta_dpo/beta_used_raw": 0.08905264735221863, "beta_dpo/gap_mean": 8.75959587097168, "beta_dpo/gap_std": 14.441301345825195, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.8333333333333334, "grad_norm": 13.085917472839355, "learning_rate": 4.259284772799099e-08, "logits/chosen": -0.43446803092956543, "logits/rejected": -0.4283529818058014, "loss": 0.5225, "step": 275 }, { "beta_dpo/beta_used": 0.1104244738817215, "beta_dpo/beta_used_raw": 0.1104244738817215, "beta_dpo/gap_mean": 8.6881103515625, "beta_dpo/gap_std": 14.51659870147705, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.8484848484848485, "grad_norm": 47.124366760253906, "learning_rate": 3.550414669125573e-08, "logits/chosen": -0.4580152630805969, "logits/rejected": -0.4392933249473572, "loss": 0.4767, "step": 280 }, { "beta_dpo/beta_used": 0.14569848775863647, "beta_dpo/beta_used_raw": 0.14569848775863647, "beta_dpo/gap_mean": 9.179306030273438, "beta_dpo/gap_std": 14.847735404968262, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.8636363636363636, "grad_norm": 43.69351577758789, "learning_rate": 2.9015298217712453e-08, "logits/chosen": -0.42454952001571655, "logits/rejected": -0.3965614438056946, "loss": 0.4529, "step": 285 }, { "beta_dpo/beta_used": 0.06421518325805664, "beta_dpo/beta_used_raw": 0.056242913007736206, "beta_dpo/gap_mean": 9.178163528442383, "beta_dpo/gap_std": 14.94957160949707, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.8787878787878788, "grad_norm": 19.567977905273438, "learning_rate": 2.3144448823151392e-08, "logits/chosen": -0.4124082624912262, "logits/rejected": -0.38752835988998413, "loss": 0.5666, "step": 290 }, { "beta_dpo/beta_used": 0.11043484508991241, "beta_dpo/beta_used_raw": 0.11043484508991241, "beta_dpo/gap_mean": 9.004778861999512, "beta_dpo/gap_std": 15.063299179077148, "beta_dpo/mask_keep_frac": 0.737500011920929, "epoch": 0.8939393939393939, "grad_norm": 45.88330841064453, "learning_rate": 1.7908016745981856e-08, "logits/chosen": -0.41249990463256836, "logits/rejected": -0.41048282384872437, "loss": 0.4783, "step": 295 }, { "beta_dpo/beta_used": 0.06652533262968063, "beta_dpo/beta_used_raw": 0.05020095035433769, "beta_dpo/gap_mean": 9.056544303894043, "beta_dpo/gap_std": 15.056539535522461, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.9090909090909091, "grad_norm": 0.25523823499679565, "learning_rate": 1.3320646032487393e-08, "logits/chosen": -0.4351003170013428, "logits/rejected": -0.42235302925109863, "loss": 0.5615, "step": 300 }, { "epoch": 0.9090909090909091, "eval_beta_dpo/beta_used": 0.10696752369403839, "eval_beta_dpo/beta_used_raw": 0.10696752369403839, "eval_beta_dpo/gap_mean": 8.805192947387695, "eval_beta_dpo/gap_std": 15.178271293640137, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.4217662513256073, "eval_logits/rejected": -0.4089266359806061, "eval_loss": 0.5633069276809692, "eval_runtime": 18.8692, "eval_samples_per_second": 122.051, "eval_steps_per_second": 0.954, "step": 300 }, { "beta_dpo/beta_used": 0.0679563358426094, "beta_dpo/beta_used_raw": 0.06361763179302216, "beta_dpo/gap_mean": 9.039968490600586, "beta_dpo/gap_std": 15.006390571594238, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.9242424242424242, "grad_norm": 26.64524269104004, "learning_rate": 9.395165583732379e-09, "logits/chosen": -0.40837812423706055, "logits/rejected": -0.3757531940937042, "loss": 0.5354, "step": 305 }, { "beta_dpo/beta_used": 0.09475517272949219, "beta_dpo/beta_used_raw": 0.09475517272949219, "beta_dpo/gap_mean": 9.129568099975586, "beta_dpo/gap_std": 14.912490844726562, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.9393939393939394, "grad_norm": 17.02347755432129, "learning_rate": 6.142553278648238e-09, "logits/chosen": -0.4192012846469879, "logits/rejected": -0.4020632803440094, "loss": 0.4862, "step": 310 }, { "beta_dpo/beta_used": 0.09896779805421829, "beta_dpo/beta_used_raw": 0.09896779805421829, "beta_dpo/gap_mean": 9.311323165893555, "beta_dpo/gap_std": 14.838136672973633, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.9545454545454546, "grad_norm": 13.178363800048828, "learning_rate": 3.5719052736323806e-09, "logits/chosen": -0.41689127683639526, "logits/rejected": -0.41213899850845337, "loss": 0.5065, "step": 315 }, { "beta_dpo/beta_used": 0.05972599983215332, "beta_dpo/beta_used_raw": 0.048868484795093536, "beta_dpo/gap_mean": 9.482072830200195, "beta_dpo/gap_std": 15.056081771850586, "beta_dpo/mask_keep_frac": 0.8999999761581421, "epoch": 0.9696969696969697, "grad_norm": 16.041927337646484, "learning_rate": 1.690410564514244e-09, "logits/chosen": -0.42210960388183594, "logits/rejected": -0.38882067799568176, "loss": 0.5702, "step": 320 }, { "beta_dpo/beta_used": 0.12381196022033691, "beta_dpo/beta_used_raw": 0.12381196022033691, "beta_dpo/gap_mean": 9.218812942504883, "beta_dpo/gap_std": 15.04699993133545, "beta_dpo/mask_keep_frac": 0.887499988079071, "epoch": 0.9848484848484849, "grad_norm": 30.680978775024414, "learning_rate": 5.033308820289184e-10, "logits/chosen": -0.4276047348976135, "logits/rejected": -0.4020787179470062, "loss": 0.4571, "step": 325 }, { "beta_dpo/beta_used": 0.08325864374637604, "beta_dpo/beta_used_raw": 0.07991620153188705, "beta_dpo/gap_mean": 9.292040824890137, "beta_dpo/gap_std": 15.013906478881836, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 1.0, "grad_norm": 12.934744834899902, "learning_rate": 1.3985977021235829e-11, "logits/chosen": -0.45221251249313354, "logits/rejected": -0.42801961302757263, "loss": 0.5248, "step": 330 }, { "epoch": 1.0, "step": 330, "total_flos": 0.0, "train_loss": 0.5772968926213005, "train_runtime": 1407.4268, "train_samples_per_second": 30.08, "train_steps_per_second": 0.234 } ], "logging_steps": 5, "max_steps": 330, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }