{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 200, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta_used": 0.10024853050708771, "beta_dpo/beta_used_raw": 0.10024853050708771, "beta_dpo/gap_mean": -0.0031278375536203384, "beta_dpo/gap_std": 0.09185527265071869, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.0020942408376963353, "grad_norm": 80.06067657470703, "learning_rate": 0.0, "logits/chosen": -0.6103914976119995, "logits/rejected": -0.6099507808685303, "loss": 1.3869, "step": 1 }, { "beta_dpo/beta_used": 0.10045824944972992, "beta_dpo/beta_used_raw": 0.10045824944972992, "beta_dpo/gap_mean": 0.0029368107207119465, "beta_dpo/gap_std": 0.47314706444740295, "beta_dpo/mask_keep_frac": 0.7916666865348816, "epoch": 0.020942408376963352, "grad_norm": 72.42662811279297, "learning_rate": 9.375e-08, "logits/chosen": -0.6866854429244995, "logits/rejected": -0.668829083442688, "loss": 1.386, "step": 10 }, { "beta_dpo/beta_used": 0.10218687355518341, "beta_dpo/beta_used_raw": 0.10218687355518341, "beta_dpo/gap_mean": 0.05031166225671768, "beta_dpo/gap_std": 0.731455385684967, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.041884816753926704, "grad_norm": 77.65188598632812, "learning_rate": 1.9791666666666664e-07, "logits/chosen": -0.6419292688369751, "logits/rejected": -0.6541769504547119, "loss": 1.3785, "step": 20 }, { "beta_dpo/beta_used": 0.10061170160770416, "beta_dpo/beta_used_raw": 0.10061170160770416, "beta_dpo/gap_mean": 0.0937122255563736, "beta_dpo/gap_std": 0.7656054496765137, "beta_dpo/mask_keep_frac": 0.8062499761581421, "epoch": 0.06282722513089005, "grad_norm": 74.03604125976562, "learning_rate": 3.020833333333333e-07, "logits/chosen": -0.6690393686294556, "logits/rejected": -0.6756961941719055, "loss": 1.3767, "step": 30 }, { "beta_dpo/beta_used": 0.10513947159051895, "beta_dpo/beta_used_raw": 0.10513947159051895, "beta_dpo/gap_mean": 0.3032568395137787, "beta_dpo/gap_std": 0.9986203908920288, "beta_dpo/mask_keep_frac": 0.856249988079071, "epoch": 0.08376963350785341, "grad_norm": 68.4834976196289, "learning_rate": 4.0625e-07, "logits/chosen": -0.6429699659347534, "logits/rejected": -0.6495934724807739, "loss": 1.3467, "step": 40 }, { "beta_dpo/beta_used": 0.104192815721035, "beta_dpo/beta_used_raw": 0.104192815721035, "beta_dpo/gap_mean": 0.7923426032066345, "beta_dpo/gap_std": 1.8291547298431396, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.10471204188481675, "grad_norm": 71.59126281738281, "learning_rate": 4.999932966293553e-07, "logits/chosen": -0.7035672068595886, "logits/rejected": -0.7120343446731567, "loss": 1.3039, "step": 50 }, { "beta_dpo/beta_used": 0.10957477241754532, "beta_dpo/beta_used_raw": 0.10957477241754532, "beta_dpo/gap_mean": 1.5687782764434814, "beta_dpo/gap_std": 3.4623851776123047, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.1256544502617801, "grad_norm": 82.82760620117188, "learning_rate": 4.991893270335525e-07, "logits/chosen": -0.6742374897003174, "logits/rejected": -0.6726926565170288, "loss": 1.2274, "step": 60 }, { "beta_dpo/beta_used": 0.10323189198970795, "beta_dpo/beta_used_raw": 0.10323189198970795, "beta_dpo/gap_mean": 2.4878456592559814, "beta_dpo/gap_std": 5.3841118812561035, "beta_dpo/mask_keep_frac": 0.737500011920929, "epoch": 0.14659685863874344, "grad_norm": 79.24715423583984, "learning_rate": 4.970496218214204e-07, "logits/chosen": -0.7053920030593872, "logits/rejected": -0.7138158679008484, "loss": 1.1847, "step": 70 }, { "beta_dpo/beta_used": 0.10442471504211426, "beta_dpo/beta_used_raw": 0.10279443114995956, "beta_dpo/gap_mean": 3.6363892555236816, "beta_dpo/gap_std": 7.359000205993652, "beta_dpo/mask_keep_frac": 0.831250011920929, "epoch": 0.16753926701570682, "grad_norm": 40.18954849243164, "learning_rate": 4.935856505068998e-07, "logits/chosen": -0.7026282548904419, "logits/rejected": -0.70656818151474, "loss": 1.1297, "step": 80 }, { "beta_dpo/beta_used": 0.09297941625118256, "beta_dpo/beta_used_raw": 0.0927402526140213, "beta_dpo/gap_mean": 4.5779619216918945, "beta_dpo/gap_std": 9.087356567382812, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.18848167539267016, "grad_norm": 45.59261703491211, "learning_rate": 4.8881598109976e-07, "logits/chosen": -0.6874291896820068, "logits/rejected": -0.7057452201843262, "loss": 1.1141, "step": 90 }, { "beta_dpo/beta_used": 0.10471361875534058, "beta_dpo/beta_used_raw": 0.10211487114429474, "beta_dpo/gap_mean": 5.183230400085449, "beta_dpo/gap_std": 10.404474258422852, "beta_dpo/mask_keep_frac": 0.8187500238418579, "epoch": 0.2094240837696335, "grad_norm": 66.85250854492188, "learning_rate": 4.827661805750437e-07, "logits/chosen": -0.6732321977615356, "logits/rejected": -0.6987311840057373, "loss": 1.1044, "step": 100 }, { "beta_dpo/beta_used": 0.1166844591498375, "beta_dpo/beta_used_raw": 0.1166844591498375, "beta_dpo/gap_mean": 6.204737663269043, "beta_dpo/gap_std": 11.558156967163086, "beta_dpo/mask_keep_frac": 0.8062499761581421, "epoch": 0.23036649214659685, "grad_norm": 54.56244659423828, "learning_rate": 4.75468677825789e-07, "logits/chosen": -0.7261234521865845, "logits/rejected": -0.7450467348098755, "loss": 1.0282, "step": 110 }, { "beta_dpo/beta_used": 0.08581940829753876, "beta_dpo/beta_used_raw": 0.0759856328368187, "beta_dpo/gap_mean": 6.777069091796875, "beta_dpo/gap_std": 12.461393356323242, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.2513089005235602, "grad_norm": 54.73094940185547, "learning_rate": 4.669625898336438e-07, "logits/chosen": -0.7630956768989563, "logits/rejected": -0.776543378829956, "loss": 1.1069, "step": 120 }, { "beta_dpo/beta_used": 0.10493312776088715, "beta_dpo/beta_used_raw": 0.09375782310962677, "beta_dpo/gap_mean": 7.0316290855407715, "beta_dpo/gap_std": 13.4308500289917, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.27225130890052357, "grad_norm": 53.551025390625, "learning_rate": 4.5729351198915705e-07, "logits/chosen": -0.7406284809112549, "logits/rejected": -0.7330573201179504, "loss": 1.091, "step": 130 }, { "beta_dpo/beta_used": 0.0665307343006134, "beta_dpo/beta_used_raw": 0.04071963578462601, "beta_dpo/gap_mean": 7.776385307312012, "beta_dpo/gap_std": 14.402565002441406, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.2931937172774869, "grad_norm": 107.44986724853516, "learning_rate": 4.4651327368569684e-07, "logits/chosen": -0.7388048768043518, "logits/rejected": -0.7451251745223999, "loss": 1.1576, "step": 140 }, { "beta_dpo/beta_used": 0.07846825569868088, "beta_dpo/beta_used_raw": 0.06488198786973953, "beta_dpo/gap_mean": 8.364961624145508, "beta_dpo/gap_std": 14.984090805053711, "beta_dpo/mask_keep_frac": 0.7875000238418579, "epoch": 0.31413612565445026, "grad_norm": 38.963260650634766, "learning_rate": 4.346796604970912e-07, "logits/chosen": -0.768231213092804, "logits/rejected": -0.7551404237747192, "loss": 1.1224, "step": 150 }, { "beta_dpo/beta_used": 0.11797045171260834, "beta_dpo/beta_used_raw": 0.09938563406467438, "beta_dpo/gap_mean": 9.785693168640137, "beta_dpo/gap_std": 15.681970596313477, "beta_dpo/mask_keep_frac": 0.856249988079071, "epoch": 0.33507853403141363, "grad_norm": 80.62310028076172, "learning_rate": 4.218561044282098e-07, "logits/chosen": -0.7575253844261169, "logits/rejected": -0.7614981532096863, "loss": 1.0544, "step": 160 }, { "beta_dpo/beta_used": 0.07409517467021942, "beta_dpo/beta_used_raw": 0.04705094173550606, "beta_dpo/gap_mean": 10.035483360290527, "beta_dpo/gap_std": 16.284427642822266, "beta_dpo/mask_keep_frac": 0.8187500238418579, "epoch": 0.35602094240837695, "grad_norm": 65.990966796875, "learning_rate": 4.081113438988443e-07, "logits/chosen": -0.7660126090049744, "logits/rejected": -0.7755380868911743, "loss": 1.0875, "step": 170 }, { "beta_dpo/beta_used": 0.07568483054637909, "beta_dpo/beta_used_raw": 0.06118815019726753, "beta_dpo/gap_mean": 9.977958679199219, "beta_dpo/gap_std": 16.553037643432617, "beta_dpo/mask_keep_frac": 0.793749988079071, "epoch": 0.3769633507853403, "grad_norm": 56.092166900634766, "learning_rate": 3.935190552834828e-07, "logits/chosen": -0.7195374965667725, "logits/rejected": -0.7341417074203491, "loss": 1.0689, "step": 180 }, { "beta_dpo/beta_used": 0.10011672973632812, "beta_dpo/beta_used_raw": 0.08130989223718643, "beta_dpo/gap_mean": 10.884498596191406, "beta_dpo/gap_std": 17.649686813354492, "beta_dpo/mask_keep_frac": 0.768750011920929, "epoch": 0.39790575916230364, "grad_norm": 47.546146392822266, "learning_rate": 3.781574579820464e-07, "logits/chosen": -0.7710455060005188, "logits/rejected": -0.783000648021698, "loss": 1.0703, "step": 190 }, { "beta_dpo/beta_used": 0.03816061466932297, "beta_dpo/beta_used_raw": 0.01525220274925232, "beta_dpo/gap_mean": 10.375402450561523, "beta_dpo/gap_std": 17.245559692382812, "beta_dpo/mask_keep_frac": 0.831250011920929, "epoch": 0.418848167539267, "grad_norm": 40.988670349121094, "learning_rate": 3.621088951385353e-07, "logits/chosen": -0.7636905312538147, "logits/rejected": -0.7812480330467224, "loss": 1.1971, "step": 200 }, { "epoch": 0.418848167539267, "eval_beta_dpo/beta_used": 0.12430721521377563, "eval_beta_dpo/beta_used_raw": 0.09974151104688644, "eval_beta_dpo/gap_mean": 11.01975154876709, "eval_beta_dpo/gap_std": 18.638986587524414, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.7570037245750427, "eval_logits/rejected": -0.7552843689918518, "eval_loss": 0.6548933386802673, "eval_runtime": 51.0397, "eval_samples_per_second": 39.185, "eval_steps_per_second": 0.627, "step": 200 }, { "beta_dpo/beta_used": 0.09783867746591568, "beta_dpo/beta_used_raw": 0.09206344187259674, "beta_dpo/gap_mean": 11.258265495300293, "beta_dpo/gap_std": 19.141300201416016, "beta_dpo/mask_keep_frac": 0.831250011920929, "epoch": 0.4397905759162304, "grad_norm": 106.01080322265625, "learning_rate": 3.454593922550693e-07, "logits/chosen": -0.7539916038513184, "logits/rejected": -0.7599259614944458, "loss": 1.0859, "step": 210 }, { "beta_dpo/beta_used": 0.13818596303462982, "beta_dpo/beta_used_raw": 0.118813656270504, "beta_dpo/gap_mean": 11.77585220336914, "beta_dpo/gap_std": 19.773366928100586, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.4607329842931937, "grad_norm": 128.11996459960938, "learning_rate": 3.2829819606729477e-07, "logits/chosen": -0.7987761497497559, "logits/rejected": -0.7768310308456421, "loss": 1.0097, "step": 220 }, { "beta_dpo/beta_used": 0.0800265297293663, "beta_dpo/beta_used_raw": 0.06512973457574844, "beta_dpo/gap_mean": 12.928131103515625, "beta_dpo/gap_std": 20.115745544433594, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.4816753926701571, "grad_norm": 41.492034912109375, "learning_rate": 3.1071729615293424e-07, "logits/chosen": -0.7944627404212952, "logits/rejected": -0.7826088070869446, "loss": 1.0617, "step": 230 }, { "beta_dpo/beta_used": 0.07821373641490936, "beta_dpo/beta_used_raw": 0.05508134886622429, "beta_dpo/gap_mean": 13.714938163757324, "beta_dpo/gap_std": 21.715341567993164, "beta_dpo/mask_keep_frac": 0.7749999761581421, "epoch": 0.5026178010471204, "grad_norm": 55.7053108215332, "learning_rate": 2.9281093183781403e-07, "logits/chosen": -0.7329837083816528, "logits/rejected": -0.7595623731613159, "loss": 1.1275, "step": 240 }, { "beta_dpo/beta_used": 0.08778323978185654, "beta_dpo/beta_used_raw": 0.048361603170633316, "beta_dpo/gap_mean": 13.810220718383789, "beta_dpo/gap_std": 22.46774673461914, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.5235602094240838, "grad_norm": 53.13675308227539, "learning_rate": 2.7467508704251135e-07, "logits/chosen": -0.787535548210144, "logits/rejected": -0.7830525636672974, "loss": 1.1019, "step": 250 }, { "beta_dpo/beta_used": 0.11194082349538803, "beta_dpo/beta_used_raw": 0.06594248861074448, "beta_dpo/gap_mean": 13.73353099822998, "beta_dpo/gap_std": 22.698503494262695, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.5445026178010471, "grad_norm": 0.9119361042976379, "learning_rate": 2.5640697577740815e-07, "logits/chosen": -0.7817746996879578, "logits/rejected": -0.7839881181716919, "loss": 1.1687, "step": 260 }, { "beta_dpo/beta_used": 0.09284855425357819, "beta_dpo/beta_used_raw": 0.08311768621206284, "beta_dpo/gap_mean": 13.976015090942383, "beta_dpo/gap_std": 22.33526039123535, "beta_dpo/mask_keep_frac": 0.8187500238418579, "epoch": 0.5654450261780105, "grad_norm": 136.4973602294922, "learning_rate": 2.381045210440644e-07, "logits/chosen": -0.7521445155143738, "logits/rejected": -0.7410815954208374, "loss": 1.0209, "step": 270 }, { "beta_dpo/beta_used": 0.10686023533344269, "beta_dpo/beta_used_raw": 0.06296978890895844, "beta_dpo/gap_mean": 14.858721733093262, "beta_dpo/gap_std": 22.79940414428711, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.5863874345549738, "grad_norm": 38.58131790161133, "learning_rate": 2.1986582993616925e-07, "logits/chosen": -0.7521171569824219, "logits/rejected": -0.7675251364707947, "loss": 1.058, "step": 280 }, { "beta_dpo/beta_used": 0.06835642457008362, "beta_dpo/beta_used_raw": 0.012238355353474617, "beta_dpo/gap_mean": 13.978078842163086, "beta_dpo/gap_std": 23.335269927978516, "beta_dpo/mask_keep_frac": 0.8374999761581421, "epoch": 0.6073298429319371, "grad_norm": 1.274525761604309, "learning_rate": 2.0178866775369774e-07, "logits/chosen": -0.7752319574356079, "logits/rejected": -0.7829610109329224, "loss": 1.2126, "step": 290 }, { "beta_dpo/beta_used": 0.08970650285482407, "beta_dpo/beta_used_raw": 0.0673152282834053, "beta_dpo/gap_mean": 13.71714973449707, "beta_dpo/gap_std": 23.238323211669922, "beta_dpo/mask_keep_frac": 0.75, "epoch": 0.6282722513089005, "grad_norm": 60.473148345947266, "learning_rate": 1.839699339491937e-07, "logits/chosen": -0.7769112586975098, "logits/rejected": -0.7637456655502319, "loss": 1.1287, "step": 300 }, { "beta_dpo/beta_used": 0.0964554101228714, "beta_dpo/beta_used_raw": 0.06809216737747192, "beta_dpo/gap_mean": 14.4856595993042, "beta_dpo/gap_std": 23.187442779541016, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6492146596858639, "grad_norm": 30.574621200561523, "learning_rate": 1.6650514271527465e-07, "logits/chosen": -0.7852055430412292, "logits/rejected": -0.7743746638298035, "loss": 1.1436, "step": 310 }, { "beta_dpo/beta_used": 0.0930468887090683, "beta_dpo/beta_used_raw": 0.057879697531461716, "beta_dpo/gap_mean": 15.27861213684082, "beta_dpo/gap_std": 23.997211456298828, "beta_dpo/mask_keep_frac": 0.8125, "epoch": 0.6701570680628273, "grad_norm": 266.17156982421875, "learning_rate": 1.4948791099758052e-07, "logits/chosen": -0.8031824827194214, "logits/rejected": -0.7853301763534546, "loss": 1.2318, "step": 320 }, { "beta_dpo/beta_used": 0.08731904625892639, "beta_dpo/beta_used_raw": 0.05920511484146118, "beta_dpo/gap_mean": 15.062555313110352, "beta_dpo/gap_std": 24.421737670898438, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.6910994764397905, "grad_norm": 54.84642791748047, "learning_rate": 1.3300945667758012e-07, "logits/chosen": -0.7694008946418762, "logits/rejected": -0.7609071135520935, "loss": 1.058, "step": 330 }, { "beta_dpo/beta_used": 0.07772944122552872, "beta_dpo/beta_used_raw": 0.04176778346300125, "beta_dpo/gap_mean": 15.674954414367676, "beta_dpo/gap_std": 25.302011489868164, "beta_dpo/mask_keep_frac": 0.762499988079071, "epoch": 0.7120418848167539, "grad_norm": 162.36752319335938, "learning_rate": 1.1715810961514072e-07, "logits/chosen": -0.8045889139175415, "logits/rejected": -0.8078791499137878, "loss": 1.1423, "step": 340 }, { "beta_dpo/beta_used": 0.09465853869915009, "beta_dpo/beta_used_raw": 0.03491034358739853, "beta_dpo/gap_mean": 15.350746154785156, "beta_dpo/gap_std": 25.115270614624023, "beta_dpo/mask_keep_frac": 0.7562500238418579, "epoch": 0.7329842931937173, "grad_norm": 122.0066146850586, "learning_rate": 1.0201883817182949e-07, "logits/chosen": -0.7979413866996765, "logits/rejected": -0.8106569051742554, "loss": 1.1516, "step": 350 }, { "beta_dpo/beta_used": 0.07950497418642044, "beta_dpo/beta_used_raw": 0.021852362900972366, "beta_dpo/gap_mean": 15.205873489379883, "beta_dpo/gap_std": 25.209131240844727, "beta_dpo/mask_keep_frac": 0.800000011920929, "epoch": 0.7539267015706806, "grad_norm": 93.26220703125, "learning_rate": 8.76727937529367e-08, "logits/chosen": -0.7563246488571167, "logits/rejected": -0.7660932540893555, "loss": 1.24, "step": 360 }, { "beta_dpo/beta_used": 0.10245828330516815, "beta_dpo/beta_used_raw": 0.05802968889474869, "beta_dpo/gap_mean": 16.286312103271484, "beta_dpo/gap_std": 25.74993896484375, "beta_dpo/mask_keep_frac": 0.768750011920929, "epoch": 0.774869109947644, "grad_norm": 143.22608947753906, "learning_rate": 7.419687580962222e-08, "logits/chosen": -0.7966378331184387, "logits/rejected": -0.8195791244506836, "loss": 1.1759, "step": 370 }, { "beta_dpo/beta_used": 0.04838007315993309, "beta_dpo/beta_used_raw": -0.006214796099811792, "beta_dpo/gap_mean": 15.983156204223633, "beta_dpo/gap_std": 24.809345245361328, "beta_dpo/mask_keep_frac": 0.7437499761581421, "epoch": 0.7958115183246073, "grad_norm": 36.29342269897461, "learning_rate": 6.166331963291519e-08, "logits/chosen": -0.7881544828414917, "logits/rejected": -0.786669909954071, "loss": 1.2336, "step": 380 }, { "beta_dpo/beta_used": 0.07021647691726685, "beta_dpo/beta_used_raw": 0.00572154950350523, "beta_dpo/gap_mean": 16.157865524291992, "beta_dpo/gap_std": 25.035715103149414, "beta_dpo/mask_keep_frac": 0.793749988079071, "epoch": 0.8167539267015707, "grad_norm": 27.86089324951172, "learning_rate": 5.013930914912476e-08, "logits/chosen": -0.8044806718826294, "logits/rejected": -0.8055523633956909, "loss": 1.1986, "step": 390 }, { "beta_dpo/beta_used": 0.0964551717042923, "beta_dpo/beta_used_raw": 0.04246100038290024, "beta_dpo/gap_mean": 16.26091766357422, "beta_dpo/gap_std": 25.67080307006836, "beta_dpo/mask_keep_frac": 0.793749988079071, "epoch": 0.837696335078534, "grad_norm": 203.63230895996094, "learning_rate": 3.968661679220467e-08, "logits/chosen": -0.8050006628036499, "logits/rejected": -0.7917808890342712, "loss": 1.2165, "step": 400 }, { "epoch": 0.837696335078534, "eval_beta_dpo/beta_used": 0.1434057652950287, "eval_beta_dpo/beta_used_raw": 0.09862707555294037, "eval_beta_dpo/gap_mean": 15.923084259033203, "eval_beta_dpo/gap_std": 25.965980529785156, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": -0.8034595847129822, "eval_logits/rejected": -0.7974430322647095, "eval_loss": 0.7667602896690369, "eval_runtime": 50.9741, "eval_samples_per_second": 39.236, "eval_steps_per_second": 0.628, "step": 400 }, { "beta_dpo/beta_used": 0.09968056529760361, "beta_dpo/beta_used_raw": 0.04236916825175285, "beta_dpo/gap_mean": 16.500282287597656, "beta_dpo/gap_std": 26.050161361694336, "beta_dpo/mask_keep_frac": 0.793749988079071, "epoch": 0.8586387434554974, "grad_norm": 82.50851440429688, "learning_rate": 3.036127238347164e-08, "logits/chosen": -0.827735424041748, "logits/rejected": -0.8203527331352234, "loss": 1.2025, "step": 410 }, { "beta_dpo/beta_used": 0.0970761626958847, "beta_dpo/beta_used_raw": 0.058871395885944366, "beta_dpo/gap_mean": 16.738262176513672, "beta_dpo/gap_std": 26.436817169189453, "beta_dpo/mask_keep_frac": 0.7562500238418579, "epoch": 0.8795811518324608, "grad_norm": 277.814453125, "learning_rate": 2.2213262793589482e-08, "logits/chosen": -0.7558459639549255, "logits/rejected": -0.7355632185935974, "loss": 1.1919, "step": 420 }, { "beta_dpo/beta_used": 0.07494507730007172, "beta_dpo/beta_used_raw": 0.037638500332832336, "beta_dpo/gap_mean": 17.993297576904297, "beta_dpo/gap_std": 27.201208114624023, "beta_dpo/mask_keep_frac": 0.84375, "epoch": 0.900523560209424, "grad_norm": 1.1577889919281006, "learning_rate": 1.5286263996730026e-08, "logits/chosen": -0.8132478594779968, "logits/rejected": -0.8199571371078491, "loss": 1.1747, "step": 430 }, { "beta_dpo/beta_used": 0.046172745525836945, "beta_dpo/beta_used_raw": -0.05054600164294243, "beta_dpo/gap_mean": 16.831357955932617, "beta_dpo/gap_std": 27.087594985961914, "beta_dpo/mask_keep_frac": 0.793749988079071, "epoch": 0.9214659685863874, "grad_norm": 8.6950101852417, "learning_rate": 9.617406953185136e-09, "logits/chosen": -0.8208335638046265, "logits/rejected": -0.8280296325683594, "loss": 1.2337, "step": 440 }, { "beta_dpo/beta_used": 0.11133173853158951, "beta_dpo/beta_used_raw": 0.07020476460456848, "beta_dpo/gap_mean": 16.71297264099121, "beta_dpo/gap_std": 26.49554443359375, "beta_dpo/mask_keep_frac": 0.824999988079071, "epoch": 0.9424083769633508, "grad_norm": 108.31566619873047, "learning_rate": 5.2370785753763356e-09, "logits/chosen": -0.7833819389343262, "logits/rejected": -0.7876101732254028, "loss": 1.171, "step": 450 }, { "beta_dpo/beta_used": 0.06652946025133133, "beta_dpo/beta_used_raw": -0.015655241906642914, "beta_dpo/gap_mean": 17.124013900756836, "beta_dpo/gap_std": 27.718246459960938, "beta_dpo/mask_keep_frac": 0.8062499761581421, "epoch": 0.9633507853403142, "grad_norm": 70.03536224365234, "learning_rate": 2.168758844148272e-09, "logits/chosen": -0.8030775785446167, "logits/rejected": -0.8030357360839844, "loss": 1.2041, "step": 460 }, { "beta_dpo/beta_used": 0.12787500023841858, "beta_dpo/beta_used_raw": 0.10427769273519516, "beta_dpo/gap_mean": 17.284704208374023, "beta_dpo/gap_std": 27.71035385131836, "beta_dpo/mask_keep_frac": 0.862500011920929, "epoch": 0.9842931937172775, "grad_norm": 215.3680877685547, "learning_rate": 4.288949484559934e-10, "logits/chosen": -0.7909310460090637, "logits/rejected": -0.7838017344474792, "loss": 1.2015, "step": 470 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 1.1642480231431045, "train_runtime": 4421.8255, "train_samples_per_second": 13.826, "train_steps_per_second": 0.108 } ], "logging_steps": 10, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }