{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 200, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0013532638549804688, "fcm_dpo/q_t": 0.5000336766242981, "grad_norm": 28.219865798950195, "learning_rate": 0.0, "logits/chosen": 0.13337239623069763, "logits/rejected": 0.12492948770523071, "logps/chosen": -64.5841293334961, "logps/ref_chosen": -64.61280822753906, "logps/ref_rejected": -64.17195129394531, "logps/rejected": -64.14192199707031, "loss": 1.3866, "margin_dpo/margin_mean": -0.0013527870178222656, "margin_dpo/margin_std": 0.2561596930027008, "step": 1 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.017816998064517975, "fcm_dpo/q_t": 0.49955499172210693, "grad_norm": 29.562381744384766, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 0.09712683409452438, "logits/rejected": 0.06875293701887131, "logps/chosen": -65.33859252929688, "logps/ref_chosen": -65.34695434570312, "logps/ref_rejected": -79.315673828125, "logps/rejected": -79.32512664794922, "loss": 1.3848, "margin_dpo/margin_mean": 0.01781691610813141, "margin_dpo/margin_std": 0.3064817190170288, "step": 5 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.005693185143172741, "fcm_dpo/q_t": 0.4998575747013092, "grad_norm": 29.699796676635742, "learning_rate": 6.71641791044776e-08, "logits/chosen": 0.11204878240823746, "logits/rejected": 0.07268079370260239, "logps/chosen": -56.658607482910156, "logps/ref_chosen": -56.65692901611328, "logps/ref_rejected": -80.12786865234375, "logps/rejected": -80.13523864746094, "loss": 1.386, "margin_dpo/margin_mean": 0.005692988634109497, "margin_dpo/margin_std": 0.2990571856498718, "step": 10 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.03819512203335762, "fcm_dpo/q_t": 0.49904537200927734, "grad_norm": 33.20173263549805, "learning_rate": 1.044776119402985e-07, "logits/chosen": 0.08240987360477448, "logits/rejected": 0.05311817675828934, "logps/chosen": -60.08363723754883, "logps/ref_chosen": -60.09392166137695, "logps/ref_rejected": -78.99056243896484, "logps/rejected": -79.01847839355469, "loss": 1.3827, "margin_dpo/margin_mean": 0.03819500282406807, "margin_dpo/margin_std": 0.31105566024780273, "step": 15 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.020285824313759804, "fcm_dpo/q_t": 0.4994930624961853, "grad_norm": 29.200054168701172, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 0.09025263786315918, "logits/rejected": 0.06288890540599823, "logps/chosen": -55.444427490234375, "logps/ref_chosen": -55.464561462402344, "logps/ref_rejected": -77.40013122558594, "logps/rejected": -77.40027618408203, "loss": 1.3845, "margin_dpo/margin_mean": 0.02028590813279152, "margin_dpo/margin_std": 0.30799758434295654, "step": 20 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.026140457019209862, "fcm_dpo/q_t": 0.4993467926979065, "grad_norm": 29.59698486328125, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 0.10830628871917725, "logits/rejected": 0.07654412090778351, "logps/chosen": -60.71149826049805, "logps/ref_chosen": -60.711814880371094, "logps/ref_rejected": -82.71756744384766, "logps/rejected": -82.7433853149414, "loss": 1.3839, "margin_dpo/margin_mean": 0.026140112429857254, "margin_dpo/margin_std": 0.30088797211647034, "step": 25 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.005647105164825916, "fcm_dpo/q_t": 0.5001412034034729, "grad_norm": 30.446046829223633, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 0.10900239646434784, "logits/rejected": 0.08392996340990067, "logps/chosen": -60.911338806152344, "logps/ref_chosen": -60.880210876464844, "logps/ref_rejected": -78.44148254394531, "logps/rejected": -78.46694946289062, "loss": 1.3871, "margin_dpo/margin_mean": -0.005647194571793079, "margin_dpo/margin_std": 0.3177885413169861, "step": 30 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.02348313294351101, "fcm_dpo/q_t": 0.4994131922721863, "grad_norm": 27.848325729370117, "learning_rate": 2.537313432835821e-07, "logits/chosen": 0.06021007150411606, "logits/rejected": 0.03433113545179367, "logps/chosen": -62.28969192504883, "logps/ref_chosen": -62.248138427734375, "logps/ref_rejected": -79.56475830078125, "logps/rejected": -79.62977600097656, "loss": 1.3843, "margin_dpo/margin_mean": 0.023483287543058395, "margin_dpo/margin_std": 0.3572625517845154, "step": 35 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05089018866419792, "fcm_dpo/q_t": 0.4987284243106842, "grad_norm": 31.253028869628906, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 0.10225018113851547, "logits/rejected": 0.05802680179476738, "logps/chosen": -58.953025817871094, "logps/ref_chosen": -58.87812423706055, "logps/ref_rejected": -84.22982025146484, "logps/rejected": -84.3556137084961, "loss": 1.3816, "margin_dpo/margin_mean": 0.0508904755115509, "margin_dpo/margin_std": 0.358222097158432, "step": 40 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04126746580004692, "fcm_dpo/q_t": 0.4989686906337738, "grad_norm": 31.914793014526367, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 0.07056603580713272, "logits/rejected": 0.04329410195350647, "logps/chosen": -66.03207397460938, "logps/ref_chosen": -65.88298034667969, "logps/ref_rejected": -83.87881469726562, "logps/rejected": -84.06917572021484, "loss": 1.3826, "margin_dpo/margin_mean": 0.04126756638288498, "margin_dpo/margin_std": 0.3951026499271393, "step": 45 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09929290413856506, "fcm_dpo/q_t": 0.49751925468444824, "grad_norm": 27.976877212524414, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 0.07899191230535507, "logits/rejected": 0.04546006768941879, "logps/chosen": -55.375457763671875, "logps/ref_chosen": -55.172386169433594, "logps/ref_rejected": -69.63300323486328, "logps/rejected": -69.93535614013672, "loss": 1.3769, "margin_dpo/margin_mean": 0.099293053150177, "margin_dpo/margin_std": 0.43157047033309937, "step": 50 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.20769624412059784, "fcm_dpo/q_t": 0.494814395904541, "grad_norm": 31.212663650512695, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 0.07141193002462387, "logits/rejected": 0.03525683656334877, "logps/chosen": -57.53242874145508, "logps/ref_chosen": -57.193580627441406, "logps/ref_rejected": -79.69940948486328, "logps/rejected": -80.24595642089844, "loss": 1.3664, "margin_dpo/margin_mean": 0.20769624412059784, "margin_dpo/margin_std": 0.5274912118911743, "step": 55 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2171248197555542, "fcm_dpo/q_t": 0.4945871829986572, "grad_norm": 29.49753761291504, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 0.11127179861068726, "logits/rejected": 0.07682862877845764, "logps/chosen": -60.594451904296875, "logps/ref_chosen": -60.068870544433594, "logps/ref_rejected": -74.41178894042969, "logps/rejected": -75.15449523925781, "loss": 1.3662, "margin_dpo/margin_mean": 0.21712493896484375, "margin_dpo/margin_std": 0.7363327741622925, "step": 60 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.29596805572509766, "fcm_dpo/q_t": 0.49262505769729614, "grad_norm": 30.78485870361328, "learning_rate": 4.776119402985074e-07, "logits/chosen": 0.15267546474933624, "logits/rejected": 0.12211690843105316, "logps/chosen": -58.8918342590332, "logps/ref_chosen": -58.1558952331543, "logps/ref_rejected": -76.06512451171875, "logps/rejected": -77.09703063964844, "loss": 1.3594, "margin_dpo/margin_mean": 0.29596781730651855, "margin_dpo/margin_std": 0.9618409276008606, "step": 65 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4171249270439148, "fcm_dpo/q_t": 0.489621102809906, "grad_norm": 28.725130081176758, "learning_rate": 4.999860140229787e-07, "logits/chosen": 0.09666416794061661, "logits/rejected": 0.06161420792341232, "logps/chosen": -68.41984558105469, "logps/ref_chosen": -67.35506439208984, "logps/ref_rejected": -82.24962615966797, "logps/rejected": -83.73153686523438, "loss": 1.3485, "margin_dpo/margin_mean": 0.41712498664855957, "margin_dpo/margin_std": 1.1459109783172607, "step": 70 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5088232755661011, "fcm_dpo/q_t": 0.48737573623657227, "grad_norm": 26.100767135620117, "learning_rate": 4.998286897523808e-07, "logits/chosen": 0.1186274066567421, "logits/rejected": 0.08292581140995026, "logps/chosen": -58.2871208190918, "logps/ref_chosen": -56.86763381958008, "logps/ref_rejected": -72.56938934326172, "logps/rejected": -74.4977035522461, "loss": 1.3414, "margin_dpo/margin_mean": 0.5088233947753906, "margin_dpo/margin_std": 1.4009206295013428, "step": 75 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6476309895515442, "fcm_dpo/q_t": 0.48400768637657166, "grad_norm": 28.427127838134766, "learning_rate": 4.994966691179711e-07, "logits/chosen": 0.13891419768333435, "logits/rejected": 0.09406773746013641, "logps/chosen": -59.4609489440918, "logps/ref_chosen": -57.687095642089844, "logps/ref_rejected": -78.06813049316406, "logps/rejected": -80.48960876464844, "loss": 1.3303, "margin_dpo/margin_mean": 0.6476308703422546, "margin_dpo/margin_std": 1.698277235031128, "step": 80 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.0366328954696655, "fcm_dpo/q_t": 0.47437912225723267, "grad_norm": 26.8502254486084, "learning_rate": 4.989901842900325e-07, "logits/chosen": 0.15370258688926697, "logits/rejected": 0.10728434473276138, "logps/chosen": -59.108184814453125, "logps/ref_chosen": -56.96040725708008, "logps/ref_rejected": -75.22166442871094, "logps/rejected": -78.40606689453125, "loss": 1.2938, "margin_dpo/margin_mean": 1.036632776260376, "margin_dpo/margin_std": 1.8020055294036865, "step": 85 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1005299091339111, "fcm_dpo/q_t": 0.47299760580062866, "grad_norm": 29.18059730529785, "learning_rate": 4.983095894354857e-07, "logits/chosen": 0.20202812552452087, "logits/rejected": 0.15330952405929565, "logps/chosen": -60.3327751159668, "logps/ref_chosen": -57.41730499267578, "logps/ref_rejected": -80.87986755371094, "logps/rejected": -84.89588165283203, "loss": 1.295, "margin_dpo/margin_mean": 1.1005302667617798, "margin_dpo/margin_std": 2.45450758934021, "step": 90 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.4387786388397217, "fcm_dpo/q_t": 0.46537190675735474, "grad_norm": 29.217578887939453, "learning_rate": 4.974553604702332e-07, "logits/chosen": 0.2133176326751709, "logits/rejected": 0.16870227456092834, "logps/chosen": -57.59110641479492, "logps/ref_chosen": -54.08087158203125, "logps/ref_rejected": -76.15860748291016, "logps/rejected": -81.10761260986328, "loss": 1.2756, "margin_dpo/margin_mean": 1.4387786388397217, "margin_dpo/margin_std": 3.3348701000213623, "step": 95 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1994415521621704, "fcm_dpo/q_t": 0.4720003604888916, "grad_norm": 33.0871467590332, "learning_rate": 4.964280947263676e-07, "logits/chosen": 0.22379426658153534, "logits/rejected": 0.19079172611236572, "logps/chosen": -68.65206909179688, "logps/ref_chosen": -63.875038146972656, "logps/ref_rejected": -82.077880859375, "logps/rejected": -88.05435180664062, "loss": 1.325, "margin_dpo/margin_mean": 1.1994414329528809, "margin_dpo/margin_std": 4.622773170471191, "step": 100 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 0.12098121643066406, "fcm_dpo/delta": 0.44622865319252014, "fcm_dpo/margin": 1.7318670749664307, "fcm_dpo/q_t": 0.45575252175331116, "grad_norm": 46.90412521362305, "learning_rate": 4.952285105344791e-07, "logits/chosen": 0.22520050406455994, "logits/rejected": 0.1741827428340912, "logps/chosen": -67.464599609375, "logps/ref_chosen": -62.572479248046875, "logps/ref_rejected": -80.93415069580078, "logps/rejected": -87.5581283569336, "loss": 1.2749, "margin_dpo/margin_mean": 1.7318668365478516, "margin_dpo/margin_std": 4.902867317199707, "step": 105 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 0.21049818396568298, "fcm_dpo/delta": 0.4119214117527008, "fcm_dpo/margin": 1.4851243495941162, "fcm_dpo/q_t": 0.4385649561882019, "grad_norm": 121.93978881835938, "learning_rate": 4.938574467213517e-07, "logits/chosen": 0.1968976855278015, "logits/rejected": 0.1738019436597824, "logps/chosen": -74.165283203125, "logps/ref_chosen": -68.67534637451172, "logps/ref_rejected": -78.82028198242188, "logps/rejected": -85.79533386230469, "loss": 1.3615, "margin_dpo/margin_mean": 1.4851243495941162, "margin_dpo/margin_std": 5.345554351806641, "step": 110 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 0.29616811871528625, "fcm_dpo/delta": 0.38898569345474243, "fcm_dpo/margin": 1.868272066116333, "fcm_dpo/q_t": 0.40087947249412537, "grad_norm": 101.64105987548828, "learning_rate": 4.923158620234019e-07, "logits/chosen": 0.23364102840423584, "logits/rejected": 0.17991967499256134, "logps/chosen": -63.528472900390625, "logps/ref_chosen": -58.65370559692383, "logps/ref_rejected": -81.89688873291016, "logps/rejected": -88.63993835449219, "loss": 1.2105, "margin_dpo/margin_mean": 1.8682724237442017, "margin_dpo/margin_std": 4.156613349914551, "step": 115 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 0.3783304691314697, "fcm_dpo/delta": 0.16895940899848938, "fcm_dpo/margin": 2.0111002922058105, "fcm_dpo/q_t": 0.36017656326293945, "grad_norm": 91.80081939697266, "learning_rate": 4.906048344162676e-07, "logits/chosen": 0.2208259552717209, "logits/rejected": 0.17146429419517517, "logps/chosen": -60.4900016784668, "logps/ref_chosen": -56.16423797607422, "logps/ref_rejected": -75.87689971923828, "logps/rejected": -82.2137680053711, "loss": 1.0835, "margin_dpo/margin_mean": 2.0111002922058105, "margin_dpo/margin_std": 3.2962546348571777, "step": 120 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 0.4266550540924072, "fcm_dpo/delta": 0.04714610427618027, "fcm_dpo/margin": 2.240276575088501, "fcm_dpo/q_t": 0.3378121256828308, "grad_norm": 101.21341705322266, "learning_rate": 4.887255603610184e-07, "logits/chosen": 0.22850540280342102, "logits/rejected": 0.1721857488155365, "logps/chosen": -64.10456085205078, "logps/ref_chosen": -59.744285583496094, "logps/ref_rejected": -86.77314758300781, "logps/rejected": -93.37370300292969, "loss": 0.9694, "margin_dpo/margin_mean": 2.24027681350708, "margin_dpo/margin_std": 3.2456538677215576, "step": 125 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 0.46494919061660767, "fcm_dpo/delta": 0.13100966811180115, "fcm_dpo/margin": 1.8871605396270752, "fcm_dpo/q_t": 0.352043092250824, "grad_norm": 143.7098846435547, "learning_rate": 4.866793539675126e-07, "logits/chosen": 0.15394529700279236, "logits/rejected": 0.12398996204137802, "logps/chosen": -68.61878967285156, "logps/ref_chosen": -64.15296936035156, "logps/ref_rejected": -75.17271423339844, "logps/rejected": -81.52568054199219, "loss": 1.0946, "margin_dpo/margin_mean": 1.8871605396270752, "margin_dpo/margin_std": 3.1590871810913086, "step": 130 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 0.4789491593837738, "fcm_dpo/delta": -0.02141920104622841, "fcm_dpo/margin": 2.124882221221924, "fcm_dpo/q_t": 0.326783150434494, "grad_norm": 103.89042663574219, "learning_rate": 4.844676460754862e-07, "logits/chosen": 0.200178861618042, "logits/rejected": 0.16137003898620605, "logps/chosen": -61.23235321044922, "logps/ref_chosen": -57.006690979003906, "logps/ref_rejected": -73.71768188476562, "logps/rejected": -80.06822204589844, "loss": 1.0191, "margin_dpo/margin_mean": 2.1248817443847656, "margin_dpo/margin_std": 3.1723520755767822, "step": 135 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 0.48734474182128906, "fcm_dpo/delta": -0.02495430037379265, "fcm_dpo/margin": 2.0965754985809326, "fcm_dpo/q_t": 0.3367246687412262, "grad_norm": 139.94981384277344, "learning_rate": 4.820919832540181e-07, "logits/chosen": 0.16469994187355042, "logits/rejected": 0.12346775829792023, "logps/chosen": -67.96693420410156, "logps/ref_chosen": -63.36246871948242, "logps/ref_rejected": -79.62621307373047, "logps/rejected": -86.32726287841797, "loss": 1.0969, "margin_dpo/margin_mean": 2.0965757369995117, "margin_dpo/margin_std": 3.4793498516082764, "step": 140 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 0.4783070683479309, "fcm_dpo/delta": 0.03718746080994606, "fcm_dpo/margin": 2.0187315940856934, "fcm_dpo/q_t": 0.34199976921081543, "grad_norm": 141.78863525390625, "learning_rate": 4.795540267200686e-07, "logits/chosen": 0.1696348637342453, "logits/rejected": 0.15050214529037476, "logps/chosen": -69.42279052734375, "logps/ref_chosen": -65.01470184326172, "logps/ref_rejected": -80.49073791503906, "logps/rejected": -86.91755676269531, "loss": 1.0997, "margin_dpo/margin_mean": 2.0187315940856934, "margin_dpo/margin_std": 3.3963191509246826, "step": 145 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 0.4847794473171234, "fcm_dpo/delta": -0.09536196291446686, "fcm_dpo/margin": 2.245018243789673, "fcm_dpo/q_t": 0.31795617938041687, "grad_norm": 95.42865753173828, "learning_rate": 4.768555511768486e-07, "logits/chosen": 0.16661684215068817, "logits/rejected": 0.12393312156200409, "logps/chosen": -63.37464141845703, "logps/ref_chosen": -59.19135284423828, "logps/ref_rejected": -74.0339126586914, "logps/rejected": -80.46221923828125, "loss": 0.9733, "margin_dpo/margin_mean": 2.2450177669525146, "margin_dpo/margin_std": 3.1093335151672363, "step": 150 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 0.4329656660556793, "fcm_dpo/delta": 0.01940056122839451, "fcm_dpo/margin": 2.265953302383423, "fcm_dpo/q_t": 0.3330842852592468, "grad_norm": 119.63710021972656, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.17574964463710785, "logits/rejected": 0.1448710560798645, "logps/chosen": -65.07331848144531, "logps/ref_chosen": -60.93949508666992, "logps/ref_rejected": -74.51151275634766, "logps/rejected": -80.91129302978516, "loss": 1.022, "margin_dpo/margin_mean": 2.2659528255462646, "margin_dpo/margin_std": 3.3525185585021973, "step": 155 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 0.4543333649635315, "fcm_dpo/delta": 0.024721205234527588, "fcm_dpo/margin": 2.1477513313293457, "fcm_dpo/q_t": 0.3333725333213806, "grad_norm": 114.04137420654297, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 0.14524099230766296, "logits/rejected": 0.11148606240749359, "logps/chosen": -62.98958206176758, "logps/ref_chosen": -58.763816833496094, "logps/ref_rejected": -74.94743347167969, "logps/rejected": -81.32093811035156, "loss": 1.0069, "margin_dpo/margin_mean": 2.147751569747925, "margin_dpo/margin_std": 3.1879055500030518, "step": 160 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 0.43008118867874146, "fcm_dpo/delta": -0.13953472673892975, "fcm_dpo/margin": 2.613295316696167, "fcm_dpo/q_t": 0.3099823296070099, "grad_norm": 84.81941986083984, "learning_rate": 4.678164332082175e-07, "logits/chosen": 0.17675867676734924, "logits/rejected": 0.13011161983013153, "logps/chosen": -60.25602340698242, "logps/ref_chosen": -55.70417022705078, "logps/ref_rejected": -76.59439849853516, "logps/rejected": -83.7595443725586, "loss": 0.9239, "margin_dpo/margin_mean": 2.613295793533325, "margin_dpo/margin_std": 3.334416151046753, "step": 165 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 0.43538564443588257, "fcm_dpo/delta": 0.10406246036291122, "fcm_dpo/margin": 2.0714168548583984, "fcm_dpo/q_t": 0.34412893652915955, "grad_norm": 95.16687774658203, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 0.1478302776813507, "logits/rejected": 0.11266426742076874, "logps/chosen": -65.52555847167969, "logps/ref_chosen": -61.169105529785156, "logps/ref_rejected": -77.21674346923828, "logps/rejected": -83.64461517333984, "loss": 1.0106, "margin_dpo/margin_mean": 2.0714163780212402, "margin_dpo/margin_std": 3.096672534942627, "step": 170 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 0.4220406115055084, "fcm_dpo/delta": -0.11823473125696182, "fcm_dpo/margin": 2.6146881580352783, "fcm_dpo/q_t": 0.3217321038246155, "grad_norm": 106.60182189941406, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 0.19509582221508026, "logits/rejected": 0.1433831751346588, "logps/chosen": -63.82081985473633, "logps/ref_chosen": -59.24176788330078, "logps/ref_rejected": -81.80384826660156, "logps/rejected": -88.99759674072266, "loss": 1.0154, "margin_dpo/margin_mean": 2.6146881580352783, "margin_dpo/margin_std": 3.812539577484131, "step": 175 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 0.39969393610954285, "fcm_dpo/delta": 0.044442176818847656, "fcm_dpo/margin": 2.3937482833862305, "fcm_dpo/q_t": 0.34041857719421387, "grad_norm": 114.9355697631836, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.16883400082588196, "logits/rejected": 0.1312471628189087, "logps/chosen": -68.145751953125, "logps/ref_chosen": -63.24883270263672, "logps/ref_rejected": -79.00736236572266, "logps/rejected": -86.29803466796875, "loss": 1.0492, "margin_dpo/margin_mean": 2.3937482833862305, "margin_dpo/margin_std": 3.709543228149414, "step": 180 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 0.4048680365085602, "fcm_dpo/delta": 0.024971742182970047, "fcm_dpo/margin": 2.408203601837158, "fcm_dpo/q_t": 0.3345295786857605, "grad_norm": 101.9957046508789, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 0.19345621764659882, "logits/rejected": 0.14936503767967224, "logps/chosen": -61.29529571533203, "logps/ref_chosen": -56.390625, "logps/ref_rejected": -76.81001281738281, "logps/rejected": -84.12287902832031, "loss": 1.018, "margin_dpo/margin_mean": 2.4082038402557373, "margin_dpo/margin_std": 3.511875867843628, "step": 185 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 0.41114169359207153, "fcm_dpo/delta": -0.07483033090829849, "fcm_dpo/margin": 2.591209888458252, "fcm_dpo/q_t": 0.31742408871650696, "grad_norm": 99.17886352539062, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 0.1815711259841919, "logits/rejected": 0.13424037396907806, "logps/chosen": -72.9486312866211, "logps/ref_chosen": -68.25389099121094, "logps/ref_rejected": -86.461181640625, "logps/rejected": -93.74713134765625, "loss": 0.9814, "margin_dpo/margin_mean": 2.591209888458252, "margin_dpo/margin_std": 3.6213302612304688, "step": 190 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 0.4059467911720276, "fcm_dpo/delta": 0.04542668163776398, "fcm_dpo/margin": 2.355367422103882, "fcm_dpo/q_t": 0.33451682329177856, "grad_norm": 103.90733337402344, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.19510746002197266, "logits/rejected": 0.17633949220180511, "logps/chosen": -67.19489288330078, "logps/ref_chosen": -62.1484260559082, "logps/ref_rejected": -71.33458709716797, "logps/rejected": -78.73640441894531, "loss": 1.0004, "margin_dpo/margin_mean": 2.3553671836853027, "margin_dpo/margin_std": 3.4357311725616455, "step": 195 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 0.40270957350730896, "fcm_dpo/delta": -0.10094480216503143, "fcm_dpo/margin": 2.705155611038208, "fcm_dpo/q_t": 0.31455981731414795, "grad_norm": 86.0372085571289, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.22762131690979004, "logits/rejected": 0.17773905396461487, "logps/chosen": -61.629058837890625, "logps/ref_chosen": -56.950096130371094, "logps/ref_rejected": -78.66989135742188, "logps/rejected": -86.05402374267578, "loss": 0.9878, "margin_dpo/margin_mean": 2.705155611038208, "margin_dpo/margin_std": 3.7367305755615234, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 0.44184160232543945, "eval_fcm_dpo/delta": 0.019765857607126236, "eval_fcm_dpo/margin": 2.217298984527588, "eval_fcm_dpo/q_t": 0.34747642278671265, "eval_logits/chosen": 0.23132538795471191, "eval_logits/rejected": 0.19121667742729187, "eval_logps/chosen": -79.51517486572266, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -86.4219970703125, "eval_loss": 0.5709094405174255, "eval_margin_dpo/margin_mean": 2.217298984527588, "eval_margin_dpo/margin_std": 3.880441665649414, "eval_runtime": 38.3936, "eval_samples_per_second": 59.984, "eval_steps_per_second": 1.875, "step": 200 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 0.4148198664188385, "fcm_dpo/delta": -0.02567175403237343, "fcm_dpo/margin": 2.4526686668395996, "fcm_dpo/q_t": 0.3341201841831207, "grad_norm": 142.11912536621094, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 0.22209465503692627, "logits/rejected": 0.16845160722732544, "logps/chosen": -62.80231475830078, "logps/ref_chosen": -57.99428176879883, "logps/ref_rejected": -83.5367431640625, "logps/rejected": -90.79744720458984, "loss": 1.0393, "margin_dpo/margin_mean": 2.4526686668395996, "margin_dpo/margin_std": 3.7842280864715576, "step": 205 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 0.42894989252090454, "fcm_dpo/delta": -0.02596813440322876, "fcm_dpo/margin": 2.375964879989624, "fcm_dpo/q_t": 0.3406800329685211, "grad_norm": 91.5486068725586, "learning_rate": 4.327482247091679e-07, "logits/chosen": 0.2209012508392334, "logits/rejected": 0.1760309338569641, "logps/chosen": -68.58443450927734, "logps/ref_chosen": -63.77195358276367, "logps/ref_rejected": -82.56491088867188, "logps/rejected": -89.75337219238281, "loss": 1.0863, "margin_dpo/margin_mean": 2.375964641571045, "margin_dpo/margin_std": 3.8247761726379395, "step": 210 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.399940550327301, "fcm_dpo/delta": -0.0751124769449234, "fcm_dpo/margin": 2.6684412956237793, "fcm_dpo/q_t": 0.3200802505016327, "grad_norm": 96.38282012939453, "learning_rate": 4.281735428447157e-07, "logits/chosen": 0.19080176949501038, "logits/rejected": 0.13846060633659363, "logps/chosen": -64.53096008300781, "logps/ref_chosen": -60.27800750732422, "logps/ref_rejected": -83.91607666015625, "logps/rejected": -90.83747863769531, "loss": 0.9493, "margin_dpo/margin_mean": 2.6684412956237793, "margin_dpo/margin_std": 3.6150002479553223, "step": 215 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.36988887190818787, "fcm_dpo/delta": 0.039208363741636276, "fcm_dpo/margin": 2.6028714179992676, "fcm_dpo/q_t": 0.33981165289878845, "grad_norm": 77.6009521484375, "learning_rate": 4.234742705255272e-07, "logits/chosen": 0.18831773102283478, "logits/rejected": 0.13733841478824615, "logps/chosen": -65.37849426269531, "logps/ref_chosen": -60.88572311401367, "logps/ref_rejected": -80.1805191040039, "logps/rejected": -87.27616119384766, "loss": 1.0103, "margin_dpo/margin_mean": 2.6028714179992676, "margin_dpo/margin_std": 3.930713653564453, "step": 220 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.3809080719947815, "fcm_dpo/delta": -0.03357607498764992, "fcm_dpo/margin": 2.6959452629089355, "fcm_dpo/q_t": 0.325172483921051, "grad_norm": 119.093017578125, "learning_rate": 4.186536937864752e-07, "logits/chosen": 0.23317813873291016, "logits/rejected": 0.17639882862567902, "logps/chosen": -65.24988555908203, "logps/ref_chosen": -61.02507781982422, "logps/ref_rejected": -91.92439270019531, "logps/rejected": -98.84513854980469, "loss": 0.9308, "margin_dpo/margin_mean": 2.6959452629089355, "margin_dpo/margin_std": 3.5449512004852295, "step": 225 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.3940550982952118, "fcm_dpo/delta": 0.04864387959241867, "fcm_dpo/margin": 2.42222261428833, "fcm_dpo/q_t": 0.3377589285373688, "grad_norm": 103.90534210205078, "learning_rate": 4.137151834863213e-07, "logits/chosen": 0.2553010582923889, "logits/rejected": 0.21687361598014832, "logps/chosen": -58.69348907470703, "logps/ref_chosen": -54.49797821044922, "logps/ref_rejected": -71.96363830566406, "logps/rejected": -78.58135986328125, "loss": 1.0312, "margin_dpo/margin_mean": 2.422222375869751, "margin_dpo/margin_std": 3.698529005050659, "step": 230 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.38651371002197266, "fcm_dpo/delta": -0.027495551854372025, "fcm_dpo/margin": 2.6372780799865723, "fcm_dpo/q_t": 0.3264053761959076, "grad_norm": 89.52863311767578, "learning_rate": 4.08662192950594e-07, "logits/chosen": 0.17925067245960236, "logits/rejected": 0.1555873155593872, "logps/chosen": -67.6564712524414, "logps/ref_chosen": -63.250282287597656, "logps/ref_rejected": -73.09049987792969, "logps/rejected": -80.13397216796875, "loss": 0.967, "margin_dpo/margin_mean": 2.637277603149414, "margin_dpo/margin_std": 3.5861003398895264, "step": 235 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.3813626766204834, "fcm_dpo/delta": -0.09251350164413452, "fcm_dpo/margin": 2.839731454849243, "fcm_dpo/q_t": 0.3197898268699646, "grad_norm": 71.10482025146484, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 0.22568419575691223, "logits/rejected": 0.18240895867347717, "logps/chosen": -69.98919677734375, "logps/ref_chosen": -65.26150512695312, "logps/ref_rejected": -87.60311126708984, "logps/rejected": -95.17052459716797, "loss": 0.9725, "margin_dpo/margin_mean": 2.839731454849243, "margin_dpo/margin_std": 3.927607774734497, "step": 240 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.3627152442932129, "fcm_dpo/delta": -0.06611888110637665, "fcm_dpo/margin": 2.9209442138671875, "fcm_dpo/q_t": 0.3157356381416321, "grad_norm": 111.84679412841797, "learning_rate": 3.982269822636601e-07, "logits/chosen": 0.2060960978269577, "logits/rejected": 0.17647871375083923, "logps/chosen": -70.17327117919922, "logps/ref_chosen": -65.73170471191406, "logps/ref_rejected": -75.19642639160156, "logps/rejected": -82.55892944335938, "loss": 0.9053, "margin_dpo/margin_mean": 2.9209437370300293, "margin_dpo/margin_std": 3.611414670944214, "step": 245 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.3453001379966736, "fcm_dpo/delta": 0.03979369252920151, "fcm_dpo/margin": 2.5490431785583496, "fcm_dpo/q_t": 0.34757569432258606, "grad_norm": 78.27101135253906, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.24320515990257263, "logits/rejected": 0.23427622020244598, "logps/chosen": -76.07374572753906, "logps/ref_chosen": -70.71224212646484, "logps/ref_rejected": -76.12723541259766, "logps/rejected": -84.03779602050781, "loss": 1.0729, "margin_dpo/margin_mean": 2.5490429401397705, "margin_dpo/margin_std": 4.084795951843262, "step": 250 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.36765122413635254, "fcm_dpo/delta": 0.06216100975871086, "fcm_dpo/margin": 2.552335739135742, "fcm_dpo/q_t": 0.3432448208332062, "grad_norm": 86.85004425048828, "learning_rate": 3.873772445177015e-07, "logits/chosen": 0.22906799614429474, "logits/rejected": 0.1917153000831604, "logps/chosen": -66.92555236816406, "logps/ref_chosen": -61.767662048339844, "logps/ref_rejected": -77.38813018798828, "logps/rejected": -85.09835052490234, "loss": 1.0684, "margin_dpo/margin_mean": 2.552335262298584, "margin_dpo/margin_std": 4.104119300842285, "step": 255 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.3392147421836853, "fcm_dpo/delta": -0.16018646955490112, "fcm_dpo/margin": 3.369103193283081, "fcm_dpo/q_t": 0.308208167552948, "grad_norm": 71.6130142211914, "learning_rate": 3.818063669026256e-07, "logits/chosen": 0.2075103521347046, "logits/rejected": 0.14859376847743988, "logps/chosen": -67.01765441894531, "logps/ref_chosen": -61.57584762573242, "logps/ref_rejected": -91.87513732910156, "logps/rejected": -100.68604278564453, "loss": 0.9038, "margin_dpo/margin_mean": 3.369102954864502, "margin_dpo/margin_std": 4.229114532470703, "step": 260 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.3240419924259186, "fcm_dpo/delta": 0.021506184712052345, "fcm_dpo/margin": 3.0214786529541016, "fcm_dpo/q_t": 0.33430081605911255, "grad_norm": 99.64929962158203, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 0.1896849423646927, "logits/rejected": 0.1461310237646103, "logps/chosen": -71.33131408691406, "logps/ref_chosen": -65.75422668457031, "logps/ref_rejected": -77.9569320678711, "logps/rejected": -86.55549621582031, "loss": 1.0053, "margin_dpo/margin_mean": 3.0214788913726807, "margin_dpo/margin_std": 4.428906440734863, "step": 265 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.33263370394706726, "fcm_dpo/delta": 0.0555957667529583, "fcm_dpo/margin": 2.8429040908813477, "fcm_dpo/q_t": 0.3359856605529785, "grad_norm": 92.8035659790039, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 0.22854265570640564, "logits/rejected": 0.1897846907377243, "logps/chosen": -67.77484893798828, "logps/ref_chosen": -62.27649688720703, "logps/ref_rejected": -76.56950378417969, "logps/rejected": -84.91075134277344, "loss": 1.01, "margin_dpo/margin_mean": 2.8429043292999268, "margin_dpo/margin_std": 4.193612098693848, "step": 270 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.33189326524734497, "fcm_dpo/delta": -0.07388236373662949, "fcm_dpo/margin": 3.2117831707000732, "fcm_dpo/q_t": 0.3266224265098572, "grad_norm": 73.1767349243164, "learning_rate": 3.645566304318526e-07, "logits/chosen": 0.21344022452831268, "logits/rejected": 0.1750694364309311, "logps/chosen": -67.38113403320312, "logps/ref_chosen": -61.854393005371094, "logps/ref_rejected": -77.22246551513672, "logps/rejected": -85.96098327636719, "loss": 0.9878, "margin_dpo/margin_mean": 3.211782455444336, "margin_dpo/margin_std": 4.597712516784668, "step": 275 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.30937570333480835, "fcm_dpo/delta": -0.10143546760082245, "fcm_dpo/margin": 3.528640031814575, "fcm_dpo/q_t": 0.30518898367881775, "grad_norm": 53.0545654296875, "learning_rate": 3.586410864126781e-07, "logits/chosen": 0.20327389240264893, "logits/rejected": 0.17730286717414856, "logps/chosen": -66.44621276855469, "logps/ref_chosen": -61.29896926879883, "logps/ref_rejected": -73.35762023925781, "logps/rejected": -82.03350830078125, "loss": 0.8753, "margin_dpo/margin_mean": 3.528640031814575, "margin_dpo/margin_std": 4.124866962432861, "step": 280 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.2883167862892151, "fcm_dpo/delta": 0.04716776683926582, "fcm_dpo/margin": 3.311861038208008, "fcm_dpo/q_t": 0.332479327917099, "grad_norm": 77.8005142211914, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 0.21987095475196838, "logits/rejected": 0.18027493357658386, "logps/chosen": -68.98224639892578, "logps/ref_chosen": -63.435462951660156, "logps/ref_rejected": -79.73661804199219, "logps/rejected": -88.59527587890625, "loss": 0.9495, "margin_dpo/margin_mean": 3.311861038208008, "margin_dpo/margin_std": 4.424164295196533, "step": 285 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.27836745977401733, "fcm_dpo/delta": -0.07125671207904816, "fcm_dpo/margin": 3.8071327209472656, "fcm_dpo/q_t": 0.31692713499069214, "grad_norm": 69.05030059814453, "learning_rate": 3.465862814232821e-07, "logits/chosen": 0.20964176952838898, "logits/rejected": 0.15969504415988922, "logps/chosen": -63.75525665283203, "logps/ref_chosen": -57.696876525878906, "logps/ref_rejected": -79.78132629394531, "logps/rejected": -89.64683532714844, "loss": 0.9226, "margin_dpo/margin_mean": 3.8071320056915283, "margin_dpo/margin_std": 4.934117794036865, "step": 290 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.270934522151947, "fcm_dpo/delta": -0.03950778394937515, "fcm_dpo/margin": 3.814711809158325, "fcm_dpo/q_t": 0.3220524191856384, "grad_norm": 65.58736419677734, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 0.2670535147190094, "logits/rejected": 0.2111613005399704, "logps/chosen": -61.7304573059082, "logps/ref_chosen": -55.430633544921875, "logps/ref_rejected": -78.1390151977539, "logps/rejected": -88.25354766845703, "loss": 0.931, "margin_dpo/margin_mean": 3.814711093902588, "margin_dpo/margin_std": 5.020625114440918, "step": 295 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.2575019299983978, "fcm_dpo/delta": -0.013848213478922844, "fcm_dpo/margin": 3.926861524581909, "fcm_dpo/q_t": 0.3218505382537842, "grad_norm": 71.43912506103516, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 0.23339371383190155, "logits/rejected": 0.189311683177948, "logps/chosen": -68.25260925292969, "logps/ref_chosen": -61.207069396972656, "logps/ref_rejected": -75.23294067382812, "logps/rejected": -86.2053451538086, "loss": 0.9303, "margin_dpo/margin_mean": 3.926861524581909, "margin_dpo/margin_std": 5.063118934631348, "step": 300 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.263566255569458, "fcm_dpo/delta": -0.02661963179707527, "fcm_dpo/margin": 3.8765950202941895, "fcm_dpo/q_t": 0.32818618416786194, "grad_norm": 68.17465209960938, "learning_rate": 3.280083614246217e-07, "logits/chosen": 0.2163856476545334, "logits/rejected": 0.1833379715681076, "logps/chosen": -70.30329895019531, "logps/ref_chosen": -63.06663131713867, "logps/ref_rejected": -78.45845031738281, "logps/rejected": -89.57170104980469, "loss": 0.9829, "margin_dpo/margin_mean": 3.8765950202941895, "margin_dpo/margin_std": 5.42116117477417, "step": 305 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.273120254278183, "fcm_dpo/delta": 0.1272657811641693, "fcm_dpo/margin": 3.23095965385437, "fcm_dpo/q_t": 0.35429221391677856, "grad_norm": 88.56037139892578, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 0.26219043135643005, "logits/rejected": 0.22065551578998566, "logps/chosen": -70.99202728271484, "logps/ref_chosen": -63.60908889770508, "logps/ref_rejected": -74.06394958496094, "logps/rejected": -84.67784881591797, "loss": 1.0785, "margin_dpo/margin_mean": 3.23095965385437, "margin_dpo/margin_std": 5.263034343719482, "step": 310 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.2818896472454071, "fcm_dpo/delta": -0.07698482275009155, "fcm_dpo/margin": 3.7881081104278564, "fcm_dpo/q_t": 0.319245308637619, "grad_norm": 60.92436599731445, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 0.23960921168327332, "logits/rejected": 0.192758709192276, "logps/chosen": -68.99839782714844, "logps/ref_chosen": -62.31493377685547, "logps/ref_rejected": -75.07472229003906, "logps/rejected": -85.54630279541016, "loss": 0.9575, "margin_dpo/margin_mean": 3.7881076335906982, "margin_dpo/margin_std": 5.181168556213379, "step": 315 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.2548847794532776, "fcm_dpo/delta": -0.0447889044880867, "fcm_dpo/margin": 4.059728622436523, "fcm_dpo/q_t": 0.32673633098602295, "grad_norm": 64.22303009033203, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.22829946875572205, "logits/rejected": 0.17820891737937927, "logps/chosen": -61.4847526550293, "logps/ref_chosen": -55.336036682128906, "logps/ref_rejected": -80.05536651611328, "logps/rejected": -90.26380920410156, "loss": 0.9724, "margin_dpo/margin_mean": 4.059728145599365, "margin_dpo/margin_std": 5.59163236618042, "step": 320 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.2541687786579132, "fcm_dpo/delta": -0.005901790224015713, "fcm_dpo/margin": 3.942878246307373, "fcm_dpo/q_t": 0.3219433128833771, "grad_norm": 60.860626220703125, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 0.22886808216571808, "logits/rejected": 0.18000295758247375, "logps/chosen": -64.44058227539062, "logps/ref_chosen": -57.90629959106445, "logps/ref_rejected": -74.2243881225586, "logps/rejected": -84.70155334472656, "loss": 0.9461, "margin_dpo/margin_mean": 3.942878007888794, "margin_dpo/margin_std": 5.178929328918457, "step": 325 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.27623167634010315, "fcm_dpo/delta": 0.06018294021487236, "fcm_dpo/margin": 3.404806137084961, "fcm_dpo/q_t": 0.3431999981403351, "grad_norm": 62.56698989868164, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 0.21622386574745178, "logits/rejected": 0.1816556602716446, "logps/chosen": -71.98468780517578, "logps/ref_chosen": -65.17555236816406, "logps/ref_rejected": -78.53681182861328, "logps/rejected": -88.7507553100586, "loss": 1.0657, "margin_dpo/margin_mean": 3.404806137084961, "margin_dpo/margin_std": 5.439192771911621, "step": 330 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.24731174111366272, "fcm_dpo/delta": -0.11190152168273926, "fcm_dpo/margin": 4.425177097320557, "fcm_dpo/q_t": 0.3129493296146393, "grad_norm": 56.091400146484375, "learning_rate": 2.895003489933375e-07, "logits/chosen": 0.250847190618515, "logits/rejected": 0.2109779417514801, "logps/chosen": -69.31832885742188, "logps/ref_chosen": -62.62797927856445, "logps/ref_rejected": -79.9095458984375, "logps/rejected": -91.02506256103516, "loss": 0.915, "margin_dpo/margin_mean": 4.425177574157715, "margin_dpo/margin_std": 5.686088562011719, "step": 335 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.2548673748970032, "fcm_dpo/delta": 0.03888889402151108, "fcm_dpo/margin": 3.7756190299987793, "fcm_dpo/q_t": 0.3394353687763214, "grad_norm": 57.22810745239258, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 0.20316573977470398, "logits/rejected": 0.1698659211397171, "logps/chosen": -68.96516418457031, "logps/ref_chosen": -61.1064567565918, "logps/ref_rejected": -76.71846008300781, "logps/rejected": -88.35279846191406, "loss": 1.0012, "margin_dpo/margin_mean": 3.7756195068359375, "margin_dpo/margin_std": 5.557743549346924, "step": 340 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.24767637252807617, "fcm_dpo/delta": 0.010343861766159534, "fcm_dpo/margin": 3.979682445526123, "fcm_dpo/q_t": 0.3323804438114166, "grad_norm": 55.119606018066406, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 0.24728891253471375, "logits/rejected": 0.2121780663728714, "logps/chosen": -67.48667907714844, "logps/ref_chosen": -60.12370681762695, "logps/ref_rejected": -78.58574676513672, "logps/rejected": -89.92839813232422, "loss": 0.9973, "margin_dpo/margin_mean": 3.9796817302703857, "margin_dpo/margin_std": 5.61331844329834, "step": 345 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.2374342978000641, "fcm_dpo/delta": -0.14152035117149353, "fcm_dpo/margin": 4.743535041809082, "fcm_dpo/q_t": 0.3163720965385437, "grad_norm": 61.54487228393555, "learning_rate": 2.698124892141971e-07, "logits/chosen": 0.28813233971595764, "logits/rejected": 0.2352372407913208, "logps/chosen": -62.17229461669922, "logps/ref_chosen": -55.104461669921875, "logps/ref_rejected": -80.63292694091797, "logps/rejected": -92.44429016113281, "loss": 0.9174, "margin_dpo/margin_mean": 4.743535041809082, "margin_dpo/margin_std": 6.253825664520264, "step": 350 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.22558502852916718, "fcm_dpo/delta": 0.006811001803725958, "fcm_dpo/margin": 4.399797439575195, "fcm_dpo/q_t": 0.32226401567459106, "grad_norm": 57.688541412353516, "learning_rate": 2.632160279321328e-07, "logits/chosen": 0.2438465654850006, "logits/rejected": 0.1791260838508606, "logps/chosen": -61.78578567504883, "logps/ref_chosen": -54.87224197387695, "logps/ref_rejected": -77.01316833496094, "logps/rejected": -88.32652282714844, "loss": 0.9512, "margin_dpo/margin_mean": 4.399797439575195, "margin_dpo/margin_std": 5.840807914733887, "step": 355 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.23347434401512146, "fcm_dpo/delta": 0.06479227542877197, "fcm_dpo/margin": 4.013222694396973, "fcm_dpo/q_t": 0.34076085686683655, "grad_norm": 62.883846282958984, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 0.2215835154056549, "logits/rejected": 0.1821989119052887, "logps/chosen": -68.06861114501953, "logps/ref_chosen": -60.75285720825195, "logps/ref_rejected": -75.21507263183594, "logps/rejected": -86.54405212402344, "loss": 1.0389, "margin_dpo/margin_mean": 4.013222694396973, "margin_dpo/margin_std": 6.100465297698975, "step": 360 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.2259870022535324, "fcm_dpo/delta": -0.08469346910715103, "fcm_dpo/margin": 4.753388404846191, "fcm_dpo/q_t": 0.3118368983268738, "grad_norm": 45.306915283203125, "learning_rate": 2.5e-07, "logits/chosen": 0.2682866156101227, "logits/rejected": 0.20303437113761902, "logps/chosen": -65.59626007080078, "logps/ref_chosen": -58.56513595581055, "logps/ref_rejected": -84.06403350830078, "logps/rejected": -95.8485336303711, "loss": 0.9033, "margin_dpo/margin_mean": 4.753389358520508, "margin_dpo/margin_std": 5.948471546173096, "step": 365 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.2373490035533905, "fcm_dpo/delta": 0.0985020250082016, "fcm_dpo/margin": 3.820103406906128, "fcm_dpo/q_t": 0.3401463031768799, "grad_norm": 58.69527816772461, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 0.21440906822681427, "logits/rejected": 0.17156557738780975, "logps/chosen": -66.21595764160156, "logps/ref_chosen": -59.443138122558594, "logps/ref_rejected": -75.80937194824219, "logps/rejected": -86.40229797363281, "loss": 1.042, "margin_dpo/margin_mean": 3.820103406906128, "margin_dpo/margin_std": 5.791916847229004, "step": 370 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.2474198043346405, "fcm_dpo/delta": 0.04625866562128067, "fcm_dpo/margin": 3.8570492267608643, "fcm_dpo/q_t": 0.3406515121459961, "grad_norm": 73.3750228881836, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 0.26100030541419983, "logits/rejected": 0.2167011946439743, "logps/chosen": -65.69026184082031, "logps/ref_chosen": -58.59185028076172, "logps/ref_rejected": -73.7529525756836, "logps/rejected": -84.70841979980469, "loss": 1.0779, "margin_dpo/margin_mean": 3.8570494651794434, "margin_dpo/margin_std": 6.083317279815674, "step": 375 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.23849084973335266, "fcm_dpo/delta": -0.10434339195489883, "fcm_dpo/margin": 4.571185111999512, "fcm_dpo/q_t": 0.31551235914230347, "grad_norm": 53.46732711791992, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 0.26094746589660645, "logits/rejected": 0.21969576179981232, "logps/chosen": -65.65458679199219, "logps/ref_chosen": -58.93424606323242, "logps/ref_rejected": -76.27055358886719, "logps/rejected": -87.56207275390625, "loss": 0.943, "margin_dpo/margin_mean": 4.5711846351623535, "margin_dpo/margin_std": 6.037328243255615, "step": 380 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.23582050204277039, "fcm_dpo/delta": 0.02780415117740631, "fcm_dpo/margin": 3.7493813037872314, "fcm_dpo/q_t": 0.3406650125980377, "grad_norm": 59.925880432128906, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 0.21887190639972687, "logits/rejected": 0.1941918432712555, "logps/chosen": -73.59498596191406, "logps/ref_chosen": -66.42684173583984, "logps/ref_rejected": -76.96304321289062, "logps/rejected": -87.88056945800781, "loss": 0.9882, "margin_dpo/margin_mean": 3.7493815422058105, "margin_dpo/margin_std": 5.2429423332214355, "step": 385 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.2368488758802414, "fcm_dpo/delta": 0.016512060537934303, "fcm_dpo/margin": 4.15440559387207, "fcm_dpo/q_t": 0.3241703510284424, "grad_norm": 48.153385162353516, "learning_rate": 2.170407537241599e-07, "logits/chosen": 0.24604515731334686, "logits/rejected": 0.2002202570438385, "logps/chosen": -67.92310333251953, "logps/ref_chosen": -60.984214782714844, "logps/ref_rejected": -79.54056549072266, "logps/rejected": -90.63386535644531, "loss": 0.9327, "margin_dpo/margin_mean": 4.15440559387207, "margin_dpo/margin_std": 5.388613224029541, "step": 390 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.22452032566070557, "fcm_dpo/delta": -0.045371972024440765, "fcm_dpo/margin": 4.619940757751465, "fcm_dpo/q_t": 0.32055288553237915, "grad_norm": 57.57057571411133, "learning_rate": 2.104996510066625e-07, "logits/chosen": 0.2859688103199005, "logits/rejected": 0.2365344762802124, "logps/chosen": -64.88384246826172, "logps/ref_chosen": -58.30937957763672, "logps/ref_rejected": -80.09587097167969, "logps/rejected": -91.290283203125, "loss": 0.9273, "margin_dpo/margin_mean": 4.619940757751465, "margin_dpo/margin_std": 5.92281436920166, "step": 395 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.22984282672405243, "fcm_dpo/delta": 0.02407177910208702, "fcm_dpo/margin": 4.2350172996521, "fcm_dpo/q_t": 0.3303438723087311, "grad_norm": 63.975730895996094, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 0.22668063640594482, "logits/rejected": 0.16040459275245667, "logps/chosen": -68.25566101074219, "logps/ref_chosen": -61.39867401123047, "logps/ref_rejected": -89.0177993774414, "logps/rejected": -100.10980224609375, "loss": 0.966, "margin_dpo/margin_mean": 4.2350172996521, "margin_dpo/margin_std": 5.741770267486572, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.2746705710887909, "eval_fcm_dpo/delta": 0.019361913204193115, "eval_fcm_dpo/margin": 3.5686707496643066, "eval_fcm_dpo/q_t": 0.3450649082660675, "eval_logits/chosen": 0.2557302713394165, "eval_logits/rejected": 0.21348130702972412, "eval_logps/chosen": -81.68497467041016, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -89.94316864013672, "eval_loss": 0.5573223829269409, "eval_margin_dpo/margin_mean": 3.5686707496643066, "eval_margin_dpo/margin_std": 6.046079635620117, "eval_runtime": 38.3156, "eval_samples_per_second": 60.106, "eval_steps_per_second": 1.879, "step": 400 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.22812703251838684, "fcm_dpo/delta": -0.1910632848739624, "fcm_dpo/margin": 5.0697832107543945, "fcm_dpo/q_t": 0.30286386609077454, "grad_norm": 43.19565200805664, "learning_rate": 1.975048638084379e-07, "logits/chosen": 0.2721787691116333, "logits/rejected": 0.2133924514055252, "logps/chosen": -62.4058837890625, "logps/ref_chosen": -55.953521728515625, "logps/ref_rejected": -77.67539978027344, "logps/rejected": -89.19754791259766, "loss": 0.8714, "margin_dpo/margin_mean": 5.0697832107543945, "margin_dpo/margin_std": 5.984508514404297, "step": 405 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.2223322093486786, "fcm_dpo/delta": 0.04787999764084816, "fcm_dpo/margin": 4.290686130523682, "fcm_dpo/q_t": 0.3321172595024109, "grad_norm": 56.05967712402344, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.24456481635570526, "logits/rejected": 0.19028040766716003, "logps/chosen": -70.0745620727539, "logps/ref_chosen": -63.40419387817383, "logps/ref_rejected": -80.85710144042969, "logps/rejected": -91.81815338134766, "loss": 0.9542, "margin_dpo/margin_mean": 4.29068660736084, "margin_dpo/margin_std": 5.697317123413086, "step": 410 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.22563305497169495, "fcm_dpo/delta": -0.03537094593048096, "fcm_dpo/margin": 4.56794548034668, "fcm_dpo/q_t": 0.3295273780822754, "grad_norm": 55.72998046875, "learning_rate": 1.846568829074628e-07, "logits/chosen": 0.2193383276462555, "logits/rejected": 0.17733624577522278, "logps/chosen": -65.1448745727539, "logps/ref_chosen": -57.6942024230957, "logps/ref_rejected": -71.74036407470703, "logps/rejected": -83.75898742675781, "loss": 0.988, "margin_dpo/margin_mean": 4.56794548034668, "margin_dpo/margin_std": 6.607818603515625, "step": 415 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.23104313015937805, "fcm_dpo/delta": 0.0747731551527977, "fcm_dpo/margin": 4.003242492675781, "fcm_dpo/q_t": 0.3458429276943207, "grad_norm": 71.31780242919922, "learning_rate": 1.782991918222275e-07, "logits/chosen": 0.24291574954986572, "logits/rejected": 0.20572228729724884, "logps/chosen": -66.96260070800781, "logps/ref_chosen": -59.169517517089844, "logps/ref_rejected": -69.47721099853516, "logps/rejected": -81.27352142333984, "loss": 1.0849, "margin_dpo/margin_mean": 4.0032429695129395, "margin_dpo/margin_std": 6.466610908508301, "step": 420 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.23313617706298828, "fcm_dpo/delta": -0.0782787948846817, "fcm_dpo/margin": 4.59422492980957, "fcm_dpo/q_t": 0.32748058438301086, "grad_norm": 48.63573455810547, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 0.24288901686668396, "logits/rejected": 0.2018076479434967, "logps/chosen": -65.39869689941406, "logps/ref_chosen": -58.09320831298828, "logps/ref_rejected": -73.98226165771484, "logps/rejected": -85.88197326660156, "loss": 0.9959, "margin_dpo/margin_mean": 4.594224452972412, "margin_dpo/margin_std": 6.688135623931885, "step": 425 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.23262247443199158, "fcm_dpo/delta": -0.010167494416236877, "fcm_dpo/margin": 4.329963684082031, "fcm_dpo/q_t": 0.3332260251045227, "grad_norm": 52.93145751953125, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 0.2085982859134674, "logits/rejected": 0.1877393275499344, "logps/chosen": -70.56092834472656, "logps/ref_chosen": -62.7039909362793, "logps/ref_rejected": -74.52284240722656, "logps/rejected": -86.70974731445312, "loss": 1.0091, "margin_dpo/margin_mean": 4.329963684082031, "margin_dpo/margin_std": 6.388288974761963, "step": 430 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.21396084129810333, "fcm_dpo/delta": -0.05841977149248123, "fcm_dpo/margin": 4.911672115325928, "fcm_dpo/q_t": 0.3213272988796234, "grad_norm": 51.059974670410156, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 0.2870796024799347, "logits/rejected": 0.249590203166008, "logps/chosen": -63.383323669433594, "logps/ref_chosen": -56.12516403198242, "logps/ref_rejected": -74.36073303222656, "logps/rejected": -86.53057861328125, "loss": 0.9411, "margin_dpo/margin_mean": 4.9116716384887695, "margin_dpo/margin_std": 6.554174900054932, "step": 435 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.20786520838737488, "fcm_dpo/delta": -0.039162855595350266, "fcm_dpo/margin": 4.9660820960998535, "fcm_dpo/q_t": 0.3247433304786682, "grad_norm": 46.13332748413086, "learning_rate": 1.534137185767178e-07, "logits/chosen": 0.21864204108715057, "logits/rejected": 0.15293407440185547, "logps/chosen": -63.088623046875, "logps/ref_chosen": -55.67548751831055, "logps/ref_rejected": -76.62055206298828, "logps/rejected": -88.9997787475586, "loss": 0.9627, "margin_dpo/margin_mean": 4.966081619262695, "margin_dpo/margin_std": 6.732314109802246, "step": 440 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.19659116864204407, "fcm_dpo/delta": -0.011401364579796791, "fcm_dpo/margin": 4.798353672027588, "fcm_dpo/q_t": 0.33275923132896423, "grad_norm": 54.457401275634766, "learning_rate": 1.473504264745062e-07, "logits/chosen": 0.23866701126098633, "logits/rejected": 0.18620404601097107, "logps/chosen": -67.8885269165039, "logps/ref_chosen": -59.903411865234375, "logps/ref_rejected": -82.02873229980469, "logps/rejected": -94.81220245361328, "loss": 0.9728, "margin_dpo/margin_mean": 4.79835319519043, "margin_dpo/margin_std": 6.5291314125061035, "step": 445 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.18794873356819153, "fcm_dpo/delta": 0.019849028438329697, "fcm_dpo/margin": 5.214951038360596, "fcm_dpo/q_t": 0.3214188814163208, "grad_norm": 53.57640838623047, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 0.2403557002544403, "logits/rejected": 0.17988719046115875, "logps/chosen": -63.68009567260742, "logps/ref_chosen": -55.83526611328125, "logps/ref_rejected": -79.63658142089844, "logps/rejected": -92.69636535644531, "loss": 0.9086, "margin_dpo/margin_mean": 5.214951038360596, "margin_dpo/margin_std": 6.322065830230713, "step": 450 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.20542562007904053, "fcm_dpo/delta": -0.014168155379593372, "fcm_dpo/margin": 4.929433345794678, "fcm_dpo/q_t": 0.3281521499156952, "grad_norm": 49.470947265625, "learning_rate": 1.354433695681474e-07, "logits/chosen": 0.19945120811462402, "logits/rejected": 0.1575821340084076, "logps/chosen": -68.32371520996094, "logps/ref_chosen": -60.59226608276367, "logps/ref_rejected": -73.37936401367188, "logps/rejected": -86.04025268554688, "loss": 0.9693, "margin_dpo/margin_mean": 4.929433345794678, "margin_dpo/margin_std": 6.865820407867432, "step": 455 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.19809255003929138, "fcm_dpo/delta": -0.07750917971134186, "fcm_dpo/margin": 5.395981788635254, "fcm_dpo/q_t": 0.3125055730342865, "grad_norm": 47.08208465576172, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 0.2629553973674774, "logits/rejected": 0.1926957219839096, "logps/chosen": -64.09815216064453, "logps/ref_chosen": -56.21283721923828, "logps/ref_rejected": -83.02075958251953, "logps/rejected": -96.30205535888672, "loss": 0.8901, "margin_dpo/margin_mean": 5.395981788635254, "margin_dpo/margin_std": 6.470156669616699, "step": 460 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.18275338411331177, "fcm_dpo/delta": 0.028541725128889084, "fcm_dpo/margin": 5.32494592666626, "fcm_dpo/q_t": 0.3231423497200012, "grad_norm": 48.6711540222168, "learning_rate": 1.238566782415197e-07, "logits/chosen": 0.27234551310539246, "logits/rejected": 0.2267368733882904, "logps/chosen": -67.08876037597656, "logps/ref_chosen": -59.0674934387207, "logps/ref_rejected": -74.53498840332031, "logps/rejected": -87.88118743896484, "loss": 0.9223, "margin_dpo/margin_mean": 5.32494592666626, "margin_dpo/margin_std": 6.607783317565918, "step": 465 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.19131594896316528, "fcm_dpo/delta": -0.0211162306368351, "fcm_dpo/margin": 5.313529014587402, "fcm_dpo/q_t": 0.31958022713661194, "grad_norm": 38.842506408691406, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 0.26374131441116333, "logits/rejected": 0.2210657298564911, "logps/chosen": -66.67548370361328, "logps/ref_chosen": -58.3397331237793, "logps/ref_rejected": -74.33660125732422, "logps/rejected": -87.98587799072266, "loss": 0.895, "margin_dpo/margin_mean": 5.313528537750244, "margin_dpo/margin_std": 6.436234474182129, "step": 470 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.17899836599826813, "fcm_dpo/delta": -0.07962033152580261, "fcm_dpo/margin": 5.9777021408081055, "fcm_dpo/q_t": 0.3140580356121063, "grad_norm": 35.938438415527344, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.2762961983680725, "logits/rejected": 0.21573925018310547, "logps/chosen": -62.51293182373047, "logps/ref_chosen": -54.60407638549805, "logps/ref_rejected": -79.94635009765625, "logps/rejected": -93.8329086303711, "loss": 0.9163, "margin_dpo/margin_mean": 5.9777021408081055, "margin_dpo/margin_std": 7.518684387207031, "step": 475 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.18707513809204102, "fcm_dpo/delta": 0.11744797229766846, "fcm_dpo/margin": 4.751503944396973, "fcm_dpo/q_t": 0.3464050590991974, "grad_norm": 58.852413177490234, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 0.21019065380096436, "logits/rejected": 0.1871183216571808, "logps/chosen": -72.26695251464844, "logps/ref_chosen": -63.0672492980957, "logps/ref_rejected": -68.59602355957031, "logps/rejected": -82.5472183227539, "loss": 1.0813, "margin_dpo/margin_mean": 4.751503944396973, "margin_dpo/margin_std": 7.717199802398682, "step": 480 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.18762502074241638, "fcm_dpo/delta": -0.02065492607653141, "fcm_dpo/margin": 5.407547950744629, "fcm_dpo/q_t": 0.3266620337963104, "grad_norm": 46.27238845825195, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 0.26643306016921997, "logits/rejected": 0.2090064287185669, "logps/chosen": -67.2301254272461, "logps/ref_chosen": -58.75799560546875, "logps/ref_rejected": -79.72233581542969, "logps/rejected": -93.60199737548828, "loss": 0.9668, "margin_dpo/margin_mean": 5.407547950744629, "margin_dpo/margin_std": 7.433469295501709, "step": 485 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.19302329421043396, "fcm_dpo/delta": 0.07708380371332169, "fcm_dpo/margin": 4.815155982971191, "fcm_dpo/q_t": 0.34535330533981323, "grad_norm": 64.28440856933594, "learning_rate": 9.650174444319956e-08, "logits/chosen": 0.2915256917476654, "logits/rejected": 0.23610806465148926, "logps/chosen": -70.24661254882812, "logps/ref_chosen": -61.394195556640625, "logps/ref_rejected": -81.1914291381836, "logps/rejected": -94.8590087890625, "loss": 1.0442, "margin_dpo/margin_mean": 4.815155982971191, "margin_dpo/margin_std": 7.485970497131348, "step": 490 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.205574631690979, "fcm_dpo/delta": 0.04338858649134636, "fcm_dpo/margin": 4.661031246185303, "fcm_dpo/q_t": 0.33606356382369995, "grad_norm": 53.13848114013672, "learning_rate": 9.133780704940594e-08, "logits/chosen": 0.2322273999452591, "logits/rejected": 0.1737196147441864, "logps/chosen": -68.44369506835938, "logps/ref_chosen": -59.85382843017578, "logps/ref_rejected": -80.63748931884766, "logps/rejected": -93.88838195800781, "loss": 1.0377, "margin_dpo/margin_mean": 4.661031723022461, "margin_dpo/margin_std": 7.028813362121582, "step": 495 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.1951872855424881, "fcm_dpo/delta": -0.0991867184638977, "fcm_dpo/margin": 5.557717323303223, "fcm_dpo/q_t": 0.325251966714859, "grad_norm": 56.068965911865234, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.2677212357521057, "logits/rejected": 0.22101454436779022, "logps/chosen": -74.45133209228516, "logps/ref_chosen": -66.17753601074219, "logps/ref_rejected": -83.75955200195312, "logps/rejected": -97.59107971191406, "loss": 0.9758, "margin_dpo/margin_mean": 5.557717323303223, "margin_dpo/margin_std": 7.881464958190918, "step": 500 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.1971052587032318, "fcm_dpo/delta": 0.01763380505144596, "fcm_dpo/margin": 4.979878902435303, "fcm_dpo/q_t": 0.3262530267238617, "grad_norm": 58.05271530151367, "learning_rate": 8.134630621352483e-08, "logits/chosen": 0.24825136363506317, "logits/rejected": 0.2213972508907318, "logps/chosen": -69.82575225830078, "logps/ref_chosen": -62.11005401611328, "logps/ref_rejected": -74.64705657958984, "logps/rejected": -87.34264373779297, "loss": 0.943, "margin_dpo/margin_mean": 4.979878902435303, "margin_dpo/margin_std": 6.590427398681641, "step": 505 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.19753201305866241, "fcm_dpo/delta": -0.0035357594024389982, "fcm_dpo/margin": 5.0731520652771, "fcm_dpo/q_t": 0.3300931751728058, "grad_norm": 55.001102447509766, "learning_rate": 7.652572947447272e-08, "logits/chosen": 0.2655506432056427, "logits/rejected": 0.20919163525104523, "logps/chosen": -72.16453552246094, "logps/ref_chosen": -64.42265319824219, "logps/ref_rejected": -87.00096130371094, "logps/rejected": -99.81599426269531, "loss": 0.989, "margin_dpo/margin_mean": 5.073152542114258, "margin_dpo/margin_std": 7.320687294006348, "step": 510 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.18178601562976837, "fcm_dpo/delta": -0.08417822420597076, "fcm_dpo/margin": 5.895651817321777, "fcm_dpo/q_t": 0.3174353837966919, "grad_norm": 43.048763275146484, "learning_rate": 7.182645715528435e-08, "logits/chosen": 0.262310266494751, "logits/rejected": 0.2180895060300827, "logps/chosen": -66.26689147949219, "logps/ref_chosen": -58.284393310546875, "logps/ref_rejected": -79.09356689453125, "logps/rejected": -92.97171020507812, "loss": 0.9102, "margin_dpo/margin_mean": 5.895651817321777, "margin_dpo/margin_std": 8.012345314025879, "step": 515 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.19615530967712402, "fcm_dpo/delta": 0.15623337030410767, "fcm_dpo/margin": 4.374403953552246, "fcm_dpo/q_t": 0.3525107502937317, "grad_norm": 52.284854888916016, "learning_rate": 6.725177529083209e-08, "logits/chosen": 0.2730554938316345, "logits/rejected": 0.23126861453056335, "logps/chosen": -69.12674713134766, "logps/ref_chosen": -61.03638458251953, "logps/ref_rejected": -72.15824890136719, "logps/rejected": -84.6230239868164, "loss": 1.0682, "margin_dpo/margin_mean": 4.374403953552246, "margin_dpo/margin_std": 7.115353584289551, "step": 520 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.20899620652198792, "fcm_dpo/delta": 0.03430444374680519, "fcm_dpo/margin": 4.636659145355225, "fcm_dpo/q_t": 0.3303223252296448, "grad_norm": 62.26331329345703, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.20217880606651306, "logits/rejected": 0.1589132845401764, "logps/chosen": -75.64788055419922, "logps/ref_chosen": -68.02732849121094, "logps/ref_rejected": -85.41429901123047, "logps/rejected": -97.6715087890625, "loss": 1.0154, "margin_dpo/margin_mean": 4.636659145355225, "margin_dpo/margin_std": 6.868948936462402, "step": 525 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.22276242077350616, "fcm_dpo/delta": -0.033804379403591156, "fcm_dpo/margin": 4.592731475830078, "fcm_dpo/q_t": 0.3295581638813019, "grad_norm": 47.57779312133789, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.23308193683624268, "logits/rejected": 0.17806780338287354, "logps/chosen": -66.49713134765625, "logps/ref_chosen": -58.67436599731445, "logps/ref_rejected": -79.38807678222656, "logps/rejected": -91.80358123779297, "loss": 1.0217, "margin_dpo/margin_mean": 4.5927324295043945, "margin_dpo/margin_std": 6.516105651855469, "step": 530 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.21365702152252197, "fcm_dpo/delta": 0.007706022821366787, "fcm_dpo/margin": 4.637831687927246, "fcm_dpo/q_t": 0.33375898003578186, "grad_norm": 52.6577262878418, "learning_rate": 5.430681259032957e-08, "logits/chosen": 0.23046866059303284, "logits/rejected": 0.17907290160655975, "logps/chosen": -65.31705474853516, "logps/ref_chosen": -57.640098571777344, "logps/ref_rejected": -77.25399780273438, "logps/rejected": -89.56879425048828, "loss": 1.0116, "margin_dpo/margin_mean": 4.637831211090088, "margin_dpo/margin_std": 6.8540802001953125, "step": 535 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.19856581091880798, "fcm_dpo/delta": -0.12930835783481598, "fcm_dpo/margin": 5.611011028289795, "fcm_dpo/q_t": 0.30457136034965515, "grad_norm": 49.485713958740234, "learning_rate": 5.026157728273966e-08, "logits/chosen": 0.2640102505683899, "logits/rejected": 0.199564129114151, "logps/chosen": -67.96708679199219, "logps/ref_chosen": -60.17341995239258, "logps/ref_rejected": -85.50316619873047, "logps/rejected": -98.90785217285156, "loss": 0.8669, "margin_dpo/margin_mean": 5.611011981964111, "margin_dpo/margin_std": 6.6042799949646, "step": 540 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.17933054268360138, "fcm_dpo/delta": -0.039366770535707474, "fcm_dpo/margin": 5.763962268829346, "fcm_dpo/q_t": 0.3183125853538513, "grad_norm": 49.12068557739258, "learning_rate": 4.635601198741607e-08, "logits/chosen": 0.2411998063325882, "logits/rejected": 0.19200275838375092, "logps/chosen": -64.20506286621094, "logps/ref_chosen": -56.985809326171875, "logps/ref_rejected": -73.21353912353516, "logps/rejected": -86.19674682617188, "loss": 0.9215, "margin_dpo/margin_mean": 5.7639617919921875, "margin_dpo/margin_std": 7.375536918640137, "step": 545 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.18896816670894623, "fcm_dpo/delta": 0.1245899647474289, "fcm_dpo/margin": 4.69591760635376, "fcm_dpo/q_t": 0.33968135714530945, "grad_norm": 46.302913665771484, "learning_rate": 4.259284772799099e-08, "logits/chosen": 0.2536870241165161, "logits/rejected": 0.20810946822166443, "logps/chosen": -67.05742645263672, "logps/ref_chosen": -59.600929260253906, "logps/ref_rejected": -75.24870300292969, "logps/rejected": -87.401123046875, "loss": 0.9894, "margin_dpo/margin_mean": 4.695918083190918, "margin_dpo/margin_std": 6.6248931884765625, "step": 550 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.20476670563220978, "fcm_dpo/delta": 0.011952433735132217, "fcm_dpo/margin": 4.818479537963867, "fcm_dpo/q_t": 0.33133333921432495, "grad_norm": 60.33503341674805, "learning_rate": 3.89747159520904e-08, "logits/chosen": 0.2548312544822693, "logits/rejected": 0.2083214819431305, "logps/chosen": -71.74968719482422, "logps/ref_chosen": -63.578895568847656, "logps/ref_rejected": -78.87867736816406, "logps/rejected": -91.8679428100586, "loss": 1.0108, "margin_dpo/margin_mean": 4.818479537963867, "margin_dpo/margin_std": 7.097817897796631, "step": 555 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.19495727121829987, "fcm_dpo/delta": -0.06875023245811462, "fcm_dpo/margin": 5.4297003746032715, "fcm_dpo/q_t": 0.32562586665153503, "grad_norm": 50.54167556762695, "learning_rate": 3.550414669125573e-08, "logits/chosen": 0.26778554916381836, "logits/rejected": 0.2139805108308792, "logps/chosen": -66.76048278808594, "logps/ref_chosen": -58.651512145996094, "logps/ref_rejected": -78.67181396484375, "logps/rejected": -92.21048736572266, "loss": 0.9439, "margin_dpo/margin_mean": 5.42970085144043, "margin_dpo/margin_std": 7.295182704925537, "step": 560 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.18790480494499207, "fcm_dpo/delta": -0.006370419170707464, "fcm_dpo/margin": 5.345377445220947, "fcm_dpo/q_t": 0.32594844698905945, "grad_norm": 53.373992919921875, "learning_rate": 3.218356679178252e-08, "logits/chosen": 0.2542131841182709, "logits/rejected": 0.2154400646686554, "logps/chosen": -68.05675506591797, "logps/ref_chosen": -60.3114128112793, "logps/ref_rejected": -78.25270080566406, "logps/rejected": -91.34342956542969, "loss": 0.951, "margin_dpo/margin_mean": 5.345377445220947, "margin_dpo/margin_std": 7.17926549911499, "step": 565 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.1877862513065338, "fcm_dpo/delta": -0.07747501134872437, "fcm_dpo/margin": 5.681626796722412, "fcm_dpo/q_t": 0.31316810846328735, "grad_norm": 37.85681915283203, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 0.24570398032665253, "logits/rejected": 0.18830426037311554, "logps/chosen": -65.35658264160156, "logps/ref_chosen": -57.752410888671875, "logps/ref_rejected": -76.99858093261719, "logps/rejected": -90.28437805175781, "loss": 0.9025, "margin_dpo/margin_mean": 5.681626319885254, "margin_dpo/margin_std": 6.906379699707031, "step": 570 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.18777219951152802, "fcm_dpo/delta": 0.09439138323068619, "fcm_dpo/margin": 4.848970890045166, "fcm_dpo/q_t": 0.3400737941265106, "grad_norm": 51.46808624267578, "learning_rate": 2.600155642716606e-08, "logits/chosen": 0.27044713497161865, "logits/rejected": 0.23382814228534698, "logps/chosen": -71.76069641113281, "logps/ref_chosen": -63.61958694458008, "logps/ref_rejected": -79.51353454589844, "logps/rejected": -92.50362396240234, "loss": 1.0032, "margin_dpo/margin_mean": 4.848970890045166, "margin_dpo/margin_std": 7.03188943862915, "step": 575 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.19913935661315918, "fcm_dpo/delta": 0.025188129395246506, "fcm_dpo/margin": 4.890405178070068, "fcm_dpo/q_t": 0.3381398320198059, "grad_norm": 47.50596618652344, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 0.25481683015823364, "logits/rejected": 0.21231190860271454, "logps/chosen": -64.9234390258789, "logps/ref_chosen": -57.3541145324707, "logps/ref_rejected": -73.14434051513672, "logps/rejected": -85.60404968261719, "loss": 1.0075, "margin_dpo/margin_mean": 4.89040470123291, "margin_dpo/margin_std": 7.159416198730469, "step": 580 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.2018500119447708, "fcm_dpo/delta": 0.039844244718551636, "fcm_dpo/margin": 4.77426815032959, "fcm_dpo/q_t": 0.3346126675605774, "grad_norm": 52.080108642578125, "learning_rate": 2.044597327993153e-08, "logits/chosen": 0.302755206823349, "logits/rejected": 0.2517249584197998, "logps/chosen": -63.728919982910156, "logps/ref_chosen": -56.0127067565918, "logps/ref_rejected": -77.16522216796875, "logps/rejected": -89.65570068359375, "loss": 1.0162, "margin_dpo/margin_mean": 4.774267673492432, "margin_dpo/margin_std": 7.161828517913818, "step": 585 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.1958218663930893, "fcm_dpo/delta": -0.1253487765789032, "fcm_dpo/margin": 5.685278415679932, "fcm_dpo/q_t": 0.3043304681777954, "grad_norm": 44.7454833984375, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 0.27173739671707153, "logits/rejected": 0.23606888949871063, "logps/chosen": -68.17321014404297, "logps/ref_chosen": -60.5894660949707, "logps/ref_rejected": -74.34771728515625, "logps/rejected": -87.61674499511719, "loss": 0.9031, "margin_dpo/margin_mean": 5.685278415679932, "margin_dpo/margin_std": 7.15541934967041, "step": 590 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.18476364016532898, "fcm_dpo/delta": 0.004565200302749872, "fcm_dpo/margin": 5.378830909729004, "fcm_dpo/q_t": 0.3307590186595917, "grad_norm": 45.189571380615234, "learning_rate": 1.553235392451377e-08, "logits/chosen": 0.2524837553501129, "logits/rejected": 0.1859164535999298, "logps/chosen": -62.0955924987793, "logps/ref_chosen": -54.77838897705078, "logps/ref_rejected": -78.102783203125, "logps/rejected": -90.79881286621094, "loss": 0.9849, "margin_dpo/margin_mean": 5.378830909729004, "margin_dpo/margin_std": 7.575669765472412, "step": 595 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.20738473534584045, "fcm_dpo/delta": 0.18134655058383942, "fcm_dpo/margin": 3.9983153343200684, "fcm_dpo/q_t": 0.3663412928581238, "grad_norm": 58.46778869628906, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 0.27015766501426697, "logits/rejected": 0.23463740944862366, "logps/chosen": -66.70384216308594, "logps/ref_chosen": -58.45500564575195, "logps/ref_rejected": -70.7367172241211, "logps/rejected": -82.9838638305664, "loss": 1.122, "margin_dpo/margin_mean": 3.9983153343200684, "margin_dpo/margin_std": 7.113587856292725, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.2267770618200302, "eval_fcm_dpo/delta": -0.0016693489160388708, "eval_fcm_dpo/margin": 4.408850193023682, "eval_fcm_dpo/q_t": 0.34116730093955994, "eval_logits/chosen": 0.27240264415740967, "eval_logits/rejected": 0.2290111631155014, "eval_logps/chosen": -82.5570068359375, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -91.6553726196289, "eval_loss": 0.5467005968093872, "eval_margin_dpo/margin_mean": 4.40885066986084, "eval_margin_dpo/margin_std": 7.250354290008545, "eval_runtime": 38.3062, "eval_samples_per_second": 60.121, "eval_steps_per_second": 1.88, "step": 600 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.20211096107959747, "fcm_dpo/delta": -0.07903344184160233, "fcm_dpo/margin": 5.284127235412598, "fcm_dpo/q_t": 0.31968027353286743, "grad_norm": 52.800880432128906, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 0.2609863877296448, "logits/rejected": 0.21765032410621643, "logps/chosen": -66.56036376953125, "logps/ref_chosen": -59.87483596801758, "logps/ref_rejected": -75.75318908691406, "logps/rejected": -87.72285461425781, "loss": 0.9413, "margin_dpo/margin_mean": 5.2841267585754395, "margin_dpo/margin_std": 7.117745399475098, "step": 605 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.19957733154296875, "fcm_dpo/delta": -0.02773882821202278, "fcm_dpo/margin": 5.118582248687744, "fcm_dpo/q_t": 0.3286603093147278, "grad_norm": 64.0735092163086, "learning_rate": 9.395165583732379e-09, "logits/chosen": 0.2576659619808197, "logits/rejected": 0.22138860821723938, "logps/chosen": -67.77408599853516, "logps/ref_chosen": -60.35883712768555, "logps/ref_rejected": -81.3543930053711, "logps/rejected": -93.88822937011719, "loss": 0.964, "margin_dpo/margin_mean": 5.118581771850586, "margin_dpo/margin_std": 7.064711093902588, "step": 610 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.19206738471984863, "fcm_dpo/delta": -0.05985846370458603, "fcm_dpo/margin": 5.479418754577637, "fcm_dpo/q_t": 0.31544992327690125, "grad_norm": 52.961814880371094, "learning_rate": 7.684137976598088e-09, "logits/chosen": 0.22922465205192566, "logits/rejected": 0.19491711258888245, "logps/chosen": -66.57718658447266, "logps/ref_chosen": -59.17219161987305, "logps/ref_rejected": -79.92167663574219, "logps/rejected": -92.80609130859375, "loss": 0.9192, "margin_dpo/margin_mean": 5.4794182777404785, "margin_dpo/margin_std": 6.942657470703125, "step": 615 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.19354508817195892, "fcm_dpo/delta": 0.08557742834091187, "fcm_dpo/margin": 4.7475481033325195, "fcm_dpo/q_t": 0.3413916826248169, "grad_norm": 58.06659698486328, "learning_rate": 6.142553278648238e-09, "logits/chosen": 0.2654898166656494, "logits/rejected": 0.21224336326122284, "logps/chosen": -65.464599609375, "logps/ref_chosen": -58.052696228027344, "logps/ref_rejected": -78.37252807617188, "logps/rejected": -90.53197479248047, "loss": 0.9722, "margin_dpo/margin_mean": 4.7475481033325195, "margin_dpo/margin_std": 6.606284141540527, "step": 620 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.20503361523151398, "fcm_dpo/delta": 0.02592085301876068, "fcm_dpo/margin": 4.755415916442871, "fcm_dpo/q_t": 0.3336712718009949, "grad_norm": 52.6633186340332, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 0.26381465792655945, "logits/rejected": 0.19538459181785583, "logps/chosen": -64.57536315917969, "logps/ref_chosen": -56.957862854003906, "logps/ref_rejected": -82.68255615234375, "logps/rejected": -95.05546569824219, "loss": 0.9651, "margin_dpo/margin_mean": 4.755415916442871, "margin_dpo/margin_std": 6.575987815856934, "step": 625 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.18615694344043732, "fcm_dpo/delta": -0.13678616285324097, "fcm_dpo/margin": 6.003110408782959, "fcm_dpo/q_t": 0.30930259823799133, "grad_norm": 38.42726135253906, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 0.26059260964393616, "logits/rejected": 0.20223116874694824, "logps/chosen": -63.660064697265625, "logps/ref_chosen": -56.71510696411133, "logps/ref_rejected": -82.94544219970703, "logps/rejected": -95.89350128173828, "loss": 0.8913, "margin_dpo/margin_mean": 6.003109931945801, "margin_dpo/margin_std": 7.36514949798584, "step": 630 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.17870579659938812, "fcm_dpo/delta": 0.08864951133728027, "fcm_dpo/margin": 5.159267425537109, "fcm_dpo/q_t": 0.34538301825523376, "grad_norm": 59.73903274536133, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 0.25905171036720276, "logits/rejected": 0.21522016823291779, "logps/chosen": -66.6440658569336, "logps/ref_chosen": -59.33793258666992, "logps/ref_rejected": -75.01703643798828, "logps/rejected": -87.4824447631836, "loss": 1.0145, "margin_dpo/margin_mean": 5.159267425537109, "margin_dpo/margin_std": 7.501204013824463, "step": 635 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.18961693346500397, "fcm_dpo/delta": -0.015742216259241104, "fcm_dpo/margin": 5.340132713317871, "fcm_dpo/q_t": 0.32362303137779236, "grad_norm": 49.96174240112305, "learning_rate": 1.690410564514244e-09, "logits/chosen": 0.2457093894481659, "logits/rejected": 0.1768445074558258, "logps/chosen": -66.05986022949219, "logps/ref_chosen": -58.1605339050293, "logps/ref_rejected": -79.85365295410156, "logps/rejected": -93.09309387207031, "loss": 0.9413, "margin_dpo/margin_mean": 5.340132713317871, "margin_dpo/margin_std": 7.175882816314697, "step": 640 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.20850110054016113, "fcm_dpo/delta": 0.11010245233774185, "fcm_dpo/margin": 4.303619384765625, "fcm_dpo/q_t": 0.3440888524055481, "grad_norm": 55.303855895996094, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 0.235604926943779, "logits/rejected": 0.20262756943702698, "logps/chosen": -71.58079528808594, "logps/ref_chosen": -63.45180130004883, "logps/ref_rejected": -74.18285369873047, "logps/rejected": -86.61546325683594, "loss": 1.0835, "margin_dpo/margin_mean": 4.303619384765625, "margin_dpo/margin_std": 7.141520023345947, "step": 645 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.19852934777736664, "fcm_dpo/delta": -0.0924367681145668, "fcm_dpo/margin": 5.4358906745910645, "fcm_dpo/q_t": 0.31767454743385315, "grad_norm": 64.30652618408203, "learning_rate": 5.033308820289184e-10, "logits/chosen": 0.2805251479148865, "logits/rejected": 0.22169442474842072, "logps/chosen": -67.74933624267578, "logps/ref_chosen": -59.75496292114258, "logps/ref_rejected": -84.31481170654297, "logps/rejected": -97.74507141113281, "loss": 0.9544, "margin_dpo/margin_mean": 5.4358906745910645, "margin_dpo/margin_std": 7.351178169250488, "step": 650 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.19725769758224487, "fcm_dpo/delta": 0.0005639016744680703, "fcm_dpo/margin": 5.0511932373046875, "fcm_dpo/q_t": 0.32906073331832886, "grad_norm": 39.39985656738281, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 0.2633139193058014, "logits/rejected": 0.1995994746685028, "logps/chosen": -65.23994445800781, "logps/ref_chosen": -57.817848205566406, "logps/ref_rejected": -79.81755065917969, "logps/rejected": -92.29084014892578, "loss": 0.9413, "margin_dpo/margin_mean": 5.0511932373046875, "margin_dpo/margin_std": 6.741064548492432, "step": 655 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.18813875317573547, "fcm_dpo/delta": -0.03273053467273712, "fcm_dpo/margin": 5.455845832824707, "fcm_dpo/q_t": 0.3237132430076599, "grad_norm": 46.501102447509766, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 0.3373282551765442, "logits/rejected": 0.2789747714996338, "logps/chosen": -67.12478637695312, "logps/ref_chosen": -59.12651443481445, "logps/ref_rejected": -79.42085266113281, "logps/rejected": -92.8749771118164, "loss": 0.9505, "margin_dpo/margin_mean": 5.455845832824707, "margin_dpo/margin_std": 7.354535102844238, "step": 660 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.0460260132617922, "train_runtime": 1747.5645, "train_samples_per_second": 24.226, "train_steps_per_second": 0.378 } ], "logging_steps": 5, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }