{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 200, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0013532638549804688, "fcm_dpo/q_t": 0.5000336766242981, "grad_norm": 28.21938133239746, "learning_rate": 0.0, "logits/chosen": 0.13337239623069763, "logits/rejected": 0.12492948770523071, "logps/chosen": -64.5841293334961, "logps/ref_chosen": -64.61280822753906, "logps/ref_rejected": -64.17195129394531, "logps/rejected": -64.14192199707031, "loss": 1.3866, "margin_dpo/margin_mean": -0.0013527870178222656, "margin_dpo/margin_std": 0.2561596930027008, "step": 1 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.012731105089187622, "fcm_dpo/q_t": 0.49968191981315613, "grad_norm": 29.578828811645508, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 0.09623775631189346, "logits/rejected": 0.06788332760334015, "logps/chosen": -65.3349380493164, "logps/ref_chosen": -65.34695434570312, "logps/ref_rejected": -79.315673828125, "logps/rejected": -79.31640625, "loss": 1.3853, "margin_dpo/margin_mean": 0.012730807065963745, "margin_dpo/margin_std": 0.3051193654537201, "step": 5 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0006597042083740234, "fcm_dpo/q_t": 0.5000167489051819, "grad_norm": 29.6705322265625, "learning_rate": 6.71641791044776e-08, "logits/chosen": 0.10582169145345688, "logits/rejected": 0.06683535873889923, "logps/chosen": -56.6657829284668, "logps/ref_chosen": -56.65692901611328, "logps/ref_rejected": -80.12786865234375, "logps/rejected": -80.13607025146484, "loss": 1.3866, "margin_dpo/margin_mean": -0.00065990089206025, "margin_dpo/margin_std": 0.3203383684158325, "step": 10 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.01577478088438511, "fcm_dpo/q_t": 0.49960583448410034, "grad_norm": 32.982215881347656, "learning_rate": 1.044776119402985e-07, "logits/chosen": 0.08618224412202835, "logits/rejected": 0.0566771999001503, "logps/chosen": -60.09851837158203, "logps/ref_chosen": -60.09392166137695, "logps/ref_rejected": -78.99056243896484, "logps/rejected": -79.01094818115234, "loss": 1.385, "margin_dpo/margin_mean": 0.01577501930296421, "margin_dpo/margin_std": 0.3348791301250458, "step": 15 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.004742377903312445, "fcm_dpo/q_t": 0.5001183748245239, "grad_norm": 29.284229278564453, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 0.09735535085201263, "logits/rejected": 0.06951850652694702, "logps/chosen": -55.4586296081543, "logps/ref_chosen": -55.464561462402344, "logps/ref_rejected": -77.40013122558594, "logps/rejected": -77.38945007324219, "loss": 1.387, "margin_dpo/margin_mean": -0.004742181394249201, "margin_dpo/margin_std": 0.29244670271873474, "step": 20 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.00360795552842319, "fcm_dpo/q_t": 0.4999099671840668, "grad_norm": 29.778076171875, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 0.10357926040887833, "logits/rejected": 0.07201702892780304, "logps/chosen": -60.72992706298828, "logps/ref_chosen": -60.711814880371094, "logps/ref_rejected": -82.71756744384766, "logps/rejected": -82.7392807006836, "loss": 1.3862, "margin_dpo/margin_mean": 0.0036078630946576595, "margin_dpo/margin_std": 0.30398499965667725, "step": 25 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.010024601593613625, "fcm_dpo/q_t": 0.5002505779266357, "grad_norm": 30.504179000854492, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 0.1048557385802269, "logits/rejected": 0.0802190899848938, "logps/chosen": -60.91057205200195, "logps/ref_chosen": -60.880210876464844, "logps/ref_rejected": -78.44148254394531, "logps/rejected": -78.4618148803711, "loss": 1.3875, "margin_dpo/margin_mean": -0.0100246611982584, "margin_dpo/margin_std": 0.30975908041000366, "step": 30 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.013202684931457043, "fcm_dpo/q_t": 0.49967002868652344, "grad_norm": 27.877357482910156, "learning_rate": 2.537313432835821e-07, "logits/chosen": 0.08469098061323166, "logits/rejected": 0.057735610753297806, "logps/chosen": -62.28917694091797, "logps/ref_chosen": -62.248138427734375, "logps/ref_rejected": -79.56475830078125, "logps/rejected": -79.61898803710938, "loss": 1.3853, "margin_dpo/margin_mean": 0.013202887959778309, "margin_dpo/margin_std": 0.32241854071617126, "step": 35 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05649406835436821, "fcm_dpo/q_t": 0.4985879957675934, "grad_norm": 31.444271087646484, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 0.11410780251026154, "logits/rejected": 0.06925268471240997, "logps/chosen": -58.951148986816406, "logps/ref_chosen": -58.87812423706055, "logps/ref_rejected": -84.22982025146484, "logps/rejected": -84.35932922363281, "loss": 1.381, "margin_dpo/margin_mean": 0.056494224816560745, "margin_dpo/margin_std": 0.3627670109272003, "step": 40 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.06682233512401581, "fcm_dpo/q_t": 0.498330295085907, "grad_norm": 31.839420318603516, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 0.05706251785159111, "logits/rejected": 0.03058524802327156, "logps/chosen": -66.00699615478516, "logps/ref_chosen": -65.88298034667969, "logps/ref_rejected": -83.87881469726562, "logps/rejected": -84.06964111328125, "loss": 1.3801, "margin_dpo/margin_mean": 0.06682238727807999, "margin_dpo/margin_std": 0.4172392785549164, "step": 45 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09804753959178925, "fcm_dpo/q_t": 0.4975499212741852, "grad_norm": 27.911935806274414, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 0.09519219398498535, "logits/rejected": 0.06103789061307907, "logps/chosen": -55.37559127807617, "logps/ref_chosen": -55.172386169433594, "logps/ref_rejected": -69.63300323486328, "logps/rejected": -69.93424224853516, "loss": 1.377, "margin_dpo/margin_mean": 0.09804768860340118, "margin_dpo/margin_std": 0.4135734438896179, "step": 50 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.18540987372398376, "fcm_dpo/q_t": 0.4953702390193939, "grad_norm": 31.428316116333008, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 0.0665198341012001, "logits/rejected": 0.030963122844696045, "logps/chosen": -57.539833068847656, "logps/ref_chosen": -57.193580627441406, "logps/ref_rejected": -79.69940948486328, "logps/rejected": -80.23106384277344, "loss": 1.3686, "margin_dpo/margin_mean": 0.18540982902050018, "margin_dpo/margin_std": 0.5479583144187927, "step": 55 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.20901694893836975, "fcm_dpo/q_t": 0.4947921633720398, "grad_norm": 29.466659545898438, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 0.11891994625329971, "logits/rejected": 0.0848398357629776, "logps/chosen": -60.59447479248047, "logps/ref_chosen": -60.068870544433594, "logps/ref_rejected": -74.41178894042969, "logps/rejected": -75.14640808105469, "loss": 1.3671, "margin_dpo/margin_mean": 0.20901694893836975, "margin_dpo/margin_std": 0.7362244129180908, "step": 60 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.3001774847507477, "fcm_dpo/q_t": 0.4925141930580139, "grad_norm": 30.807130813598633, "learning_rate": 4.776119402985074e-07, "logits/chosen": 0.13861653208732605, "logits/rejected": 0.10865757614374161, "logps/chosen": -58.88816452026367, "logps/ref_chosen": -58.1558952331543, "logps/ref_rejected": -76.06512451171875, "logps/rejected": -77.09757995605469, "loss": 1.3589, "margin_dpo/margin_mean": 0.3001771569252014, "margin_dpo/margin_std": 0.9452616572380066, "step": 65 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.39394986629486084, "fcm_dpo/q_t": 0.49018916487693787, "grad_norm": 28.67983627319336, "learning_rate": 4.999860140229787e-07, "logits/chosen": 0.10006751120090485, "logits/rejected": 0.06497758626937866, "logps/chosen": -68.39962005615234, "logps/ref_chosen": -67.35506439208984, "logps/ref_rejected": -82.24962615966797, "logps/rejected": -83.6881332397461, "loss": 1.3507, "margin_dpo/margin_mean": 0.39395004510879517, "margin_dpo/margin_std": 1.1363012790679932, "step": 70 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5148524045944214, "fcm_dpo/q_t": 0.48723092675209045, "grad_norm": 26.078073501586914, "learning_rate": 4.998286897523808e-07, "logits/chosen": 0.10058212280273438, "logits/rejected": 0.06556755304336548, "logps/chosen": -58.295387268066406, "logps/ref_chosen": -56.86763381958008, "logps/ref_rejected": -72.56938934326172, "logps/rejected": -74.51200103759766, "loss": 1.3408, "margin_dpo/margin_mean": 0.5148526430130005, "margin_dpo/margin_std": 1.398964762687683, "step": 75 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6505656242370605, "fcm_dpo/q_t": 0.48392024636268616, "grad_norm": 28.3653621673584, "learning_rate": 4.994966691179711e-07, "logits/chosen": 0.13842633366584778, "logits/rejected": 0.09329269081354141, "logps/chosen": -59.446983337402344, "logps/ref_chosen": -57.687095642089844, "logps/ref_rejected": -78.06813049316406, "logps/rejected": -80.47857666015625, "loss": 1.3297, "margin_dpo/margin_mean": 0.6505654454231262, "margin_dpo/margin_std": 1.6704308986663818, "step": 80 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.0345122814178467, "fcm_dpo/q_t": 0.47443389892578125, "grad_norm": 26.892316818237305, "learning_rate": 4.989901842900325e-07, "logits/chosen": 0.1578936129808426, "logits/rejected": 0.11112338304519653, "logps/chosen": -59.1240234375, "logps/ref_chosen": -56.96040725708008, "logps/ref_rejected": -75.22166442871094, "logps/rejected": -78.41979217529297, "loss": 1.2938, "margin_dpo/margin_mean": 1.0345120429992676, "margin_dpo/margin_std": 1.7866588830947876, "step": 85 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1209580898284912, "fcm_dpo/q_t": 0.47252073884010315, "grad_norm": 29.175752639770508, "learning_rate": 4.983095894354857e-07, "logits/chosen": 0.20981314778327942, "logits/rejected": 0.16077354550361633, "logps/chosen": -60.326324462890625, "logps/ref_chosen": -57.41730499267578, "logps/ref_rejected": -80.87986755371094, "logps/rejected": -84.90985870361328, "loss": 1.2935, "margin_dpo/margin_mean": 1.1209580898284912, "margin_dpo/margin_std": 2.4879467487335205, "step": 90 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.4390569925308228, "fcm_dpo/q_t": 0.46531516313552856, "grad_norm": 29.310806274414062, "learning_rate": 4.974553604702332e-07, "logits/chosen": 0.1912127137184143, "logits/rejected": 0.1479342132806778, "logps/chosen": -57.60295867919922, "logps/ref_chosen": -54.08087158203125, "logps/ref_rejected": -76.15860748291016, "logps/rejected": -81.1197509765625, "loss": 1.2749, "margin_dpo/margin_mean": 1.4390567541122437, "margin_dpo/margin_std": 3.292581558227539, "step": 95 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.211411714553833, "fcm_dpo/q_t": 0.47177332639694214, "grad_norm": 33.16886520385742, "learning_rate": 4.964280947263676e-07, "logits/chosen": 0.2135041207075119, "logits/rejected": 0.18092623353004456, "logps/chosen": -68.66203308105469, "logps/ref_chosen": -63.875038146972656, "logps/ref_rejected": -82.077880859375, "logps/rejected": -88.07627868652344, "loss": 1.3247, "margin_dpo/margin_mean": 1.211411714553833, "margin_dpo/margin_std": 4.652392387390137, "step": 100 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.7167237997055054, "fcm_dpo/q_t": 0.460963636636734, "grad_norm": 30.796314239501953, "learning_rate": 4.952285105344791e-07, "logits/chosen": 0.2201795130968094, "logits/rejected": 0.16930809617042542, "logps/chosen": -67.5002670288086, "logps/ref_chosen": -62.572479248046875, "logps/ref_rejected": -80.93415069580078, "logps/rejected": -87.57865905761719, "loss": 1.283, "margin_dpo/margin_mean": 1.7167232036590576, "margin_dpo/margin_std": 4.936122894287109, "step": 105 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.5425608158111572, "fcm_dpo/q_t": 0.4638938009738922, "grad_norm": 44.27668380737305, "learning_rate": 4.938574467213517e-07, "logits/chosen": 0.1879446804523468, "logits/rejected": 0.16481170058250427, "logps/chosen": -74.3304672241211, "logps/ref_chosen": -68.67534637451172, "logps/ref_rejected": -78.82028198242188, "logps/rejected": -86.01795959472656, "loss": 1.3142, "margin_dpo/margin_mean": 1.5425606966018677, "margin_dpo/margin_std": 5.584108829498291, "step": 110 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.017230272293091, "fcm_dpo/q_t": 0.45290979743003845, "grad_norm": 28.293357849121094, "learning_rate": 4.923158620234019e-07, "logits/chosen": 0.23582999408245087, "logits/rejected": 0.18265566229820251, "logps/chosen": -64.07593536376953, "logps/ref_chosen": -58.65370559692383, "logps/ref_rejected": -81.89688873291016, "logps/rejected": -89.33635711669922, "loss": 1.2493, "margin_dpo/margin_mean": 2.0172300338745117, "margin_dpo/margin_std": 4.730603218078613, "step": 115 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.405763864517212, "fcm_dpo/q_t": 0.4429899752140045, "grad_norm": 28.859172821044922, "learning_rate": 4.906048344162676e-07, "logits/chosen": 0.24149473011493683, "logits/rejected": 0.19213181734085083, "logps/chosen": -61.583091735839844, "logps/ref_chosen": -56.16423797607422, "logps/ref_rejected": -75.87689971923828, "logps/rejected": -83.70152282714844, "loss": 1.208, "margin_dpo/margin_mean": 2.405764102935791, "margin_dpo/margin_std": 4.32560396194458, "step": 120 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.015474319458008, "fcm_dpo/q_t": 0.4313550889492035, "grad_norm": 29.275339126586914, "learning_rate": 4.887255603610184e-07, "logits/chosen": 0.26413899660110474, "logits/rejected": 0.20717649161815643, "logps/chosen": -66.0591049194336, "logps/ref_chosen": -59.744285583496094, "logps/ref_rejected": -86.77314758300781, "logps/rejected": -96.10343933105469, "loss": 1.1669, "margin_dpo/margin_mean": 3.015474319458008, "margin_dpo/margin_std": 5.040165901184082, "step": 125 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 0.09993546456098557, "fcm_dpo/delta": -0.0032323698978871107, "fcm_dpo/margin": 2.8389410972595215, "fcm_dpo/q_t": 0.4344428479671478, "grad_norm": 30.352113723754883, "learning_rate": 4.866793539675126e-07, "logits/chosen": 0.1967930942773819, "logits/rejected": 0.1665017306804657, "logps/chosen": -71.52397918701172, "logps/ref_chosen": -64.15296936035156, "logps/ref_rejected": -75.17271423339844, "logps/rejected": -85.38265991210938, "loss": 1.1919, "margin_dpo/margin_mean": 2.8389410972595215, "margin_dpo/margin_std": 5.271792411804199, "step": 130 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 0.09838803857564926, "fcm_dpo/delta": -0.024441083893179893, "fcm_dpo/margin": 3.772266387939453, "fcm_dpo/q_t": 0.4152294099330902, "grad_norm": 24.730987548828125, "learning_rate": 4.844676460754862e-07, "logits/chosen": 0.26162657141685486, "logits/rejected": 0.22124962508678436, "logps/chosen": -65.22847747802734, "logps/ref_chosen": -57.006690979003906, "logps/ref_rejected": -73.71768188476562, "logps/rejected": -85.71173095703125, "loss": 1.1324, "margin_dpo/margin_mean": 3.772266387939453, "margin_dpo/margin_std": 5.984399318695068, "step": 135 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 0.09531065821647644, "fcm_dpo/delta": -0.05013541504740715, "fcm_dpo/margin": 4.44573450088501, "fcm_dpo/q_t": 0.4060499668121338, "grad_norm": 29.4134464263916, "learning_rate": 4.820919832540181e-07, "logits/chosen": 0.25046736001968384, "logits/rejected": 0.2064397782087326, "logps/chosen": -74.15589904785156, "logps/ref_chosen": -63.36246871948242, "logps/ref_rejected": -79.62621307373047, "logps/rejected": -94.8653793334961, "loss": 1.1281, "margin_dpo/margin_mean": 4.445734977722168, "margin_dpo/margin_std": 7.5196404457092285, "step": 140 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 0.08971674740314484, "fcm_dpo/delta": -0.04721946269273758, "fcm_dpo/margin": 4.761581897735596, "fcm_dpo/q_t": 0.40657633543014526, "grad_norm": 30.589385986328125, "learning_rate": 4.795540267200686e-07, "logits/chosen": 0.2786180078983307, "logits/rejected": 0.2569752633571625, "logps/chosen": -77.37989044189453, "logps/ref_chosen": -65.01470184326172, "logps/ref_rejected": -80.49073791503906, "logps/rejected": -97.61750793457031, "loss": 1.1506, "margin_dpo/margin_mean": 4.761581897735596, "margin_dpo/margin_std": 8.919352531433105, "step": 145 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 0.08510036766529083, "fcm_dpo/delta": -0.11195192486047745, "fcm_dpo/margin": 5.9401068687438965, "fcm_dpo/q_t": 0.39034393429756165, "grad_norm": 22.236682891845703, "learning_rate": 4.768555511768486e-07, "logits/chosen": 0.2792455554008484, "logits/rejected": 0.23401157557964325, "logps/chosen": -72.20897674560547, "logps/ref_chosen": -59.19135284423828, "logps/ref_rejected": -74.0339126586914, "logps/rejected": -92.99165344238281, "loss": 1.0828, "margin_dpo/margin_mean": 5.9401068687438965, "margin_dpo/margin_std": 9.036436080932617, "step": 150 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 0.07190684974193573, "fcm_dpo/delta": -0.13100460171699524, "fcm_dpo/margin": 7.103701591491699, "fcm_dpo/q_t": 0.38805317878723145, "grad_norm": 21.95441246032715, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.3055272698402405, "logits/rejected": 0.2715781033039093, "logps/chosen": -74.09068298339844, "logps/ref_chosen": -60.93949508666992, "logps/ref_rejected": -74.51151275634766, "logps/rejected": -94.76640319824219, "loss": 1.0779, "margin_dpo/margin_mean": 7.103701114654541, "margin_dpo/margin_std": 10.82844352722168, "step": 155 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 0.06413034349679947, "fcm_dpo/delta": -0.11762239784002304, "fcm_dpo/margin": 7.9449782371521, "fcm_dpo/q_t": 0.38926568627357483, "grad_norm": 20.171205520629883, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 0.2932426929473877, "logits/rejected": 0.25532081723213196, "logps/chosen": -73.47798919677734, "logps/ref_chosen": -58.763816833496094, "logps/ref_rejected": -74.94743347167969, "logps/rejected": -97.60658264160156, "loss": 1.0755, "margin_dpo/margin_mean": 7.9449782371521, "margin_dpo/margin_std": 12.001934051513672, "step": 160 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 0.05613988637924194, "fcm_dpo/delta": -0.1726417988538742, "fcm_dpo/margin": 10.18576431274414, "fcm_dpo/q_t": 0.3745308518409729, "grad_norm": 17.679018020629883, "learning_rate": 4.678164332082175e-07, "logits/chosen": 0.3483496308326721, "logits/rejected": 0.2956962287425995, "logps/chosen": -72.38264465332031, "logps/ref_chosen": -55.70417022705078, "logps/ref_rejected": -76.59439849853516, "logps/rejected": -103.45863342285156, "loss": 1.0359, "margin_dpo/margin_mean": 10.18576431274414, "margin_dpo/margin_std": 13.736506462097168, "step": 165 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 0.04928728565573692, "fcm_dpo/delta": -0.09952159970998764, "fcm_dpo/margin": 10.129049301147461, "fcm_dpo/q_t": 0.3913528621196747, "grad_norm": 16.242919921875, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 0.3343687057495117, "logits/rejected": 0.2925954759120941, "logps/chosen": -76.33804321289062, "logps/ref_chosen": -61.169105529785156, "logps/ref_rejected": -77.21674346923828, "logps/rejected": -102.5147476196289, "loss": 1.0787, "margin_dpo/margin_mean": 10.129049301147461, "margin_dpo/margin_std": 15.241543769836426, "step": 170 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 0.04289032891392708, "fcm_dpo/delta": -0.16773784160614014, "fcm_dpo/margin": 12.669050216674805, "fcm_dpo/q_t": 0.38352128863334656, "grad_norm": 16.280622482299805, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 0.4107338786125183, "logits/rejected": 0.3484509587287903, "logps/chosen": -75.43938446044922, "logps/ref_chosen": -59.24176788330078, "logps/ref_rejected": -81.80384826660156, "logps/rejected": -110.67051696777344, "loss": 1.075, "margin_dpo/margin_mean": 12.669050216674805, "margin_dpo/margin_std": 19.41705322265625, "step": 175 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 0.03678743541240692, "fcm_dpo/delta": -0.11334402859210968, "fcm_dpo/margin": 13.086156845092773, "fcm_dpo/q_t": 0.3945137560367584, "grad_norm": 14.899900436401367, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.3705318868160248, "logits/rejected": 0.3279619812965393, "logps/chosen": -80.0919418334961, "logps/ref_chosen": -63.24883270263672, "logps/ref_rejected": -79.00736236572266, "logps/rejected": -108.93663024902344, "loss": 1.0966, "margin_dpo/margin_mean": 13.086158752441406, "margin_dpo/margin_std": 20.56978988647461, "step": 180 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 0.03294721618294716, "fcm_dpo/delta": -0.11023982614278793, "fcm_dpo/margin": 13.870523452758789, "fcm_dpo/q_t": 0.39916402101516724, "grad_norm": 14.874372482299805, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 0.43493133783340454, "logits/rejected": 0.38104137778282166, "logps/chosen": -73.21731567382812, "logps/ref_chosen": -56.390625, "logps/ref_rejected": -76.81001281738281, "logps/rejected": -107.5072250366211, "loss": 1.1048, "margin_dpo/margin_mean": 13.870523452758789, "margin_dpo/margin_std": 21.951461791992188, "step": 185 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 0.02976861596107483, "fcm_dpo/delta": -0.130234032869339, "fcm_dpo/margin": 17.27200698852539, "fcm_dpo/q_t": 0.3894996643066406, "grad_norm": 21.018056869506836, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 0.42057228088378906, "logits/rejected": 0.36500033736228943, "logps/chosen": -86.78327178955078, "logps/ref_chosen": -68.25389099121094, "logps/ref_rejected": -86.461181640625, "logps/rejected": -122.26255798339844, "loss": 1.0765, "margin_dpo/margin_mean": 17.27200698852539, "margin_dpo/margin_std": 26.312463760375977, "step": 190 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 0.026562869548797607, "fcm_dpo/delta": -0.1006811112165451, "fcm_dpo/margin": 17.410314559936523, "fcm_dpo/q_t": 0.3985130786895752, "grad_norm": 17.732221603393555, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.45999032258987427, "logits/rejected": 0.4397885799407959, "logps/chosen": -90.21788024902344, "logps/ref_chosen": -62.1484260559082, "logps/ref_rejected": -71.33458709716797, "logps/rejected": -116.81434631347656, "loss": 1.1108, "margin_dpo/margin_mean": 17.41031265258789, "margin_dpo/margin_std": 28.3375301361084, "step": 195 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 0.02375042252242565, "fcm_dpo/delta": -0.12756529450416565, "fcm_dpo/margin": 21.052818298339844, "fcm_dpo/q_t": 0.3899378478527069, "grad_norm": 14.28109073638916, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.5520531535148621, "logits/rejected": 0.4885140061378479, "logps/chosen": -89.1515121459961, "logps/ref_chosen": -56.950096130371094, "logps/ref_rejected": -78.66989135742188, "logps/rejected": -131.92413330078125, "loss": 1.0917, "margin_dpo/margin_mean": 21.052818298339844, "margin_dpo/margin_std": 32.75715637207031, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 0.022144686430692673, "eval_logits/chosen": 0.5289739966392517, "eval_logits/rejected": 0.47875019907951355, "eval_logps/chosen": -112.04022979736328, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -137.68849182128906, "eval_loss": 0.5616376996040344, "eval_margin_dpo/margin_mean": 20.95873260498047, "eval_margin_dpo/margin_std": 36.02712631225586, "eval_runtime": 39.0532, "eval_samples_per_second": 58.971, "eval_steps_per_second": 1.844, "step": 200 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 0.020863929763436317, "fcm_dpo/delta": -0.10121381282806396, "fcm_dpo/margin": 23.369535446166992, "fcm_dpo/q_t": 0.3925306797027588, "grad_norm": 18.312232971191406, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 0.5444221496582031, "logits/rejected": 0.47210827469825745, "logps/chosen": -100.59618377685547, "logps/ref_chosen": -57.99428176879883, "logps/ref_rejected": -83.5367431640625, "logps/rejected": -149.5081787109375, "loss": 1.0877, "margin_dpo/margin_mean": 23.369535446166992, "margin_dpo/margin_std": 35.94400405883789, "step": 205 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 0.019625190645456314, "fcm_dpo/delta": -0.06917699426412582, "fcm_dpo/margin": 21.765804290771484, "fcm_dpo/q_t": 0.4085807204246521, "grad_norm": 16.501283645629883, "learning_rate": 4.327482247091679e-07, "logits/chosen": 0.5628946423530579, "logits/rejected": 0.5068370699882507, "logps/chosen": -115.35340881347656, "logps/ref_chosen": -63.77195358276367, "logps/ref_rejected": -82.56491088867188, "logps/rejected": -155.9121856689453, "loss": 1.1576, "margin_dpo/margin_mean": 21.765806198120117, "margin_dpo/margin_std": 41.01616668701172, "step": 210 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.01737585850059986, "fcm_dpo/delta": -0.14525336027145386, "fcm_dpo/margin": 31.006107330322266, "fcm_dpo/q_t": 0.3825533390045166, "grad_norm": 14.236336708068848, "learning_rate": 4.281735428447157e-07, "logits/chosen": 0.55656498670578, "logits/rejected": 0.48811864852905273, "logps/chosen": -107.94661712646484, "logps/ref_chosen": -60.27800750732422, "logps/ref_rejected": -83.91607666015625, "logps/rejected": -162.59080505371094, "loss": 1.0518, "margin_dpo/margin_mean": 31.006107330322266, "margin_dpo/margin_std": 43.699501037597656, "step": 215 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.01522024255245924, "fcm_dpo/delta": -0.08151903748512268, "fcm_dpo/margin": 28.928936004638672, "fcm_dpo/q_t": 0.4007510244846344, "grad_norm": 13.642237663269043, "learning_rate": 4.234742705255272e-07, "logits/chosen": 0.5403038263320923, "logits/rejected": 0.47905245423316956, "logps/chosen": -108.02642822265625, "logps/ref_chosen": -60.88572311401367, "logps/ref_rejected": -80.1805191040039, "logps/rejected": -156.2501678466797, "loss": 1.103, "margin_dpo/margin_mean": 28.92893409729004, "margin_dpo/margin_std": 44.418190002441406, "step": 220 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.014115704223513603, "fcm_dpo/delta": -0.08850517123937607, "fcm_dpo/margin": 32.46453857421875, "fcm_dpo/q_t": 0.3981640040874481, "grad_norm": 12.134458541870117, "learning_rate": 4.186536937864752e-07, "logits/chosen": 0.6014004945755005, "logits/rejected": 0.5260181427001953, "logps/chosen": -108.61814880371094, "logps/ref_chosen": -61.02507781982422, "logps/ref_rejected": -91.92439270019531, "logps/rejected": -171.98199462890625, "loss": 1.0861, "margin_dpo/margin_mean": 32.464542388916016, "margin_dpo/margin_std": 47.451393127441406, "step": 225 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.013391288928687572, "fcm_dpo/delta": -0.04506213217973709, "fcm_dpo/margin": 31.46584701538086, "fcm_dpo/q_t": 0.4058153033256531, "grad_norm": 13.718859672546387, "learning_rate": 4.137151834863213e-07, "logits/chosen": 0.6671124696731567, "logits/rejected": 0.6181380748748779, "logps/chosen": -106.39383697509766, "logps/ref_chosen": -54.49797821044922, "logps/ref_rejected": -71.96363830566406, "logps/rejected": -155.32534790039062, "loss": 1.1278, "margin_dpo/margin_mean": 31.46584701538086, "margin_dpo/margin_std": 53.070556640625, "step": 230 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.012218359857797623, "fcm_dpo/delta": -0.10176394134759903, "fcm_dpo/margin": 35.79335403442383, "fcm_dpo/q_t": 0.4017709791660309, "grad_norm": 15.619464874267578, "learning_rate": 4.08662192950594e-07, "logits/chosen": 0.6200426816940308, "logits/rejected": 0.5975286364555359, "logps/chosen": -124.5421142578125, "logps/ref_chosen": -63.250282287597656, "logps/ref_rejected": -73.09049987792969, "logps/rejected": -170.17568969726562, "loss": 1.1063, "margin_dpo/margin_mean": 35.79335403442383, "margin_dpo/margin_std": 54.02311325073242, "step": 235 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.011470427736639977, "fcm_dpo/delta": -0.07215714454650879, "fcm_dpo/margin": 40.14154815673828, "fcm_dpo/q_t": 0.3966708779335022, "grad_norm": 12.957676887512207, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 0.6564071774482727, "logits/rejected": 0.601380467414856, "logps/chosen": -138.31361389160156, "logps/ref_chosen": -65.26150512695312, "logps/ref_rejected": -87.60311126708984, "logps/rejected": -200.7967529296875, "loss": 1.0907, "margin_dpo/margin_mean": 40.14154815673828, "margin_dpo/margin_std": 59.86652755737305, "step": 240 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.010648809373378754, "fcm_dpo/delta": -0.08149583637714386, "fcm_dpo/margin": 43.929100036621094, "fcm_dpo/q_t": 0.39428311586380005, "grad_norm": 14.245344161987305, "learning_rate": 3.982269822636601e-07, "logits/chosen": 0.6480621099472046, "logits/rejected": 0.6164118051528931, "logps/chosen": -135.7429656982422, "logps/ref_chosen": -65.73170471191406, "logps/ref_rejected": -75.19642639160156, "logps/rejected": -189.1367645263672, "loss": 1.0693, "margin_dpo/margin_mean": 43.92909622192383, "margin_dpo/margin_std": 59.444854736328125, "step": 245 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.009915231727063656, "fcm_dpo/delta": -0.041826874017715454, "fcm_dpo/margin": 35.88407897949219, "fcm_dpo/q_t": 0.4197370409965515, "grad_norm": 14.5069580078125, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.7002509832382202, "logits/rejected": 0.6972779035568237, "logps/chosen": -155.23341369628906, "logps/ref_chosen": -70.71224212646484, "logps/ref_rejected": -76.12723541259766, "logps/rejected": -196.532470703125, "loss": 1.1785, "margin_dpo/margin_mean": 35.88407516479492, "margin_dpo/margin_std": 69.27667999267578, "step": 250 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.009629678912460804, "fcm_dpo/delta": -0.012941457331180573, "fcm_dpo/margin": 34.634307861328125, "fcm_dpo/q_t": 0.4239214360713959, "grad_norm": 15.92194938659668, "learning_rate": 3.873772445177015e-07, "logits/chosen": 0.6967302560806274, "logits/rejected": 0.6524414420127869, "logps/chosen": -145.23605346679688, "logps/ref_chosen": -61.767662048339844, "logps/ref_rejected": -77.38813018798828, "logps/rejected": -195.49081420898438, "loss": 1.1757, "margin_dpo/margin_mean": 34.63430404663086, "margin_dpo/margin_std": 64.64119720458984, "step": 255 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.009221619926393032, "fcm_dpo/delta": -0.06601964682340622, "fcm_dpo/margin": 48.99907684326172, "fcm_dpo/q_t": 0.39814695715904236, "grad_norm": 19.110883712768555, "learning_rate": 3.818063669026256e-07, "logits/chosen": 0.6849242448806763, "logits/rejected": 0.603645384311676, "logps/chosen": -148.38861083984375, "logps/ref_chosen": -61.57584762573242, "logps/ref_rejected": -91.87513732910156, "logps/rejected": -227.6869659423828, "loss": 1.0902, "margin_dpo/margin_mean": 48.99907684326172, "margin_dpo/margin_std": 72.31168365478516, "step": 260 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.008941135369241238, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 41.382564544677734, "fcm_dpo/q_t": 0.41540035605430603, "grad_norm": 11.394349098205566, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 0.643638551235199, "logits/rejected": 0.5954387187957764, "logps/chosen": -147.72586059570312, "logps/ref_chosen": -65.75422668457031, "logps/ref_rejected": -77.9569320678711, "logps/rejected": -201.31112670898438, "loss": 1.1392, "margin_dpo/margin_mean": 41.38256072998047, "margin_dpo/margin_std": 68.18827819824219, "step": 265 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.008837602101266384, "fcm_dpo/delta": -0.02355731837451458, "fcm_dpo/margin": 42.396751403808594, "fcm_dpo/q_t": 0.41459059715270996, "grad_norm": 15.444929122924805, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 0.6901830434799194, "logits/rejected": 0.6426895260810852, "logps/chosen": -138.05111694335938, "logps/ref_chosen": -62.27649688720703, "logps/ref_rejected": -76.56950378417969, "logps/rejected": -194.7408905029297, "loss": 1.1371, "margin_dpo/margin_mean": 42.396751403808594, "margin_dpo/margin_std": 68.82071685791016, "step": 270 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.008553928695619106, "fcm_dpo/delta": -0.03487258031964302, "fcm_dpo/margin": 46.58140182495117, "fcm_dpo/q_t": 0.40970954298973083, "grad_norm": 20.471668243408203, "learning_rate": 3.645566304318526e-07, "logits/chosen": 0.6841222047805786, "logits/rejected": 0.6400257349014282, "logps/chosen": -143.00296020507812, "logps/ref_chosen": -61.854393005371094, "logps/ref_rejected": -77.22246551513672, "logps/rejected": -204.95242309570312, "loss": 1.1265, "margin_dpo/margin_mean": 46.58140182495117, "margin_dpo/margin_std": 74.56303405761719, "step": 275 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.008292925544083118, "fcm_dpo/delta": -0.060983072966337204, "fcm_dpo/margin": 49.10211944580078, "fcm_dpo/q_t": 0.4059298038482666, "grad_norm": 13.393473625183105, "learning_rate": 3.586410864126781e-07, "logits/chosen": 0.6645344495773315, "logits/rejected": 0.6346439719200134, "logps/chosen": -146.82679748535156, "logps/ref_chosen": -61.29896926879883, "logps/ref_rejected": -73.35762023925781, "logps/rejected": -207.98757934570312, "loss": 1.1041, "margin_dpo/margin_mean": 49.10211944580078, "margin_dpo/margin_std": 69.17396545410156, "step": 280 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.007784596644341946, "fcm_dpo/delta": -0.027562415227293968, "fcm_dpo/margin": 48.093055725097656, "fcm_dpo/q_t": 0.41425347328186035, "grad_norm": 16.781484603881836, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 0.6967326402664185, "logits/rejected": 0.6534906625747681, "logps/chosen": -159.16049194335938, "logps/ref_chosen": -63.435462951660156, "logps/ref_rejected": -79.73661804199219, "logps/rejected": -223.55471801757812, "loss": 1.1319, "margin_dpo/margin_mean": 48.093055725097656, "margin_dpo/margin_std": 75.91288757324219, "step": 285 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.007469588425010443, "fcm_dpo/delta": -0.042763665318489075, "fcm_dpo/margin": 55.20690155029297, "fcm_dpo/q_t": 0.40540584921836853, "grad_norm": 15.624042510986328, "learning_rate": 3.465862814232821e-07, "logits/chosen": 0.711457371711731, "logits/rejected": 0.650222659111023, "logps/chosen": -162.69920349121094, "logps/ref_chosen": -57.696876525878906, "logps/ref_rejected": -79.78132629394531, "logps/rejected": -239.9905548095703, "loss": 1.0921, "margin_dpo/margin_mean": 55.20690155029297, "margin_dpo/margin_std": 75.91992950439453, "step": 290 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.00719553604722023, "fcm_dpo/delta": -0.038509003818035126, "fcm_dpo/margin": 52.451446533203125, "fcm_dpo/q_t": 0.4142046570777893, "grad_norm": 17.099523544311523, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 0.7705697417259216, "logits/rejected": 0.6970144510269165, "logps/chosen": -168.26991271972656, "logps/ref_chosen": -55.430633544921875, "logps/ref_rejected": -78.1390151977539, "logps/rejected": -243.4297332763672, "loss": 1.1278, "margin_dpo/margin_mean": 52.451446533203125, "margin_dpo/margin_std": 81.40741729736328, "step": 295 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.007098735310137272, "fcm_dpo/delta": -0.0035702171735465527, "fcm_dpo/margin": 45.36979293823242, "fcm_dpo/q_t": 0.4261544346809387, "grad_norm": 12.7843656539917, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 0.707840621471405, "logits/rejected": 0.6562869548797607, "logps/chosen": -185.88197326660156, "logps/ref_chosen": -61.207069396972656, "logps/ref_rejected": -75.23294067382812, "logps/rejected": -245.2776336669922, "loss": 1.1763, "margin_dpo/margin_mean": 45.36979293823242, "margin_dpo/margin_std": 83.63997650146484, "step": 300 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.007054163608700037, "fcm_dpo/delta": -0.011994509026408195, "fcm_dpo/margin": 47.528289794921875, "fcm_dpo/q_t": 0.423266738653183, "grad_norm": 14.771221160888672, "learning_rate": 3.280083614246217e-07, "logits/chosen": 0.6945724487304688, "logits/rejected": 0.6594172716140747, "logps/chosen": -184.41525268554688, "logps/ref_chosen": -63.06663131713867, "logps/ref_rejected": -78.45845031738281, "logps/rejected": -247.3353729248047, "loss": 1.1687, "margin_dpo/margin_mean": 47.52829360961914, "margin_dpo/margin_std": 85.50675201416016, "step": 305 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.006967984139919281, "fcm_dpo/delta": -0.005523760803043842, "fcm_dpo/margin": 38.326297760009766, "fcm_dpo/q_t": 0.43881019949913025, "grad_norm": 12.633563995361328, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 0.7481607794761658, "logits/rejected": 0.7011669874191284, "logps/chosen": -181.23231506347656, "logps/ref_chosen": -63.60908889770508, "logps/ref_rejected": -74.06394958496094, "logps/rejected": -230.0135040283203, "loss": 1.2204, "margin_dpo/margin_mean": 38.3262939453125, "margin_dpo/margin_std": 83.02953338623047, "step": 310 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.006899350322782993, "fcm_dpo/delta": -0.015122666954994202, "fcm_dpo/margin": 49.54688262939453, "fcm_dpo/q_t": 0.4215819239616394, "grad_norm": 12.57418441772461, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 0.6818271279335022, "logits/rejected": 0.6326395869255066, "logps/chosen": -168.5291748046875, "logps/ref_chosen": -62.31493377685547, "logps/ref_rejected": -75.07472229003906, "logps/rejected": -230.8358612060547, "loss": 1.1467, "margin_dpo/margin_mean": 49.54688262939453, "margin_dpo/margin_std": 79.58061981201172, "step": 315 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.00665700901299715, "fcm_dpo/delta": -0.0466286763548851, "fcm_dpo/margin": 58.286048889160156, "fcm_dpo/q_t": 0.41121044754981995, "grad_norm": 11.143263816833496, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.6711269617080688, "logits/rejected": 0.6105794310569763, "logps/chosen": -152.8118133544922, "logps/ref_chosen": -55.336036682128906, "logps/ref_rejected": -80.05536651611328, "logps/rejected": -235.81716918945312, "loss": 1.1163, "margin_dpo/margin_mean": 58.286048889160156, "margin_dpo/margin_std": 85.82135009765625, "step": 320 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.006425836123526096, "fcm_dpo/delta": -0.025686081498861313, "fcm_dpo/margin": 57.461631774902344, "fcm_dpo/q_t": 0.4144781231880188, "grad_norm": 10.588150978088379, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 0.6904675364494324, "logits/rejected": 0.63264399766922, "logps/chosen": -163.8726043701172, "logps/ref_chosen": -57.90629959106445, "logps/ref_rejected": -74.2243881225586, "logps/rejected": -237.65234375, "loss": 1.1236, "margin_dpo/margin_mean": 57.461631774902344, "margin_dpo/margin_std": 84.99284362792969, "step": 325 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.006382169667631388, "fcm_dpo/delta": -0.0038101542741060257, "fcm_dpo/margin": 48.41600799560547, "fcm_dpo/q_t": 0.42902547121047974, "grad_norm": 11.327309608459473, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 0.6518532037734985, "logits/rejected": 0.615720272064209, "logps/chosen": -183.9138641357422, "logps/ref_chosen": -65.17555236816406, "logps/ref_rejected": -78.53681182861328, "logps/rejected": -245.69113159179688, "loss": 1.1791, "margin_dpo/margin_mean": 48.41600799560547, "margin_dpo/margin_std": 88.16541290283203, "step": 330 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.0062421816401183605, "fcm_dpo/delta": -0.024326926097273827, "fcm_dpo/margin": 60.58441162109375, "fcm_dpo/q_t": 0.4129098057746887, "grad_norm": 14.136266708374023, "learning_rate": 2.895003489933375e-07, "logits/chosen": 0.7140273451805115, "logits/rejected": 0.6678114533424377, "logps/chosen": -181.29025268554688, "logps/ref_chosen": -62.62797927856445, "logps/ref_rejected": -79.9095458984375, "logps/rejected": -259.15618896484375, "loss": 1.116, "margin_dpo/margin_mean": 60.58441162109375, "margin_dpo/margin_std": 88.1111831665039, "step": 335 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.006153796333819628, "fcm_dpo/delta": -0.008682211861014366, "fcm_dpo/margin": 52.959136962890625, "fcm_dpo/q_t": 0.4254566729068756, "grad_norm": 11.051826477050781, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 0.6959365010261536, "logits/rejected": 0.6593111753463745, "logps/chosen": -193.18997192382812, "logps/ref_chosen": -61.1064567565918, "logps/ref_rejected": -76.71846008300781, "logps/rejected": -261.7611083984375, "loss": 1.1653, "margin_dpo/margin_mean": 52.959144592285156, "margin_dpo/margin_std": 92.80432891845703, "step": 340 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.00603325804695487, "fcm_dpo/delta": -0.01978192664682865, "fcm_dpo/margin": 59.29703903198242, "fcm_dpo/q_t": 0.41810742020606995, "grad_norm": 11.418231010437012, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 0.6984354853630066, "logits/rejected": 0.6573707461357117, "logps/chosen": -188.5522003173828, "logps/ref_chosen": -60.12370681762695, "logps/ref_rejected": -78.58574676513672, "logps/rejected": -266.3112487792969, "loss": 1.1408, "margin_dpo/margin_mean": 59.29703903198242, "margin_dpo/margin_std": 95.01673889160156, "step": 345 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.005945051088929176, "fcm_dpo/delta": -0.04099785163998604, "fcm_dpo/margin": 68.73493957519531, "fcm_dpo/q_t": 0.40639615058898926, "grad_norm": 13.632902145385742, "learning_rate": 2.698124892141971e-07, "logits/chosen": 0.7776240706443787, "logits/rejected": 0.7138788104057312, "logps/chosen": -179.670166015625, "logps/ref_chosen": -55.104461669921875, "logps/ref_rejected": -80.63292694091797, "logps/rejected": -273.9335632324219, "loss": 1.0955, "margin_dpo/margin_mean": 68.73492431640625, "margin_dpo/margin_std": 94.61377716064453, "step": 350 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.005738373845815659, "fcm_dpo/delta": -0.02628186345100403, "fcm_dpo/margin": 70.14204406738281, "fcm_dpo/q_t": 0.4074961543083191, "grad_norm": 11.920957565307617, "learning_rate": 2.632160279321328e-07, "logits/chosen": 0.7566885352134705, "logits/rejected": 0.6853598356246948, "logps/chosen": -179.594970703125, "logps/ref_chosen": -54.87224197387695, "logps/ref_rejected": -77.01316833496094, "logps/rejected": -271.8779602050781, "loss": 1.1034, "margin_dpo/margin_mean": 70.14204406738281, "margin_dpo/margin_std": 100.20321655273438, "step": 355 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.005551532376557589, "fcm_dpo/delta": -0.027073120698332787, "fcm_dpo/margin": 58.166900634765625, "fcm_dpo/q_t": 0.42612725496292114, "grad_norm": 11.87866497039795, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 0.7266971468925476, "logits/rejected": 0.6876090168952942, "logps/chosen": -207.0313262939453, "logps/ref_chosen": -60.75285720825195, "logps/ref_rejected": -75.21507263183594, "logps/rejected": -279.6604309082031, "loss": 1.1756, "margin_dpo/margin_mean": 58.166900634765625, "margin_dpo/margin_std": 105.21885681152344, "step": 360 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.0053816549479961395, "fcm_dpo/delta": -0.03808388113975525, "fcm_dpo/margin": 80.42271423339844, "fcm_dpo/q_t": 0.4026545584201813, "grad_norm": 10.223109245300293, "learning_rate": 2.5e-07, "logits/chosen": 0.7709358930587769, "logits/rejected": 0.7040495872497559, "logps/chosen": -198.53555297851562, "logps/ref_chosen": -58.56513595581055, "logps/ref_rejected": -84.06403350830078, "logps/rejected": -304.4571533203125, "loss": 1.0842, "margin_dpo/margin_mean": 80.42271423339844, "margin_dpo/margin_std": 112.6312026977539, "step": 365 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.005276652052998543, "fcm_dpo/delta": -0.007817991077899933, "fcm_dpo/margin": 64.3998031616211, "fcm_dpo/q_t": 0.4214501976966858, "grad_norm": 12.37844181060791, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 0.7387269735336304, "logits/rejected": 0.6979778409004211, "logps/chosen": -199.47242736816406, "logps/ref_chosen": -59.443138122558594, "logps/ref_rejected": -75.80937194824219, "logps/rejected": -280.23846435546875, "loss": 1.1548, "margin_dpo/margin_mean": 64.3998031616211, "margin_dpo/margin_std": 108.75566101074219, "step": 370 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.005195076577365398, "fcm_dpo/delta": -0.016707830131053925, "fcm_dpo/margin": 63.93414306640625, "fcm_dpo/q_t": 0.4242404103279114, "grad_norm": 14.827937126159668, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 0.7876321077346802, "logits/rejected": 0.7421884536743164, "logps/chosen": -198.78579711914062, "logps/ref_chosen": -58.59185028076172, "logps/ref_rejected": -73.7529525756836, "logps/rejected": -277.88104248046875, "loss": 1.1759, "margin_dpo/margin_mean": 63.93414306640625, "margin_dpo/margin_std": 119.758544921875, "step": 375 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.005114838946610689, "fcm_dpo/delta": -0.013521865010261536, "fcm_dpo/margin": 71.66112518310547, "fcm_dpo/q_t": 0.41665583848953247, "grad_norm": 12.766351699829102, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 0.7841566801071167, "logits/rejected": 0.7393085956573486, "logps/chosen": -195.7206573486328, "logps/ref_chosen": -58.93424606323242, "logps/ref_rejected": -76.27055358886719, "logps/rejected": -284.71807861328125, "loss": 1.1354, "margin_dpo/margin_mean": 71.66112518310547, "margin_dpo/margin_std": 114.41337585449219, "step": 380 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.0050823139026761055, "fcm_dpo/delta": -0.010413008742034435, "fcm_dpo/margin": 60.48808670043945, "fcm_dpo/q_t": 0.4290579855442047, "grad_norm": 9.910415649414062, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 0.7422327399253845, "logits/rejected": 0.723129153251648, "logps/chosen": -211.396728515625, "logps/ref_chosen": -66.42684173583984, "logps/ref_rejected": -76.96304321289062, "logps/rejected": -282.4209899902344, "loss": 1.1822, "margin_dpo/margin_mean": 60.48808670043945, "margin_dpo/margin_std": 111.88542175292969, "step": 385 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.005034881643950939, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 65.50511169433594, "fcm_dpo/q_t": 0.42413240671157837, "grad_norm": 11.989714622497559, "learning_rate": 2.170407537241599e-07, "logits/chosen": 0.7661797404289246, "logits/rejected": 0.7171558141708374, "logps/chosen": -205.19943237304688, "logps/ref_chosen": -60.984214782714844, "logps/ref_rejected": -79.54056549072266, "logps/rejected": -289.26092529296875, "loss": 1.1583, "margin_dpo/margin_mean": 65.50511169433594, "margin_dpo/margin_std": 110.4152603149414, "step": 390 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.0049595460295677185, "fcm_dpo/delta": -0.023463377729058266, "fcm_dpo/margin": 75.90830993652344, "fcm_dpo/q_t": 0.4133135676383972, "grad_norm": 11.980950355529785, "learning_rate": 2.104996510066625e-07, "logits/chosen": 0.7978567481040955, "logits/rejected": 0.7469469308853149, "logps/chosen": -197.7019805908203, "logps/ref_chosen": -58.30937957763672, "logps/ref_rejected": -80.09587097167969, "logps/rejected": -295.39678955078125, "loss": 1.1188, "margin_dpo/margin_mean": 75.90830993652344, "margin_dpo/margin_std": 111.34185791015625, "step": 395 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.004786391276866198, "fcm_dpo/delta": -0.049943797290325165, "fcm_dpo/margin": 75.52302551269531, "fcm_dpo/q_t": 0.41768354177474976, "grad_norm": 14.266228675842285, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 0.7723232507705688, "logits/rejected": 0.6996694207191467, "logps/chosen": -212.7456512451172, "logps/ref_chosen": -61.39867401123047, "logps/ref_rejected": -89.0177993774414, "logps/rejected": -315.8878173828125, "loss": 1.134, "margin_dpo/margin_mean": 75.52302551269531, "margin_dpo/margin_std": 114.94172668457031, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.004678524564951658, "eval_logits/chosen": 0.7730867862701416, "eval_logits/rejected": 0.7383347749710083, "eval_logps/chosen": -231.1050567626953, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -297.61297607421875, "eval_loss": 0.5988211035728455, "eval_margin_dpo/margin_mean": 61.81840133666992, "eval_margin_dpo/margin_std": 122.34429931640625, "eval_runtime": 39.0005, "eval_samples_per_second": 59.051, "eval_steps_per_second": 1.846, "step": 400 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.004617620259523392, "fcm_dpo/delta": -0.021043911576271057, "fcm_dpo/margin": 78.88172149658203, "fcm_dpo/q_t": 0.41677242517471313, "grad_norm": 9.603548049926758, "learning_rate": 1.975048638084379e-07, "logits/chosen": 0.8381478190422058, "logits/rejected": 0.7813048958778381, "logps/chosen": -199.7872772216797, "logps/ref_chosen": -55.953521728515625, "logps/ref_rejected": -77.67539978027344, "logps/rejected": -300.390869140625, "loss": 1.1288, "margin_dpo/margin_mean": 78.88172149658203, "margin_dpo/margin_std": 118.5303726196289, "step": 405 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.0044863419607281685, "fcm_dpo/delta": -0.03810073807835579, "fcm_dpo/margin": 80.86897277832031, "fcm_dpo/q_t": 0.4153751730918884, "grad_norm": 10.528109550476074, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.800028920173645, "logits/rejected": 0.7490849494934082, "logps/chosen": -218.7476043701172, "logps/ref_chosen": -63.40419387817383, "logps/ref_rejected": -80.85710144042969, "logps/rejected": -317.06951904296875, "loss": 1.1232, "margin_dpo/margin_mean": 80.86897277832031, "margin_dpo/margin_std": 114.73890686035156, "step": 410 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.004409838933497667, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 74.53548431396484, "fcm_dpo/q_t": 0.42364954948425293, "grad_norm": 9.875075340270996, "learning_rate": 1.846568829074628e-07, "logits/chosen": 0.8113610148429871, "logits/rejected": 0.7791782021522522, "logps/chosen": -228.55026245117188, "logps/ref_chosen": -57.6942024230957, "logps/ref_rejected": -71.74036407470703, "logps/rejected": -317.13189697265625, "loss": 1.1495, "margin_dpo/margin_mean": 74.53548431396484, "margin_dpo/margin_std": 117.6064682006836, "step": 415 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.004353252239525318, "fcm_dpo/delta": -0.016652632504701614, "fcm_dpo/margin": 68.00981140136719, "fcm_dpo/q_t": 0.4319809377193451, "grad_norm": 12.201674461364746, "learning_rate": 1.782991918222275e-07, "logits/chosen": 0.8484581708908081, "logits/rejected": 0.819144606590271, "logps/chosen": -241.6039581298828, "logps/ref_chosen": -59.169517517089844, "logps/ref_rejected": -69.47721099853516, "logps/rejected": -319.92144775390625, "loss": 1.1926, "margin_dpo/margin_mean": 68.00981140136719, "margin_dpo/margin_std": 129.67337036132812, "step": 420 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.00428891833871603, "fcm_dpo/delta": -0.021387049928307533, "fcm_dpo/margin": 82.73738861083984, "fcm_dpo/q_t": 0.4183295667171478, "grad_norm": 9.915822982788086, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 0.8384604454040527, "logits/rejected": 0.809127151966095, "logps/chosen": -226.8582763671875, "logps/ref_chosen": -58.09320831298828, "logps/ref_rejected": -73.98226165771484, "logps/rejected": -325.4847412109375, "loss": 1.1363, "margin_dpo/margin_mean": 82.73738861083984, "margin_dpo/margin_std": 128.06600952148438, "step": 425 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.004245240706950426, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 70.30382537841797, "fcm_dpo/q_t": 0.4308013319969177, "grad_norm": 11.620451927185059, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 0.8153077363967896, "logits/rejected": 0.803315281867981, "logps/chosen": -252.0271759033203, "logps/ref_chosen": -62.7039909362793, "logps/ref_rejected": -74.52284240722656, "logps/rejected": -334.14984130859375, "loss": 1.1889, "margin_dpo/margin_mean": 70.30382537841797, "margin_dpo/margin_std": 133.47201538085938, "step": 430 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.004219419322907925, "fcm_dpo/delta": -0.015322555787861347, "fcm_dpo/margin": 82.38020324707031, "fcm_dpo/q_t": 0.41900143027305603, "grad_norm": 11.40176773071289, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 0.8734161257743835, "logits/rejected": 0.8401018977165222, "logps/chosen": -224.8971710205078, "logps/ref_chosen": -56.12516403198242, "logps/ref_rejected": -74.36073303222656, "logps/rejected": -325.5129089355469, "loss": 1.1412, "margin_dpo/margin_mean": 82.38020324707031, "margin_dpo/margin_std": 129.3118133544922, "step": 435 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.0041299303993582726, "fcm_dpo/delta": -0.023912524804472923, "fcm_dpo/margin": 82.6595230102539, "fcm_dpo/q_t": 0.4219323992729187, "grad_norm": 13.837409019470215, "learning_rate": 1.534137185767178e-07, "logits/chosen": 0.835965633392334, "logits/rejected": 0.7754732966423035, "logps/chosen": -231.00851440429688, "logps/ref_chosen": -55.67548751831055, "logps/ref_rejected": -76.62055206298828, "logps/rejected": -334.6131286621094, "loss": 1.1507, "margin_dpo/margin_mean": 82.65951538085938, "margin_dpo/margin_std": 135.90968322753906, "step": 440 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.004010509233921766, "fcm_dpo/delta": -0.024360598996281624, "fcm_dpo/margin": 94.17513275146484, "fcm_dpo/q_t": 0.41288742423057556, "grad_norm": 9.381706237792969, "learning_rate": 1.473504264745062e-07, "logits/chosen": 0.8353230357170105, "logits/rejected": 0.7917869091033936, "logps/chosen": -241.6427001953125, "logps/ref_chosen": -59.903411865234375, "logps/ref_rejected": -82.02873229980469, "logps/rejected": -357.94317626953125, "loss": 1.111, "margin_dpo/margin_mean": 94.17513275146484, "margin_dpo/margin_std": 131.37210083007812, "step": 445 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.003899561706930399, "fcm_dpo/delta": -0.024923671036958694, "fcm_dpo/margin": 86.66099548339844, "fcm_dpo/q_t": 0.4215773642063141, "grad_norm": 10.855829238891602, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 0.8515201807022095, "logits/rejected": 0.7979717254638672, "logps/chosen": -237.58364868164062, "logps/ref_chosen": -55.83526611328125, "logps/ref_rejected": -79.63658142089844, "logps/rejected": -348.04595947265625, "loss": 1.1448, "margin_dpo/margin_mean": 86.66099548339844, "margin_dpo/margin_std": 133.32977294921875, "step": 450 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.0038856077007949352, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 78.07716369628906, "fcm_dpo/q_t": 0.4294430613517761, "grad_norm": 11.119799613952637, "learning_rate": 1.354433695681474e-07, "logits/chosen": 0.8199604153633118, "logits/rejected": 0.7901821136474609, "logps/chosen": -240.78634643554688, "logps/ref_chosen": -60.59226608276367, "logps/ref_rejected": -73.37936401367188, "logps/rejected": -331.6506042480469, "loss": 1.1685, "margin_dpo/margin_mean": 78.07716369628906, "margin_dpo/margin_std": 132.1455535888672, "step": 455 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.0038638408295810223, "fcm_dpo/delta": -0.020884912461042404, "fcm_dpo/margin": 93.4942855834961, "fcm_dpo/q_t": 0.41626229882240295, "grad_norm": 9.670624732971191, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 0.8717799186706543, "logits/rejected": 0.808576226234436, "logps/chosen": -236.9020538330078, "logps/ref_chosen": -56.21283721923828, "logps/ref_rejected": -83.02075958251953, "logps/rejected": -357.20428466796875, "loss": 1.1265, "margin_dpo/margin_mean": 93.4942855834961, "margin_dpo/margin_std": 139.18475341796875, "step": 460 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.0037416815757751465, "fcm_dpo/delta": -0.01730378530919552, "fcm_dpo/margin": 90.57196044921875, "fcm_dpo/q_t": 0.4205476641654968, "grad_norm": 11.144495964050293, "learning_rate": 1.238566782415197e-07, "logits/chosen": 0.8878629803657532, "logits/rejected": 0.8561855554580688, "logps/chosen": -241.06063842773438, "logps/ref_chosen": -59.0674934387207, "logps/ref_rejected": -74.53498840332031, "logps/rejected": -347.10009765625, "loss": 1.1409, "margin_dpo/margin_mean": 90.57195281982422, "margin_dpo/margin_std": 135.90689086914062, "step": 465 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.0037400186993181705, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 81.56043243408203, "fcm_dpo/q_t": 0.4288889467716217, "grad_norm": 10.959892272949219, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 0.8900365829467773, "logits/rejected": 0.856399655342102, "logps/chosen": -243.0541229248047, "logps/ref_chosen": -58.3397331237793, "logps/ref_rejected": -74.33660125732422, "logps/rejected": -340.6114501953125, "loss": 1.1733, "margin_dpo/margin_mean": 81.56043243408203, "margin_dpo/margin_std": 142.09490966796875, "step": 470 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.0037331648636609316, "fcm_dpo/delta": -0.00920518022030592, "fcm_dpo/margin": 99.83604431152344, "fcm_dpo/q_t": 0.4153580069541931, "grad_norm": 7.998687267303467, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.8998042345046997, "logits/rejected": 0.8499566912651062, "logps/chosen": -225.0286102294922, "logps/ref_chosen": -54.60407638549805, "logps/ref_rejected": -79.94635009765625, "logps/rejected": -350.2068786621094, "loss": 1.1275, "margin_dpo/margin_mean": 99.83604431152344, "margin_dpo/margin_std": 155.26611328125, "step": 475 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.003702650312334299, "fcm_dpo/delta": -0.008397220633924007, "fcm_dpo/margin": 73.63870239257812, "fcm_dpo/q_t": 0.43734756112098694, "grad_norm": 14.331131935119629, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 0.853714108467102, "logits/rejected": 0.8454058766365051, "logps/chosen": -257.2554016113281, "logps/ref_chosen": -63.0672492980957, "logps/ref_rejected": -68.59602355957031, "logps/rejected": -336.4228820800781, "loss": 1.2098, "margin_dpo/margin_mean": 73.63871002197266, "margin_dpo/margin_std": 151.20285034179688, "step": 480 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.0036670055706053972, "fcm_dpo/delta": -0.004230070393532515, "fcm_dpo/margin": 85.2213363647461, "fcm_dpo/q_t": 0.4283173978328705, "grad_norm": 11.36840534210205, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 0.8761838674545288, "logits/rejected": 0.8311912417411804, "logps/chosen": -251.0254669189453, "logps/ref_chosen": -58.75799560546875, "logps/ref_rejected": -79.72233581542969, "logps/rejected": -357.21112060546875, "loss": 1.1635, "margin_dpo/margin_mean": 85.2213363647461, "margin_dpo/margin_std": 141.01046752929688, "step": 485 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.003649166552349925, "fcm_dpo/delta": -0.005526586435735226, "fcm_dpo/margin": 79.44273376464844, "fcm_dpo/q_t": 0.4346071183681488, "grad_norm": 9.696874618530273, "learning_rate": 9.650174444319956e-08, "logits/chosen": 0.9166892170906067, "logits/rejected": 0.8767145872116089, "logps/chosen": -262.38348388671875, "logps/ref_chosen": -61.394195556640625, "logps/ref_rejected": -81.1914291381836, "logps/rejected": -361.6234436035156, "loss": 1.1984, "margin_dpo/margin_mean": 79.44273376464844, "margin_dpo/margin_std": 160.96810913085938, "step": 490 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.0036221942864358425, "fcm_dpo/delta": -0.009324881248176098, "fcm_dpo/margin": 80.0656967163086, "fcm_dpo/q_t": 0.43500107526779175, "grad_norm": 14.121245384216309, "learning_rate": 9.133780704940594e-08, "logits/chosen": 0.8733257055282593, "logits/rejected": 0.8286750912666321, "logps/chosen": -253.50320434570312, "logps/ref_chosen": -59.85382843017578, "logps/ref_rejected": -80.63748931884766, "logps/rejected": -354.3525695800781, "loss": 1.1983, "margin_dpo/margin_mean": 80.06568908691406, "margin_dpo/margin_std": 160.94277954101562, "step": 495 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.0036053061485290527, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 94.49732971191406, "fcm_dpo/q_t": 0.4224638342857361, "grad_norm": 11.681142807006836, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.8845356106758118, "logits/rejected": 0.8533564805984497, "logps/chosen": -260.6972351074219, "logps/ref_chosen": -66.17753601074219, "logps/ref_rejected": -83.75955200195312, "logps/rejected": -372.7765808105469, "loss": 1.1603, "margin_dpo/margin_mean": 94.49732971191406, "margin_dpo/margin_std": 167.73483276367188, "step": 500 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.0035957619547843933, "fcm_dpo/delta": -0.008863715454936028, "fcm_dpo/margin": 81.07406616210938, "fcm_dpo/q_t": 0.431431382894516, "grad_norm": 11.598374366760254, "learning_rate": 8.134630621352483e-08, "logits/chosen": 0.8734153509140015, "logits/rejected": 0.8628484010696411, "logps/chosen": -250.24264526367188, "logps/ref_chosen": -62.11005401611328, "logps/ref_rejected": -74.64705657958984, "logps/rejected": -343.85369873046875, "loss": 1.1724, "margin_dpo/margin_mean": 81.07406616210938, "margin_dpo/margin_std": 132.01162719726562, "step": 505 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.0035542245022952557, "fcm_dpo/delta": -0.010841513983905315, "fcm_dpo/margin": 93.92044067382812, "fcm_dpo/q_t": 0.42539000511169434, "grad_norm": 10.781188011169434, "learning_rate": 7.652572947447272e-08, "logits/chosen": 0.8776324391365051, "logits/rejected": 0.8372354507446289, "logps/chosen": -260.9776916503906, "logps/ref_chosen": -64.42265319824219, "logps/ref_rejected": -87.00096130371094, "logps/rejected": -377.47650146484375, "loss": 1.1734, "margin_dpo/margin_mean": 93.92044830322266, "margin_dpo/margin_std": 176.3047637939453, "step": 510 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.0035349582321941853, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 90.1122055053711, "fcm_dpo/q_t": 0.42532816529273987, "grad_norm": 9.599004745483398, "learning_rate": 7.182645715528435e-08, "logits/chosen": 0.9000862240791321, "logits/rejected": 0.8689319491386414, "logps/chosen": -247.7484893798828, "logps/ref_chosen": -58.284393310546875, "logps/ref_rejected": -79.09356689453125, "logps/rejected": -358.6697998046875, "loss": 1.1575, "margin_dpo/margin_mean": 90.11221313476562, "margin_dpo/margin_std": 144.56326293945312, "step": 515 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.0035349582321941853, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 71.46282958984375, "fcm_dpo/q_t": 0.4412451684474945, "grad_norm": 13.013392448425293, "learning_rate": 6.725177529083209e-08, "logits/chosen": 0.9094135165214539, "logits/rejected": 0.8865984678268433, "logps/chosen": -254.99520874023438, "logps/ref_chosen": -61.03638458251953, "logps/ref_rejected": -72.15824890136719, "logps/rejected": -337.57989501953125, "loss": 1.2161, "margin_dpo/margin_mean": 71.46283721923828, "margin_dpo/margin_std": 148.10073852539062, "step": 520 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.0035349582321941853, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 81.25996398925781, "fcm_dpo/q_t": 0.43352437019348145, "grad_norm": 13.817595481872559, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.8353425860404968, "logits/rejected": 0.804462730884552, "logps/chosen": -264.1961669921875, "logps/ref_chosen": -68.02732849121094, "logps/ref_rejected": -85.41429901123047, "logps/rejected": -362.8431396484375, "loss": 1.1911, "margin_dpo/margin_mean": 81.25996398925781, "margin_dpo/margin_std": 154.6202850341797, "step": 525 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.0035310834646224976, "fcm_dpo/delta": -0.003660431830212474, "fcm_dpo/margin": 77.5444564819336, "fcm_dpo/q_t": 0.4368818402290344, "grad_norm": 12.4998779296875, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.8905463218688965, "logits/rejected": 0.8446179628372192, "logps/chosen": -255.26919555664062, "logps/ref_chosen": -58.67436599731445, "logps/ref_rejected": -79.38807678222656, "logps/rejected": -353.52734375, "loss": 1.1992, "margin_dpo/margin_mean": 77.5444564819336, "margin_dpo/margin_std": 147.32528686523438, "step": 530 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.003522042650729418, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 82.69160461425781, "fcm_dpo/q_t": 0.4328169822692871, "grad_norm": 11.253222465515137, "learning_rate": 5.430681259032957e-08, "logits/chosen": 0.8829957842826843, "logits/rejected": 0.842154324054718, "logps/chosen": -248.08706665039062, "logps/ref_chosen": -57.640098571777344, "logps/ref_rejected": -77.25399780273438, "logps/rejected": -350.39251708984375, "loss": 1.1841, "margin_dpo/margin_mean": 82.69161224365234, "margin_dpo/margin_std": 150.93914794921875, "step": 535 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.0035197685938328505, "fcm_dpo/delta": -0.006476428359746933, "fcm_dpo/margin": 101.35395050048828, "fcm_dpo/q_t": 0.4183521270751953, "grad_norm": 9.733626365661621, "learning_rate": 5.026157728273966e-08, "logits/chosen": 0.9086763262748718, "logits/rejected": 0.8577451705932617, "logps/chosen": -256.18011474609375, "logps/ref_chosen": -60.17341995239258, "logps/ref_rejected": -85.50316619873047, "logps/rejected": -382.8638000488281, "loss": 1.1288, "margin_dpo/margin_mean": 101.35395050048828, "margin_dpo/margin_std": 152.10256958007812, "step": 540 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.00349930627271533, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 94.39086151123047, "fcm_dpo/q_t": 0.4234938621520996, "grad_norm": 12.908324241638184, "learning_rate": 4.635601198741607e-08, "logits/chosen": 0.917340874671936, "logits/rejected": 0.8850774765014648, "logps/chosen": -242.275390625, "logps/ref_chosen": -56.985809326171875, "logps/ref_rejected": -73.21353912353516, "logps/rejected": -352.8939514160156, "loss": 1.1519, "margin_dpo/margin_mean": 94.39086151123047, "margin_dpo/margin_std": 151.55343627929688, "step": 545 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.0034850898664444685, "fcm_dpo/delta": -0.00509117916226387, "fcm_dpo/margin": 76.22270202636719, "fcm_dpo/q_t": 0.4382683336734772, "grad_norm": 11.022492408752441, "learning_rate": 4.259284772799099e-08, "logits/chosen": 0.912948489189148, "logits/rejected": 0.8859111666679382, "logps/chosen": -255.4915008544922, "logps/ref_chosen": -59.600929260253906, "logps/ref_rejected": -75.24870300292969, "logps/rejected": -347.3619689941406, "loss": 1.2114, "margin_dpo/margin_mean": 76.22270202636719, "margin_dpo/margin_std": 156.75267028808594, "step": 550 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.003481535706669092, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 83.40065002441406, "fcm_dpo/q_t": 0.43253573775291443, "grad_norm": 11.873709678649902, "learning_rate": 3.89747159520904e-08, "logits/chosen": 0.9214051365852356, "logits/rejected": 0.8947726488113403, "logps/chosen": -266.4524841308594, "logps/ref_chosen": -63.578895568847656, "logps/ref_rejected": -78.87867736816406, "logps/rejected": -365.1529235839844, "loss": 1.1841, "margin_dpo/margin_mean": 83.4006576538086, "margin_dpo/margin_std": 151.18142700195312, "step": 555 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.003460574196651578, "fcm_dpo/delta": -0.009147383272647858, "fcm_dpo/margin": 102.399658203125, "fcm_dpo/q_t": 0.4190526604652405, "grad_norm": 12.284520149230957, "learning_rate": 3.550414669125573e-08, "logits/chosen": 0.9413592219352722, "logits/rejected": 0.9070916175842285, "logps/chosen": -256.690673828125, "logps/ref_chosen": -58.651512145996094, "logps/ref_rejected": -78.67181396484375, "logps/rejected": -379.1106262207031, "loss": 1.1302, "margin_dpo/margin_mean": 102.399658203125, "margin_dpo/margin_std": 151.68862915039062, "step": 560 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.0034138336777687073, "fcm_dpo/delta": -0.013129929080605507, "fcm_dpo/margin": 100.43526458740234, "fcm_dpo/q_t": 0.4204806387424469, "grad_norm": 12.020137786865234, "learning_rate": 3.218356679178252e-08, "logits/chosen": 0.9278671145439148, "logits/rejected": 0.9034161567687988, "logps/chosen": -253.25778198242188, "logps/ref_chosen": -60.3114128112793, "logps/ref_rejected": -78.25270080566406, "logps/rejected": -371.63433837890625, "loss": 1.1388, "margin_dpo/margin_mean": 100.43526458740234, "margin_dpo/margin_std": 153.38381958007812, "step": 565 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.0033486653119325638, "fcm_dpo/delta": -0.02772103250026703, "fcm_dpo/margin": 99.43392944335938, "fcm_dpo/q_t": 0.42399096488952637, "grad_norm": 12.34563159942627, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 0.9331613779067993, "logits/rejected": 0.8943105936050415, "logps/chosen": -249.2666473388672, "logps/ref_chosen": -57.752410888671875, "logps/ref_rejected": -76.99858093261719, "logps/rejected": -367.94671630859375, "loss": 1.1578, "margin_dpo/margin_mean": 99.43392944335938, "margin_dpo/margin_std": 166.1993865966797, "step": 570 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.0033004791475832462, "fcm_dpo/delta": -0.011403532698750496, "fcm_dpo/margin": 86.78084564208984, "fcm_dpo/q_t": 0.4353984296321869, "grad_norm": 11.008131980895996, "learning_rate": 2.600155642716606e-08, "logits/chosen": 0.9364798665046692, "logits/rejected": 0.9186896085739136, "logps/chosen": -265.30657958984375, "logps/ref_chosen": -63.61958694458008, "logps/ref_rejected": -79.51353454589844, "logps/rejected": -367.98138427734375, "loss": 1.1951, "margin_dpo/margin_mean": 86.78085327148438, "margin_dpo/margin_std": 166.95652770996094, "step": 575 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.0032326322980225086, "fcm_dpo/delta": -0.012774638831615448, "fcm_dpo/margin": 98.78877258300781, "fcm_dpo/q_t": 0.42647188901901245, "grad_norm": 12.614327430725098, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 0.9242687225341797, "logits/rejected": 0.9064335823059082, "logps/chosen": -247.09750366210938, "logps/ref_chosen": -57.3541145324707, "logps/ref_rejected": -73.14434051513672, "logps/rejected": -361.676513671875, "loss": 1.1637, "margin_dpo/margin_mean": 98.78877258300781, "margin_dpo/margin_std": 166.97918701171875, "step": 580 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.0032326322980225086, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 91.58312225341797, "fcm_dpo/q_t": 0.43279165029525757, "grad_norm": 11.919511795043945, "learning_rate": 2.044597327993153e-08, "logits/chosen": 0.9634637832641602, "logits/rejected": 0.9281566739082336, "logps/chosen": -253.19546508789062, "logps/ref_chosen": -56.0127067565918, "logps/ref_rejected": -77.16522216796875, "logps/rejected": -365.93109130859375, "loss": 1.1847, "margin_dpo/margin_mean": 91.5831298828125, "margin_dpo/margin_std": 169.7579803466797, "step": 585 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.0032326322980225086, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 87.02014923095703, "fcm_dpo/q_t": 0.43297773599624634, "grad_norm": 13.126020431518555, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 0.9554249048233032, "logits/rejected": 0.9359169006347656, "logps/chosen": -263.68682861328125, "logps/ref_chosen": -60.5894660949707, "logps/ref_rejected": -74.34771728515625, "logps/rejected": -364.4652404785156, "loss": 1.1894, "margin_dpo/margin_mean": 87.02014923095703, "margin_dpo/margin_std": 158.75521850585938, "step": 590 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.00319870188832283, "fcm_dpo/delta": -0.010551819577813148, "fcm_dpo/margin": 100.10346984863281, "fcm_dpo/q_t": 0.4260531961917877, "grad_norm": 10.075697898864746, "learning_rate": 1.553235392451377e-08, "logits/chosen": 0.9538490176200867, "logits/rejected": 0.9052039384841919, "logps/chosen": -240.1796875, "logps/ref_chosen": -54.77838897705078, "logps/ref_rejected": -78.102783203125, "logps/rejected": -363.6075744628906, "loss": 1.1566, "margin_dpo/margin_mean": 100.10346221923828, "margin_dpo/margin_std": 161.6309356689453, "step": 595 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.00319870188832283, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 65.01690673828125, "fcm_dpo/q_t": 0.45148009061813354, "grad_norm": 12.22407341003418, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 0.9378520846366882, "logits/rejected": 0.9170206189155579, "logps/chosen": -263.9869384765625, "logps/ref_chosen": -58.45500564575195, "logps/ref_rejected": -70.7367172241211, "logps/rejected": -341.2855529785156, "loss": 1.242, "margin_dpo/margin_mean": 65.01690673828125, "margin_dpo/margin_std": 143.92819213867188, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.00319870188832283, "eval_logits/chosen": 0.8913569450378418, "eval_logits/rejected": 0.8742244243621826, "eval_logps/chosen": -278.61993408203125, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -364.2364501953125, "eval_loss": 0.6074615716934204, "eval_margin_dpo/margin_mean": 80.9269790649414, "eval_margin_dpo/margin_std": 168.3527069091797, "eval_runtime": 39.0585, "eval_samples_per_second": 58.963, "eval_steps_per_second": 1.843, "step": 600 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.00319870188832283, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 91.87323760986328, "fcm_dpo/q_t": 0.4317256510257721, "grad_norm": 10.033231735229492, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 0.9322928190231323, "logits/rejected": 0.9066828489303589, "logps/chosen": -246.4706573486328, "logps/ref_chosen": -59.87483596801758, "logps/ref_rejected": -75.75318908691406, "logps/rejected": -354.2222595214844, "loss": 1.1779, "margin_dpo/margin_mean": 91.87324523925781, "margin_dpo/margin_std": 161.2457733154297, "step": 605 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.003174326615408063, "fcm_dpo/delta": -0.009571181610226631, "fcm_dpo/margin": 99.8558120727539, "fcm_dpo/q_t": 0.42549604177474976, "grad_norm": 13.667858123779297, "learning_rate": 9.395165583732379e-09, "logits/chosen": 0.9424371719360352, "logits/rejected": 0.9166472554206848, "logps/chosen": -255.5699462890625, "logps/ref_chosen": -60.35883712768555, "logps/ref_rejected": -81.3543930053711, "logps/rejected": -376.42132568359375, "loss": 1.1567, "margin_dpo/margin_mean": 99.85580444335938, "margin_dpo/margin_std": 156.640869140625, "step": 610 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.0031617539934813976, "fcm_dpo/delta": -0.010276397690176964, "fcm_dpo/margin": 101.62857818603516, "fcm_dpo/q_t": 0.4245510995388031, "grad_norm": 9.765281677246094, "learning_rate": 7.684137976598088e-09, "logits/chosen": 0.9229475855827332, "logits/rejected": 0.8982959985733032, "logps/chosen": -253.18075561523438, "logps/ref_chosen": -59.17219161987305, "logps/ref_rejected": -79.92167663574219, "logps/rejected": -375.55877685546875, "loss": 1.1485, "margin_dpo/margin_mean": 101.62857818603516, "margin_dpo/margin_std": 152.48818969726562, "step": 615 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.0031358408741652966, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 89.38777923583984, "fcm_dpo/q_t": 0.4349850118160248, "grad_norm": 10.843006134033203, "learning_rate": 6.142553278648238e-09, "logits/chosen": 0.9537204504013062, "logits/rejected": 0.9183050394058228, "logps/chosen": -253.4351043701172, "logps/ref_chosen": -58.052696228027344, "logps/ref_rejected": -78.37252807617188, "logps/rejected": -363.1427307128906, "loss": 1.1848, "margin_dpo/margin_mean": 89.38777160644531, "margin_dpo/margin_std": 156.2245635986328, "step": 620 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.0031358408741652966, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 96.6719970703125, "fcm_dpo/q_t": 0.4313376545906067, "grad_norm": 10.53563404083252, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 0.9534462094306946, "logits/rejected": 0.900356113910675, "logps/chosen": -253.8286895751953, "logps/ref_chosen": -56.957862854003906, "logps/ref_rejected": -82.68255615234375, "logps/rejected": -376.225341796875, "loss": 1.1779, "margin_dpo/margin_mean": 96.6719970703125, "margin_dpo/margin_std": 174.75888061523438, "step": 625 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.0031358408741652966, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 105.3574447631836, "fcm_dpo/q_t": 0.42352789640426636, "grad_norm": 10.356474876403809, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 0.9468274116516113, "logits/rejected": 0.9034187197685242, "logps/chosen": -247.63818359375, "logps/ref_chosen": -56.71510696411133, "logps/ref_rejected": -82.94544219970703, "logps/rejected": -379.2259521484375, "loss": 1.1485, "margin_dpo/margin_mean": 105.35743713378906, "margin_dpo/margin_std": 166.2731475830078, "step": 630 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.0030982757452875376, "fcm_dpo/delta": -0.012051684781908989, "fcm_dpo/margin": 89.59659576416016, "fcm_dpo/q_t": 0.4353605806827545, "grad_norm": 10.570072174072266, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 0.9560381174087524, "logits/rejected": 0.9319430589675903, "logps/chosen": -254.63623046875, "logps/ref_chosen": -59.33793258666992, "logps/ref_rejected": -75.01703643798828, "logps/rejected": -359.91192626953125, "loss": 1.1902, "margin_dpo/margin_mean": 89.59659576416016, "margin_dpo/margin_std": 159.2709197998047, "step": 635 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.0030982757452875376, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 97.72769927978516, "fcm_dpo/q_t": 0.42967167496681213, "grad_norm": 10.967521667480469, "learning_rate": 1.690410564514244e-09, "logits/chosen": 0.9475260972976685, "logits/rejected": 0.903438925743103, "logps/chosen": -259.0896301269531, "logps/ref_chosen": -58.1605339050293, "logps/ref_rejected": -79.85365295410156, "logps/rejected": -378.5104064941406, "loss": 1.1668, "margin_dpo/margin_mean": 97.72769165039062, "margin_dpo/margin_std": 162.35629272460938, "step": 640 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.0030982757452875376, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 77.14000701904297, "fcm_dpo/q_t": 0.44398292899131775, "grad_norm": 15.546424865722656, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 0.9349179267883301, "logits/rejected": 0.922415554523468, "logps/chosen": -264.6257629394531, "logps/ref_chosen": -63.45180130004883, "logps/ref_rejected": -74.18285369873047, "logps/rejected": -352.49676513671875, "loss": 1.2142, "margin_dpo/margin_mean": 77.14000701904297, "margin_dpo/margin_std": 147.8017120361328, "step": 645 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.0030982757452875376, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 95.89753723144531, "fcm_dpo/q_t": 0.4306652545928955, "grad_norm": 9.394474029541016, "learning_rate": 5.033308820289184e-10, "logits/chosen": 0.9502288103103638, "logits/rejected": 0.9044377207756042, "logps/chosen": -268.394775390625, "logps/ref_chosen": -59.75496292114258, "logps/ref_rejected": -84.31481170654297, "logps/rejected": -388.8521423339844, "loss": 1.1769, "margin_dpo/margin_mean": 95.89753723144531, "margin_dpo/margin_std": 167.29580688476562, "step": 650 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.0030882812570780516, "fcm_dpo/delta": -0.016260221600532532, "fcm_dpo/margin": 95.9172592163086, "fcm_dpo/q_t": 0.43137580156326294, "grad_norm": 10.780631065368652, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 0.9268029928207397, "logits/rejected": 0.8841239809989929, "logps/chosen": -252.7764892578125, "logps/ref_chosen": -57.817848205566406, "logps/ref_rejected": -79.81755065917969, "logps/rejected": -370.6934814453125, "loss": 1.1725, "margin_dpo/margin_mean": 95.9172592163086, "margin_dpo/margin_std": 159.18435668945312, "step": 655 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.003048304468393326, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 98.31352996826172, "fcm_dpo/q_t": 0.43072566390037537, "grad_norm": 11.600972175598145, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 0.9769344329833984, "logits/rejected": 0.9424291849136353, "logps/chosen": -260.94500732421875, "logps/ref_chosen": -59.12651443481445, "logps/ref_rejected": -79.42085266113281, "logps/rejected": -379.5528869628906, "loss": 1.1768, "margin_dpo/margin_mean": 98.31353759765625, "margin_dpo/margin_std": 174.72268676757812, "step": 660 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.1812975648311552, "train_runtime": 1809.2515, "train_samples_per_second": 23.4, "train_steps_per_second": 0.365 } ], "logging_steps": 5, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }